test: stop CPU load monitoring at the end of a test

pb8o · pb8o · commit 45842cba2804 · 2023-04-20T11:32:55.000+02:00
We see errors during teardown. It is then not clear if the CPU usage
happened during the call phase of the test (which would be legit) or the
teardown (which would be a false positive).

Restrict the CPU load monitoring part to only the part of the test we
are interested in.

Also remove the CPU load monitoring from Microvm, since this is the only
user and it fits better as a Context Manager.

Signed-off-by: Pablo Barbáchano &lt;pablob@amazon.com&gt;
diff --git a/tests/framework/microvm.py b/tests/framework/microvm.py
@@ -27,7 +27,6 @@
 from retry import retry
 
 import host_tools.cargo_build as build_tools
-import host_tools.cpu_load as cpu_tools
 import host_tools.logging as log_tools
 import host_tools.memory as mem_tools
 import host_tools.network as net_tools
@@ -167,9 +166,6 @@ def __init__(
         if monitor_memory:
             self.memory_monitor = mem_tools.MemoryMonitor()
 
-        # Cpu load monitoring has to be explicitly enabled using
-        # the `enable_cpu_load_monitor` method.
-        self._cpu_load_monitor = None
         self.vcpus_count = None
 
         # External clone/exec tool, because Python can't into clone
@@ -221,11 +217,6 @@ def kill(self):
                 self.memory_monitor.join(timeout=1)
             self.memory_monitor.check_samples()
 
-        if self._cpu_load_monitor:
-            self._cpu_load_monitor.signal_stop()
-            self._cpu_load_monitor.join()
-            self._cpu_load_monitor.check_samples()
-
     @property
     def firecracker_version(self):
         """Return the version of the Firecracker executable."""
@@ -333,18 +324,6 @@ def append_to_log_data(self, data):
         with data_lock:
             self.__log_data += data
 
-    def enable_cpu_load_monitor(self, threshold):
-        """Enable the cpu load monitor."""
-        process_pid = self.jailer_clone_pid
-        # We want to monitor the emulation thread, which is currently
-        # the first one created.
-        # A possible improvement is to find it by name.
-        thread_pid = self.jailer_clone_pid
-        self._cpu_load_monitor = cpu_tools.CpuLoadMonitor(
-            process_pid, thread_pid, threshold
-        )
-        self._cpu_load_monitor.start()
-
     def copy_to_jail_ramfs(self, src):
         """Copy a file to a jail ramfs."""
         filename = os.path.basename(src)
diff --git a/tests/host_tools/cpu_load.py b/tests/host_tools/cpu_load.py
@@ -6,12 +6,6 @@
 
 from framework import utils
 
-# /proc/<pid>/stat output taken from
-# https://www.man7.org/linux/man-pages/man5/proc.5.html
-STAT_UTIME_IDX = 13
-STAT_STIME_IDX = 14
-STAT_STARTTIME_IDX = 21
-
 
 class CpuLoadExceededException(Exception):
     """A custom exception containing details on excessive cpu load."""
@@ -88,3 +82,18 @@ def check_samples(self):
         """Check that there are no samples above the threshold."""
         if len(self.cpu_load_samples) > 0:
             raise CpuLoadExceededException(self._cpu_load_samples, self._threshold)
+
+    def __enter__(self):
+        """Functions to use this CPU Load class as a Context Manager
+
+        >>> clm = CpuLoadMonitor(1000, 1000, 45)
+        >>> with clm:
+        >>>    # do stuff
+        """
+        self.start()
+
+    def __exit__(self, _type, _value, _traceback):
+        """Exit context"""
+        self.check_samples()
+        self.signal_stop()
+        self.join()
diff --git a/tests/integration_tests/functional/test_rate_limiter.py b/tests/integration_tests/functional/test_rate_limiter.py
@@ -4,6 +4,7 @@
 import time
 
 from framework import utils
+from host_tools import cpu_load
 
 # The iperf version to run this tests with
 IPERF_BINARY = "iperf3"
@@ -146,16 +147,8 @@ def test_rx_rate_limiting_cpu_load(test_microvm_with_api, network_config):
     """
     test_microvm = test_microvm_with_api
     test_microvm.spawn()
-
     test_microvm.basic_config()
 
-    # Enable monitor that checks if the cpu load is over the threshold.
-    # After multiple runs, the average value for the cpu load
-    # seems to be around 10%. Setting the threshold a little
-    # higher to skip false positives.
-    threshold = 20
-    test_microvm.enable_cpu_load_monitor(threshold)
-
     # Create interface with aggressive rate limiting enabled.
     rx_rate_limiter_no_burst = {
         "bandwidth": {"size": 65536, "refill_time": 1000}  # 64KBytes  # 1s
@@ -165,6 +158,7 @@ def test_rx_rate_limiting_cpu_load(test_microvm_with_api, network_config):
     )
 
     test_microvm.start()
+
     # Start iperf server on guest.
     _start_iperf_on_guest(test_microvm, guest_ip)
 
@@ -175,7 +169,21 @@ def test_rx_rate_limiting_cpu_load(test_microvm_with_api, network_config):
         guest_ip,
         IPERF_TRANSMIT_TIME * 5,
     )
-    _iperf_out = _run_local_iperf(iperf_cmd)
+
+    # Enable monitor that checks if the cpu load is over the threshold.
+    # After multiple runs, the average value for the cpu load
+    # seems to be around 10%. Setting the threshold a little
+    # higher to skip false positives.
+    # We want to monitor the emulation thread, which is currently
+    # the first one created.
+    # A possible improvement is to find it by name.
+    cpu_load_monitor = cpu_load.CpuLoadMonitor(
+        process_pid=test_microvm.jailer_clone_pid,
+        thread_pid=test_microvm.jailer_clone_pid,
+        threshold=20,
+    )
+    with cpu_load_monitor:
+        _run_local_iperf(iperf_cmd)
 
 
 def _check_tx_rate_limiting(test_microvm, guest_ips, host_ips):