Update lxc_utils.py

should fix [this issue](#14)
fabriziosalmi · Nov 30, 2024 · 9e06b3c · 9e06b3c
1 parent 5e90000
commit 9e06b3c
Showing 1 changed file with 100 additions and 112 deletions.
diff --git a/lxc_autoscale/lxc_utils.py b/lxc_autoscale/lxc_utils.py
@@ -202,6 +202,9 @@ def get_total_memory():
     return available_memory
 
 
+import time
+import logging
+
 def get_cpu_usage(ctid):
     """
     Retrieve the CPU usage of a container using multiple methods with fallbacks.
@@ -212,136 +215,120 @@ def get_cpu_usage(ctid):
     Returns:
         float: The CPU usage percentage, or 0.0 if all methods fail.
     """
-    def loadavg_method(ctid):
+    def run_command(command):
         """
-        Retrieve CPU usage based on the system's load average.
-        
+        Helper to execute a shell command and return its output.
         Args:
-            ctid (str): The container ID.
-        
+            command (str): The command to run.
         Returns:
-            float: The CPU usage percentage.
+            str: The output of the command.
         """
-        # Get load average using /proc/loadavg
-        cmd_loadavg = f"pct exec {ctid} -- cat /proc/loadavg"
-        loadavg_output = run_command(cmd_loadavg)
-        loadavg = float(loadavg_output.split()[0])  # 1-minute load average
-
-        # Get number of CPUs
-        cmd_nproc = f"pct exec {ctid} -- nproc"
-        nproc_output = run_command(cmd_nproc)
-        num_cpus = int(nproc_output)
-
-        if num_cpus == 0:
-            raise ValueError("Number of CPUs is zero.")
-
-        # Calculate CPU usage percentage
-        cpu_usage = (loadavg / num_cpus) * 100
-        cpu_usage = min(cpu_usage, 100.0)  # Cap at 100%
-        return round(cpu_usage, 2)
+        import subprocess
+        try:
+            result = subprocess.run(
+                command, shell=True, capture_output=True, text=True, check=True
+            )
+            return result.stdout.strip()
+        except subprocess.CalledProcessError as e:
+            logging.warning(f"Command failed: {command}, Error: {e}")
+            return ""
+
+    def loadavg_method(ctid):
+        """Calculate CPU usage based on system's load average."""
+        try:
+            cmd_loadavg = f"pct exec {ctid} -- cat /proc/loadavg"
+            loadavg_output = run_command(cmd_loadavg)
+            loadavg = float(loadavg_output.split()[0])
+
+            cmd_nproc = f"pct exec {ctid} -- nproc"
+            nproc_output = run_command(cmd_nproc)
+            num_cpus = int(nproc_output)
+
+            if num_cpus == 0:
+                raise ValueError("Number of CPUs is zero.")
+
+            cpu_usage = min((loadavg / num_cpus) * 100, 100.0)
+            return round(cpu_usage, 2)
+        except Exception as e:
+            raise RuntimeError(f"Loadavg method failed: {e}")
 
     def load_method(ctid):
-        """
-        Retrieve CPU usage by reading /proc/stat.
-        
-        Args:
-            ctid (str): The container ID.
-        
-        Returns:
-            float: The CPU usage percentage.
-        """
-        cmd = f"pct exec {ctid} -- cat /proc/stat | grep '^cpu '"
-        result = run_command(cmd)
-        initial_cpu_times = list(map(float, result.split()[1:]))
-        initial_total_time = sum(initial_cpu_times)
-        initial_idle_time = initial_cpu_times[3]  # idle time is the 4th field
+        """Calculate CPU usage using /proc/stat."""
+        try:
+            cmd = f"pct exec {ctid} -- cat /proc/stat | grep '^cpu '"
+            result = run_command(cmd)
+            initial_cpu_times = list(map(float, result.split()[1:]))
+            initial_total_time = sum(initial_cpu_times)
+            initial_idle_time = initial_cpu_times[3]
 
-        time.sleep(1)
+            time.sleep(1)
 
-        result = run_command(cmd)
-        new_cpu_times = list(map(float, result.split()[1:]))
-        new_total_time = sum(new_cpu_times)
-        new_idle_time = new_cpu_times[3]
+            result = run_command(cmd)
+            new_cpu_times = list(map(float, result.split()[1:]))
+            new_total_time = sum(new_cpu_times)
+            new_idle_time = new_cpu_times[3]
 
-        total_diff = new_total_time - initial_total_time
-        idle_diff = new_idle_time - initial_idle_time
+            total_diff = new_total_time - initial_total_time
+            idle_diff = new_idle_time - initial_idle_time
 
-        if total_diff == 0:
-            raise ValueError("Total CPU time did not change.")
+            if total_diff == 0:
+                raise ValueError("Total CPU time did not change.")
 
-        cpu_usage = 100.0 * (total_diff - idle_diff) / total_diff
-        return round(max(min(cpu_usage, 100.0), 0.0), 2)
+            cpu_usage = 100.0 * (total_diff - idle_diff) / total_diff
+            return round(max(min(cpu_usage, 100.0), 0.0), 2)
+        except Exception as e:
+            raise RuntimeError(f"Load method failed: {e}")
 
     def cgroup_method(ctid):
-        """
-        Retrieve CPU usage from cgroup statistics.
-        
-        Args:
-            ctid (str): The container ID.
-        
-        Returns:
-            float: The CPU usage percentage.
-        """
-        cmd = f"pct exec {ctid} -- cat /sys/fs/cgroup/cpu/cpuacct.usage"
-        initial_usage = float(run_command(cmd))
-
-        time.sleep(1)
-
-        usage_after = float(run_command(cmd))
-        usage_diff = usage_after - initial_usage
-
-        # Convert nanoseconds to seconds
-        cpu_usage_seconds = usage_diff / 1e9  # Assuming 1 second interval
-        cpu_usage = cpu_usage_seconds * 100  # Convert to percentage
-
-        return round(cpu_usage, 2)
+        """Retrieve CPU usage from cgroup stats."""
+        try:
+            cmd = f"pct exec {ctid} -- cat /sys/fs/cgroup/cpu/cpuacct.usage"
+            initial_usage = float(run_command(cmd))
+            time.sleep(1)
+            usage_after = float(run_command(cmd))
+
+            usage_diff = usage_after - initial_usage
+            cpu_usage_seconds = usage_diff / 1e9
+            cpu_usage = min(cpu_usage_seconds * 100, 100.0)
+            return round(cpu_usage, 2)
+        except Exception as e:
+            raise RuntimeError(f"CGroup method failed: {e}")
 
     def top_method(ctid):
-        """
-        Retrieve CPU usage using the top command.
-        
-        Args:
-            ctid (str): The container ID.
-        
-        Returns:
-            float: The CPU usage percentage.
-        """
-        cmd = f"pct exec {ctid} -- top -bn1 | grep 'Cpu(s)'"
-        result = run_command(cmd)
-        # Example output: Cpu(s):  1.3%us,  0.7%sy,  0.0%ni, 97.5%id,  0.5%wa,  0.0%hi,  0.0%si,  0.0%st
-        parts = result.split(',')
-        idle_part = next((p for p in parts if 'id' in p), None)
-        if idle_part:
-            idle = float(idle_part.strip().split('%')[0])
-            cpu_usage = 100.0 - idle
-            return round(cpu_usage, 2)
-        raise ValueError("Idle CPU information not found.")
+        """Retrieve CPU usage using the top command."""
+        try:
+            cmd = f"pct exec {ctid} -- top -bn1 | grep 'Cpu(s)'"
+            result = run_command(cmd)
+            parts = result.split(',')
+            idle_part = next((p for p in parts if 'id' in p), None)
+            if idle_part:
+                idle = float(idle_part.strip().split('%')[0])
+                cpu_usage = 100.0 - idle
+                return round(cpu_usage, 2)
+            raise ValueError("Idle CPU information not found.")
+        except Exception as e:
+            raise RuntimeError(f"Top method failed: {e}")
 
     def ps_method(ctid):
-        """
-        Retrieve CPU usage by aggregating the CPU usage of all processes.
-        
-        Args:
-            ctid (str): The container ID.
-        
-        Returns:
-            float: The CPU usage percentage.
-        """
-        cmd = f"pct exec {ctid} -- ps -eo %cpu --no-headers"
-        result = run_command(cmd)
-        if not result:
-            return 0.0
-        cpu_usages = list(map(float, result.split()))
-        cpu_usage = sum(cpu_usages)
-        return round(min(cpu_usage, 100.0), 2)
-
-    # List of methods in order of priority
+        """Retrieve CPU usage by aggregating CPU usage of processes."""
+        try:
+            cmd = f"pct exec {ctid} -- ps -eo %cpu --no-headers"
+            result = run_command(cmd)
+            if not result:
+                return 0.0
+            cpu_usages = list(map(float, result.split()))
+            cpu_usage = min(sum(cpu_usages), 100.0)
+            return round(cpu_usage, 2)
+        except Exception as e:
+            raise RuntimeError(f"PS method failed: {e}")
+
+    # Methods in priority order
     methods = [
-        ('Load Average Method', loadavg_method),
-        ('Load Method', load_method),
-        ('CGroup Method', cgroup_method),
-        ('Top Command Method', top_method),
-        ('PS Command Method', ps_method),
+        ("Load Average Method", loadavg_method),
+        ("Load Method", load_method),
+        ("CGroup Method", cgroup_method),
+        ("Top Command Method", top_method),
+        ("PS Command Method", ps_method),
     ]
 
     for method_name, method in methods:
@@ -358,6 +345,7 @@ def ps_method(ctid):
 
 
 
+
 def get_memory_usage(ctid):
     """
     Retrieve the memory usage of a container.