Skip to content

Commit

Permalink
Update lxc_utils.py
Browse files Browse the repository at this point in the history
should fix [this issue](#14)
  • Loading branch information
fabriziosalmi authored Nov 30, 2024
1 parent 5e90000 commit 9e06b3c
Showing 1 changed file with 100 additions and 112 deletions.
212 changes: 100 additions & 112 deletions lxc_autoscale/lxc_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,9 @@ def get_total_memory():
return available_memory


import time
import logging

def get_cpu_usage(ctid):
"""
Retrieve the CPU usage of a container using multiple methods with fallbacks.
Expand All @@ -212,136 +215,120 @@ def get_cpu_usage(ctid):
Returns:
float: The CPU usage percentage, or 0.0 if all methods fail.
"""
def loadavg_method(ctid):
def run_command(command):
"""
Retrieve CPU usage based on the system's load average.
Helper to execute a shell command and return its output.
Args:
ctid (str): The container ID.
command (str): The command to run.
Returns:
float: The CPU usage percentage.
str: The output of the command.
"""
# Get load average using /proc/loadavg
cmd_loadavg = f"pct exec {ctid} -- cat /proc/loadavg"
loadavg_output = run_command(cmd_loadavg)
loadavg = float(loadavg_output.split()[0]) # 1-minute load average

# Get number of CPUs
cmd_nproc = f"pct exec {ctid} -- nproc"
nproc_output = run_command(cmd_nproc)
num_cpus = int(nproc_output)

if num_cpus == 0:
raise ValueError("Number of CPUs is zero.")

# Calculate CPU usage percentage
cpu_usage = (loadavg / num_cpus) * 100
cpu_usage = min(cpu_usage, 100.0) # Cap at 100%
return round(cpu_usage, 2)
import subprocess
try:
result = subprocess.run(
command, shell=True, capture_output=True, text=True, check=True
)
return result.stdout.strip()
except subprocess.CalledProcessError as e:
logging.warning(f"Command failed: {command}, Error: {e}")
return ""

def loadavg_method(ctid):
"""Calculate CPU usage based on system's load average."""
try:
cmd_loadavg = f"pct exec {ctid} -- cat /proc/loadavg"
loadavg_output = run_command(cmd_loadavg)
loadavg = float(loadavg_output.split()[0])

cmd_nproc = f"pct exec {ctid} -- nproc"
nproc_output = run_command(cmd_nproc)
num_cpus = int(nproc_output)

if num_cpus == 0:
raise ValueError("Number of CPUs is zero.")

cpu_usage = min((loadavg / num_cpus) * 100, 100.0)
return round(cpu_usage, 2)
except Exception as e:
raise RuntimeError(f"Loadavg method failed: {e}")

def load_method(ctid):
"""
Retrieve CPU usage by reading /proc/stat.
Args:
ctid (str): The container ID.
Returns:
float: The CPU usage percentage.
"""
cmd = f"pct exec {ctid} -- cat /proc/stat | grep '^cpu '"
result = run_command(cmd)
initial_cpu_times = list(map(float, result.split()[1:]))
initial_total_time = sum(initial_cpu_times)
initial_idle_time = initial_cpu_times[3] # idle time is the 4th field
"""Calculate CPU usage using /proc/stat."""
try:
cmd = f"pct exec {ctid} -- cat /proc/stat | grep '^cpu '"
result = run_command(cmd)
initial_cpu_times = list(map(float, result.split()[1:]))
initial_total_time = sum(initial_cpu_times)
initial_idle_time = initial_cpu_times[3]

time.sleep(1)
time.sleep(1)

result = run_command(cmd)
new_cpu_times = list(map(float, result.split()[1:]))
new_total_time = sum(new_cpu_times)
new_idle_time = new_cpu_times[3]
result = run_command(cmd)
new_cpu_times = list(map(float, result.split()[1:]))
new_total_time = sum(new_cpu_times)
new_idle_time = new_cpu_times[3]

total_diff = new_total_time - initial_total_time
idle_diff = new_idle_time - initial_idle_time
total_diff = new_total_time - initial_total_time
idle_diff = new_idle_time - initial_idle_time

if total_diff == 0:
raise ValueError("Total CPU time did not change.")
if total_diff == 0:
raise ValueError("Total CPU time did not change.")

cpu_usage = 100.0 * (total_diff - idle_diff) / total_diff
return round(max(min(cpu_usage, 100.0), 0.0), 2)
cpu_usage = 100.0 * (total_diff - idle_diff) / total_diff
return round(max(min(cpu_usage, 100.0), 0.0), 2)
except Exception as e:
raise RuntimeError(f"Load method failed: {e}")

def cgroup_method(ctid):
"""
Retrieve CPU usage from cgroup statistics.
Args:
ctid (str): The container ID.
Returns:
float: The CPU usage percentage.
"""
cmd = f"pct exec {ctid} -- cat /sys/fs/cgroup/cpu/cpuacct.usage"
initial_usage = float(run_command(cmd))

time.sleep(1)

usage_after = float(run_command(cmd))
usage_diff = usage_after - initial_usage

# Convert nanoseconds to seconds
cpu_usage_seconds = usage_diff / 1e9 # Assuming 1 second interval
cpu_usage = cpu_usage_seconds * 100 # Convert to percentage

return round(cpu_usage, 2)
"""Retrieve CPU usage from cgroup stats."""
try:
cmd = f"pct exec {ctid} -- cat /sys/fs/cgroup/cpu/cpuacct.usage"
initial_usage = float(run_command(cmd))
time.sleep(1)
usage_after = float(run_command(cmd))

usage_diff = usage_after - initial_usage
cpu_usage_seconds = usage_diff / 1e9
cpu_usage = min(cpu_usage_seconds * 100, 100.0)
return round(cpu_usage, 2)
except Exception as e:
raise RuntimeError(f"CGroup method failed: {e}")

def top_method(ctid):
"""
Retrieve CPU usage using the top command.
Args:
ctid (str): The container ID.
Returns:
float: The CPU usage percentage.
"""
cmd = f"pct exec {ctid} -- top -bn1 | grep 'Cpu(s)'"
result = run_command(cmd)
# Example output: Cpu(s): 1.3%us, 0.7%sy, 0.0%ni, 97.5%id, 0.5%wa, 0.0%hi, 0.0%si, 0.0%st
parts = result.split(',')
idle_part = next((p for p in parts if 'id' in p), None)
if idle_part:
idle = float(idle_part.strip().split('%')[0])
cpu_usage = 100.0 - idle
return round(cpu_usage, 2)
raise ValueError("Idle CPU information not found.")
"""Retrieve CPU usage using the top command."""
try:
cmd = f"pct exec {ctid} -- top -bn1 | grep 'Cpu(s)'"
result = run_command(cmd)
parts = result.split(',')
idle_part = next((p for p in parts if 'id' in p), None)
if idle_part:
idle = float(idle_part.strip().split('%')[0])
cpu_usage = 100.0 - idle
return round(cpu_usage, 2)
raise ValueError("Idle CPU information not found.")
except Exception as e:
raise RuntimeError(f"Top method failed: {e}")

def ps_method(ctid):
"""
Retrieve CPU usage by aggregating the CPU usage of all processes.
Args:
ctid (str): The container ID.
Returns:
float: The CPU usage percentage.
"""
cmd = f"pct exec {ctid} -- ps -eo %cpu --no-headers"
result = run_command(cmd)
if not result:
return 0.0
cpu_usages = list(map(float, result.split()))
cpu_usage = sum(cpu_usages)
return round(min(cpu_usage, 100.0), 2)

# List of methods in order of priority
"""Retrieve CPU usage by aggregating CPU usage of processes."""
try:
cmd = f"pct exec {ctid} -- ps -eo %cpu --no-headers"
result = run_command(cmd)
if not result:
return 0.0
cpu_usages = list(map(float, result.split()))
cpu_usage = min(sum(cpu_usages), 100.0)
return round(cpu_usage, 2)
except Exception as e:
raise RuntimeError(f"PS method failed: {e}")

# Methods in priority order
methods = [
('Load Average Method', loadavg_method),
('Load Method', load_method),
('CGroup Method', cgroup_method),
('Top Command Method', top_method),
('PS Command Method', ps_method),
("Load Average Method", loadavg_method),
("Load Method", load_method),
("CGroup Method", cgroup_method),
("Top Command Method", top_method),
("PS Command Method", ps_method),
]

for method_name, method in methods:
Expand All @@ -358,6 +345,7 @@ def ps_method(ctid):




def get_memory_usage(ctid):
"""
Retrieve the memory usage of a container.
Expand Down

0 comments on commit 9e06b3c

Please sign in to comment.