Skip to content

Commit

Permalink
metrics: show podman container cpu usage
Browse files Browse the repository at this point in the history
Cockpit only showed CPU usage status for all running systemd
services. Administrators these days also run tasks in containers and
want to know if they are eating up CPU usage. Podman containers are
conveniently using Linux cgroups so we can identify them with a simple
regex.
  • Loading branch information
jelly authored and martinpitt committed May 12, 2022
1 parent dfb9c8b commit cd14c22
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 8 deletions.
46 changes: 38 additions & 8 deletions pkg/metrics/metrics.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ const _ = cockpit.gettext;
// format Date as YYYY-MM-DD HH:mm:ss UTC which is human friendly and systemd compatible
const formatUTC_ISO = t => `${t.getUTCFullYear()}-${t.getUTCMonth() + 1}-${t.getUTCDate()} ${t.getUTCHours()}:${t.getUTCMinutes()}:${t.getUTCSeconds()} UTC`;

// podman's containers cgroup
const podmanCgroupRe = /libpod-(?<containerid>[a-z|0-9]{64})\.scope$/;

// keep track of maximum values for unbounded data, so that we can normalize it properly
// pre-init them to avoid inflating noise
let scaleSatCPU = 4;
Expand Down Expand Up @@ -377,7 +380,7 @@ class CurrentMetrics extends React.Component {
}
}

// return [ { [key, value, is_user] } ] list of the biggest n values
// return [ { [key, value, is_user, is_container] } ] list of the biggest n values
function n_biggest(names, values, n) {
const merged = [];
names.forEach((k, i) => {
Expand All @@ -387,21 +390,48 @@ class CurrentMetrics extends React.Component {
const is_user = k.match(/^user.*user@\d+\.service.+/);
const label = k.replace(/.*\//, '').replace(/\.service$/, '');
// only keep cgroup basenames, and drop redundant .service suffix
merged.push([label, v, is_user]);
merged.push([label, v, is_user, false]);
}
// filter out podman containers
const matches = k.match(podmanCgroupRe);
if (matches && v) {
// truncate to 12 chars like the podman output
const containerid = matches.groups.containerid.substr(0, 12);
const is_user = k.match(/^user.slice/);
merged.push([containerid, v, is_user, true]);
}
});
merged.sort((a, b) => b[1] - a[1]);
return merged.slice(0, n);
}

function serviceRow(name, value, is_user) {
const name_text = (
<Button variant="link" isInline component="a" key={name} onClick={ e => cockpit.jump("/system/services#/" + name + ".service" + (is_user ? "?owner=user" : "")) }>
function cgroupClickHandler(name, is_user, is_container) {
if (is_container) {
cockpit.jump("/podman");
} else {
cockpit.jump("/system/services#/" + name + ".service" + (is_user ? "?owner=user" : ""));
}
}

function cgroupRow(name, value, is_user, is_container) {
const podman_installed = cockpit.manifests && cockpit.manifests.podman;
let name_text = (
<Button variant="link" isInline component="a" key={name}
onClick={() => cgroupClickHandler(name, is_user, is_container)}
isDisabled={is_container && !podman_installed}>
<TableText wrapModifier="truncate">
{name}
{is_container ? _("pod") + " " + name : name}
</TableText>
</Button>
);
if (is_container && !podman_installed) {
name_text = (
<Tooltip content={_("cockpit-podman is not installed")} key={name + "_tooltip"}>
<div>
{name_text}
</div>
</Tooltip>);
}
const value_text = <TableText wrapModifier="nowrap">{value}</TableText>;
return {
cells: [{ title: name_text }, { title: value_text }]
Expand All @@ -410,11 +440,11 @@ class CurrentMetrics extends React.Component {

// top 5 CPU and memory consuming systemd units
newState.topServicesCPU = n_biggest(this.cgroupCPUNames, this.samples[9], 5).map(
([key, value, is_user]) => serviceRow(key, Number(value / 10 / numCpu).toFixed(1), is_user) // usec/s → percent
([key, value, is_user, is_container]) => cgroupRow(key, Number(value / 10 / numCpu).toFixed(1), is_user, is_container) // usec/s → percent
);

newState.topServicesMemory = n_biggest(this.cgroupMemoryNames, this.samples[10], 5).map(
([key, value, is_user]) => serviceRow(key, cockpit.format_bytes(value), is_user)
([key, value, is_user, is_container]) => cgroupRow(key, cockpit.format_bytes(value), is_user, is_container)
);

this.setState(newState);
Expand Down
41 changes: 41 additions & 0 deletions test/verify/check-metrics
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import parent
import packagelib
from testlib import *

from machine_core import ssh_connection
from machine_core.constants import TEST_OS_DEFAULT


Expand Down Expand Up @@ -782,6 +783,46 @@ class TestCurrentMetrics(MachineCase):
b.wait(lambda: float(b.text("#load-avg .pf-l-flex div:first-child").split()[-1].rstrip(',')) > 15)
m.execute("systemctl stop load-hog 2>/dev/null || true") # ok to fail, as the command exits by itself

# Test podman containers
# HACK: coreos does not have a busybox image
if m.image != "fedora-coreos":
container_name = "pod-cpu-hog"
m.execute(f"podman run --rm -d --name {container_name} quay.io/libpod/busybox /bin/dd if=/dev/urandom of=/dev/null")

container_sha = m.execute(f"podman inspect --format '{{{{.Id}}}}' {container_name}").strip()
shortid = container_sha[:12]

# On some test images the container takes a while to show up
with b.wait_timeout(300):
b.wait_in_text("#current-metrics-card-cpu", f"pod {shortid}")
b.wait(lambda: topServiceValue(self, "Top 5 CPU services", "%", 1) > 70)
m.execute(f"podman stop -t 0 {container_name}")

# RHEL-8 / CentOS-8's podman user containers do not show up as
# libpod-$containerid but as podman-3679.scope.
if m.image != "centos-8-stream" and not m.image.startswith("rhel-8"):
# copy images for user podman tests; podman insists on user session
m.execute("""
podman save quay.io/libpod/busybox | sudo -i -u admin podman load
""")

# Test user containers
admin_s = ssh_connection.SSHConnection(user="admin",
address=m.ssh_address,
ssh_port=m.ssh_port,
identity_file=m.identity_file)
user_container_name = "user-cpu-hog"
admin_s.execute(f"podman run --rm -d --name {user_container_name} quay.io/libpod/busybox /bin/dd if=/dev/urandom of=/dev/null")

container_sha = admin_s.execute(f"podman inspect --format '{{{{.Id}}}}' {user_container_name}").strip()
shortid = container_sha[:12]

# On some test images the container takes a while to show up
with b.wait_timeout(300):
b.wait_in_text("#current-metrics-card-cpu", f"pod {shortid}")
b.wait(lambda: topServiceValue(self, "Top 5 CPU services", "%", 1) > 70)
admin_s.execute(f"podman stop -t 0 {user_container_name}")

# this settles down slowly, don't wait for becoming really quiet
with b.wait_timeout(300):
b.wait(lambda: float(b.text("#load-avg .pf-l-flex div:first-child").split()[-1].rstrip(',')) < 10)
Expand Down

0 comments on commit cd14c22

Please sign in to comment.