Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cgroup rewrite: uses systemctl for expressing desired configuration instead drop-in files #3269

Merged
merged 11 commits into from
Dec 17, 2024
6 changes: 4 additions & 2 deletions azurelinuxagent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from azurelinuxagent.ga import logcollector, cgroupconfigurator
from azurelinuxagent.ga.cgroupcontroller import AGENT_LOG_COLLECTOR
from azurelinuxagent.ga.cpucontroller import _CpuController
from azurelinuxagent.ga.cgroupapi import get_cgroup_api, InvalidCgroupMountpointException
from azurelinuxagent.ga.cgroupapi import create_cgroup_api, InvalidCgroupMountpointException
from azurelinuxagent.ga.firewall_manager import FirewallManager

import azurelinuxagent.common.conf as conf
Expand Down Expand Up @@ -216,7 +216,7 @@ def collect_logs(self, is_full_mode):
tracked_controllers = []
if CollectLogsHandler.is_enabled_monitor_cgroups_check():
try:
cgroup_api = get_cgroup_api()
cgroup_api = create_cgroup_api()
except InvalidCgroupMountpointException as e:
event.warn(WALAEventOperation.LogCollection, "The agent does not support cgroups if the default systemd mountpoint is not being used: {0}", ustr(e))
sys.exit(logcollector.INVALID_CGROUPS_ERRCODE)
Expand All @@ -226,6 +226,8 @@ def collect_logs(self, is_full_mode):

log_collector_cgroup = cgroup_api.get_process_cgroup(process_id="self", cgroup_name=AGENT_LOG_COLLECTOR)
tracked_controllers = log_collector_cgroup.get_controllers()
for controller in tracked_controllers:
logger.info("{0} controller for cgroup: {1}".format(controller.get_controller_type(), controller))

if len(tracked_controllers) != len(log_collector_cgroup.get_supported_controller_names()):
event.warn(WALAEventOperation.LogCollection, "At least one required controller is missing. The following controllers are required for the log collector to run: {0}", log_collector_cgroup.get_supported_controller_names())
Expand Down
49 changes: 48 additions & 1 deletion azurelinuxagent/common/osutil/systemd.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,13 @@ def get_version():
# systemd 245 (245.4-4ubuntu3)
# +PAM +AUDIT +SELINUX +IMA +APPARMOR +SMACK +SYSVINIT +UTMP etc
#
return shellutil.run_command(['systemctl', '--version'])
# return fist line systemd 245 (245.4-4ubuntu3)
try:
output = shellutil.run_command(['systemctl', '--version'])
version = output.split('\n')[0]
return version
except Exception:
return "unknown"


def get_unit_file_install_path():
Expand Down Expand Up @@ -84,3 +90,44 @@ def get_unit_property(unit_name, property_name):
raise ValueError("Can't find property {0} of {1}".format(property_name, unit_name))
return match.group('value')


def set_unit_run_time_property(unit_name, property_name, value):
"""
Set a property of a unit at runtime

Note: --runtime settings only apply until the next reboot
"""
try:
# Ex: systemctl set-property foobar.service CPUWeight=200 --runtime
shellutil.run_command(["systemctl", "set-property", unit_name, "{0}={1}".format(property_name, value), "--runtime"])
except shellutil.CommandError as e:
raise ValueError("Can't set property {0} of {1}: {2}".format(property_name, unit_name, e))


def set_unit_run_time_properties(unit_name, property_names, values):
"""
Set multiple properties of a unit at runtime

Note: --runtime settings only apply until the next reboot
"""
if len(property_names) != len(values):
raise ValueError("The number of property names:{0} and values:{1} must be the same".format(property_names, values))

properties = ["{0}={1}".format(name, value) for name, value in zip(property_names, values)]

try:
# Ex: systemctl set-property foobar.service CPUWeight=200 MemoryMax=2G IPAccounting=yes --runtime
shellutil.run_command(["systemctl", "set-property", unit_name] + properties + ["--runtime"])
except shellutil.CommandError as e:
raise ValueError("Can't set properties {0} of {1}: {2}".format(properties, unit_name, e))


def is_unit_loaded(unit_name):
"""
Determine if a unit is loaded
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does it mean for a unit to be loaded

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unit being loaded means unit is parsed and available to the systemd process. We check this for extension services when we set quotas. If systemd is unaware of extension services and not loaded in the system yet, we get error while setting quotas. Hence, I added loaded check.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If systemd is unaware of extension services and not loaded in the system yet, we get error while setting quotas. Hence, I added loaded check.

can you add this as a comment where you call this function?

"""
try:
value = get_unit_property(unit_name, "LoadState")
return value.lower() == "loaded"
except shellutil.CommandError:
return False
10 changes: 2 additions & 8 deletions azurelinuxagent/ga/cgroupapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class CGroupUtil(object):
Cgroup utility methods which are independent of systemd cgroup api.
"""
@staticmethod
def cgroups_supported():
def distro_supported():
distro_info = get_distro()
distro_name = distro_info[0]
try:
Expand Down Expand Up @@ -149,7 +149,7 @@ def __init__(self, msg=None):
super(InvalidCgroupMountpointException, self).__init__(msg)


def get_cgroup_api():
def create_cgroup_api():
"""
Determines which version of Cgroup should be used for resource enforcement and monitoring by the Agent and returns
the corresponding Api.
Expand All @@ -172,7 +172,6 @@ def get_cgroup_api():
root_hierarchy_mode = shellutil.run_command(["stat", "-f", "--format=%T", CGROUP_FILE_SYSTEM_ROOT]).rstrip()

if root_hierarchy_mode == "cgroup2fs":
log_cgroup_info("Using cgroup v2 for resource enforcement and monitoring")
return SystemdCgroupApiv2()

elif root_hierarchy_mode == "tmpfs":
Expand All @@ -192,7 +191,6 @@ def get_cgroup_api():
# mounted in a location other than the systemd default, raise Exception.
if not cgroup_api_v1.are_mountpoints_systemd_created():
raise InvalidCgroupMountpointException("Expected cgroup controllers to be mounted at '{0}', but at least one is not. v1 mount points: \n{1}".format(CGROUP_FILE_SYSTEM_ROOT, json.dumps(cgroup_api_v1.get_controller_mountpoints())))
log_cgroup_info("Using cgroup v1 for resource enforcement and monitoring")
return cgroup_api_v1

raise CGroupsException("{0} has an unexpected file type: {1}".format(CGROUP_FILE_SYSTEM_ROOT, root_hierarchy_mode))
Expand Down Expand Up @@ -650,8 +648,6 @@ def get_controllers(self, expected_relative_path=None):
controller = MemoryControllerV1(self._cgroup_name, controller_path)

if controller is not None:
msg = "{0} controller for cgroup: {1}".format(supported_controller_name, controller)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Logging here is a side-effect of calling get_controllers which being called everywhere. As a result, we are simply logging at random places where it's not needed. So, removing it here

log_cgroup_info(msg)
controllers.append(controller)

return controllers
Expand Down Expand Up @@ -729,8 +725,6 @@ def get_controllers(self, expected_relative_path=None):
controller = MemoryControllerV2(self._cgroup_name, self._cgroup_path)

if controller is not None:
msg = "{0} controller for cgroup: {1}".format(supported_controller_name, controller)
log_cgroup_info(msg)
controllers.append(controller)

return controllers
Expand Down
Loading
Loading