From d508e2ae744b00529a5ff250f5bf2f1b1f56dc4e Mon Sep 17 00:00:00 2001 From: "Jason C. Nucciarone" Date: Tue, 19 Nov 2024 13:09:57 -0500 Subject: [PATCH 01/10] feat(slurm_ops): install `prometheus` exporter only on slurmctld machines Currently the Slurm prometheus exporter is only activated on slurmctld machines, so it doesn't make sense to install the exporter on all nodes. If other nodes need the Slurm exporter, then we can add directly to the package list for that specific service type. Signed-off-by: Jason C. Nucciarone --- lib/charms/hpc_libs/v0/slurm_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/charms/hpc_libs/v0/slurm_ops.py b/lib/charms/hpc_libs/v0/slurm_ops.py index 83cc47d..c458f3e 100644 --- a/lib/charms/hpc_libs/v0/slurm_ops.py +++ b/lib/charms/hpc_libs/v0/slurm_ops.py @@ -635,10 +635,10 @@ def _install_service(self) -> None: Raises: SlurmOpsError: Raised if `apt` fails to install the required Slurm packages. """ - packages = [self._service_name, "munge", "mungectl", "prometheus-slurm-exporter"] + packages = [self._service_name, "munge", "mungectl"] match self._service_name: case "slurmctld": - packages.extend(["libpmix-dev", "mailutils"]) + packages.extend(["libpmix-dev", "mailutils", "prometheus-slurm-exporter"]) case "slurmd": packages.extend(["libpmix-dev", "openmpi-bin"]) case "slurmrestd": From ee7199af8356aa031cdbd6f5cd24b91a1b1d3f10 Mon Sep 17 00:00:00 2001 From: "Jason C. Nucciarone" Date: Wed, 20 Nov 2024 09:50:30 -0500 Subject: [PATCH 02/10] feat(slurm_ops)!: implement SackdManager BREAKING CHANGES: No longer uses the PPA `ubuntu-hpc/slurm-wlm-23.02` as it only supports Jammy, and it does not provide a `sackd` package. Instead, `_AptManager` now pulls the relevant Slurm packages from either `ubuntu-hpc/experimental` or `universe`. . Eventually we'll need to identify how we want to enable the latest and greatest Slurm version in deb format, but we'll boil that pot later. Signed-off-by: Jason C. Nucciarone --- lib/charms/hpc_libs/v0/slurm_ops.py | 113 +++++++++++++++------------- 1 file changed, 59 insertions(+), 54 deletions(-) diff --git a/lib/charms/hpc_libs/v0/slurm_ops.py b/lib/charms/hpc_libs/v0/slurm_ops.py index c458f3e..50d1a2c 100644 --- a/lib/charms/hpc_libs/v0/slurm_ops.py +++ b/lib/charms/hpc_libs/v0/slurm_ops.py @@ -51,6 +51,7 @@ def _on_install(self, _) -> None: """ __all__ = [ + "SackdManager", "SlurmOpsError", "SlurmctldManager", "SlurmdManager", @@ -179,6 +180,7 @@ class _ServiceType(Enum): MUNGE = "munge" PROMETHEUS_EXPORTER = "prometheus-slurm-exporter" + SACKD = "sackd" SLURMD = "slurmd" SLURMCTLD = "slurmctld" SLURMDBD = "slurmdbd" @@ -425,12 +427,12 @@ class _SnapManager(_OpsManager): def install(self) -> None: """Install Slurm using the `slurm` snap.""" # TODO: https://github.com/charmed-hpc/hpc-libs/issues/35 - - # Pin Slurm snap to stable channel. + # Pin Slurm snap to stable channel. _snap("install", "slurm", "--channel", "latest/candidate", "--classic") # TODO: https://github.com/charmed-hpc/slurm-snap/issues/49 - - # Request automatic alias for the Slurm snap so we don't need to do it here. - # We will possibly need to account for a third-party Slurm snap installation - # where aliasing is not automatically performed. + # Request automatic alias for the Slurm snap so we don't need to do it here. + # We will possibly need to account for a third-party Slurm snap installation + # where aliasing is not automatically performed. _snap("alias", "slurm.mungectl", "mungectl") def version(self) -> str: @@ -514,49 +516,6 @@ def _init_ubuntu_hpc_ppa() -> None: SlurmOpsError: Raised if `apt` fails to update with Ubuntu HPC repositories enabled. """ _logger.debug("initializing apt to use ubuntu hpc debian package repositories") - slurm_wlm = apt.DebianRepository( - enabled=True, - repotype="deb", - uri="https://ppa.launchpadcontent.net/ubuntu-hpc/slurm-wlm-23.02/ubuntu", - release=distro.codename(), - groups=["main"], - ) - slurm_wlm.import_key( - textwrap.dedent( - """ - -----BEGIN PGP PUBLIC KEY BLOCK----- - Comment: Hostname: - Version: Hockeypuck 2.2 - - xsFNBGTuZb8BEACtJ1CnZe6/hv84DceHv+a54y3Pqq0gqED0xhTKnbj/E2ByJpmT - NlDNkpeITwPAAN1e3824Me76Qn31RkogTMoPJ2o2XfG253RXd67MPxYhfKTJcnM3 - CEkmeI4u2Lynh3O6RQ08nAFS2AGTeFVFH2GPNWrfOsGZW03Jas85TZ0k7LXVHiBs - W6qonbsFJhshvwC3SryG4XYT+z/+35x5fus4rPtMrrEOD65hij7EtQNaE8owuAju - Kcd0m2b+crMXNcllWFWmYMV0VjksQvYD7jwGrWeKs+EeHgU8ZuqaIP4pYHvoQjag - umqnH9Qsaq5NAXiuAIAGDIIV4RdAfQIR4opGaVgIFJdvoSwYe3oh2JlrLPBlyxyY - dayDifd3X8jxq6/oAuyH1h5K/QLs46jLSR8fUbG98SCHlRmvozTuWGk+e07ALtGe - sGv78ToHKwoM2buXaTTHMwYwu7Rx8LZ4bZPHdersN1VW/m9yn1n5hMzwbFKy2s6/ - D4Q2ZBsqlN+5aW2q0IUmO+m0GhcdaDv8U7RVto1cWWPr50HhiCi7Yvei1qZiD9jq - 57oYZVqTUNCTPxi6NeTOdEc+YqNynWNArx4PHh38LT0bqKtlZCGHNfoAJLPVYhbB - b2AHj9edYtHU9AAFSIy+HstET6P0UDxy02IeyE2yxoUBqdlXyv6FL44E+wARAQAB - zRxMYXVuY2hwYWQgUFBBIGZvciBVYnVudHUgSFBDwsGOBBMBCgA4FiEErocSHcPk - oLD4H/Aj9tDF1ca+s3sFAmTuZb8CGwMFCwkIBwIGFQoJCAsCBBYCAwECHgECF4AA - CgkQ9tDF1ca+s3sz3w//RNawsgydrutcbKf0yphDhzWS53wgfrs2KF1KgB0u/H+u - 6Kn2C6jrVM0vuY4NKpbEPCduOj21pTCepL6PoCLv++tICOLVok5wY7Zn3WQFq0js - Iy1wO5t3kA1cTD/05v/qQVBGZ2j4DsJo33iMcQS5AjHvSr0nu7XSvDDEE3cQE55D - 87vL7lgGjuTOikPh5FpCoS1gpemBfwm2Lbm4P8vGOA4/witRjGgfC1fv1idUnZLM - TbGrDlhVie8pX2kgB6yTYbJ3P3kpC1ZPpXSRWO/cQ8xoYpLBTXOOtqwZZUnxyzHh - gM+hv42vPTOnCo+apD97/VArsp59pDqEVoAtMTk72fdBqR+BB77g2hBkKESgQIEq - EiE1/TOISioMkE0AuUdaJ2ebyQXugSHHuBaqbEC47v8t5DVN5Qr9OriuzCuSDNFn - 6SBHpahN9ZNi9w0A/Yh1+lFfpkVw2t04Q2LNuupqOpW+h3/62AeUqjUIAIrmfeML - IDRE2VdquYdIXKuhNvfpJYGdyvx/wAbiAeBWg0uPSepwTfTG59VPQmj0FtalkMnN - ya2212K5q68O5eXOfCnGeMvqIXxqzpdukxSZnLkgk40uFJnJVESd/CxHquqHPUDE - fy6i2AnB3kUI27D4HY2YSlXLSRbjiSxTfVwNCzDsIh7Czefsm6ITK2+cVWs0hNQ= - =cs1s - -----END PGP PUBLIC KEY BLOCK----- - """ - ) - ) experimental = apt.DebianRepository( enabled=True, repotype="deb", @@ -601,7 +560,6 @@ def _init_ubuntu_hpc_ppa() -> None: ) ) repositories = apt.RepositoryMapping() - repositories.add(slurm_wlm) repositories.add(experimental) try: @@ -637,6 +595,8 @@ def _install_service(self) -> None: """ packages = [self._service_name, "munge", "mungectl"] match self._service_name: + case "sackd": + packages.extend(["slurm-client"]) case "slurmctld": packages.extend(["libpmix-dev", "mailutils", "prometheus-slurm-exporter"]) case "slurmd": @@ -674,6 +634,28 @@ def _create_state_save_location(self) -> None: def _apply_overrides(self) -> None: """Override defaults supplied provided by Slurm Debian packages.""" match self._service_name: + case "sackd": + _logger.debug("overriding default sackd service configuration") + config_override = Path( + "/etc/systemd/system/sackd.service.d/10-sackd-config-server.conf" + ) + config_override.mkdir(parents=True, exist_ok=True) + config_override.write_text( + textwrap.dedent( + """ + [Service] + ExecStart= + ExecStart=/usr/bin/sh -c "/usr/sbin/sackd --systemd $${SACKD_CONFIG_SERVER:+--conf-server $$SACKD_CONFIG_SERVER} $$SACKD_OPTIONS" + """ + ) + ) + + # TODO: https://github.com/charmed-hpc/hpc-libs/issues/54 - + # Make `sackd` create its service environment file so that we + # aren't required to manually create it here. + _logger.debug("creating sackd environment file") + self._env_file.touch(mode=0o644) + dotenv.set_key(self._env_file, "SACKD_OPTIONS", "") case "slurmctld": _logger.debug("overriding default slurmctld service configuration") self._set_ulimit() @@ -792,11 +774,11 @@ def _apply_overrides(self) -> None: # TODO: https://github.com/charmed-hpc/hpc-libs/issues/36 - -# Use `jwtctl` to provide backend for generating, setting, and getting -# jwt signing key used by `slurmctld` and `slurmdbd`. This way we also -# won't need to pass the keyfile path to the `__init__` constructor. -# . -# Also, enable `jwtctl` to set the user and group for the keyfile. +# Use `jwtctl` to provide backend for generating, setting, and getting +# jwt signing key used by `slurmctld` and `slurmdbd`. This way we also +# won't need to pass the keyfile path to the `__init__` constructor. +# . +# Also, enable `jwtctl` to set the user and group for the keyfile. class _JWTKeyManager: """Control the jwt signing key used by Slurm.""" @@ -828,7 +810,7 @@ def generate(self) -> None: # TODO: https://github.com/charmed-hpc/mungectl/issues/5 - -# Have `mungectl` set user and group permissions on the munge.key file. +# Have `mungectl` set user and group permissions on the munge.key file. class _MungeKeyManager: """Control the munge key via `mungectl ...` commands.""" @@ -908,6 +890,29 @@ def scontrol(*args) -> str: return _call("scontrol", *args).stdout +class SackdManager(_SlurmManagerBase): + """Manager for the Sackd service.""" + + def __init__(self, *args, **kwargs) -> None: + super().__init__(service=_ServiceType.SACKD, *args, **kwargs) + self._env_manager = self._ops_manager.env_manager_for(_ServiceType.SACKD) + + @property + def config_server(self) -> str: + """Get the config server address of this Sackd node.""" + return self._env_manager.get("SACKD_CONFIG_SERVER") + + @config_server.setter + def config_server(self, addr: str) -> None: + """Set the config server address of this Sackd node.""" + self._env_manager.set({"SACKD_CONFIG_SERVER": addr}) + + @config_server.deleter + def config_server(self) -> None: + """Unset the config server address of this Sackd node.""" + self._env_manager.unset("SACKD_CONFIG_SERVER") + + class SlurmctldManager(_SlurmManagerBase): """Manager for the Slurmctld service.""" From c0dcd42106c41e5252287166bf2d0c848c7e884f Mon Sep 17 00:00:00 2001 From: "Jason C. Nucciarone" Date: Wed, 20 Nov 2024 09:56:27 -0500 Subject: [PATCH 03/10] tests(slurm_ops): add unit tests for SackdManager Signed-off-by: Jason C. Nucciarone --- tests/unit/test_slurm_ops.py | 38 +++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/tests/unit/test_slurm_ops.py b/tests/unit/test_slurm_ops.py index 9994232..c44a10f 100644 --- a/tests/unit/test_slurm_ops.py +++ b/tests/unit/test_slurm_ops.py @@ -15,6 +15,7 @@ import charms.operator_libs_linux.v0.apt as apt import dotenv from charms.hpc_libs.v0.slurm_ops import ( + SackdManager, SlurmctldManager, SlurmdbdManager, SlurmdManager, @@ -272,6 +273,7 @@ class TestAptPackageManager(TestCase): def setUp(self) -> None: self.setUpPyfakefs() + self.sackd = SackdManager(snap=False) self.slurmctld = SlurmctldManager(snap=False) self.slurmd = SlurmdManager(snap=False) self.slurmdbd = SlurmdbdManager(snap=False) @@ -339,7 +341,17 @@ def test_set_ulimit(self, *_) -> None: @patch("charms.operator_libs_linux.v0.apt.add_package") def test_install_service(self, add_package, *_) -> None: """Test that `_install_service` installs the correct packages for each service.""" - # Install slurmctld. + self.sackd._ops_manager._install_service() + self.assertListEqual( + add_package.call_args[0][0], + [ + "sackd", + "munge", + "mungectl", + "slurm-client", + ], + ) + self.slurmctld._ops_manager._install_service() self.assertListEqual( add_package.call_args[0][0], @@ -347,9 +359,9 @@ def test_install_service(self, add_package, *_) -> None: "slurmctld", "munge", "mungectl", - "prometheus-slurm-exporter", "libpmix-dev", "mailutils", + "prometheus-slurm-exporter", ], ) @@ -360,7 +372,6 @@ def test_install_service(self, add_package, *_) -> None: "slurmd", "munge", "mungectl", - "prometheus-slurm-exporter", "libpmix-dev", "openmpi-bin", ], @@ -369,7 +380,7 @@ def test_install_service(self, add_package, *_) -> None: self.slurmdbd._ops_manager._install_service() self.assertListEqual( add_package.call_args[0][0], - ["slurmdbd", "munge", "mungectl", "prometheus-slurm-exporter"], + ["slurmdbd", "munge", "mungectl"], ) self.slurmrestd._ops_manager._install_service() @@ -379,7 +390,6 @@ def test_install_service(self, add_package, *_) -> None: "slurmrestd", "munge", "mungectl", - "prometheus-slurm-exporter", "slurm-wlm-basic-plugins", ], ) @@ -573,6 +583,24 @@ def test_scontrol(self, subcmd) -> None: ) +@patch("charms.hpc_libs.v0.slurm_ops.subprocess.run") +class TestSackdConfig(TestCase): + """Test the `sackd` service configuration manager.""" + + def setUp(self): + self.setUpPyfakefs() + self.manager = SackdManager(snap=False) + self.fs.create_file("/etc/default/sackd") + + def test_config_server(self, *_) -> None: + """Test that `SACKD_CONFIG_SERVER` is configured correctly.""" + self.manager.config_server = "localhost" + self.assertEqual(self.manager.config_server, "localhost") + self.assertEqual(dotenv.get_key("/etc/default/sackd", "SACKD_CONFIG_SERVER"), "localhost") + del self.manager.config_server + self.assertIsNone(self.manager.config_server) + + @patch("charms.hpc_libs.v0.slurm_ops.subprocess.run") class TestSlurmctldConfig(TestCase): """Test the Slurmctld service config manager.""" From d90ba3822e291f5d39cb3b7dfe313c1899a4f91b Mon Sep 17 00:00:00 2001 From: "Jason C. Nucciarone" Date: Wed, 20 Nov 2024 09:57:02 -0500 Subject: [PATCH 04/10] tests(all): bump gambol scene runners to noble Signed-off-by: Jason C. Nucciarone --- tests/integration/test_hpc_libs.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_hpc_libs.yaml b/tests/integration/test_hpc_libs.yaml index 0e280bf..031c7d9 100644 --- a/tests/integration/test_hpc_libs.yaml +++ b/tests/integration/test_hpc_libs.yaml @@ -18,7 +18,7 @@ provider: acts: test-is-container: name: "Test the is_container library" - run-on: jammy + run-on: noble input: - host-path: lib path: lib @@ -43,7 +43,7 @@ acts: is_container test-slurm-ops-snap: name: "Test the slurm_ops library (snap)" - run-on: jammy + run-on: noble input: - host-path: lib path: lib @@ -76,7 +76,7 @@ acts: slurm_ops test-slurm-ops-apt: name: "Test the slurm_ops library (apt)" - run-on: jammy + run-on: noble input: - host-path: lib path: lib From 9694276c616c9d5e0fbf8b93e188ca929f402441 Mon Sep 17 00:00:00 2001 From: "Jason C. Nucciarone" Date: Fri, 6 Dec 2024 10:51:46 -0500 Subject: [PATCH 05/10] fix(slurm_ops): bump apt charm library to 0.15 New version contains fixes for adding third-party repositories on Noble Numbat. Signed-off-by: Jason C. Nucciarone --- lib/charms/operator_libs_linux/v0/apt.py | 583 +++++++++++++++++++---- 1 file changed, 481 insertions(+), 102 deletions(-) diff --git a/lib/charms/operator_libs_linux/v0/apt.py b/lib/charms/operator_libs_linux/v0/apt.py index b8913c0..1c67d40 100644 --- a/lib/charms/operator_libs_linux/v0/apt.py +++ b/lib/charms/operator_libs_linux/v0/apt.py @@ -106,10 +106,9 @@ import os import re import subprocess -from collections.abc import Mapping from enum import Enum from subprocess import PIPE, CalledProcessError, check_output -from typing import Iterable, List, Optional, Tuple, Union +from typing import Any, Dict, Iterable, Iterator, List, Mapping, Optional, Tuple, Union from urllib.parse import urlparse logger = logging.getLogger(__name__) @@ -122,11 +121,12 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 14 +LIBPATCH = 15 VALID_SOURCE_TYPES = ("deb", "deb-src") OPTIONS_MATCHER = re.compile(r"\[.*?\]") +_GPG_KEY_DIR = "/etc/apt/trusted.gpg.d/" class Error(Exception): @@ -814,9 +814,9 @@ def remove_package( package_names: the name of a package Raises: - PackageNotFoundError if the package is not found. + TypeError: if no packages are provided """ - packages = [] + packages: List[DebianPackage] = [] package_names = [package_names] if isinstance(package_names, str) else package_names if not package_names: @@ -837,7 +837,17 @@ def remove_package( def update() -> None: """Update the apt cache via `apt-get update`.""" - subprocess.run(["apt-get", "update", "--error-on=any"], capture_output=True, check=True) + cmd = ["apt-get", "update", "--error-on=any"] + try: + subprocess.run(cmd, capture_output=True, check=True) + except CalledProcessError as e: + logger.error( + "%s:\nstdout:\n%s\nstderr:\n%s", + " ".join(cmd), + e.stdout.decode(), + e.stderr.decode(), + ) + raise def import_key(key: str) -> str: @@ -876,7 +886,7 @@ def import_key(key: str) -> str: key_bytes = key.encode("utf-8") key_name = DebianRepository._get_keyid_by_gpg_key(key_bytes) key_gpg = DebianRepository._dearmor_gpg_key(key_bytes) - gpg_key_filename = "/etc/apt/trusted.gpg.d/{}.gpg".format(key_name) + gpg_key_filename = os.path.join(_GPG_KEY_DIR, "{}.gpg".format(key_name)) DebianRepository._write_apt_gpg_keyfile( key_name=gpg_key_filename, key_material=key_gpg ) @@ -897,7 +907,7 @@ def import_key(key: str) -> str: key_asc = DebianRepository._get_key_by_keyid(key) # write the key in GPG format so that apt-key list shows it key_gpg = DebianRepository._dearmor_gpg_key(key_asc.encode("utf-8")) - gpg_key_filename = "/etc/apt/trusted.gpg.d/{}.gpg".format(key) + gpg_key_filename = os.path.join(_GPG_KEY_DIR, "{}.gpg".format(key)) DebianRepository._write_apt_gpg_keyfile(key_name=gpg_key_filename, key_material=key_gpg) return gpg_key_filename @@ -913,6 +923,9 @@ class GPGKeyError(Error): class DebianRepository: """An abstraction to represent a repository.""" + _deb822_stanza: Optional["_Deb822Stanza"] = None + """set by Deb822Stanza after creating a DebianRepository""" + def __init__( self, enabled: bool, @@ -920,9 +933,9 @@ def __init__( uri: str, release: str, groups: List[str], - filename: Optional[str] = "", - gpg_key_filename: Optional[str] = "", - options: Optional[dict] = None, + filename: str = "", + gpg_key_filename: str = "", + options: Optional[Dict[str, str]] = None, ): self._enabled = enabled self._repotype = repotype @@ -970,14 +983,15 @@ def filename(self, fname: str) -> None: Args: fname: a filename to write the repository information to. """ - if not fname.endswith(".list"): - raise InvalidSourceError("apt source filenames should end in .list!") - + if not fname.endswith((".list", ".sources")): + raise InvalidSourceError("apt source filenames should end in .list or .sources!") self._filename = fname @property def gpg_key(self): """Returns the path to the GPG key for this repository.""" + if not self._gpg_key_filename and self._deb822_stanza is not None: + self._gpg_key_filename = self._deb822_stanza.get_gpg_key_filename() return self._gpg_key_filename @property @@ -985,21 +999,19 @@ def options(self): """Returns any additional repo options which are set.""" return self._options - def make_options_string(self) -> str: - """Generate the complete options string for a a repository. + def make_options_string(self, include_signed_by: bool = True) -> str: + """Generate the complete one-line-style options string for a repository. - Combining `gpg_key`, if set, and the rest of the options to find - a complex repo string. + Combining `gpg_key`, if set (and include_signed_by is True), with any other + provided options to form the options section of a one-line-style definition. """ options = self._options if self._options else {} - if self._gpg_key_filename: - options["signed-by"] = self._gpg_key_filename - - return ( - "[{}] ".format(" ".join(["{}={}".format(k, v) for k, v in options.items()])) - if options - else "" - ) + if include_signed_by and self.gpg_key: + options["signed-by"] = self.gpg_key + if not options: + return "" + pairs = ("{}={}".format(k, v) for k, v in sorted(options.items())) + return "[{}] ".format(" ".join(pairs)) @staticmethod def prefix_from_uri(uri: str) -> str: @@ -1012,47 +1024,46 @@ def prefix_from_uri(uri: str) -> str: @staticmethod def from_repo_line(repo_line: str, write_file: Optional[bool] = True) -> "DebianRepository": - """Instantiate a new `DebianRepository` a `sources.list` entry line. + """Instantiate a new `DebianRepository` from a `sources.list` entry line. Args: repo_line: a string representing a repository entry - write_file: boolean to enable writing the new repo to disk + write_file: boolean to enable writing the new repo to disk. True by default. + Expect it to result in an add-apt-repository call under the hood, like: + add-apt-repository --no-update --sourceslist="$repo_line" """ - repo = RepositoryMapping._parse(repo_line, "UserInput") - fname = "{}-{}.list".format( - DebianRepository.prefix_from_uri(repo.uri), repo.release.replace("/", "-") + repo = RepositoryMapping._parse( # pyright: ignore[reportPrivateUsage] + repo_line, filename="UserInput" # temp filename ) - repo.filename = fname - - options = repo.options if repo.options else {} - if repo.gpg_key: - options["signed-by"] = repo.gpg_key - - # For Python 3.5 it's required to use sorted in the options dict in order to not have - # different results in the order of the options between executions. - options_str = ( - "[{}] ".format(" ".join(["{}={}".format(k, v) for k, v in sorted(options.items())])) - if options - else "" - ) - + repo.filename = repo._make_filename() if write_file: - with open(fname, "wb") as f: - f.write( - ( - "{}".format("#" if not repo.enabled else "") - + "{} {}{} ".format(repo.repotype, options_str, repo.uri) - + "{} {}\n".format(repo.release, " ".join(repo.groups)) - ).encode("utf-8") - ) - + _add_repository(repo) return repo + def _make_filename(self) -> str: + """Construct a filename from uri and release. + + For internal use when a filename isn't set. + Should match the filename written to by add-apt-repository. + """ + return "{}-{}.list".format( + DebianRepository.prefix_from_uri(self.uri), + self.release.replace("/", "-"), + ) + def disable(self) -> None: - """Remove this repository from consideration. + """Remove this repository by disabling it in the source file. + + WARNING: This method does NOT alter the `self.enabled` flag. - Disable it instead of removing from the repository file. + WARNING: disable is currently not implemented for repositories defined + by a deb822 stanza. Raises a NotImplementedError in this case. """ + if self._deb822_stanza is not None: + raise NotImplementedError( + "Disabling a repository defined by a deb822 format source is not implemented." + " Please raise an issue if you require this feature." + ) searcher = "{} {}{} {}".format( self.repotype, self.make_options_string(), self.uri, self.release ) @@ -1181,7 +1192,27 @@ def _write_apt_gpg_keyfile(key_name: str, key_material: bytes) -> None: keyf.write(key_material) -class RepositoryMapping(Mapping): +def _repo_to_identifier(repo: DebianRepository) -> str: + """Return str identifier derived from repotype, uri, and release. + + Private method used to produce the identifiers used by RepositoryMapping. + """ + return "{}-{}-{}".format(repo.repotype, repo.uri, repo.release) + + +def _repo_to_line(repo: DebianRepository, include_signed_by: bool = True) -> str: + """Return the one-per-line format repository definition.""" + return "{prefix}{repotype} {options}{uri} {release} {groups}".format( + prefix="" if repo.enabled else "#", + repotype=repo.repotype, + options=repo.make_options_string(include_signed_by=include_signed_by), + uri=repo.uri, + release=repo.release, + groups=" ".join(repo.groups), + ) + + +class RepositoryMapping(Mapping[str, DebianRepository]): """An representation of known repositories. Instantiation of `RepositoryMapping` will iterate through the @@ -1197,29 +1228,53 @@ class RepositoryMapping(Mapping): )) """ + _apt_dir = "/etc/apt" + _sources_subdir = "sources.list.d" + _default_list_name = "sources.list" + _default_sources_name = "ubuntu.sources" + _last_errors: Tuple[Error, ...] = () + def __init__(self): - self._repository_map = {} - # Repositories that we're adding -- used to implement mode param - self.default_file = "/etc/apt/sources.list" + self._repository_map: Dict[str, DebianRepository] = {} + self.default_file = os.path.join(self._apt_dir, self._default_list_name) + # ^ public attribute for backwards compatibility only + sources_dir = os.path.join(self._apt_dir, self._sources_subdir) + default_sources = os.path.join(sources_dir, self._default_sources_name) # read sources.list if it exists + # ignore InvalidSourceError if ubuntu.sources also exists + # -- in this case, sources.list just contains a comment if os.path.isfile(self.default_file): - self.load(self.default_file) + try: + self.load(self.default_file) + except InvalidSourceError: + if not os.path.isfile(default_sources): + raise # read sources.list.d - for file in glob.iglob("/etc/apt/sources.list.d/*.list"): + for file in glob.iglob(os.path.join(sources_dir, "*.list")): self.load(file) + for file in glob.iglob(os.path.join(sources_dir, "*.sources")): + self.load_deb822(file) - def __contains__(self, key: str) -> bool: - """Magic method for checking presence of repo in mapping.""" + def __contains__(self, key: Any) -> bool: + """Magic method for checking presence of repo in mapping. + + Checks against the string names used to identify repositories. + """ return key in self._repository_map def __len__(self) -> int: """Return number of repositories in map.""" return len(self._repository_map) - def __iter__(self) -> Iterable[DebianRepository]: - """Return iterator for RepositoryMapping.""" + def __iter__(self) -> Iterator[DebianRepository]: + """Return iterator for RepositoryMapping. + + Iterates over the DebianRepository values rather than the string names. + FIXME: this breaks the expectations of the Mapping abstract base class + for example when it provides methods like keys and items + """ return iter(self._repository_map.values()) def __getitem__(self, repository_uri: str) -> DebianRepository: @@ -1230,16 +1285,69 @@ def __setitem__(self, repository_uri: str, repository: DebianRepository) -> None """Add a `DebianRepository` to the cache.""" self._repository_map[repository_uri] = repository + def load_deb822(self, filename: str) -> None: + """Load a deb822 format repository source file into the cache. + + In contrast to one-line-style, the deb822 format specifies a repository + using a multi-line stanza. Stanzas are separated by whitespace, + and each definition consists of lines that are either key: value pairs, + or continuations of the previous value. + + Read more about the deb822 format here: + https://manpages.ubuntu.com/manpages/noble/en/man5/sources.list.5.html + For instance, ubuntu 24.04 (noble) lists its sources using deb822 style in: + /etc/apt/sources.list.d/ubuntu.sources + """ + with open(filename, "r") as f: + repos, errors = self._parse_deb822_lines(f, filename=filename) + for repo in repos: + self._repository_map[_repo_to_identifier(repo)] = repo + if errors: + self._last_errors = tuple(errors) + logger.debug( + "the following %d error(s) were encountered when reading deb822 sources:\n%s", + len(errors), + "\n".join(str(e) for e in errors), + ) + if repos: + logger.info("parsed %d apt package repositories from %s", len(repos), filename) + else: + raise InvalidSourceError("all repository lines in '{}' were invalid!".format(filename)) + + @classmethod + def _parse_deb822_lines( + cls, + lines: Iterable[str], + filename: str = "", + ) -> Tuple[List[DebianRepository], List[InvalidSourceError]]: + """Parse lines from a deb822 file into a list of repos and a list of errors. + + The semantics of `_parse_deb822_lines` slightly different to `_parse`: + `_parse` reads a commented out line as an entry that is not enabled + `_parse_deb822_lines` strips out comments entirely when parsing a file into stanzas, + instead only reading the 'Enabled' key to determine if an entry is enabled + """ + repos: List[DebianRepository] = [] + errors: List[InvalidSourceError] = [] + for numbered_lines in _iter_deb822_stanzas(lines): + try: + stanza = _Deb822Stanza(numbered_lines=numbered_lines, filename=filename) + except InvalidSourceError as e: + errors.append(e) + else: + repos.extend(stanza.repos) + return repos, errors + def load(self, filename: str): - """Load a repository source file into the cache. + """Load a one-line-style format repository source file into the cache. Args: filename: the path to the repository file """ - parsed = [] - skipped = [] + parsed: List[int] = [] + skipped: List[int] = [] with open(filename, "r") as f: - for n, line in enumerate(f): + for n, line in enumerate(f, start=1): # 1 indexed line numbers try: repo = self._parse(line, filename) except InvalidSourceError: @@ -1255,7 +1363,7 @@ def load(self, filename: str): logger.debug("skipped the following lines in file '%s': %s", filename, skip_list) if parsed: - logger.info("parsed %d apt package repositories", len(parsed)) + logger.info("parsed %d apt package repositories from %s", len(parsed), filename) else: raise InvalidSourceError("all repository lines in '{}' were invalid!".format(filename)) @@ -1314,48 +1422,319 @@ def _parse(line: str, filename: str) -> DebianRepository: else: raise InvalidSourceError("An invalid sources line was found in %s!", filename) - def add(self, repo: DebianRepository, default_filename: Optional[bool] = False) -> None: - """Add a new repository to the system. + def add( # noqa: D417 # undocumented-param: default_filename intentionally undocumented + self, repo: DebianRepository, default_filename: Optional[bool] = False + ) -> None: + """Add a new repository to the system using add-apt-repository. Args: - repo: a `DebianRepository` object - default_filename: an (Optional) filename if the default is not desirable - """ - new_filename = "{}-{}.list".format( - DebianRepository.prefix_from_uri(repo.uri), repo.release.replace("/", "-") - ) + repo: a DebianRepository object + if repo.enabled is falsey, will return without adding the repository + Raises: + CalledProcessError: if there's an error running apt-add-repository + + WARNING: Does not associate the repository with a signing key. + Use `import_key` to add a signing key globally. - fname = repo.filename or new_filename + WARNING: if repo.enabled is falsey, will return without adding the repository - options = repo.options if repo.options else {} - if repo.gpg_key: - options["signed-by"] = repo.gpg_key + WARNING: Don't forget to call `apt.update` before installing any packages! + Or call `apt.add_package` with `update_cache=True`. - with open(fname, "wb") as f: - f.write( + WARNING: the default_filename keyword argument is provided for backwards compatibility + only. It is not used, and was not used in the previous revision of this library. + """ + if not repo.enabled: + logger.warning( ( - "{}".format("#" if not repo.enabled else "") - + "{} {}{} ".format(repo.repotype, repo.make_options_string(), repo.uri) - + "{} {}\n".format(repo.release, " ".join(repo.groups)) - ).encode("utf-8") + "Returning from RepositoryMapping.add(repo=%s) without adding the repo" + " because repo.enabled is %s" + ), + repo, + repo.enabled, ) - - self._repository_map["{}-{}-{}".format(repo.repotype, repo.uri, repo.release)] = repo + return + _add_repository(repo) + self._repository_map[_repo_to_identifier(repo)] = repo def disable(self, repo: DebianRepository) -> None: - """Remove a repository. Disable by default. + """Remove a repository by disabling it in the source file. - Args: - repo: a `DebianRepository` to disable + WARNING: disable is currently not implemented for repositories defined + by a deb822 stanza, and will raise a NotImplementedError if called on one. + + WARNING: This method does NOT alter the `.enabled` flag on the DebianRepository. """ - searcher = "{} {}{} {}".format( - repo.repotype, repo.make_options_string(), repo.uri, repo.release + repo.disable() + self._repository_map[_repo_to_identifier(repo)] = repo + # ^ adding to map on disable seems like a bug, but this is the previous behaviour + + +def _add_repository( + repo: DebianRepository, + remove: bool = False, + update_cache: bool = False, +) -> None: + line = _repo_to_line(repo, include_signed_by=False) + key_file = repo.gpg_key + if key_file and not remove and not os.path.exists(key_file): + msg = ( + "Adding repository '{line}' with add-apt-repository." + " Key file '{key_file}' does not exist." + " Ensure it is imported correctly to use this repository." + ).format(line=line, key_file=key_file) + logger.warning(msg) + cmd = [ + "add-apt-repository", + "--yes", + "--sourceslist=" + line, + ] + if remove: + cmd.append("--remove") + if not update_cache: + cmd.append("--no-update") + logger.info("%s", cmd) + try: + subprocess.run(cmd, check=True, capture_output=True) + except CalledProcessError as e: + logger.error( + "subprocess.run(%s):\nstdout:\n%s\nstderr:\n%s", + cmd, + e.stdout.decode(), + e.stderr.decode(), ) + raise + + +class _Deb822Stanza: + """Representation of a stanza from a deb822 source file. + + May define multiple DebianRepository objects. + """ + + def __init__(self, numbered_lines: List[Tuple[int, str]], filename: str = ""): + self._filename = filename + self._numbered_lines = numbered_lines + if not numbered_lines: + self._repos = () + self._gpg_key_filename = "" + self._gpg_key_from_stanza = None + return + options, line_numbers = _deb822_stanza_to_options(numbered_lines) + repos, gpg_key_info = _deb822_options_to_repos( + options, line_numbers=line_numbers, filename=filename + ) + for repo in repos: + repo._deb822_stanza = self # pyright: ignore[reportPrivateUsage] + self._repos = repos + self._gpg_key_filename, self._gpg_key_from_stanza = gpg_key_info + + @property + def repos(self) -> Tuple[DebianRepository, ...]: + """The repositories defined by this deb822 stanza.""" + return self._repos + + def get_gpg_key_filename(self) -> str: + """Return the path to the GPG key for this stanza. + + Import the key first, if the key itself was provided in the stanza. + Return an empty string if no filename or key was provided. + """ + if self._gpg_key_filename: + return self._gpg_key_filename + if self._gpg_key_from_stanza is None: + return "" + # a gpg key was provided in the stanza + # and we haven't already imported it + self._gpg_key_filename = import_key(self._gpg_key_from_stanza) + return self._gpg_key_filename + + +class MissingRequiredKeyError(InvalidSourceError): + """Missing a required value in a source file.""" + + def __init__(self, message: str = "", *, file: str, line: Optional[int], key: str) -> None: + super().__init__(message, file, line, key) + self.file = file + self.line = line + self.key = key + + +class BadValueError(InvalidSourceError): + """Bad value for an entry in a source file.""" + + def __init__( + self, + message: str = "", + *, + file: str, + line: Optional[int], + key: str, + value: str, + ) -> None: + super().__init__(message, file, line, key, value) + self.file = file + self.line = line + self.key = key + self.value = value - for line in fileinput.input(repo.filename, inplace=True): - if re.match(r"^{}\s".format(re.escape(searcher)), line): - print("# {}".format(line), end="") - else: - print(line, end="") - self._repository_map["{}-{}-{}".format(repo.repotype, repo.uri, repo.release)] = repo +def _iter_deb822_stanzas(lines: Iterable[str]) -> Iterator[List[Tuple[int, str]]]: + """Given lines from a deb822 format file, yield a stanza of lines. + + Args: + lines: an iterable of lines from a deb822 sources file + + Yields: + lists of numbered lines (a tuple of line number and line) that make up + a deb822 stanza, with comments stripped out (but accounted for in line numbering) + """ + current_stanza: List[Tuple[int, str]] = [] + for n, line in enumerate(lines, start=1): # 1 indexed line numbers + if not line.strip(): # blank lines separate stanzas + if current_stanza: + yield current_stanza + current_stanza = [] + continue + content, _delim, _comment = line.partition("#") + if content.strip(): # skip (potentially indented) comment line + current_stanza.append((n, content.rstrip())) # preserve indent + if current_stanza: + yield current_stanza + + +def _deb822_stanza_to_options( + lines: Iterable[Tuple[int, str]], +) -> Tuple[Dict[str, str], Dict[str, int]]: + """Turn numbered lines into a dict of options and a dict of line numbers. + + Args: + lines: an iterable of numbered lines (a tuple of line number and line) + + Returns: + a dictionary of option names to (potentially multiline) values, and + a dictionary of option names to starting line number + """ + parts: Dict[str, List[str]] = {} + line_numbers: Dict[str, int] = {} + current = None + for n, line in lines: + assert "#" not in line # comments should be stripped out + if line.startswith(" "): # continuation of previous key's value + assert current is not None + parts[current].append(line.rstrip()) # preserve indent + continue + raw_key, _, raw_value = line.partition(":") + current = raw_key.strip() + parts[current] = [raw_value.strip()] + line_numbers[current] = n + options = {k: "\n".join(v) for k, v in parts.items()} + return options, line_numbers + + +def _deb822_options_to_repos( + options: Dict[str, str], line_numbers: Mapping[str, int] = {}, filename: str = "" +) -> Tuple[Tuple[DebianRepository, ...], Tuple[str, Optional[str]]]: + """Return a collections of DebianRepository objects defined by this deb822 stanza. + + Args: + options: a dictionary of deb822 field names to string options + line_numbers: a dictionary of field names to line numbers (for error messages) + filename: the file the options were read from (for repository object and errors) + + Returns: + a tuple of `DebianRepository`s, and + a tuple of the gpg key filename and optional in-stanza provided key itself + + Raises: + InvalidSourceError if any options are malformed or required options are missing + """ + # Enabled + enabled_field = options.pop("Enabled", "yes") + if enabled_field == "yes": + enabled = True + elif enabled_field == "no": + enabled = False + else: + raise BadValueError( + "Must be one of yes or no (default: yes).", + file=filename, + line=line_numbers.get("Enabled"), + key="Enabled", + value=enabled_field, + ) + # Signed-By + gpg_key_file = options.pop("Signed-By", "") + gpg_key_from_stanza: Optional[str] = None + if "\n" in gpg_key_file: + # actually a literal multi-line gpg-key rather than a filename + gpg_key_from_stanza = gpg_key_file + gpg_key_file = "" + # Types + try: + repotypes = options.pop("Types").split() + uris = options.pop("URIs").split() + suites = options.pop("Suites").split() + except KeyError as e: + [key] = e.args + raise MissingRequiredKeyError( + key=key, + line=min(line_numbers.values()) if line_numbers else None, + file=filename, + ) + # Components + # suite can specify an exact path, in which case the components must be omitted and suite must end with a slash (/). + # If suite does not specify an exact path, at least one component must be present. + # https://manpages.ubuntu.com/manpages/noble/man5/sources.list.5.html + components: List[str] + if len(suites) == 1 and suites[0].endswith("/"): + if "Components" in options: + msg = ( + "Since 'Suites' (line {suites_line}) specifies" + " a path relative to 'URIs' (line {uris_line})," + " 'Components' must be ommitted." + ).format( + suites_line=line_numbers.get("Suites"), + uris_line=line_numbers.get("URIs"), + ) + raise BadValueError( + msg, + file=filename, + line=line_numbers.get("Components"), + key="Components", + value=options["Components"], + ) + components = [] + else: + if "Components" not in options: + msg = ( + "Since 'Suites' (line {suites_line}) does not specify" + " a path relative to 'URIs' (line {uris_line})," + " 'Components' must be present in this stanza." + ).format( + suites_line=line_numbers.get("Suites"), + uris_line=line_numbers.get("URIs"), + ) + raise MissingRequiredKeyError( + msg, + file=filename, + line=min(line_numbers.values()) if line_numbers else None, + key="Components", + ) + components = options.pop("Components").split() + repos = tuple( + DebianRepository( + enabled=enabled, + repotype=repotype, + uri=uri, + release=suite, + groups=components, + filename=filename, + gpg_key_filename=gpg_key_file, + options=options, + ) + for repotype in repotypes + for uri in uris + for suite in suites + ) + return repos, (gpg_key_file, gpg_key_from_stanza) From 1553f51e6c5df0a0d26f85d9bda0ff1ea54e3156 Mon Sep 17 00:00:00 2001 From: "Jason C. Nucciarone" Date: Fri, 6 Dec 2024 11:52:21 -0500 Subject: [PATCH 06/10] fix(slurm_ops): use shell variable substitution in /etc/default/sackd Variable substitution in the environment file prevents us from needing to set the service type to `Type=forking` in systemd. `sackd` is started the way it should be with the `--systemd` rather than run as a child process in the shell. `slurmd` needs some further work around `juju-systemd-notices` before it can use this approach as well. Other changes: * Clean up docstrings to be accurate and reflect the current implementation of the library. Signed-off-by: Jason C. Nucciarone --- lib/charms/hpc_libs/v0/slurm_ops.py | 95 ++++++++++++++--------------- 1 file changed, 47 insertions(+), 48 deletions(-) diff --git a/lib/charms/hpc_libs/v0/slurm_ops.py b/lib/charms/hpc_libs/v0/slurm_ops.py index 35029b6..f249c96 100644 --- a/lib/charms/hpc_libs/v0/slurm_ops.py +++ b/lib/charms/hpc_libs/v0/slurm_ops.py @@ -39,13 +39,13 @@ class ApplicationCharm(CharmBase): def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) - self._slurm_manager = SlurmctldManager(snap=True) + self._slurmctld = SlurmctldManager(snap=True) self.framework.observe(self.on.install, self._on_install) def _on_install(self, _) -> None: self._slurmctld.install() self.unit.set_workload_version(self._slurmctld.version()) - with self._slurmctld.config() as config: + with self._slurmctld.config.edit() as config: config.cluster_name = "cluster" ``` """ @@ -125,11 +125,11 @@ def _call( ) -> subprocess.CompletedProcess: """Call a command with logging. - If the `check` argument is set to `False`, the command call will not raise an error if the command - fails. + If the `check` argument is set to `False`, the command call + will not raise an error if the command fails. Raises: - SlurmOpsError: Raised if the command fails. + SlurmOpsError: Raised if the executed command fails. """ cmd = [cmd, *args] _logger.debug(f"executing command {cmd}") @@ -414,7 +414,7 @@ def active(self) -> bool: class _OpsManager(ABC): - """Manager to control the installation, creation and configuration of Slurm-related services.""" + """Manager to control the lifecycle of Slurm-related services.""" @abstractmethod def install(self) -> None: @@ -444,7 +444,7 @@ def env_manager_for(self, service: _ServiceType) -> _EnvManager: class _SnapManager(_OpsManager): - """Slurm ops manager that uses Snap as its package manager.""" + """Operations manager for the Slurm snap backend.""" def install(self) -> None: """Install Slurm using the `slurm` snap.""" @@ -486,7 +486,7 @@ def env_manager_for(self, service: _ServiceType) -> _EnvManager: class _AptManager(_OpsManager): - """Slurm ops manager that uses apt as its package manager. + """Operations manager for the Slurm Debian package backend. Notes: This manager provides some environment variables that are automatically passed to the @@ -499,7 +499,7 @@ def __init__(self, service: _ServiceType) -> None: self._env_file = Path(f"/etc/default/{self._service_name}") def install(self) -> None: - """Install Slurm using the `slurm` snap.""" + """Install Slurm using the `slurm-wlm` Debian package set.""" self._init_ubuntu_hpc_ppa() self._install_service() self._create_state_save_location() @@ -657,27 +657,19 @@ def _apply_overrides(self) -> None: """Override defaults supplied provided by Slurm Debian packages.""" match self._service_name: case "sackd": - _logger.debug("overriding default sackd service configuration") - config_override = Path( - "/etc/systemd/system/sackd.service.d/10-sackd-config-server.conf" - ) - config_override.mkdir(parents=True, exist_ok=True) - config_override.write_text( + # TODO: https://github.com/charmed-hpc/hpc-libs/issues/54 - + # Make `sackd` create its service environment file so that we + # aren't required to manually create it here. + _logger.debug("creating sackd environment file") + self._env_file.touch(mode=0o644, exist_ok=True) + self._env_file.write_text( textwrap.dedent( """ - [Service] - ExecStart= - ExecStart=/usr/bin/sh -c "/usr/sbin/sackd --systemd $${SACKD_CONFIG_SERVER:+--conf-server $$SACKD_CONFIG_SERVER} $$SACKD_OPTIONS" + SACKD_CONFIG_SERVER='' + SACKD_OPTIONS="${SACKD_CONFIG_SERVER:+--conf-server $SACKD_CONFIG_SERVER}" """ ) ) - - # TODO: https://github.com/charmed-hpc/hpc-libs/issues/54 - - # Make `sackd` create its service environment file so that we - # aren't required to manually create it here. - _logger.debug("creating sackd environment file") - self._env_file.touch(mode=0o644) - dotenv.set_key(self._env_file, "SACKD_OPTIONS", "") case "slurmctld": _logger.debug("overriding default slurmctld service configuration") self._set_ulimit() @@ -907,13 +899,13 @@ def scontrol(*args) -> str: """Control Slurm via `scontrol` commands. Raises: - SlurmOpsError: Raised if scontrol command fails. + SlurmOpsError: Raised if `scontrol` command fails. """ return _call("scontrol", *args).stdout class SackdManager(_SlurmManagerBase): - """Manager for the Sackd service.""" + """Manager for the `sackd` service.""" def __init__(self, *args, **kwargs) -> None: super().__init__(service=_ServiceType.SACKD, *args, **kwargs) @@ -921,22 +913,32 @@ def __init__(self, *args, **kwargs) -> None: @property def config_server(self) -> str: - """Get the config server address of this Sackd node.""" + """Get the configuration server address of this `sackd` node.""" return self._env_manager.get("SACKD_CONFIG_SERVER") @config_server.setter def config_server(self, addr: str) -> None: - """Set the config server address of this Sackd node.""" + """Set the configuration server address of this `sackd` node. + + Sets the `--conf-server` option for `sackd`. + """ self._env_manager.set({"SACKD_CONFIG_SERVER": addr}) @config_server.deleter def config_server(self) -> None: - """Unset the config server address of this Sackd node.""" - self._env_manager.unset("SACKD_CONFIG_SERVER") + """Unset the configuration server address of this `sackd` node. + + Warnings: + The `SACKD_CONFIG_SERVER` is set to an emtpy string rather than + deleted from the environment file to preserve the line order necessary + to successfully use variable substitution within the `SACKD_OPTIONS` + environment variable. + """ + self._env_manager.set({"SACKD_CONFIG_SERVER": ""}) class SlurmctldManager(_SlurmManagerBase): - """Manager for the Slurmctld service.""" + """Manager for the `slurmctld` service.""" def __init__(self, *args, **kwargs) -> None: super().__init__(service=_ServiceType.SLURMCTLD, *args, **kwargs) @@ -952,13 +954,7 @@ def __init__(self, *args, **kwargs) -> None: class SlurmdManager(_SlurmManagerBase): - """Manager for the Slurmd service. - - This service will additionally provide some environment variables that need to be - passed through to the service in case the default service is overriden (e.g. a systemctl file override). - - - SLURMD_CONFIG_SERVER. Sets the `--conf-server` option for `slurmd`. - """ + """Manager for the `slurmd` service.""" def __init__(self, *args, **kwargs) -> None: super().__init__(service=_ServiceType.SLURMD, *args, **kwargs) @@ -976,22 +972,25 @@ def group(self) -> str: @property def config_server(self) -> str: - """Get the config server address of this Slurmd node.""" + """Get the configuration server address of this `slurmd` node.""" return self._env_manager.get("SLURMD_CONFIG_SERVER") @config_server.setter def config_server(self, addr: str) -> None: - """Set the config server address of this Slurmd node.""" + """Set the configuration server address of this `slurmd` node. + + Sets the `--conf-server` option for `slurmd`. + """ self._env_manager.set({"SLURMD_CONFIG_SERVER": addr}) @config_server.deleter def config_server(self) -> None: - """Unset the config server address of this Slurmd node.""" + """Unset the configuration server address of this `slurmd` node.""" self._env_manager.unset("SLURMD_CONFIG_SERVER") class SlurmdbdManager(_SlurmManagerBase): - """Manager for the Slurmdbd service.""" + """Manager for the `slurmdbd` service.""" def __init__(self, *args, **kwargs) -> None: super().__init__(service=_ServiceType.SLURMDBD, *args, **kwargs) @@ -1002,12 +1001,12 @@ def __init__(self, *args, **kwargs) -> None: @property def mysql_unix_port(self) -> str: - """Get the URI of the unix socket slurmdbd uses to communicate with MySQL.""" + """Get the URI of the unix socket `slurmdbd` uses to communicate with MySQL.""" return self._env_manager.get("MYSQL_UNIX_PORT") @mysql_unix_port.setter def mysql_unix_port(self, socket_path: Union[str, os.PathLike]) -> None: - """Set the unix socket URI that slurmdbd will use to communicate with MySQL.""" + """Set the unix socket URI that `slurmdbd` will use to communicate with MySQL.""" self._env_manager.set({"MYSQL_UNIX_PORT": socket_path}) @mysql_unix_port.deleter @@ -1017,7 +1016,7 @@ def mysql_unix_port(self) -> None: class SlurmrestdManager(_SlurmManagerBase): - """Manager for the Slurmrestd service.""" + """Manager for the `slurmrestd` service.""" def __init__(self, *args, **kwargs) -> None: super().__init__(service=_ServiceType.SLURMRESTD, *args, **kwargs) @@ -1027,10 +1026,10 @@ def __init__(self, *args, **kwargs) -> None: @property def user(self) -> str: - """Get the user that the slurmrestd service will run as.""" + """Get the user that the `slurmrestd` service will run as.""" return "slurmrestd" @property def group(self): - """Get the group that the slurmrestd service will run as.""" + """Get the group that the `slurmrestd` service will run as.""" return "slurmrestd" From b51c2fe77ef48968a656ee6d16a42abc61fac180 Mon Sep 17 00:00:00 2001 From: "Jason C. Nucciarone" Date: Fri, 6 Dec 2024 11:56:32 -0500 Subject: [PATCH 07/10] tests(slurm_ops): set `StateSaveLocation` in integration tests Slurm 23.11 requires permissions to be properly set on the `StateSaveLocation`. The default is `/var/spool` which isn't set up the way Slurm wants. Instead provide `/var/lib/slurm/checkpoint` which is set up the way Slurm wants by the Debian package. Signed-off-by: Jason C. Nucciarone --- tests/integration/slurm_ops/test_manager.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/slurm_ops/test_manager.py b/tests/integration/slurm_ops/test_manager.py index d549bcb..0b2bc01 100644 --- a/tests/integration/slurm_ops/test_manager.py +++ b/tests/integration/slurm_ops/test_manager.py @@ -40,6 +40,7 @@ def test_slurm_config(slurmctld: SlurmctldManager) -> None: """Test that the slurm config can be changed.""" with slurmctld.config.edit() as config: config.slurmctld_host = [slurmctld.hostname] + config.state_save_location = "/var/lib/slurm/checkpoint" config.cluster_name = "test-cluster" for line in str(slurmctld.config.load()).splitlines(): From 935cd26da244397bf1bc9a21843959456f2b60fe Mon Sep 17 00:00:00 2001 From: "Jason C. Nucciarone" Date: Fri, 6 Dec 2024 12:05:54 -0500 Subject: [PATCH 08/10] fix(slurm_ops): use correct type hints for environment variables getters Environment variable getters can reuturn str or None, not just str. None is returned if the environment variable is not defined in the corresponding environment file. Signed-off-by: Jason C. Nucciarone --- lib/charms/hpc_libs/v0/slurm_ops.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/charms/hpc_libs/v0/slurm_ops.py b/lib/charms/hpc_libs/v0/slurm_ops.py index f249c96..9963bf3 100644 --- a/lib/charms/hpc_libs/v0/slurm_ops.py +++ b/lib/charms/hpc_libs/v0/slurm_ops.py @@ -912,9 +912,10 @@ def __init__(self, *args, **kwargs) -> None: self._env_manager = self._ops_manager.env_manager_for(_ServiceType.SACKD) @property - def config_server(self) -> str: + def config_server(self) -> str | None: """Get the configuration server address of this `sackd` node.""" - return self._env_manager.get("SACKD_CONFIG_SERVER") + result = self._env_manager.get("SACKD_CONFIG_SERVER") + return None if result == "" else result @config_server.setter def config_server(self, addr: str) -> None: @@ -971,7 +972,7 @@ def group(self) -> str: return "root" @property - def config_server(self) -> str: + def config_server(self) -> str | None: """Get the configuration server address of this `slurmd` node.""" return self._env_manager.get("SLURMD_CONFIG_SERVER") @@ -1000,7 +1001,7 @@ def __init__(self, *args, **kwargs) -> None: ) @property - def mysql_unix_port(self) -> str: + def mysql_unix_port(self) -> str | None: """Get the URI of the unix socket `slurmdbd` uses to communicate with MySQL.""" return self._env_manager.get("MYSQL_UNIX_PORT") From ef7df18f49083a72ddb511d6050f698bc65e6510 Mon Sep 17 00:00:00 2001 From: "Jason C. Nucciarone" Date: Fri, 6 Dec 2024 12:08:46 -0500 Subject: [PATCH 09/10] chore(release): bump `slurm_ops` to 0.10 Also bump dependency versions. `cryptography` is now pinned to the v44 API version rather than v43. Signed-off-by: Jason C. Nucciarone --- lib/charms/hpc_libs/v0/slurm_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/charms/hpc_libs/v0/slurm_ops.py b/lib/charms/hpc_libs/v0/slurm_ops.py index 9963bf3..cb60de3 100644 --- a/lib/charms/hpc_libs/v0/slurm_ops.py +++ b/lib/charms/hpc_libs/v0/slurm_ops.py @@ -97,11 +97,11 @@ def _on_install(self, _) -> None: # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 9 +LIBPATCH = 10 # Charm library dependencies to fetch during `charmcraft pack`. PYDEPS = [ - "cryptography~=43.0.1", + "cryptography~=44.0.0", "pyyaml>=6.0.2", "python-dotenv~=1.0.1", "slurmutils~=0.9.0", From 1b967905297d813a2dd0731bd2b153054b13286d Mon Sep 17 00:00:00 2001 From: "Jason C. Nucciarone" Date: Fri, 6 Dec 2024 15:23:40 -0500 Subject: [PATCH 10/10] fix(slurm_ops): manadate that config server is provided in service file Require the `--conf-server` to be provided for start-up. Variable substitution cannot be used in environment files. Declared environment variables are treated as independent key-value pairs. Signed-off-by: Jason C. Nucciarone --- lib/charms/hpc_libs/v0/slurm_ops.py | 37 ++++++++++++++--------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/lib/charms/hpc_libs/v0/slurm_ops.py b/lib/charms/hpc_libs/v0/slurm_ops.py index cb60de3..5652db6 100644 --- a/lib/charms/hpc_libs/v0/slurm_ops.py +++ b/lib/charms/hpc_libs/v0/slurm_ops.py @@ -657,19 +657,26 @@ def _apply_overrides(self) -> None: """Override defaults supplied provided by Slurm Debian packages.""" match self._service_name: case "sackd": - # TODO: https://github.com/charmed-hpc/hpc-libs/issues/54 - - # Make `sackd` create its service environment file so that we - # aren't required to manually create it here. - _logger.debug("creating sackd environment file") - self._env_file.touch(mode=0o644, exist_ok=True) - self._env_file.write_text( + _logger.debug("overriding default sackd service configuration") + config_override = Path( + "/etc/systemd/system/sackd.service.d/10-sackd-config-server.conf" + ) + config_override.parent.mkdir(parents=True, exist_ok=True) + config_override.write_text( textwrap.dedent( """ - SACKD_CONFIG_SERVER='' - SACKD_OPTIONS="${SACKD_CONFIG_SERVER:+--conf-server $SACKD_CONFIG_SERVER}" + [Service] + ExecStart= + ExecStart=/usr/sbin/sackd --systemd --conf-server $SACKD_CONFIG_SERVER """ ) ) + + # TODO: https://github.com/charmed-hpc/hpc-libs/issues/54 - + # Make `sackd` create its service environment file so that we + # aren't required to manually create it here. + _logger.debug("creating sackd environment file") + self._env_file.touch(mode=0o644, exist_ok=True) case "slurmctld": _logger.debug("overriding default slurmctld service configuration") self._set_ulimit() @@ -914,8 +921,7 @@ def __init__(self, *args, **kwargs) -> None: @property def config_server(self) -> str | None: """Get the configuration server address of this `sackd` node.""" - result = self._env_manager.get("SACKD_CONFIG_SERVER") - return None if result == "" else result + return self._env_manager.get("SACKD_CONFIG_SERVER") @config_server.setter def config_server(self, addr: str) -> None: @@ -927,15 +933,8 @@ def config_server(self, addr: str) -> None: @config_server.deleter def config_server(self) -> None: - """Unset the configuration server address of this `sackd` node. - - Warnings: - The `SACKD_CONFIG_SERVER` is set to an emtpy string rather than - deleted from the environment file to preserve the line order necessary - to successfully use variable substitution within the `SACKD_OPTIONS` - environment variable. - """ - self._env_manager.set({"SACKD_CONFIG_SERVER": ""}) + """Unset the configuration server address of this `sackd` node.""" + self._env_manager.unset("SACKD_CONFIG_SERVER") class SlurmctldManager(_SlurmManagerBase):