From 0b571ba2502fb407b64ed34877fa9242285cdb41 Mon Sep 17 00:00:00 2001 From: Mia Altieri <32723809+MiaAltieri@users.noreply.github.com> Date: Fri, 13 Dec 2024 15:28:47 +0100 Subject: [PATCH] [DPE-6124] optimise max map for VM deployments (#522) ## Context Solves issue #518 which reports ` 2024-11-28T21:05:16.089+00:00: vm.max_map_count is too low` This message comes from the fact that each network connection requires 2 mapped virtual memory areas. So mongodb is pointing out that if the server accepts all of the connections allowed, it will not have enough maps and possibly cause the mongod to crash. MongoDB defines this value to be 2x the possible of soft ulimit. We could automatically set `vm.max_map` to 2x of soft ulimit. But I believe this is not advisable since each memory-mapped area consumes resources, and having too many can lead to increased memory usage and slower performance. **Question - is there a programmatic way to determine at what point the number of memory mapped areas is detrimental? If so we could always consider decreasing the ulimt.** With that said I believe setting it to `262144` is a good spot to start and we could come back to these questions later in the future ## Solution We adopted [an approach similar to Kafka](https://github.com/canonical/kafka-operator/commit/db3fe14a45a4f539deeafe626f1ad30a475a5255#diff-0580ab9aef31b44580478eb270ec3422c5e6fef35df585fecb3a68317e246809R347) here which is to set vm_max when possible. If it is not possible we log a warning and do not attempt to set it again ## Testing tested by hand in OpenStack deployment to verify that the following warning does not appear when connecting to Mongodb. But it was necessary to restart `mongod` to have the message go away. ``` 2024-11-28T21:05:16.089+00:00: vm.max_map_count is too low ``` ## know issue ``` vm.max_map_count is too low ``` is present until ` charmed-mongodb.mongod` is restarted (despite the value being set before the installation of the`charmed-mongodb` snap.) --- lib/charms/operator_libs_linux/v0/sysctl.py | 288 ++++++++++++++++++++ src/charm.py | 22 ++ src/config.py | 7 + tests/unit/test_charm.py | 14 + 4 files changed, 331 insertions(+) create mode 100644 lib/charms/operator_libs_linux/v0/sysctl.py diff --git a/lib/charms/operator_libs_linux/v0/sysctl.py b/lib/charms/operator_libs_linux/v0/sysctl.py new file mode 100644 index 000000000..8245835e8 --- /dev/null +++ b/lib/charms/operator_libs_linux/v0/sysctl.py @@ -0,0 +1,288 @@ +# Copyright 2023 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Handler for the sysctl config. + +This library allows your charm to create and configure sysctl options to the machine. + +Validation and merge capabilities are added, for situations where more than one application +are setting values. The following files can be created: + +- /etc/sysctl.d/90-juju- + Requirements from one application requesting to configure the values. + +- /etc/sysctl.d/95-juju-sysctl.conf + Merged file resulting from all other `90-juju-*` application files. + + +A charm using the sysctl lib will need a data structure like the following: +``` +{ +"vm.swappiness": "1", +"vm.max_map_count": "262144", +"vm.dirty_ratio": "80", +"vm.dirty_background_ratio": "5", +"net.ipv4.tcp_max_syn_backlog": "4096", +} +``` + +Now, it can use that template within the charm, or just declare the values directly: + +```python +from charms.operator_libs_linux.v0 import sysctl + +class MyCharm(CharmBase): + + def __init__(self, *args): + ... + self.sysctl = sysctl.Config(self.meta.name) + + self.framework.observe(self.on.install, self._on_install) + self.framework.observe(self.on.remove, self._on_remove) + + def _on_install(self, _): + # Altenatively, read the values from a template + sysctl_data = {"net.ipv4.tcp_max_syn_backlog": "4096"}} + + try: + self.sysctl.configure(config=sysctl_data) + except (sysctl.ApplyError, sysctl.ValidationError) as e: + logger.error(f"Error setting values on sysctl: {e.message}") + self.unit.status = BlockedStatus("Sysctl config not possible") + except sysctl.CommandError: + logger.error("Error on sysctl") + + def _on_remove(self, _): + self.sysctl.remove() +``` +""" + +import logging +import re +from pathlib import Path +from subprocess import STDOUT, CalledProcessError, check_output +from typing import Dict, List + +logger = logging.getLogger(__name__) + +# The unique Charmhub library identifier, never change it +LIBID = "17a6cd4d80104d15b10f9c2420ab3266" + +# Increment this major API version when introducing breaking changes +LIBAPI = 0 + +# Increment this PATCH version before using `charmcraft publish-lib` or reset +# to 0 if you are raising the major API version +LIBPATCH = 4 + +CHARM_FILENAME_PREFIX = "90-juju-" +SYSCTL_DIRECTORY = Path("/etc/sysctl.d") +SYSCTL_FILENAME = SYSCTL_DIRECTORY / "95-juju-sysctl.conf" +SYSCTL_HEADER = f"""# This config file was produced by sysctl lib v{LIBAPI}.{LIBPATCH} +# +# This file represents the output of the sysctl lib, which can combine multiple +# configurations into a single file like. +""" + + +class Error(Exception): + """Base class of most errors raised by this library.""" + + @property + def message(self): + """Return the message passed as an argument.""" + return self.args[0] + + +class CommandError(Error): + """Raised when there's an error running sysctl command.""" + + +class ApplyError(Error): + """Raised when there's an error applying values in sysctl.""" + + +class ValidationError(Error): + """Exception representing value validation error.""" + + +class Config(Dict): + """Represents the state of the config that a charm wants to enforce.""" + + _apply_re = re.compile(r"sysctl: permission denied on key \"([a-z_\.]+)\", ignoring$") + + def __init__(self, name: str) -> None: + self.name = name + self._data = self._load_data() + + def __contains__(self, key: str) -> bool: + """Check if key is in config.""" + return key in self._data + + def __len__(self): + """Get size of config.""" + return len(self._data) + + def __iter__(self): + """Iterate over config.""" + return iter(self._data) + + def __getitem__(self, key: str) -> str: + """Get value for key form config.""" + return self._data[key] + + @property + def charm_filepath(self) -> Path: + """Name for resulting charm config file.""" + return SYSCTL_DIRECTORY / f"{CHARM_FILENAME_PREFIX}{self.name}" + + def configure(self, config: Dict[str, str]) -> None: + """Configure sysctl options with a desired set of params. + + Args: + config: dictionary with keys to configure: + ``` + {"vm.swappiness": "10", ...} + ``` + """ + self._parse_config(config) + + # NOTE: case where own charm calls configure() more than once. + if self.charm_filepath.exists(): + self._merge(add_own_charm=False) + + conflict = self._validate() + if conflict: + raise ValidationError(f"Validation error for keys: {conflict}") + + snapshot = self._create_snapshot() + logger.debug("Created snapshot for keys: %s", snapshot) + try: + self._apply() + except ApplyError: + self._restore_snapshot(snapshot) + raise + + self._create_charm_file() + self._merge() + + def remove(self) -> None: + """Remove config for charm. + + The removal process won't apply any sysctl configuration. It will only merge files from + remaining charms. + """ + self.charm_filepath.unlink(missing_ok=True) + logger.info("Charm config file %s was removed", self.charm_filepath) + self._merge() + + def _validate(self) -> List[str]: + """Validate the desired config params against merged ones.""" + common_keys = set(self._data.keys()) & set(self._desired_config.keys()) + conflict_keys = [] + for key in common_keys: + if self._data[key] != self._desired_config[key]: + logger.warning( + "Values for key '%s' are different: %s != %s", + key, + self._data[key], + self._desired_config[key], + ) + conflict_keys.append(key) + + return conflict_keys + + def _create_charm_file(self) -> None: + """Write the charm file.""" + with open(self.charm_filepath, "w") as f: + f.write(f"# {self.name}\n") + for key, value in self._desired_config.items(): + f.write(f"{key}={value}\n") + + def _merge(self, add_own_charm=True) -> None: + """Create the merged sysctl file. + + Args: + add_own_charm : bool, if false it will skip the charm file from the merge. + """ + # get all files that start by 90-juju- + data = [SYSCTL_HEADER] + paths = set(SYSCTL_DIRECTORY.glob(f"{CHARM_FILENAME_PREFIX}*")) + if not add_own_charm: + paths.discard(self.charm_filepath) + + for path in paths: + with open(path, "r") as f: + data += f.readlines() + with open(SYSCTL_FILENAME, "w") as f: + f.writelines(data) + + # Reload data with newly created file. + self._data = self._load_data() + + def _apply(self) -> None: + """Apply values to machine.""" + cmd = [f"{key}={value}" for key, value in self._desired_config.items()] + result = self._sysctl(cmd) + failed_values = [ + self._apply_re.match(line) for line in result if self._apply_re.match(line) + ] + logger.debug("Failed values: %s", failed_values) + + if failed_values: + msg = f"Unable to set params: {[f.group(1) for f in failed_values]}" + logger.error(msg) + raise ApplyError(msg) + + def _create_snapshot(self) -> Dict[str, str]: + """Create a snapshot of config options that are going to be set.""" + cmd = ["-n"] + list(self._desired_config.keys()) + values = self._sysctl(cmd) + return dict(zip(list(self._desired_config.keys()), values)) + + def _restore_snapshot(self, snapshot: Dict[str, str]) -> None: + """Restore a snapshot to the machine.""" + values = [f"{key}={value}" for key, value in snapshot.items()] + self._sysctl(values) + + def _sysctl(self, cmd: List[str]) -> List[str]: + """Execute a sysctl command.""" + cmd = ["sysctl"] + cmd + logger.debug("Executing sysctl command: %s", cmd) + try: + return check_output(cmd, stderr=STDOUT, universal_newlines=True).splitlines() + except CalledProcessError as e: + msg = f"Error executing '{cmd}': {e.stdout}" + logger.error(msg) + raise CommandError(msg) + + def _parse_config(self, config: Dict[str, str]) -> None: + """Parse a config passed to the lib.""" + self._desired_config = {k: str(v) for k, v in config.items()} + + def _load_data(self) -> Dict[str, str]: + """Get merged config.""" + config = {} + if not SYSCTL_FILENAME.exists(): + return config + + with open(SYSCTL_FILENAME, "r") as f: + for line in f: + if line.startswith(("#", ";")) or not line.strip() or "=" not in line: + continue + + key, _, value = line.partition("=") + config[key.strip()] = value.strip() + + return config diff --git a/src/charm.py b/src/charm.py index 2823879af..ebfa47a7c 100755 --- a/src/charm.py +++ b/src/charm.py @@ -40,6 +40,7 @@ MonitorUser, OperatorUser, ) +from charms.operator_libs_linux.v0 import sysctl from charms.operator_libs_linux.v1.systemd import service_running from charms.operator_libs_linux.v2 import snap from data_platform_helpers.version_check import ( @@ -162,6 +163,9 @@ def __init__(self, *args): self.secrets = SecretCache(self) + self.sysctl_config = sysctl.Config(name=self.app.name) + self.framework.observe(getattr(self.on, "remove"), self._on_remove) + # BEGIN: properties def _mongo_scrape_config(self) -> List[Dict]: """Generates scrape config for the mongo metrics endpoint.""" @@ -373,6 +377,7 @@ def get_charm_internal_revision(self) -> str: def _on_install(self, event: InstallEvent) -> None: """Handle the install event (fired on startup).""" + self._set_os_config() self.status.set_and_share_status(MaintenanceStatus("installing MongoDB")) try: self.install_snap_packages(packages=Config.SNAP_PACKAGES) @@ -647,6 +652,10 @@ def _on_storage_detaching(self, event: StorageDetachingEvent) -> None: except PyMongoError as e: logger.error("Failed to remove %s from replica set, error=%r", self.unit.name, e) + def _on_remove(self, _) -> None: + """Handler for removal.""" + self.sysctl_config.remove() + def _on_update_status(self, event: UpdateStatusEvent): # user-made mistakes might result in other incorrect statues. Prioritise informing users of # their mistake. @@ -1070,6 +1079,19 @@ def install_snap_packages(self, packages: List[Package]) -> None: ) raise + def _set_os_config(self) -> None: + """Sets sysctl config.""" + try: + self.sysctl_config.configure(Config.Sysctl.OS_REQUIREMENTS) + except (sysctl.ApplyError, sysctl.ValidationError, sysctl.CommandError) as e: + # we allow events to continue in the case that we are not able to correctly configure + # sysctl config, since we can still run the workload with wrong sysctl parameters + # even if it is not optimal. + logger.error(f"Error setting values on sysctl: {e.message}") + # containers share the kernel with the host system, and some sysctl parameters are + # set at kernel level. + logger.warning("sysctl params cannot be set. Is the machine running on a container?") + def _instatiate_keyfile(self, event: StartEvent) -> None: # wait for keyFile to be created by leader unit if not (keyfile := self.get_secret(APP_SCOPE, Config.Secrets.SECRET_KEYFILE_NAME)): diff --git a/src/config.py b/src/config.py index b394049a0..fdc3459dc 100644 --- a/src/config.py +++ b/src/config.py @@ -133,6 +133,13 @@ class Substrate: VM = "vm" K8S = "k8s" + class Sysctl: + """Constants for Sysctl.""" + + OS_REQUIREMENTS = { + "vm.max_map_count": "262144", + } + class Upgrade: """Upgrade related constants.""" diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index e63b3ac43..0a7852233 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -16,6 +16,7 @@ from tenacity import stop_after_attempt from charm import MongodbOperatorCharm, NotReadyError, subprocess +from config import Config from .helpers import patch_network_get @@ -188,6 +189,19 @@ def test_install_snap_packages_failure(self, _call, snap_cache, update_mongod_se self.harness.charm.on.install.emit() self.assertTrue(isinstance(self.harness.charm.unit.status, BlockedStatus)) + @patch_network_get(private_address="1.1.1.1") + @patch("charm.update_mongod_service") + @patch("charm.snap.SnapCache") + @patch("charm.setup_logrotate_and_cron") + @patch("charm.copy_licenses_to_unit") + @patch("subprocess.check_call") + @patch("builtins.open") + @patch("charm.sysctl.Config.configure") + def test_install(self, patched_os_config, *unused): + """Test verifies the correct functions get called when installing apt packages.""" + self.harness.charm.on.install.emit() + patched_os_config.assert_called_once_with(Config.Sysctl.OS_REQUIREMENTS) + @patch_network_get(private_address="1.1.1.1") def test_app_hosts(self): rel_id = self.harness.charm.model.get_relation("database-peers").id