Skip to content

Commit

Permalink
[DPE-6124] optimise max map for VM deployments (#522)
Browse files Browse the repository at this point in the history
## Context
Solves issue #518 which reports ` 2024-11-28T21:05:16.089+00:00:
vm.max_map_count is too low`

This message comes from the fact that each network connection requires 2
mapped virtual memory areas. So mongodb is pointing out that if the
server accepts all of the connections allowed, it will not have enough
maps and possibly cause the mongod to crash.

MongoDB defines this value to be 2x the possible of soft ulimit. We
could automatically set `vm.max_map` to 2x of soft ulimit. But I believe
this is not advisable since each memory-mapped area consumes resources,
and having too many can lead to increased memory usage and slower
performance. **Question - is there a programmatic way to determine at
what point the number of memory mapped areas is detrimental? If so we
could always consider decreasing the ulimt.**

With that said I believe setting it to `262144` is a good spot to start
and we could come back to these questions later in the future

## Solution
We adopted [an approach similar to
Kafka](canonical/kafka-operator@db3fe14#diff-0580ab9aef31b44580478eb270ec3422c5e6fef35df585fecb3a68317e246809R347)
here which is to set vm_max when possible. If it is not possible we log
a warning and do not attempt to set it again

## Testing
tested by hand in OpenStack deployment to verify that the following
warning does not appear when connecting to Mongodb. But it was necessary
to restart `mongod` to have the message go away.
```
   2024-11-28T21:05:16.089+00:00: vm.max_map_count is too low
```

## know issue
```
vm.max_map_count is too low
```
is present until ` charmed-mongodb.mongod` is restarted (despite the
value being set before the installation of the`charmed-mongodb` snap.)
  • Loading branch information
MiaAltieri authored Dec 13, 2024
1 parent 8b7e872 commit 0b571ba
Show file tree
Hide file tree
Showing 4 changed files with 331 additions and 0 deletions.
288 changes: 288 additions & 0 deletions lib/charms/operator_libs_linux/v0/sysctl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,288 @@
# Copyright 2023 Canonical Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Handler for the sysctl config.
This library allows your charm to create and configure sysctl options to the machine.
Validation and merge capabilities are added, for situations where more than one application
are setting values. The following files can be created:
- /etc/sysctl.d/90-juju-<app-name>
Requirements from one application requesting to configure the values.
- /etc/sysctl.d/95-juju-sysctl.conf
Merged file resulting from all other `90-juju-*` application files.
A charm using the sysctl lib will need a data structure like the following:
```
{
"vm.swappiness": "1",
"vm.max_map_count": "262144",
"vm.dirty_ratio": "80",
"vm.dirty_background_ratio": "5",
"net.ipv4.tcp_max_syn_backlog": "4096",
}
```
Now, it can use that template within the charm, or just declare the values directly:
```python
from charms.operator_libs_linux.v0 import sysctl
class MyCharm(CharmBase):
def __init__(self, *args):
...
self.sysctl = sysctl.Config(self.meta.name)
self.framework.observe(self.on.install, self._on_install)
self.framework.observe(self.on.remove, self._on_remove)
def _on_install(self, _):
# Altenatively, read the values from a template
sysctl_data = {"net.ipv4.tcp_max_syn_backlog": "4096"}}
try:
self.sysctl.configure(config=sysctl_data)
except (sysctl.ApplyError, sysctl.ValidationError) as e:
logger.error(f"Error setting values on sysctl: {e.message}")
self.unit.status = BlockedStatus("Sysctl config not possible")
except sysctl.CommandError:
logger.error("Error on sysctl")
def _on_remove(self, _):
self.sysctl.remove()
```
"""

import logging
import re
from pathlib import Path
from subprocess import STDOUT, CalledProcessError, check_output
from typing import Dict, List

logger = logging.getLogger(__name__)

# The unique Charmhub library identifier, never change it
LIBID = "17a6cd4d80104d15b10f9c2420ab3266"

# Increment this major API version when introducing breaking changes
LIBAPI = 0

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 4

CHARM_FILENAME_PREFIX = "90-juju-"
SYSCTL_DIRECTORY = Path("/etc/sysctl.d")
SYSCTL_FILENAME = SYSCTL_DIRECTORY / "95-juju-sysctl.conf"
SYSCTL_HEADER = f"""# This config file was produced by sysctl lib v{LIBAPI}.{LIBPATCH}
#
# This file represents the output of the sysctl lib, which can combine multiple
# configurations into a single file like.
"""


class Error(Exception):
"""Base class of most errors raised by this library."""

@property
def message(self):
"""Return the message passed as an argument."""
return self.args[0]


class CommandError(Error):
"""Raised when there's an error running sysctl command."""


class ApplyError(Error):
"""Raised when there's an error applying values in sysctl."""


class ValidationError(Error):
"""Exception representing value validation error."""


class Config(Dict):
"""Represents the state of the config that a charm wants to enforce."""

_apply_re = re.compile(r"sysctl: permission denied on key \"([a-z_\.]+)\", ignoring$")

def __init__(self, name: str) -> None:
self.name = name
self._data = self._load_data()

def __contains__(self, key: str) -> bool:
"""Check if key is in config."""
return key in self._data

def __len__(self):
"""Get size of config."""
return len(self._data)

def __iter__(self):
"""Iterate over config."""
return iter(self._data)

def __getitem__(self, key: str) -> str:
"""Get value for key form config."""
return self._data[key]

@property
def charm_filepath(self) -> Path:
"""Name for resulting charm config file."""
return SYSCTL_DIRECTORY / f"{CHARM_FILENAME_PREFIX}{self.name}"

def configure(self, config: Dict[str, str]) -> None:
"""Configure sysctl options with a desired set of params.
Args:
config: dictionary with keys to configure:
```
{"vm.swappiness": "10", ...}
```
"""
self._parse_config(config)

# NOTE: case where own charm calls configure() more than once.
if self.charm_filepath.exists():
self._merge(add_own_charm=False)

conflict = self._validate()
if conflict:
raise ValidationError(f"Validation error for keys: {conflict}")

snapshot = self._create_snapshot()
logger.debug("Created snapshot for keys: %s", snapshot)
try:
self._apply()
except ApplyError:
self._restore_snapshot(snapshot)
raise

self._create_charm_file()
self._merge()

def remove(self) -> None:
"""Remove config for charm.
The removal process won't apply any sysctl configuration. It will only merge files from
remaining charms.
"""
self.charm_filepath.unlink(missing_ok=True)
logger.info("Charm config file %s was removed", self.charm_filepath)
self._merge()

def _validate(self) -> List[str]:
"""Validate the desired config params against merged ones."""
common_keys = set(self._data.keys()) & set(self._desired_config.keys())
conflict_keys = []
for key in common_keys:
if self._data[key] != self._desired_config[key]:
logger.warning(
"Values for key '%s' are different: %s != %s",
key,
self._data[key],
self._desired_config[key],
)
conflict_keys.append(key)

return conflict_keys

def _create_charm_file(self) -> None:
"""Write the charm file."""
with open(self.charm_filepath, "w") as f:
f.write(f"# {self.name}\n")
for key, value in self._desired_config.items():
f.write(f"{key}={value}\n")

def _merge(self, add_own_charm=True) -> None:
"""Create the merged sysctl file.
Args:
add_own_charm : bool, if false it will skip the charm file from the merge.
"""
# get all files that start by 90-juju-
data = [SYSCTL_HEADER]
paths = set(SYSCTL_DIRECTORY.glob(f"{CHARM_FILENAME_PREFIX}*"))
if not add_own_charm:
paths.discard(self.charm_filepath)

for path in paths:
with open(path, "r") as f:
data += f.readlines()
with open(SYSCTL_FILENAME, "w") as f:
f.writelines(data)

# Reload data with newly created file.
self._data = self._load_data()

def _apply(self) -> None:
"""Apply values to machine."""
cmd = [f"{key}={value}" for key, value in self._desired_config.items()]
result = self._sysctl(cmd)
failed_values = [
self._apply_re.match(line) for line in result if self._apply_re.match(line)
]
logger.debug("Failed values: %s", failed_values)

if failed_values:
msg = f"Unable to set params: {[f.group(1) for f in failed_values]}"
logger.error(msg)
raise ApplyError(msg)

def _create_snapshot(self) -> Dict[str, str]:
"""Create a snapshot of config options that are going to be set."""
cmd = ["-n"] + list(self._desired_config.keys())
values = self._sysctl(cmd)
return dict(zip(list(self._desired_config.keys()), values))

def _restore_snapshot(self, snapshot: Dict[str, str]) -> None:
"""Restore a snapshot to the machine."""
values = [f"{key}={value}" for key, value in snapshot.items()]
self._sysctl(values)

def _sysctl(self, cmd: List[str]) -> List[str]:
"""Execute a sysctl command."""
cmd = ["sysctl"] + cmd
logger.debug("Executing sysctl command: %s", cmd)
try:
return check_output(cmd, stderr=STDOUT, universal_newlines=True).splitlines()
except CalledProcessError as e:
msg = f"Error executing '{cmd}': {e.stdout}"
logger.error(msg)
raise CommandError(msg)

def _parse_config(self, config: Dict[str, str]) -> None:
"""Parse a config passed to the lib."""
self._desired_config = {k: str(v) for k, v in config.items()}

def _load_data(self) -> Dict[str, str]:
"""Get merged config."""
config = {}
if not SYSCTL_FILENAME.exists():
return config

with open(SYSCTL_FILENAME, "r") as f:
for line in f:
if line.startswith(("#", ";")) or not line.strip() or "=" not in line:
continue

key, _, value = line.partition("=")
config[key.strip()] = value.strip()

return config
22 changes: 22 additions & 0 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
MonitorUser,
OperatorUser,
)
from charms.operator_libs_linux.v0 import sysctl
from charms.operator_libs_linux.v1.systemd import service_running
from charms.operator_libs_linux.v2 import snap
from data_platform_helpers.version_check import (
Expand Down Expand Up @@ -162,6 +163,9 @@ def __init__(self, *args):

self.secrets = SecretCache(self)

self.sysctl_config = sysctl.Config(name=self.app.name)
self.framework.observe(getattr(self.on, "remove"), self._on_remove)

# BEGIN: properties
def _mongo_scrape_config(self) -> List[Dict]:
"""Generates scrape config for the mongo metrics endpoint."""
Expand Down Expand Up @@ -373,6 +377,7 @@ def get_charm_internal_revision(self) -> str:

def _on_install(self, event: InstallEvent) -> None:
"""Handle the install event (fired on startup)."""
self._set_os_config()
self.status.set_and_share_status(MaintenanceStatus("installing MongoDB"))
try:
self.install_snap_packages(packages=Config.SNAP_PACKAGES)
Expand Down Expand Up @@ -647,6 +652,10 @@ def _on_storage_detaching(self, event: StorageDetachingEvent) -> None:
except PyMongoError as e:
logger.error("Failed to remove %s from replica set, error=%r", self.unit.name, e)

def _on_remove(self, _) -> None:
"""Handler for removal."""
self.sysctl_config.remove()

def _on_update_status(self, event: UpdateStatusEvent):
# user-made mistakes might result in other incorrect statues. Prioritise informing users of
# their mistake.
Expand Down Expand Up @@ -1070,6 +1079,19 @@ def install_snap_packages(self, packages: List[Package]) -> None:
)
raise

def _set_os_config(self) -> None:
"""Sets sysctl config."""
try:
self.sysctl_config.configure(Config.Sysctl.OS_REQUIREMENTS)
except (sysctl.ApplyError, sysctl.ValidationError, sysctl.CommandError) as e:
# we allow events to continue in the case that we are not able to correctly configure
# sysctl config, since we can still run the workload with wrong sysctl parameters
# even if it is not optimal.
logger.error(f"Error setting values on sysctl: {e.message}")
# containers share the kernel with the host system, and some sysctl parameters are
# set at kernel level.
logger.warning("sysctl params cannot be set. Is the machine running on a container?")

def _instatiate_keyfile(self, event: StartEvent) -> None:
# wait for keyFile to be created by leader unit
if not (keyfile := self.get_secret(APP_SCOPE, Config.Secrets.SECRET_KEYFILE_NAME)):
Expand Down
7 changes: 7 additions & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,13 @@ class Substrate:
VM = "vm"
K8S = "k8s"

class Sysctl:
"""Constants for Sysctl."""

OS_REQUIREMENTS = {
"vm.max_map_count": "262144",
}

class Upgrade:
"""Upgrade related constants."""

Expand Down
14 changes: 14 additions & 0 deletions tests/unit/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from tenacity import stop_after_attempt

from charm import MongodbOperatorCharm, NotReadyError, subprocess
from config import Config

from .helpers import patch_network_get

Expand Down Expand Up @@ -188,6 +189,19 @@ def test_install_snap_packages_failure(self, _call, snap_cache, update_mongod_se
self.harness.charm.on.install.emit()
self.assertTrue(isinstance(self.harness.charm.unit.status, BlockedStatus))

@patch_network_get(private_address="1.1.1.1")
@patch("charm.update_mongod_service")
@patch("charm.snap.SnapCache")
@patch("charm.setup_logrotate_and_cron")
@patch("charm.copy_licenses_to_unit")
@patch("subprocess.check_call")
@patch("builtins.open")
@patch("charm.sysctl.Config.configure")
def test_install(self, patched_os_config, *unused):
"""Test verifies the correct functions get called when installing apt packages."""
self.harness.charm.on.install.emit()
patched_os_config.assert_called_once_with(Config.Sysctl.OS_REQUIREMENTS)

@patch_network_get(private_address="1.1.1.1")
def test_app_hosts(self):
rel_id = self.harness.charm.model.get_relation("database-peers").id
Expand Down

0 comments on commit 0b571ba

Please sign in to comment.