From 4743da51827875b96ee5097a12c3e268087ca697 Mon Sep 17 00:00:00 2001 From: Mia Altieri <32723809+MiaAltieri@users.noreply.github.com> Date: Wed, 20 Sep 2023 08:57:32 +0200 Subject: [PATCH] [DPE-2561] POC - Start with provided config option: config-server, shard (#246) This PR enables the user to pass the following config options: `replication`(default), `shard`, and `config-server`. This PR supports this options by starting the charm as a `shard` or a `config-server`. If the config option `config-server` is provided, then the charm starts an internal `mongos` service that runs on 0.0.0.0 and is configured to the provided config server. As a POC there are not tests included in this PR. Testing was performed by hand with: ``` charmcraft pack juju deploy ./*charm --config role="shard" shard-one juju deploy ./*charm --config role="config-server" config-server watch -n1 --color juju status --color juju ssh shard-one/0 systemctl status snap.charmed-mongodb.mongod.service exit juju ssh config-server/0 systemctl status snap.charmed-mongodb.mongod.service systemctl status snap.charmed-mongodb.mongos.service exit ``` New snap revison packages a new version of the PBM tool which updated how errors were handled when querying PBM status, commit [4bf9d5f9608976cf84d813f521826ea25eb9e04b](https://github.com/canonical/mongodb-operator/pull/246/commits/4bf9d5f9608976cf84d813f521826ea25eb9e04b) reflects these necessary changes Follow up PR is to be made immediately after merging of this feature is completed and is a requirement of finishing [DPE-2561](https://warthogs.atlassian.net/browse/DPE-2561). For this PR this will be starting `mongos` with a `--auth` and the same `--keyFile` used to start the`mongod` service Future PR is to be started after the follow up PR has been made. Once `mongos` is started with auth [DPE-2562](https://warthogs.atlassian.net/browse/DPE-2562) will be started. This includes creating a basic shared library between `config-server` and `shard` components. In this PR we will: - implement keyfile sharing across shard and config server components - implement adding shards to cluster (Shard removal is saved for later.) As a POC this PR doesn't include handling edge cases, intelligent status reporting, unit tests, or integration tests. These will be handled later on down the line. Specifically: 1. block config change events for changing role of charm 2. report status of internal `mongos` in `update_status` 3. unit tests and integration tests [DPE-2561]: https://warthogs.atlassian.net/browse/DPE-2561?atlOrigin=eyJpIjoiNWRkNTljNzYxNjVmNDY3MDlhMDU5Y2ZhYzA5YTRkZjUiLCJwIjoiZ2l0aHViLWNvbS1KU1cifQ --------- Co-authored-by: Pedro Guimaraes --- config.yaml | 6 ++ lib/charms/mongodb/v0/helpers.py | 36 ++++++++++- lib/charms/mongodb/v0/mongodb_backups.py | 77 +++++++++++++++++++----- src/charm.py | 63 +++++++++++++++---- src/config.py | 13 +++- src/machine_helpers.py | 27 ++++++--- 6 files changed, 182 insertions(+), 40 deletions(-) diff --git a/config.yaml b/config.yaml index 0e820d039..fb9659b65 100644 --- a/config.yaml +++ b/config.yaml @@ -8,3 +8,9 @@ options: When a relation is removed, auto-delete ensures that any relevant databases associated with the relation are also removed default: false + role: + description: | + role config option exists to deploy the charmed-mongodb application as a shard, + config-server, or as a replica set. + type: string + default: replication diff --git a/lib/charms/mongodb/v0/helpers.py b/lib/charms/mongodb/v0/helpers.py index 4f626f125..b7fb2ca8f 100644 --- a/lib/charms/mongodb/v0/helpers.py +++ b/lib/charms/mongodb/v0/helpers.py @@ -19,6 +19,8 @@ ) from pymongo.errors import AutoReconnect, ServerSelectionTimeoutError +from config import Config + # The unique Charmhub library identifier, never change it LIBID = "b9a7fe0c38d8486a9d1ce94c27d4758e" @@ -27,8 +29,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 7 - +LIBPATCH = 8 # path to store mongodb ketFile KEY_FILE = "keyFile" @@ -80,10 +81,35 @@ def get_create_user_cmd( ] +def get_mongos_args(config: MongoDBConfiguration) -> str: + """Returns the arguments used for starting mongos on a config-server side application. + + Returns: + A string representing the arguments to be passed to mongos. + """ + # mongos running on the config server communicates through localhost + config_server_uri = f"{config.replset}/localhost" + + # todo follow up PR add TLS + cmd = [ + # mongos on config server side should run on 0.0.0.0 so it can be accessed by other units + # in the sharded cluster + "--bind_ip_all", + f"--configdb {config_server_uri}", + # config server is already using 27017 + f"--port {Config.MONGOS_PORT}", + # todo followup PR add keyfile and auth + "\n", + ] + + return " ".join(cmd) + + def get_mongod_args( config: MongoDBConfiguration, auth: bool = True, snap_install: bool = False, + role: str = "replication", ) -> str: """Construct the MongoDB startup command line. @@ -137,6 +163,12 @@ def get_mongod_args( ] ) + if role == "config-server": + cmd.append("--configsvr") + + if role == "shard": + cmd.append("--shardsvr") + cmd.append("\n") return " ".join(cmd) diff --git a/lib/charms/mongodb/v0/mongodb_backups.py b/lib/charms/mongodb/v0/mongodb_backups.py index c20ade76d..03fe112e6 100644 --- a/lib/charms/mongodb/v0/mongodb_backups.py +++ b/lib/charms/mongodb/v0/mongodb_backups.py @@ -13,14 +13,10 @@ import re import subprocess import time -from typing import Dict, List +from typing import Dict, List, Optional, Union from charms.data_platform_libs.v0.s3 import CredentialsChangedEvent, S3Requirer -from charms.mongodb.v0.helpers import ( - current_pbm_op, - process_pbm_error, - process_pbm_status, -) +from charms.mongodb.v0.helpers import current_pbm_op, process_pbm_status from charms.operator_libs_linux.v1 import snap from ops.framework import Object from ops.model import BlockedStatus, MaintenanceStatus, StatusBase, WaitingStatus @@ -34,6 +30,8 @@ wait_fixed, ) +from config import Config + # The unique Charmhub library identifier, never change it LIBID = "18c461132b824ace91af0d7abe85f40e" @@ -42,7 +40,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 6 +LIBPATCH = 7 logger = logging.getLogger(__name__) @@ -63,6 +61,9 @@ BACKUP_RESTORE_ATTEMPT_COOLDOWN = 15 +_StrOrBytes = Union[str, bytes] + + class ResyncError(Exception): """Raised when pbm is resyncing configurations and is not ready to be used.""" @@ -316,7 +317,7 @@ def _configure_pbm_options(self, event) -> None: ), return except ExecError as e: - self.charm.unit.status = BlockedStatus(process_pbm_error(e.stdout)) + self.charm.unit.status = BlockedStatus(self.process_pbm_error(e.stdout)) return except subprocess.CalledProcessError as e: logger.error("Syncing configurations failed: %s", str(e)) @@ -418,7 +419,7 @@ def _wait_pbm_status(self) -> None: ) raise ResyncError except ExecError as e: - self.charm.unit.status = BlockedStatus(process_pbm_error(e.stdout)) + self.charm.unit.status = BlockedStatus(self.process_pbm_error(e.stdout)) def _get_pbm_status(self) -> StatusBase: """Retrieve pbm status.""" @@ -428,15 +429,14 @@ def _get_pbm_status(self) -> StatusBase: try: previous_pbm_status = self.charm.unit.status pbm_status = self.charm.run_pbm_command(PBM_STATUS_CMD) + + # pbm errors are outputted in json and do not raise CLI errors + pbm_error = self.process_pbm_error(pbm_status) + if pbm_error: + return BlockedStatus(pbm_error) + self._log_backup_restore_result(pbm_status, previous_pbm_status) return process_pbm_status(pbm_status) - except ExecError as e: - logger.error(f"Failed to get pbm status. {e}") - return BlockedStatus(process_pbm_error(e.stdout)) - except subprocess.CalledProcessError as e: - # pbm pipes a return code of 1, but its output shows the true error code so it is - # necessary to parse the output - return BlockedStatus(process_pbm_error(e.output)) except Exception as e: # pbm pipes a return code of 1, but its output shows the true error code so it is # necessary to parse the output @@ -652,3 +652,48 @@ def _get_backup_restore_operation_result(self, current_pbm_status, previous_pbm_ return f"Backup {backup_id} completed successfully" return "Unknown operation result" + + def retrieve_error_message(self, pbm_status: Dict) -> str: + """Parses pbm status for an error message from the current unit. + + If pbm_agent is in the error state, the command `pbm status` does not raise an error. + Instead, it is in the log messages. pbm_agent also shows all the error messages for other + replicas in the set. + """ + try: + clusters = pbm_status["cluster"] + for cluster in clusters: + if cluster["rs"] == self.charm.app.name: + break + + for host_info in cluster["nodes"]: + replica_info = ( + f"mongodb/{self.charm._unit_ip(self.charm.unit)}:{Config.MONGOS_PORT}" + ) + if host_info["host"] == replica_info: + break + + return str(host_info["errors"]) + except KeyError: + return "" + + def process_pbm_error(self, pbm_status: Optional[_StrOrBytes]) -> str: + """Returns errors found in PBM status.""" + if type(pbm_status) == bytes: + pbm_status = pbm_status.decode("utf-8") + + try: + error_message = self.retrieve_error_message(json.loads(pbm_status)) + except json.decoder.JSONDecodeError: + # if pbm status doesn't return a parsable dictionary it is an error message + # represented as a string + error_message = pbm_status + + message = None + if "status code: 403" in error_message: + message = "s3 credentials are incorrect." + elif "status code: 404" in error_message: + message = "s3 configurations are incompatible." + elif "status code: 301" in error_message: + message = "s3 configurations are incompatible." + return message diff --git a/src/charm.py b/src/charm.py index 2a116682f..1d64824b9 100755 --- a/src/charm.py +++ b/src/charm.py @@ -92,7 +92,6 @@ class MongodbOperatorCharm(CharmBase): def __init__(self, *args): super().__init__(*args) self._port = Config.MONGODB_PORT - self.framework.observe(self.on.install, self._on_install) self.framework.observe(self.on.start, self._on_start) self.framework.observe(self.on.update_status, self._on_update_status) @@ -234,6 +233,15 @@ def db_initialised(self) -> bool: """Check if MongoDB is initialised.""" return "db_initialised" in self.app_peer_data + @property + def role(self) -> str: + """Returns role of MongoDB deployment.""" + return self.model.config["role"] + + def is_role(self, role_name: str) -> bool: + """Checks if application is running in provided role.""" + return self.role == role_name + @db_initialised.setter def db_initialised(self, value): """Set the db_initialised flag.""" @@ -278,7 +286,10 @@ def _on_install(self, event: InstallEvent) -> None: # Construct the mongod startup commandline args for systemd and reload the daemon. update_mongod_service( - auth=auth, machine_ip=self._unit_ip(self.unit), config=self.mongodb_config + auth=auth, + machine_ip=self._unit_ip(self.unit), + config=self.mongodb_config, + role=self.role, ) # add licenses @@ -297,9 +308,7 @@ def _on_start(self, event: StartEvent) -> None: try: logger.debug("starting MongoDB.") self.unit.status = MaintenanceStatus("starting MongoDB") - snap_cache = snap.SnapCache() - mongodb_snap = snap_cache["charmed-mongodb"] - mongodb_snap.start(services=["mongod"], enable=True) + self.start_mongod_service() self.unit.status = ActiveStatus() except snap.SnapError as e: logger.error("An exception occurred when starting mongod agent, error: %s.", str(e)) @@ -950,12 +959,17 @@ def _initialise_replica_set(self, event: StartEvent) -> None: self._peers.data[self.app]["replica_set_hosts"] = json.dumps( [self._unit_ip(self.unit)] ) + logger.info("User initialization") self._init_operator_user() self._init_backup_user() self._init_monitor_user() - logger.info("Manage relations") - self.client_relations.oversee_users(None, None) + + # in sharding, user management is handled by mongos subordinate charm + if self.is_role(Config.Role.REPLICATION): + logger.info("Manage user") + self.client_relations.oversee_users(None, None) + except subprocess.CalledProcessError as e: logger.error( "Deferring on_start: exit code: %i, stderr: %s", e.exit_code, e.stderr @@ -1021,21 +1035,48 @@ def set_secret(self, scope: str, key: str, value: Optional[str]) -> Optional[str else: raise RuntimeError("Unknown secret scope.") + def start_mongod_service(self): + """Starts the mongod service and if necessary starts mongos. + + Raises: + snap.SnapError + """ + snap_cache = snap.SnapCache() + mongodb_snap = snap_cache["charmed-mongodb"] + mongodb_snap.start(services=["mongod"], enable=True) + + # charms running as config server are responsible for maintaining a server side mongos + if self.is_role(Config.Role.CONFIG_SERVER): + mongodb_snap.start(services=["mongos"], enable=True) + + def stop_mongod_service(self): + """Stops the mongod service and if necessary stops mongos. + + Raises: + snap.SnapError + """ + snap_cache = snap.SnapCache() + mongodb_snap = snap_cache["charmed-mongodb"] + mongodb_snap.stop(services=["mongod"]) + + # charms running as config server are responsible for maintaining a server side mongos + if self.is_role(Config.Role.CONFIG_SERVER): + mongodb_snap.stop(services=["mongos"]) + def restart_mongod_service(self, auth=None): """Restarts the mongod service with its associated configuration.""" if auth is None: auth = self.auth_enabled() try: - snap_cache = snap.SnapCache() - mongodb_snap = snap_cache["charmed-mongodb"] - mongodb_snap.stop(services=["mongod"]) + self.stop_mongod_service() update_mongod_service( auth, self._unit_ip(self.unit), config=self.mongodb_config, + role=self.role, ) - mongodb_snap.start(services=["mongod"], enable=True) + self.start_mongod_service() except snap.SnapError as e: logger.error("An exception occurred when starting mongod agent, error: %s.", str(e)) self.unit.status = BlockedStatus("couldn't start MongoDB") diff --git a/src/config.py b/src/config.py index 2d24f9f80..da3261864 100644 --- a/src/config.py +++ b/src/config.py @@ -9,14 +9,21 @@ class Config: """Configuration for MongoDB Charm.""" - SUBSTRATE = "vm" - # We expect the MongoDB container to use the default ports + MONGOS_PORT = 27018 MONGODB_PORT = 27017 + SUBSTRATE = "vm" ENV_VAR_PATH = "/etc/environment" MONGODB_SNAP_DATA_DIR = "/var/snap/charmed-mongodb/current" MONGOD_CONF_DIR = f"{MONGODB_SNAP_DATA_DIR}/etc/mongod" MONGOD_CONF_FILE_PATH = f"{MONGOD_CONF_DIR}/mongod.conf" - SNAP_PACKAGES = [("charmed-mongodb", "5/edge", 82)] + SNAP_PACKAGES = [("charmed-mongodb", "5/edge", 84)] + + class Role: + """Role config names for MongoDB Charm.""" + + CONFIG_SERVER = "config-server" + REPLICATION = "replication" + SHARD = "shard" class Actions: """Actions related config for MongoDB Charm.""" diff --git a/src/machine_helpers.py b/src/machine_helpers.py index ba4b18fc8..37512266a 100644 --- a/src/machine_helpers.py +++ b/src/machine_helpers.py @@ -6,7 +6,7 @@ import pwd from pathlib import Path -from charms.mongodb.v0.helpers import get_mongod_args +from charms.mongodb.v0.helpers import get_mongod_args, get_mongos_args from charms.mongodb.v0.mongodb import MongoDBConfiguration from config import Config @@ -18,24 +18,35 @@ MONGO_USER = "snap_daemon" -def update_mongod_service(auth: bool, machine_ip: str, config: MongoDBConfiguration) -> None: +def update_mongod_service( + auth: bool, machine_ip: str, config: MongoDBConfiguration, role: str = "replication" +) -> None: """Updates the mongod service file with the new options for starting.""" - with open(Config.ENV_VAR_PATH, "r") as env_var_file: - env_vars = env_var_file.readlines() - # write our arguments and write them to /etc/environment - the environment variable here is # read in in the charmed-mongob.mongod.service file. mongod_start_args = get_mongod_args(config, auth, snap_install=True) + add_args_to_env("MONGOD_ARGS", mongod_start_args) + + if role == "config-server": + mongos_start_args = get_mongos_args(config) + add_args_to_env("MONGOS_ARGS", mongos_start_args) + + +def add_args_to_env(var: str, args: str): + """Adds the provided arguments to the environment as the provided variable.""" + with open(Config.ENV_VAR_PATH, "r") as env_var_file: + env_vars = env_var_file.readlines() + args_added = False for index, line in enumerate(env_vars): - if "MONGOD_ARGS" in line: + if var in line: args_added = True - env_vars[index] = f"MONGOD_ARGS={mongod_start_args}" + env_vars[index] = f"{var}={args}" # if it is the first time adding these args to the file - will will need to append them to the # file if not args_added: - env_vars.append(f"MONGOD_ARGS={mongod_start_args}") + env_vars.append(f"{var}={args}") with open(Config.ENV_VAR_PATH, "w") as service_file: service_file.writelines(env_vars)