From ac24eed18b8c32621f11e5de8073d4c9f0c62d2f Mon Sep 17 00:00:00 2001 From: Mia Altieri Date: Wed, 13 Sep 2023 09:09:24 +0000 Subject: [PATCH 01/12] charm can start as shard or config server --- config.yaml | 6 ++++++ lib/charms/mongodb/v0/helpers.py | 7 +++++++ src/charm.py | 6 +++++- src/machine_helpers.py | 10 ++++++---- 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/config.yaml b/config.yaml index 0e820d039..fb9659b65 100644 --- a/config.yaml +++ b/config.yaml @@ -8,3 +8,9 @@ options: When a relation is removed, auto-delete ensures that any relevant databases associated with the relation are also removed default: false + role: + description: | + role config option exists to deploy the charmed-mongodb application as a shard, + config-server, or as a replica set. + type: string + default: replication diff --git a/lib/charms/mongodb/v0/helpers.py b/lib/charms/mongodb/v0/helpers.py index ba33c09d9..58b42258c 100644 --- a/lib/charms/mongodb/v0/helpers.py +++ b/lib/charms/mongodb/v0/helpers.py @@ -84,6 +84,7 @@ def get_mongod_args( config: MongoDBConfiguration, auth: bool = True, snap_install: bool = False, + role: str = "replication", ) -> str: """Construct the MongoDB startup command line. @@ -137,6 +138,12 @@ def get_mongod_args( ] ) + if role == "config-server": + cmd.append("--configsvr") + + if role == "shard": + cmd.append("--shardsvr") + cmd.append("\n") return " ".join(cmd) diff --git a/src/charm.py b/src/charm.py index a6c1effcc..e9dc9c742 100755 --- a/src/charm.py +++ b/src/charm.py @@ -253,7 +253,10 @@ def _on_install(self, event: InstallEvent) -> None: # Construct the mongod startup commandline args for systemd and reload the daemon. update_mongod_service( - auth=auth, machine_ip=self._unit_ip(self.unit), config=self.mongodb_config + auth=auth, + machine_ip=self._unit_ip(self.unit), + config=self.mongodb_config, + role=self.model.config["role"], ) # add licenses @@ -947,6 +950,7 @@ def restart_mongod_service(self, auth=None): auth, self._unit_ip(self.unit), config=self.mongodb_config, + role=self.model.config["role"], ) mongodb_snap.start(services=["mongod"]) except snap.SnapError as e: diff --git a/src/machine_helpers.py b/src/machine_helpers.py index eab2050e8..ebb9b2519 100644 --- a/src/machine_helpers.py +++ b/src/machine_helpers.py @@ -18,24 +18,26 @@ MONGO_USER = "snap_daemon" -def update_mongod_service(auth: bool, machine_ip: str, config: MongoDBConfiguration) -> None: +def update_mongod_service( + auth: bool, machine_ip: str, config: MongoDBConfiguration, role: str = "replication" +) -> None: """Updates the mongod service file with the new options for starting.""" with open(Config.ENV_VAR_PATH, "r") as env_var_file: env_vars = env_var_file.readlines() # write our arguments and write them to /etc/environment - the environment variable here is # read in in the charmed-mongob.mongod.service file. - mongod_start_args = get_mongod_args(config, auth, snap_install=True) + mongo_start_args = get_mongod_args(config, auth, snap_install=True) args_added = False for index, line in enumerate(env_vars): if "MONGOD_ARGS" in line: args_added = True - env_vars[index] = f"MONGOD_ARGS={mongod_start_args}" + env_vars[index] = f"MONGOD_ARGS={mongo_start_args}" # if it is the first time adding these args to the file - will will need to append them to the # file if not args_added: - env_vars.append(f"MONGOD_ARGS={mongod_start_args}") + env_vars.append(f"MONGOD_ARGS={mongo_start_args}") with open(Config.ENV_VAR_PATH, "w") as service_file: service_file.writelines(env_vars) From f8ff8a1bb71fc74495a6d6df7660b431129c606e Mon Sep 17 00:00:00 2001 From: Mia Altieri Date: Thu, 14 Sep 2023 16:27:40 +0000 Subject: [PATCH 02/12] mongos, shard, and config server all start without error --- lib/charms/mongodb/v0/helpers.py | 25 ++++++++++++ src/charm.py | 67 +++++++++++++++++++++++++------- src/config.py | 8 ++-- src/machine_helpers.py | 23 +++++++---- 4 files changed, 98 insertions(+), 25 deletions(-) diff --git a/lib/charms/mongodb/v0/helpers.py b/lib/charms/mongodb/v0/helpers.py index 58b42258c..789aa1791 100644 --- a/lib/charms/mongodb/v0/helpers.py +++ b/lib/charms/mongodb/v0/helpers.py @@ -80,6 +80,31 @@ def get_create_user_cmd( ] +def get_mongos_args(config: MongoDBConfiguration) -> str: + """Returns the arguments used for starting mongos on a config-server side application. + + Returns: + A string representing the arguments to be passed to mongos. + """ + # mongos running on the config server communicates through localhost + config_server_uri = f"{config.replset}/localhost" + + # no need to add TLS since no network calls are used, since mongos is configured to listen + # on local host + cmd = [ + # mongos on config server side only runs on local host + "--bind_ip localhost", + # todo figure out this one + f"--configdb {config_server_uri}", + # config server is already using 27017 + "--port 27018", + # todo followup PR add keyfile and auth + "\n", + ] + + return " ".join(cmd) + + def get_mongod_args( config: MongoDBConfiguration, auth: bool = True, diff --git a/src/charm.py b/src/charm.py index e9dc9c742..ba275c067 100755 --- a/src/charm.py +++ b/src/charm.py @@ -75,7 +75,6 @@ class MongodbOperatorCharm(CharmBase): def __init__(self, *args): super().__init__(*args) self._port = Config.MONGODB_PORT - self.framework.observe(self.on.install, self._on_install) self.framework.observe(self.on.start, self._on_start) self.framework.observe(self.on.update_status, self._on_update_status) @@ -213,6 +212,15 @@ def db_initialised(self) -> bool: """Check if MongoDB is initialised.""" return "db_initialised" in self.app_peer_data + @property + def role(self) -> str: + """Returns role of MongoDB deployment.""" + return self.model.config["role"] + + def is_role(self, role_name: str) -> bool: + """Checks if application is running in provided role.""" + return self.role == role_name + @db_initialised.setter def db_initialised(self, value): """Set the db_initialised flag.""" @@ -256,7 +264,7 @@ def _on_install(self, event: InstallEvent) -> None: auth=auth, machine_ip=self._unit_ip(self.unit), config=self.mongodb_config, - role=self.model.config["role"], + role=self.role, ) # add licenses @@ -275,9 +283,7 @@ def _on_start(self, event: StartEvent) -> None: try: logger.debug("starting MongoDB.") self.unit.status = MaintenanceStatus("starting MongoDB") - snap_cache = snap.SnapCache() - mongodb_snap = snap_cache["charmed-mongodb"] - mongodb_snap.start(services=["mongod"]) + self.start_mongod_service() self.unit.status = ActiveStatus() except snap.SnapError as e: logger.error("An exception occurred when starting mongod agent, error: %s.", str(e)) @@ -291,7 +297,7 @@ def _on_start(self, event: StartEvent) -> None: return # check if this unit's deployment of MongoDB is ready - with MongoDBConnection(self.mongodb_config, "localhost", direct=True) as direct_mongo: + with MongoDBConnection(self.mongodb_config, f"localhost", direct=True) as direct_mongo: if not direct_mongo.is_ready: logger.debug("mongodb service is not ready yet.") self.unit.status = WaitingStatus("waiting for MongoDB to start") @@ -464,7 +470,7 @@ def _on_update_status(self, event: UpdateStatusEvent): return # Cannot check more advanced MongoDB statuses if mongod hasn't started. - with MongoDBConnection(self.mongodb_config, "localhost", direct=True) as direct_mongo: + with MongoDBConnection(self.mongodb_config, f"localhost", direct=True) as direct_mongo: if not direct_mongo.is_ready: self.unit.status = WaitingStatus("Waiting for MongoDB to start") return @@ -871,19 +877,24 @@ def _initialise_replica_set(self, event: StartEvent) -> None: # can be corrupted. return - with MongoDBConnection(self.mongodb_config, "localhost", direct=True) as direct_mongo: + with MongoDBConnection(self.mongodb_config, f"localhost", direct=True) as direct_mongo: try: logger.info("Replica Set initialization") direct_mongo.init_replset() self._peers.data[self.app]["replica_set_hosts"] = json.dumps( [self._unit_ip(self.unit)] ) + logger.info("User initialization") self._init_operator_user() self._init_backup_user() self._init_monitor_user() - logger.info("Manage relations") - self.client_relations.oversee_users(None, None) + + # in sharding, user management is handled by mongos subordinate charm + if self.is_role(Config.REPLICATION): + logger.info("Manage user") + self.client_relations.oversee_users(None, None) + except subprocess.CalledProcessError as e: logger.error( "Deferring on_start: exit code: %i, stderr: %s", e.exit_code, e.stderr @@ -937,22 +948,48 @@ def set_secret(self, scope: str, key: str, value: Optional[str]) -> None: else: raise RuntimeError("Unknown secret scope.") + def start_mongod_service(self): + """Starts the mongod service and if necessary starts mongos. + + Raises: + snap.SnapError + """ + snap_cache = snap.SnapCache() + mongodb_snap = snap_cache["charmed-mongodb"] + mongodb_snap.start(services=["mongod"]) + + # charms running as config server are responsible for maintaing a server side mongos + if self.is_role(Config.CONFIG_SERVER): + mongodb_snap.start(services=["mongos"]) + + def stop_mongod_service(self): + """Stops the mongod service and if necessary stops mongos. + + Raises: + snap.SnapError + """ + snap_cache = snap.SnapCache() + mongodb_snap = snap_cache["charmed-mongodb"] + mongodb_snap.stop(services=["mongod"]) + + # charms running as config server are responsible for maintaing a server side mongos + if self.is_role(Config.CONFIG_SERVER): + mongodb_snap.stop(services=["mongos"]) + def restart_mongod_service(self, auth=None): """Restarts the mongod service with its associated configuration.""" if auth is None: auth = self.auth_enabled() try: - snap_cache = snap.SnapCache() - mongodb_snap = snap_cache["charmed-mongodb"] - mongodb_snap.stop(services=["mongod"]) + self.stop_mongod_service() update_mongod_service( auth, self._unit_ip(self.unit), config=self.mongodb_config, - role=self.model.config["role"], + role=self.role, ) - mongodb_snap.start(services=["mongod"]) + self.start_mongod_service() except snap.SnapError as e: logger.error("An exception occurred when starting mongod agent, error: %s.", str(e)) self.unit.status = BlockedStatus("couldn't start MongoDB") diff --git a/src/config.py b/src/config.py index 8d8279f87..7c13a5c02 100644 --- a/src/config.py +++ b/src/config.py @@ -6,14 +6,16 @@ class Config: """Configuration for MongoDB Charm.""" - SUBSTRATE = "vm" - # We expect the MongoDB container to use the default ports MONGODB_PORT = 27017 + SUBSTRATE = "vm" ENV_VAR_PATH = "/etc/environment" MONGODB_SNAP_DATA_DIR = "/var/snap/charmed-mongodb/current" MONGOD_CONF_DIR = f"{MONGODB_SNAP_DATA_DIR}/etc/mongod" MONGOD_CONF_FILE_PATH = f"{MONGOD_CONF_DIR}/mongod.conf" - SNAP_PACKAGES = [("charmed-mongodb", "5/edge", 82)] + SNAP_PACKAGES = [("charmed-mongodb", "5/edge/mongos", 83)] + CONFIG_SERVER = "config-server" + REPLICATION = "replication" + SHARD = "shard" class Actions: """Actions related config for MongoDB Charm.""" diff --git a/src/machine_helpers.py b/src/machine_helpers.py index ebb9b2519..ddd3f031e 100644 --- a/src/machine_helpers.py +++ b/src/machine_helpers.py @@ -6,7 +6,7 @@ import pwd from pathlib import Path -from charms.mongodb.v0.helpers import get_mongod_args +from charms.mongodb.v0.helpers import get_mongod_args, get_mongos_args from charms.mongodb.v0.mongodb import MongoDBConfiguration from config import Config @@ -22,22 +22,31 @@ def update_mongod_service( auth: bool, machine_ip: str, config: MongoDBConfiguration, role: str = "replication" ) -> None: """Updates the mongod service file with the new options for starting.""" + # write our arguments and write them to /etc/environment - the environment variable here is + # read in in the charmed-mongob.mongod.service file. + mongod_start_args = get_mongod_args(config, auth, snap_install=True) + add_args_to_env("MONGOD_ARGS", mongod_start_args) + + if role == "config-server": + mongos_start_args = get_mongos_args(config) + add_args_to_env("MONGOS_ARGS", mongos_start_args) + + +def add_args_to_env(var: str, args: str): + """Adds the provided arguments to the environment as the provided variable.""" with open(Config.ENV_VAR_PATH, "r") as env_var_file: env_vars = env_var_file.readlines() - # write our arguments and write them to /etc/environment - the environment variable here is - # read in in the charmed-mongob.mongod.service file. - mongo_start_args = get_mongod_args(config, auth, snap_install=True) args_added = False for index, line in enumerate(env_vars): - if "MONGOD_ARGS" in line: + if var in line: args_added = True - env_vars[index] = f"MONGOD_ARGS={mongo_start_args}" + env_vars[index] = f"{var}={args}" # if it is the first time adding these args to the file - will will need to append them to the # file if not args_added: - env_vars.append(f"MONGOD_ARGS={mongo_start_args}") + env_vars.append(f"{var}={args}") with open(Config.ENV_VAR_PATH, "w") as service_file: service_file.writelines(env_vars) From 9b64f319b4be0d2ece66889d22422a7afbb36d13 Mon Sep 17 00:00:00 2001 From: Mia Altieri Date: Fri, 15 Sep 2023 08:09:37 +0000 Subject: [PATCH 03/12] use correct snap --- src/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config.py b/src/config.py index 7c13a5c02..85a9fe3fa 100644 --- a/src/config.py +++ b/src/config.py @@ -12,7 +12,7 @@ class Config: MONGODB_SNAP_DATA_DIR = "/var/snap/charmed-mongodb/current" MONGOD_CONF_DIR = f"{MONGODB_SNAP_DATA_DIR}/etc/mongod" MONGOD_CONF_FILE_PATH = f"{MONGOD_CONF_DIR}/mongod.conf" - SNAP_PACKAGES = [("charmed-mongodb", "5/edge/mongos", 83)] + SNAP_PACKAGES = [("charmed-mongodb", "5/edge", 84)] CONFIG_SERVER = "config-server" REPLICATION = "replication" SHARD = "shard" From 11f3c6515a20cba37c71be8d2c4e89015e4cbae3 Mon Sep 17 00:00:00 2001 From: Mia Altieri Date: Fri, 15 Sep 2023 08:53:38 +0000 Subject: [PATCH 04/12] fmt + lint --- src/charm.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/charm.py b/src/charm.py index ff83fbdc3..d86a0c4ac 100755 --- a/src/charm.py +++ b/src/charm.py @@ -322,7 +322,7 @@ def _on_start(self, event: StartEvent) -> None: return # check if this unit's deployment of MongoDB is ready - with MongoDBConnection(self.mongodb_config, f"localhost", direct=True) as direct_mongo: + with MongoDBConnection(self.mongodb_config, "localhost", direct=True) as direct_mongo: if not direct_mongo.is_ready: logger.debug("mongodb service is not ready yet.") self.unit.status = WaitingStatus("waiting for MongoDB to start") @@ -495,7 +495,7 @@ def _on_update_status(self, event: UpdateStatusEvent): return # Cannot check more advanced MongoDB statuses if mongod hasn't started. - with MongoDBConnection(self.mongodb_config, f"localhost", direct=True) as direct_mongo: + with MongoDBConnection(self.mongodb_config, "localhost", direct=True) as direct_mongo: if not direct_mongo.is_ready: self.unit.status = WaitingStatus("Waiting for MongoDB to start") return @@ -952,7 +952,7 @@ def _initialise_replica_set(self, event: StartEvent) -> None: # can be corrupted. return - with MongoDBConnection(self.mongodb_config, f"localhost", direct=True) as direct_mongo: + with MongoDBConnection(self.mongodb_config, "localhost", direct=True) as direct_mongo: try: logger.info("Replica Set initialization") direct_mongo.init_replset() @@ -1045,7 +1045,7 @@ def start_mongod_service(self): mongodb_snap = snap_cache["charmed-mongodb"] mongodb_snap.start(services=["mongod"]) - # charms running as config server are responsible for maintaing a server side mongos + # charms running as config server are responsible for maintaining a server side mongos if self.is_role(Config.CONFIG_SERVER): mongodb_snap.start(services=["mongos"]) @@ -1059,7 +1059,7 @@ def stop_mongod_service(self): mongodb_snap = snap_cache["charmed-mongodb"] mongodb_snap.stop(services=["mongod"]) - # charms running as config server are responsible for maintaing a server side mongos + # charms running as config server are responsible for maintaining a server side mongos if self.is_role(Config.CONFIG_SERVER): mongodb_snap.stop(services=["mongos"]) From 8e8572b2e154d4d62e18f3ca7364c4e32918047e Mon Sep 17 00:00:00 2001 From: Pedro Guimaraes Date: Fri, 15 Sep 2023 14:14:26 +0200 Subject: [PATCH 05/12] Moving from mongo to mongosh cli --- lib/charms/mongodb/v0/helpers.py | 2 +- tests/integration/relation_tests/legacy_relations/helpers.py | 2 +- tests/integration/test_charm.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/charms/mongodb/v0/helpers.py b/lib/charms/mongodb/v0/helpers.py index ba33c09d9..1f6fa8860 100644 --- a/lib/charms/mongodb/v0/helpers.py +++ b/lib/charms/mongodb/v0/helpers.py @@ -49,7 +49,7 @@ # noinspection GrazieInspection def get_create_user_cmd( - config: MongoDBConfiguration, mongo_path="charmed-mongodb.mongo" + config: MongoDBConfiguration, mongo_path="charmed-mongodb.mongosh" ) -> List[str]: """Creates initial admin user for MongoDB. diff --git a/tests/integration/relation_tests/legacy_relations/helpers.py b/tests/integration/relation_tests/legacy_relations/helpers.py index 9bb27b3fe..ccde5c01d 100644 --- a/tests/integration/relation_tests/legacy_relations/helpers.py +++ b/tests/integration/relation_tests/legacy_relations/helpers.py @@ -136,7 +136,7 @@ async def mongo_tls_command(ops_test: OpsTest) -> str: replica_set_uri = f"mongodb://{hosts}/admin?replicaSet={app}" return ( - f"charmed-mongodb.mongo '{replica_set_uri}' --eval 'rs.status()'" + f"charmed-mongodb.mongosh '{replica_set_uri}' --eval 'rs.status()'" f" --tls --tlsCAFile {EXTERNAL_CERT_PATH}" f" --tlsCertificateKeyFile {EXTERNAL_PEM_PATH}" ) diff --git a/tests/integration/test_charm.py b/tests/integration/test_charm.py index c7e1450fd..1832acf42 100644 --- a/tests/integration/test_charm.py +++ b/tests/integration/test_charm.py @@ -177,7 +177,7 @@ async def test_monitor_user(ops_test: OpsTest) -> None: hosts = ",".join(replica_set_hosts) replica_set_uri = f"mongodb://monitor:{password}@{hosts}/admin?replicaSet=mongodb" - admin_mongod_cmd = f"charmed-mongodb.mongo '{replica_set_uri}' --eval 'rs.conf()'" + admin_mongod_cmd = f"charmed-mongodb.mongosh '{replica_set_uri}' --eval 'rs.conf()'" check_monitor_cmd = f"exec --unit {unit.name} -- {admin_mongod_cmd}" return_code, _, _ = await ops_test.juju(*check_monitor_cmd.split()) assert return_code == 0, "command rs.conf() on monitor user does not work" From 4bf9d5f9608976cf84d813f521826ea25eb9e04b Mon Sep 17 00:00:00 2001 From: Mia Altieri Date: Fri, 15 Sep 2023 15:30:16 +0000 Subject: [PATCH 06/12] update error processing --- lib/charms/mongodb/v0/helpers.py | 21 +------ lib/charms/mongodb/v0/mongodb_backups.py | 70 +++++++++++++++++++----- 2 files changed, 56 insertions(+), 35 deletions(-) diff --git a/lib/charms/mongodb/v0/helpers.py b/lib/charms/mongodb/v0/helpers.py index 789aa1791..cbde28fcf 100644 --- a/lib/charms/mongodb/v0/helpers.py +++ b/lib/charms/mongodb/v0/helpers.py @@ -7,7 +7,7 @@ import secrets import string import subprocess -from typing import List, Optional, Union +from typing import List from charms.mongodb.v0.mongodb import MongoDBConfiguration, MongoDBConnection from ops.model import ( @@ -234,25 +234,6 @@ def copy_licenses_to_unit(): ) -_StrOrBytes = Union[str, bytes] - - -def process_pbm_error(error_string: Optional[_StrOrBytes]) -> str: - """Parses pbm error string and returns a user friendly message.""" - message = "couldn't configure s3 backup option" - if not error_string: - return message - if type(error_string) == bytes: - error_string = error_string.decode("utf-8") - if "status code: 403" in error_string: # type: ignore - message = "s3 credentials are incorrect." - elif "status code: 404" in error_string: # type: ignore - message = "s3 configurations are incompatible." - elif "status code: 301" in error_string: # type: ignore - message = "s3 configurations are incompatible." - return message - - def current_pbm_op(pbm_status: str) -> str: """Parses pbm status for the operation that pbm is running.""" pbm_status = json.loads(pbm_status) diff --git a/lib/charms/mongodb/v0/mongodb_backups.py b/lib/charms/mongodb/v0/mongodb_backups.py index c65c86f1d..94d661975 100644 --- a/lib/charms/mongodb/v0/mongodb_backups.py +++ b/lib/charms/mongodb/v0/mongodb_backups.py @@ -13,14 +13,10 @@ import re import subprocess import time -from typing import Dict, List +from typing import Dict, List, Optional, Union from charms.data_platform_libs.v0.s3 import CredentialsChangedEvent, S3Requirer -from charms.mongodb.v0.helpers import ( - current_pbm_op, - process_pbm_error, - process_pbm_status, -) +from charms.mongodb.v0.helpers import current_pbm_op, process_pbm_status from charms.operator_libs_linux.v1 import snap from ops.framework import Object from ops.model import BlockedStatus, MaintenanceStatus, StatusBase, WaitingStatus @@ -316,7 +312,7 @@ def _configure_pbm_options(self, event) -> None: ), return except ExecError as e: - self.charm.unit.status = BlockedStatus(process_pbm_error(e.stdout)) + self.charm.unit.status = BlockedStatus(self.process_pbm_error(e.stdout)) return except subprocess.CalledProcessError as e: logger.error("Syncing configurations failed: %s", str(e)) @@ -418,7 +414,7 @@ def _wait_pbm_status(self) -> None: ) raise ResyncError except ExecError as e: - self.charm.unit.status = BlockedStatus(process_pbm_error(e.stdout)) + self.charm.unit.status = BlockedStatus(self.process_pbm_error(e.stdout)) def _get_pbm_status(self) -> StatusBase: """Retrieve pbm status.""" @@ -428,15 +424,14 @@ def _get_pbm_status(self) -> StatusBase: try: previous_pbm_status = self.charm.unit.status pbm_status = self.charm.run_pbm_command(PBM_STATUS_CMD) + + # pbm errors are outputted in json and do not raise CLI errors + pbm_error = self.process_pbm_error(pbm_status) + if pbm_error: + return BlockedStatus(pbm_error) + self._log_backup_restore_result(pbm_status, previous_pbm_status) return process_pbm_status(pbm_status) - except ExecError as e: - logger.error(f"Failed to get pbm status. {e}") - return BlockedStatus(process_pbm_error(e.stdout)) - except subprocess.CalledProcessError as e: - # pbm pipes a return code of 1, but its output shows the true error code so it is - # necessary to parse the output - return BlockedStatus(process_pbm_error(e.output)) except Exception as e: # pbm pipes a return code of 1, but its output shows the true error code so it is # necessary to parse the output @@ -652,3 +647,48 @@ def _get_backup_restore_operation_result(self, current_pbm_status, previous_pbm_ return f"Backup {backup_id} completed successfully" return "Unknown operation result" + + def retrieve_error_message(self, pbm_status: Dict) -> str: + """Parses pbm status for an error message from the current unit. + + If pbm_agent is in the error state, the command `pbm status` does not raise an error. + Instead, it is in the log messages. pbm_agent also shows all the error messages for other + replicas in the set. + """ + try: + clusters = pbm_status["cluster"] + for cluster in clusters: + if cluster["rs"] == self.charm.app.name: + break + + for host_info in cluster["nodes"]: + replica_info = f"mongodb/{self.charm._unit_ip(self.charm.unit)}:27107" + if host_info["host"] == replica_info: + break + + return str(host_info["errors"]) + except KeyError: + return "" + + _StrOrBytes = Union[str, bytes] + + def process_pbm_error(self, pbm_status: Optional[_StrOrBytes]) -> str: + """Returns errors found in PBM status.""" + if type(pbm_status) == bytes: + pbm_status = pbm_status.decode("utf-8") + + try: + error_message = self.retrieve_error_message(json.loads(pbm_status)) + except json.decoder.JSONDecodeError: + # if pbm status doesn't return a parsable dictionary it is an error message + # represented as a string + error_message = pbm_status + + message = None + if "status code: 403" in error_message: + message = "s3 credentials are incorrect." + elif "status code: 404" in error_message: + message = "s3 configurations are incompatible." + elif "status code: 301" in error_message: + message = "s3 configurations are incompatible." + return message From e924c0067d9732e2ceefc74b6f5947c2bf04ee37 Mon Sep 17 00:00:00 2001 From: Mia Altieri Date: Fri, 15 Sep 2023 15:32:12 +0000 Subject: [PATCH 07/12] bump lib patch --- lib/charms/mongodb/v0/helpers.py | 2 +- lib/charms/mongodb/v0/mongodb_backups.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/charms/mongodb/v0/helpers.py b/lib/charms/mongodb/v0/helpers.py index cbde28fcf..63784afa0 100644 --- a/lib/charms/mongodb/v0/helpers.py +++ b/lib/charms/mongodb/v0/helpers.py @@ -27,7 +27,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 8 +LIBPATCH = 9 # path to store mongodb ketFile diff --git a/lib/charms/mongodb/v0/mongodb_backups.py b/lib/charms/mongodb/v0/mongodb_backups.py index 94d661975..e947aa2f7 100644 --- a/lib/charms/mongodb/v0/mongodb_backups.py +++ b/lib/charms/mongodb/v0/mongodb_backups.py @@ -38,7 +38,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 6 +LIBPATCH = 7 logger = logging.getLogger(__name__) From 06af49a06725860a41cf9ee8eb6c5489ff8381e0 Mon Sep 17 00:00:00 2001 From: Mia Altieri Date: Tue, 19 Sep 2023 08:20:51 +0000 Subject: [PATCH 08/12] mongos should be run on 0.0.0.0 --- lib/charms/mongodb/v0/helpers.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/charms/mongodb/v0/helpers.py b/lib/charms/mongodb/v0/helpers.py index ec7eaeaf4..d4c820aaa 100644 --- a/lib/charms/mongodb/v0/helpers.py +++ b/lib/charms/mongodb/v0/helpers.py @@ -91,8 +91,9 @@ def get_mongos_args(config: MongoDBConfiguration) -> str: # no need to add TLS since no network calls are used, since mongos is configured to listen # on local host cmd = [ - # mongos on config server side only runs on local host - "--bind_ip localhost", + # mongos on config server side should run on 0.0.0.0 so it can be accessed by other units + # in the sharded cluster + "--bind_ip 0.0.0.0", # todo figure out this one f"--configdb {config_server_uri}", # config server is already using 27017 From 363db5f01a2dcf049dda9c0011c64d9c890da74c Mon Sep 17 00:00:00 2001 From: Mia Altieri Date: Tue, 19 Sep 2023 08:51:38 +0000 Subject: [PATCH 09/12] addressing PR comments --- lib/charms/mongodb/v0/helpers.py | 4 +- lib/charms/mongodb/v0/mongodb_backups.py | 11 +- src/charm.py | 6 +- src/config.py | 11 +- .../integration/backup_tests/test_backups.py | 606 +++++++++--------- 5 files changed, 325 insertions(+), 313 deletions(-) diff --git a/lib/charms/mongodb/v0/helpers.py b/lib/charms/mongodb/v0/helpers.py index d4c820aaa..c95d99393 100644 --- a/lib/charms/mongodb/v0/helpers.py +++ b/lib/charms/mongodb/v0/helpers.py @@ -19,6 +19,8 @@ ) from pymongo.errors import AutoReconnect, ServerSelectionTimeoutError +from config import Config + # The unique Charmhub library identifier, never change it LIBID = "b9a7fe0c38d8486a9d1ce94c27d4758e" @@ -97,7 +99,7 @@ def get_mongos_args(config: MongoDBConfiguration) -> str: # todo figure out this one f"--configdb {config_server_uri}", # config server is already using 27017 - "--port 27018", + f"--port {Config.MONGOS_PORT}", # todo followup PR add keyfile and auth "\n", ] diff --git a/lib/charms/mongodb/v0/mongodb_backups.py b/lib/charms/mongodb/v0/mongodb_backups.py index e947aa2f7..f0fde7286 100644 --- a/lib/charms/mongodb/v0/mongodb_backups.py +++ b/lib/charms/mongodb/v0/mongodb_backups.py @@ -30,6 +30,8 @@ wait_fixed, ) +from config import Config + # The unique Charmhub library identifier, never change it LIBID = "9f2b91c6128d48d6ba22724bf365da3b" @@ -59,6 +61,9 @@ BACKUP_RESTORE_ATTEMPT_COOLDOWN = 15 +_StrOrBytes = Union[str, bytes] + + class ResyncError(Exception): """Raised when pbm is resyncing configurations and is not ready to be used.""" @@ -662,7 +667,9 @@ def retrieve_error_message(self, pbm_status: Dict) -> str: break for host_info in cluster["nodes"]: - replica_info = f"mongodb/{self.charm._unit_ip(self.charm.unit)}:27107" + replica_info = ( + f"mongodb/{self.charm._unit_ip(self.charm.unit)}:{Config.MONGOS_PORT}" + ) if host_info["host"] == replica_info: break @@ -670,8 +677,6 @@ def retrieve_error_message(self, pbm_status: Dict) -> str: except KeyError: return "" - _StrOrBytes = Union[str, bytes] - def process_pbm_error(self, pbm_status: Optional[_StrOrBytes]) -> str: """Returns errors found in PBM status.""" if type(pbm_status) == bytes: diff --git a/src/charm.py b/src/charm.py index d86a0c4ac..bcad1165b 100755 --- a/src/charm.py +++ b/src/charm.py @@ -966,7 +966,7 @@ def _initialise_replica_set(self, event: StartEvent) -> None: self._init_monitor_user() # in sharding, user management is handled by mongos subordinate charm - if self.is_role(Config.REPLICATION): + if self.is_role(Config.Role.REPLICATION): logger.info("Manage user") self.client_relations.oversee_users(None, None) @@ -1046,7 +1046,7 @@ def start_mongod_service(self): mongodb_snap.start(services=["mongod"]) # charms running as config server are responsible for maintaining a server side mongos - if self.is_role(Config.CONFIG_SERVER): + if self.is_role(Config.Role.CONFIG_SERVER): mongodb_snap.start(services=["mongos"]) def stop_mongod_service(self): @@ -1060,7 +1060,7 @@ def stop_mongod_service(self): mongodb_snap.stop(services=["mongod"]) # charms running as config server are responsible for maintaining a server side mongos - if self.is_role(Config.CONFIG_SERVER): + if self.is_role(Config.Role.CONFIG_SERVER): mongodb_snap.stop(services=["mongos"]) def restart_mongod_service(self, auth=None): diff --git a/src/config.py b/src/config.py index 283b13f19..da3261864 100644 --- a/src/config.py +++ b/src/config.py @@ -9,6 +9,7 @@ class Config: """Configuration for MongoDB Charm.""" + MONGOS_PORT = 27018 MONGODB_PORT = 27017 SUBSTRATE = "vm" ENV_VAR_PATH = "/etc/environment" @@ -16,9 +17,13 @@ class Config: MONGOD_CONF_DIR = f"{MONGODB_SNAP_DATA_DIR}/etc/mongod" MONGOD_CONF_FILE_PATH = f"{MONGOD_CONF_DIR}/mongod.conf" SNAP_PACKAGES = [("charmed-mongodb", "5/edge", 84)] - CONFIG_SERVER = "config-server" - REPLICATION = "replication" - SHARD = "shard" + + class Role: + """Role config names for MongoDB Charm.""" + + CONFIG_SERVER = "config-server" + REPLICATION = "replication" + SHARD = "shard" class Actions: """Actions related config for MongoDB Charm.""" diff --git a/tests/integration/backup_tests/test_backups.py b/tests/integration/backup_tests/test_backups.py index b7cb64d48..cc78957a2 100644 --- a/tests/integration/backup_tests/test_backups.py +++ b/tests/integration/backup_tests/test_backups.py @@ -99,306 +99,306 @@ async def test_blocked_incorrect_conf(ops_test: OpsTest) -> None: assert db_unit.workload_status_message == "s3 configurations are incompatible." -@pytest.mark.abort_on_fail -async def test_ready_correct_conf(ops_test: OpsTest) -> None: - """Verifies charm goes into active status when s3 config and creds options are correct.""" - db_app_name = await helpers.app_name(ops_test) - choices = string.ascii_letters + string.digits - unique_path = "".join([secrets.choice(choices) for _ in range(4)]) - configuration_parameters = { - "bucket": "data-charms-testing", - "path": f"mongodb-vm/test-{unique_path}", - "endpoint": "https://s3.amazonaws.com", - "region": "us-east-1", - } - - # apply new configuration options - await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters) - - # after applying correct config options and creds the applications should both be active - await ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active", timeout=TIMEOUT) - await ops_test.model.wait_for_idle( - apps=[db_app_name], status="active", timeout=TIMEOUT, idle_period=60 - ) - - -@pytest.mark.abort_on_fail -async def test_create_and_list_backups(ops_test: OpsTest) -> None: - db_unit = await helpers.get_leader_unit(ops_test) - - # verify backup list works - action = await db_unit.run_action(action_name="list-backups") - list_result = await action.wait() - backups = list_result.results["backups"] - assert backups, "backups not outputted" - - # verify backup is started - action = await db_unit.run_action(action_name="create-backup") - backup_result = await action.wait() - assert "backup started" in backup_result.results["backup-status"], "backup didn't start" - - # verify backup is present in the list of backups - # the action `create-backup` only confirms that the command was sent to the `pbm`. Creating a - # backup can take a lot of time so this function returns once the command was successfully - # sent to pbm. Therefore we should retry listing the backup several times - try: - for attempt in Retrying(stop=stop_after_delay(20), wait=wait_fixed(3)): - with attempt: - backups = await helpers.count_logical_backups(db_unit) - assert backups == 1 - except RetryError: - assert backups == 1, "Backup not created." - - -@pytest.mark.abort_on_fail -async def test_multi_backup(ops_test: OpsTest, continuous_writes_to_db) -> None: - """With writes in the DB test creating a backup while another one is running. - - Note that before creating the second backup we change the bucket and change the s3 storage - from AWS to GCP. This test verifies that the first backup in AWS is made, the second backup - in GCP is made, and that before the second backup is made that pbm correctly resyncs. - """ - db_app_name = await helpers.app_name(ops_test) - db_unit = await helpers.get_leader_unit(ops_test) - - # create first backup once ready - await asyncio.gather( - ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), - ) - - action = await db_unit.run_action(action_name="create-backup") - first_backup = await action.wait() - assert first_backup.status == "completed", "First backup not started." - - # while first backup is running change access key, secret keys, and bucket name - # for GCP - await helpers.set_credentials(ops_test, cloud="GCP") - - # change to GCP configs and wait for PBM to resync - configuration_parameters = { - "bucket": "data-charms-testing", - "endpoint": "https://storage.googleapis.com", - "region": "", - } - await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters) - - await asyncio.gather( - ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), - ) - - # create a backup as soon as possible. might not be immediately possible since only one backup - # can happen at a time. - try: - for attempt in Retrying(stop=stop_after_delay(40), wait=wait_fixed(5)): - with attempt: - action = await db_unit.run_action(action_name="create-backup") - second_backup = await action.wait() - assert second_backup.status == "completed" - except RetryError: - assert second_backup.status == "completed", "Second backup not started." - - # the action `create-backup` only confirms that the command was sent to the `pbm`. Creating a - # backup can take a lot of time so this function returns once the command was successfully - # sent to pbm. Therefore before checking, wait for Charmed MongoDB to finish creating the - # backup - await asyncio.gather( - ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), - ) - - # verify that backups was made in GCP bucket - try: - for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(5)): - with attempt: - backups = await helpers.count_logical_backups(db_unit) - assert backups == 1, "Backup not created in bucket on GCP." - except RetryError: - assert backups == 1, "Backup not created in first bucket on GCP." - - # set AWS credentials, set configs for s3 storage, and wait to resync - await helpers.set_credentials(ops_test, cloud="AWS") - configuration_parameters = { - "bucket": "data-charms-testing", - "region": "us-east-1", - "endpoint": "https://s3.amazonaws.com", - } - await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters) - await asyncio.gather( - ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), - ) - - # verify that backups was made on the AWS bucket - try: - for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(5)): - with attempt: - backups = await helpers.count_logical_backups(db_unit) - assert backups == 2, "Backup not created in bucket on AWS." - except RetryError: - assert backups == 2, "Backup not created in bucket on AWS." - - -@pytest.mark.abort_on_fail -async def test_restore(ops_test: OpsTest, add_writes_to_db) -> None: - """Simple backup tests that verifies that writes are correctly restored.""" - # count total writes - number_writes = await ha_helpers.count_writes(ops_test) - assert number_writes > 0, "no writes to backup" - - # create a backup in the AWS bucket - db_app_name = await helpers.app_name(ops_test) - db_unit = await helpers.get_leader_unit(ops_test) - prev_backups = await helpers.count_logical_backups(db_unit) - action = await db_unit.run_action(action_name="create-backup") - first_backup = await action.wait() - assert first_backup.status == "completed", "First backup not started." - - # verify that backup was made on the bucket - try: - for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(5)): - with attempt: - backups = await helpers.count_logical_backups(db_unit) - assert backups == prev_backups + 1, "Backup not created." - except RetryError: - assert backups == prev_backups + 1, "Backup not created." - - # add writes to be cleared after restoring the backup. Note these are written to the same - # collection that was backed up. - await helpers.insert_unwanted_data(ops_test) - new_number_of_writes = await ha_helpers.count_writes(ops_test) - assert new_number_of_writes > number_writes, "No writes to be cleared after restoring." - - # find most recent backup id and restore - action = await db_unit.run_action(action_name="list-backups") - list_result = await action.wait() - list_result = list_result.results["backups"] - most_recent_backup = list_result.split("\n")[-1] - backup_id = most_recent_backup.split()[0] - action = await db_unit.run_action(action_name="restore", **{"backup-id": backup_id}) - restore = await action.wait() - assert restore.results["restore-status"] == "restore started", "restore not successful" - - await asyncio.gather( - ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), - ) - - # verify all writes are present - try: - for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(20)): - with attempt: - number_writes_restored = await ha_helpers.count_writes(ops_test) - assert number_writes == number_writes_restored, "writes not correctly restored" - except RetryError: - assert number_writes == number_writes_restored, "writes not correctly restored" - - -@pytest.mark.parametrize("cloud_provider", ["AWS", "GCP"]) -async def test_restore_new_cluster(ops_test: OpsTest, add_writes_to_db, cloud_provider): - # configure test for the cloud provider - db_app_name = await helpers.app_name(ops_test) - await helpers.set_credentials(ops_test, cloud=cloud_provider) - if cloud_provider == "AWS": - configuration_parameters = { - "bucket": "data-charms-testing", - "region": "us-east-1", - "endpoint": "https://s3.amazonaws.com", - } - else: - configuration_parameters = { - "bucket": "data-charms-testing", - "endpoint": "https://storage.googleapis.com", - "region": "", - } - - await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters) - await asyncio.gather( - ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active"), - ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), - ) - - # create a backup - writes_in_old_cluster = await ha_helpers.count_writes(ops_test, db_app_name) - assert writes_in_old_cluster > 0, "old cluster has no writes." - await helpers.create_and_verify_backup(ops_test) - - # save old password, since after restoring we will need this password to authenticate. - old_password = await ha_helpers.get_password(ops_test, db_app_name) - - # deploy a new cluster with a different name - db_charm = await ops_test.build_charm(".") - await ops_test.model.deploy(db_charm, num_units=3, application_name=NEW_CLUSTER) - await asyncio.gather( - ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active", idle_period=20), - ) - - db_unit = await helpers.get_leader_unit(ops_test, db_app_name=NEW_CLUSTER) - action = await db_unit.run_action("set-password", **{"password": old_password}) - action = await action.wait() - assert action.status == "completed" - - # relate to s3 - s3 has the necessary configurations - await ops_test.model.add_relation(S3_APP_NAME, NEW_CLUSTER) - await ops_test.model.block_until( - lambda: helpers.is_relation_joined(ops_test, ENDPOINT, ENDPOINT) is True, - timeout=TIMEOUT, - ) - - # wait for new cluster to sync - await asyncio.gather( - ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active", idle_period=20), - ) - - # verify that the listed backups from the old cluster are not listed as failed. - assert ( - await helpers.count_failed_backups(db_unit) == 0 - ), "Backups from old cluster are listed as failed" - - # find most recent backup id and restore - action = await db_unit.run_action(action_name="list-backups") - list_result = await action.wait() - list_result = list_result.results["backups"] - most_recent_backup = list_result.split("\n")[-1] - backup_id = most_recent_backup.split()[0] - action = await db_unit.run_action(action_name="restore", **{"backup-id": backup_id}) - restore = await action.wait() - assert restore.results["restore-status"] == "restore started", "restore not successful" - - # verify all writes are present - try: - for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(20)): - with attempt: - writes_in_new_cluster = await ha_helpers.count_writes(ops_test, NEW_CLUSTER) - assert ( - writes_in_new_cluster == writes_in_old_cluster - ), "new cluster writes do not match old cluster writes after restore" - except RetryError: - assert ( - writes_in_new_cluster == writes_in_old_cluster - ), "new cluster writes do not match old cluster writes after restore" - - await helpers.destroy_cluster(ops_test, cluster_name=NEW_CLUSTER) - - -@pytest.mark.abort_on_fail -async def test_update_backup_password(ops_test: OpsTest) -> None: - """Verifies that after changing the backup password the pbm tool is updated and functional.""" - db_app_name = await helpers.app_name(ops_test) - db_unit = await helpers.get_leader_unit(ops_test) - - # wait for charm to be idle before setting password - await asyncio.gather( - ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), - ) - - parameters = {"username": "backup"} - action = await db_unit.run_action("set-password", **parameters) - action = await action.wait() - assert action.status == "completed", "failed to set backup password" - - # wait for charm to be idle after setting password - await asyncio.gather( - ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), - ) - - # verify we still have connection to pbm via creating a backup - action = await db_unit.run_action(action_name="create-backup") - backup_result = await action.wait() - assert "backup started" in backup_result.results["backup-status"], "backup didn't start" +# @pytest.mark.abort_on_fail +# async def test_ready_correct_conf(ops_test: OpsTest) -> None: +# """Verifies charm goes into active status when s3 config and creds options are correct.""" +# db_app_name = await helpers.app_name(ops_test) +# choices = string.ascii_letters + string.digits +# unique_path = "".join([secrets.choice(choices) for _ in range(4)]) +# configuration_parameters = { +# "bucket": "data-charms-testing", +# "path": f"mongodb-vm/test-{unique_path}", +# "endpoint": "https://s3.amazonaws.com", +# "region": "us-east-1", +# } + +# # apply new configuration options +# await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters) + +# # after applying correct config options and creds the applications should both be active +# await ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active", timeout=TIMEOUT) +# await ops_test.model.wait_for_idle( +# apps=[db_app_name], status="active", timeout=TIMEOUT, idle_period=60 +# ) + + +# @pytest.mark.abort_on_fail +# async def test_create_and_list_backups(ops_test: OpsTest) -> None: +# db_unit = await helpers.get_leader_unit(ops_test) + +# # verify backup list works +# action = await db_unit.run_action(action_name="list-backups") +# list_result = await action.wait() +# backups = list_result.results["backups"] +# assert backups, "backups not outputted" + +# # verify backup is started +# action = await db_unit.run_action(action_name="create-backup") +# backup_result = await action.wait() +# assert "backup started" in backup_result.results["backup-status"], "backup didn't start" + +# # verify backup is present in the list of backups +# # the action `create-backup` only confirms that the command was sent to the `pbm`. Creating a +# # backup can take a lot of time so this function returns once the command was successfully +# # sent to pbm. Therefore we should retry listing the backup several times +# try: +# for attempt in Retrying(stop=stop_after_delay(20), wait=wait_fixed(3)): +# with attempt: +# backups = await helpers.count_logical_backups(db_unit) +# assert backups == 1 +# except RetryError: +# assert backups == 1, "Backup not created." + + +# @pytest.mark.abort_on_fail +# async def test_multi_backup(ops_test: OpsTest, continuous_writes_to_db) -> None: +# """With writes in the DB test creating a backup while another one is running. + +# Note that before creating the second backup we change the bucket and change the s3 storage +# from AWS to GCP. This test verifies that the first backup in AWS is made, the second backup +# in GCP is made, and that before the second backup is made that pbm correctly resyncs. +# """ +# db_app_name = await helpers.app_name(ops_test) +# db_unit = await helpers.get_leader_unit(ops_test) + +# # create first backup once ready +# await asyncio.gather( +# ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), +# ) + +# action = await db_unit.run_action(action_name="create-backup") +# first_backup = await action.wait() +# assert first_backup.status == "completed", "First backup not started." + +# # while first backup is running change access key, secret keys, and bucket name +# # for GCP +# await helpers.set_credentials(ops_test, cloud="GCP") + +# # change to GCP configs and wait for PBM to resync +# configuration_parameters = { +# "bucket": "data-charms-testing", +# "endpoint": "https://storage.googleapis.com", +# "region": "", +# } +# await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters) + +# await asyncio.gather( +# ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), +# ) + +# # create a backup as soon as possible. might not be immediately possible since only one backup +# # can happen at a time. +# try: +# for attempt in Retrying(stop=stop_after_delay(40), wait=wait_fixed(5)): +# with attempt: +# action = await db_unit.run_action(action_name="create-backup") +# second_backup = await action.wait() +# assert second_backup.status == "completed" +# except RetryError: +# assert second_backup.status == "completed", "Second backup not started." + +# # the action `create-backup` only confirms that the command was sent to the `pbm`. Creating a +# # backup can take a lot of time so this function returns once the command was successfully +# # sent to pbm. Therefore before checking, wait for Charmed MongoDB to finish creating the +# # backup +# await asyncio.gather( +# ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), +# ) + +# # verify that backups was made in GCP bucket +# try: +# for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(5)): +# with attempt: +# backups = await helpers.count_logical_backups(db_unit) +# assert backups == 1, "Backup not created in bucket on GCP." +# except RetryError: +# assert backups == 1, "Backup not created in first bucket on GCP." + +# # set AWS credentials, set configs for s3 storage, and wait to resync +# await helpers.set_credentials(ops_test, cloud="AWS") +# configuration_parameters = { +# "bucket": "data-charms-testing", +# "region": "us-east-1", +# "endpoint": "https://s3.amazonaws.com", +# } +# await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters) +# await asyncio.gather( +# ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), +# ) + +# # verify that backups was made on the AWS bucket +# try: +# for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(5)): +# with attempt: +# backups = await helpers.count_logical_backups(db_unit) +# assert backups == 2, "Backup not created in bucket on AWS." +# except RetryError: +# assert backups == 2, "Backup not created in bucket on AWS." + + +# @pytest.mark.abort_on_fail +# async def test_restore(ops_test: OpsTest, add_writes_to_db) -> None: +# """Simple backup tests that verifies that writes are correctly restored.""" +# # count total writes +# number_writes = await ha_helpers.count_writes(ops_test) +# assert number_writes > 0, "no writes to backup" + +# # create a backup in the AWS bucket +# db_app_name = await helpers.app_name(ops_test) +# db_unit = await helpers.get_leader_unit(ops_test) +# prev_backups = await helpers.count_logical_backups(db_unit) +# action = await db_unit.run_action(action_name="create-backup") +# first_backup = await action.wait() +# assert first_backup.status == "completed", "First backup not started." + +# # verify that backup was made on the bucket +# try: +# for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(5)): +# with attempt: +# backups = await helpers.count_logical_backups(db_unit) +# assert backups == prev_backups + 1, "Backup not created." +# except RetryError: +# assert backups == prev_backups + 1, "Backup not created." + +# # add writes to be cleared after restoring the backup. Note these are written to the same +# # collection that was backed up. +# await helpers.insert_unwanted_data(ops_test) +# new_number_of_writes = await ha_helpers.count_writes(ops_test) +# assert new_number_of_writes > number_writes, "No writes to be cleared after restoring." + +# # find most recent backup id and restore +# action = await db_unit.run_action(action_name="list-backups") +# list_result = await action.wait() +# list_result = list_result.results["backups"] +# most_recent_backup = list_result.split("\n")[-1] +# backup_id = most_recent_backup.split()[0] +# action = await db_unit.run_action(action_name="restore", **{"backup-id": backup_id}) +# restore = await action.wait() +# assert restore.results["restore-status"] == "restore started", "restore not successful" + +# await asyncio.gather( +# ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), +# ) + +# # verify all writes are present +# try: +# for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(20)): +# with attempt: +# number_writes_restored = await ha_helpers.count_writes(ops_test) +# assert number_writes == number_writes_restored, "writes not correctly restored" +# except RetryError: +# assert number_writes == number_writes_restored, "writes not correctly restored" + + +# @pytest.mark.parametrize("cloud_provider", ["AWS", "GCP"]) +# async def test_restore_new_cluster(ops_test: OpsTest, add_writes_to_db, cloud_provider): +# # configure test for the cloud provider +# db_app_name = await helpers.app_name(ops_test) +# await helpers.set_credentials(ops_test, cloud=cloud_provider) +# if cloud_provider == "AWS": +# configuration_parameters = { +# "bucket": "data-charms-testing", +# "region": "us-east-1", +# "endpoint": "https://s3.amazonaws.com", +# } +# else: +# configuration_parameters = { +# "bucket": "data-charms-testing", +# "endpoint": "https://storage.googleapis.com", +# "region": "", +# } + +# await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters) +# await asyncio.gather( +# ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active"), +# ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), +# ) + +# # create a backup +# writes_in_old_cluster = await ha_helpers.count_writes(ops_test, db_app_name) +# assert writes_in_old_cluster > 0, "old cluster has no writes." +# await helpers.create_and_verify_backup(ops_test) + +# # save old password, since after restoring we will need this password to authenticate. +# old_password = await ha_helpers.get_password(ops_test, db_app_name) + +# # deploy a new cluster with a different name +# db_charm = await ops_test.build_charm(".") +# await ops_test.model.deploy(db_charm, num_units=3, application_name=NEW_CLUSTER) +# await asyncio.gather( +# ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active", idle_period=20), +# ) + +# db_unit = await helpers.get_leader_unit(ops_test, db_app_name=NEW_CLUSTER) +# action = await db_unit.run_action("set-password", **{"password": old_password}) +# action = await action.wait() +# assert action.status == "completed" + +# # relate to s3 - s3 has the necessary configurations +# await ops_test.model.add_relation(S3_APP_NAME, NEW_CLUSTER) +# await ops_test.model.block_until( +# lambda: helpers.is_relation_joined(ops_test, ENDPOINT, ENDPOINT) is True, +# timeout=TIMEOUT, +# ) + +# # wait for new cluster to sync +# await asyncio.gather( +# ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active", idle_period=20), +# ) + +# # verify that the listed backups from the old cluster are not listed as failed. +# assert ( +# await helpers.count_failed_backups(db_unit) == 0 +# ), "Backups from old cluster are listed as failed" + +# # find most recent backup id and restore +# action = await db_unit.run_action(action_name="list-backups") +# list_result = await action.wait() +# list_result = list_result.results["backups"] +# most_recent_backup = list_result.split("\n")[-1] +# backup_id = most_recent_backup.split()[0] +# action = await db_unit.run_action(action_name="restore", **{"backup-id": backup_id}) +# restore = await action.wait() +# assert restore.results["restore-status"] == "restore started", "restore not successful" + +# # verify all writes are present +# try: +# for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(20)): +# with attempt: +# writes_in_new_cluster = await ha_helpers.count_writes(ops_test, NEW_CLUSTER) +# assert ( +# writes_in_new_cluster == writes_in_old_cluster +# ), "new cluster writes do not match old cluster writes after restore" +# except RetryError: +# assert ( +# writes_in_new_cluster == writes_in_old_cluster +# ), "new cluster writes do not match old cluster writes after restore" + +# await helpers.destroy_cluster(ops_test, cluster_name=NEW_CLUSTER) + + +# @pytest.mark.abort_on_fail +# async def test_update_backup_password(ops_test: OpsTest) -> None: +# """Verifies that after changing the backup password the pbm tool is updated and functional.""" +# db_app_name = await helpers.app_name(ops_test) +# db_unit = await helpers.get_leader_unit(ops_test) + +# # wait for charm to be idle before setting password +# await asyncio.gather( +# ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), +# ) + +# parameters = {"username": "backup"} +# action = await db_unit.run_action("set-password", **parameters) +# action = await action.wait() +# assert action.status == "completed", "failed to set backup password" + +# # wait for charm to be idle after setting password +# await asyncio.gather( +# ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), +# ) + +# # verify we still have connection to pbm via creating a backup +# action = await db_unit.run_action(action_name="create-backup") +# backup_result = await action.wait() +# assert "backup started" in backup_result.results["backup-status"], "backup didn't start" From ac78ed38c3a58b73998a95d3d00c7fe178ab502f Mon Sep 17 00:00:00 2001 From: Mia Altieri Date: Tue, 19 Sep 2023 09:20:37 +0000 Subject: [PATCH 10/12] PR comments --- lib/charms/mongodb/v0/helpers.py | 6 +- .../integration/backup_tests/test_backups.py | 606 +++++++++--------- 2 files changed, 305 insertions(+), 307 deletions(-) diff --git a/lib/charms/mongodb/v0/helpers.py b/lib/charms/mongodb/v0/helpers.py index c95d99393..7f9085250 100644 --- a/lib/charms/mongodb/v0/helpers.py +++ b/lib/charms/mongodb/v0/helpers.py @@ -90,13 +90,11 @@ def get_mongos_args(config: MongoDBConfiguration) -> str: # mongos running on the config server communicates through localhost config_server_uri = f"{config.replset}/localhost" - # no need to add TLS since no network calls are used, since mongos is configured to listen - # on local host + # todo follow up PR add TLS cmd = [ # mongos on config server side should run on 0.0.0.0 so it can be accessed by other units # in the sharded cluster - "--bind_ip 0.0.0.0", - # todo figure out this one + "--bind_ip", f"--configdb {config_server_uri}", # config server is already using 27017 f"--port {Config.MONGOS_PORT}", diff --git a/tests/integration/backup_tests/test_backups.py b/tests/integration/backup_tests/test_backups.py index cc78957a2..b7cb64d48 100644 --- a/tests/integration/backup_tests/test_backups.py +++ b/tests/integration/backup_tests/test_backups.py @@ -99,306 +99,306 @@ async def test_blocked_incorrect_conf(ops_test: OpsTest) -> None: assert db_unit.workload_status_message == "s3 configurations are incompatible." -# @pytest.mark.abort_on_fail -# async def test_ready_correct_conf(ops_test: OpsTest) -> None: -# """Verifies charm goes into active status when s3 config and creds options are correct.""" -# db_app_name = await helpers.app_name(ops_test) -# choices = string.ascii_letters + string.digits -# unique_path = "".join([secrets.choice(choices) for _ in range(4)]) -# configuration_parameters = { -# "bucket": "data-charms-testing", -# "path": f"mongodb-vm/test-{unique_path}", -# "endpoint": "https://s3.amazonaws.com", -# "region": "us-east-1", -# } - -# # apply new configuration options -# await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters) - -# # after applying correct config options and creds the applications should both be active -# await ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active", timeout=TIMEOUT) -# await ops_test.model.wait_for_idle( -# apps=[db_app_name], status="active", timeout=TIMEOUT, idle_period=60 -# ) - - -# @pytest.mark.abort_on_fail -# async def test_create_and_list_backups(ops_test: OpsTest) -> None: -# db_unit = await helpers.get_leader_unit(ops_test) - -# # verify backup list works -# action = await db_unit.run_action(action_name="list-backups") -# list_result = await action.wait() -# backups = list_result.results["backups"] -# assert backups, "backups not outputted" - -# # verify backup is started -# action = await db_unit.run_action(action_name="create-backup") -# backup_result = await action.wait() -# assert "backup started" in backup_result.results["backup-status"], "backup didn't start" - -# # verify backup is present in the list of backups -# # the action `create-backup` only confirms that the command was sent to the `pbm`. Creating a -# # backup can take a lot of time so this function returns once the command was successfully -# # sent to pbm. Therefore we should retry listing the backup several times -# try: -# for attempt in Retrying(stop=stop_after_delay(20), wait=wait_fixed(3)): -# with attempt: -# backups = await helpers.count_logical_backups(db_unit) -# assert backups == 1 -# except RetryError: -# assert backups == 1, "Backup not created." - - -# @pytest.mark.abort_on_fail -# async def test_multi_backup(ops_test: OpsTest, continuous_writes_to_db) -> None: -# """With writes in the DB test creating a backup while another one is running. - -# Note that before creating the second backup we change the bucket and change the s3 storage -# from AWS to GCP. This test verifies that the first backup in AWS is made, the second backup -# in GCP is made, and that before the second backup is made that pbm correctly resyncs. -# """ -# db_app_name = await helpers.app_name(ops_test) -# db_unit = await helpers.get_leader_unit(ops_test) - -# # create first backup once ready -# await asyncio.gather( -# ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), -# ) - -# action = await db_unit.run_action(action_name="create-backup") -# first_backup = await action.wait() -# assert first_backup.status == "completed", "First backup not started." - -# # while first backup is running change access key, secret keys, and bucket name -# # for GCP -# await helpers.set_credentials(ops_test, cloud="GCP") - -# # change to GCP configs and wait for PBM to resync -# configuration_parameters = { -# "bucket": "data-charms-testing", -# "endpoint": "https://storage.googleapis.com", -# "region": "", -# } -# await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters) - -# await asyncio.gather( -# ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), -# ) - -# # create a backup as soon as possible. might not be immediately possible since only one backup -# # can happen at a time. -# try: -# for attempt in Retrying(stop=stop_after_delay(40), wait=wait_fixed(5)): -# with attempt: -# action = await db_unit.run_action(action_name="create-backup") -# second_backup = await action.wait() -# assert second_backup.status == "completed" -# except RetryError: -# assert second_backup.status == "completed", "Second backup not started." - -# # the action `create-backup` only confirms that the command was sent to the `pbm`. Creating a -# # backup can take a lot of time so this function returns once the command was successfully -# # sent to pbm. Therefore before checking, wait for Charmed MongoDB to finish creating the -# # backup -# await asyncio.gather( -# ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), -# ) - -# # verify that backups was made in GCP bucket -# try: -# for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(5)): -# with attempt: -# backups = await helpers.count_logical_backups(db_unit) -# assert backups == 1, "Backup not created in bucket on GCP." -# except RetryError: -# assert backups == 1, "Backup not created in first bucket on GCP." - -# # set AWS credentials, set configs for s3 storage, and wait to resync -# await helpers.set_credentials(ops_test, cloud="AWS") -# configuration_parameters = { -# "bucket": "data-charms-testing", -# "region": "us-east-1", -# "endpoint": "https://s3.amazonaws.com", -# } -# await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters) -# await asyncio.gather( -# ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), -# ) - -# # verify that backups was made on the AWS bucket -# try: -# for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(5)): -# with attempt: -# backups = await helpers.count_logical_backups(db_unit) -# assert backups == 2, "Backup not created in bucket on AWS." -# except RetryError: -# assert backups == 2, "Backup not created in bucket on AWS." - - -# @pytest.mark.abort_on_fail -# async def test_restore(ops_test: OpsTest, add_writes_to_db) -> None: -# """Simple backup tests that verifies that writes are correctly restored.""" -# # count total writes -# number_writes = await ha_helpers.count_writes(ops_test) -# assert number_writes > 0, "no writes to backup" - -# # create a backup in the AWS bucket -# db_app_name = await helpers.app_name(ops_test) -# db_unit = await helpers.get_leader_unit(ops_test) -# prev_backups = await helpers.count_logical_backups(db_unit) -# action = await db_unit.run_action(action_name="create-backup") -# first_backup = await action.wait() -# assert first_backup.status == "completed", "First backup not started." - -# # verify that backup was made on the bucket -# try: -# for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(5)): -# with attempt: -# backups = await helpers.count_logical_backups(db_unit) -# assert backups == prev_backups + 1, "Backup not created." -# except RetryError: -# assert backups == prev_backups + 1, "Backup not created." - -# # add writes to be cleared after restoring the backup. Note these are written to the same -# # collection that was backed up. -# await helpers.insert_unwanted_data(ops_test) -# new_number_of_writes = await ha_helpers.count_writes(ops_test) -# assert new_number_of_writes > number_writes, "No writes to be cleared after restoring." - -# # find most recent backup id and restore -# action = await db_unit.run_action(action_name="list-backups") -# list_result = await action.wait() -# list_result = list_result.results["backups"] -# most_recent_backup = list_result.split("\n")[-1] -# backup_id = most_recent_backup.split()[0] -# action = await db_unit.run_action(action_name="restore", **{"backup-id": backup_id}) -# restore = await action.wait() -# assert restore.results["restore-status"] == "restore started", "restore not successful" - -# await asyncio.gather( -# ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), -# ) - -# # verify all writes are present -# try: -# for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(20)): -# with attempt: -# number_writes_restored = await ha_helpers.count_writes(ops_test) -# assert number_writes == number_writes_restored, "writes not correctly restored" -# except RetryError: -# assert number_writes == number_writes_restored, "writes not correctly restored" - - -# @pytest.mark.parametrize("cloud_provider", ["AWS", "GCP"]) -# async def test_restore_new_cluster(ops_test: OpsTest, add_writes_to_db, cloud_provider): -# # configure test for the cloud provider -# db_app_name = await helpers.app_name(ops_test) -# await helpers.set_credentials(ops_test, cloud=cloud_provider) -# if cloud_provider == "AWS": -# configuration_parameters = { -# "bucket": "data-charms-testing", -# "region": "us-east-1", -# "endpoint": "https://s3.amazonaws.com", -# } -# else: -# configuration_parameters = { -# "bucket": "data-charms-testing", -# "endpoint": "https://storage.googleapis.com", -# "region": "", -# } - -# await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters) -# await asyncio.gather( -# ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active"), -# ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), -# ) - -# # create a backup -# writes_in_old_cluster = await ha_helpers.count_writes(ops_test, db_app_name) -# assert writes_in_old_cluster > 0, "old cluster has no writes." -# await helpers.create_and_verify_backup(ops_test) - -# # save old password, since after restoring we will need this password to authenticate. -# old_password = await ha_helpers.get_password(ops_test, db_app_name) - -# # deploy a new cluster with a different name -# db_charm = await ops_test.build_charm(".") -# await ops_test.model.deploy(db_charm, num_units=3, application_name=NEW_CLUSTER) -# await asyncio.gather( -# ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active", idle_period=20), -# ) - -# db_unit = await helpers.get_leader_unit(ops_test, db_app_name=NEW_CLUSTER) -# action = await db_unit.run_action("set-password", **{"password": old_password}) -# action = await action.wait() -# assert action.status == "completed" - -# # relate to s3 - s3 has the necessary configurations -# await ops_test.model.add_relation(S3_APP_NAME, NEW_CLUSTER) -# await ops_test.model.block_until( -# lambda: helpers.is_relation_joined(ops_test, ENDPOINT, ENDPOINT) is True, -# timeout=TIMEOUT, -# ) - -# # wait for new cluster to sync -# await asyncio.gather( -# ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active", idle_period=20), -# ) - -# # verify that the listed backups from the old cluster are not listed as failed. -# assert ( -# await helpers.count_failed_backups(db_unit) == 0 -# ), "Backups from old cluster are listed as failed" - -# # find most recent backup id and restore -# action = await db_unit.run_action(action_name="list-backups") -# list_result = await action.wait() -# list_result = list_result.results["backups"] -# most_recent_backup = list_result.split("\n")[-1] -# backup_id = most_recent_backup.split()[0] -# action = await db_unit.run_action(action_name="restore", **{"backup-id": backup_id}) -# restore = await action.wait() -# assert restore.results["restore-status"] == "restore started", "restore not successful" - -# # verify all writes are present -# try: -# for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(20)): -# with attempt: -# writes_in_new_cluster = await ha_helpers.count_writes(ops_test, NEW_CLUSTER) -# assert ( -# writes_in_new_cluster == writes_in_old_cluster -# ), "new cluster writes do not match old cluster writes after restore" -# except RetryError: -# assert ( -# writes_in_new_cluster == writes_in_old_cluster -# ), "new cluster writes do not match old cluster writes after restore" - -# await helpers.destroy_cluster(ops_test, cluster_name=NEW_CLUSTER) - - -# @pytest.mark.abort_on_fail -# async def test_update_backup_password(ops_test: OpsTest) -> None: -# """Verifies that after changing the backup password the pbm tool is updated and functional.""" -# db_app_name = await helpers.app_name(ops_test) -# db_unit = await helpers.get_leader_unit(ops_test) - -# # wait for charm to be idle before setting password -# await asyncio.gather( -# ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), -# ) - -# parameters = {"username": "backup"} -# action = await db_unit.run_action("set-password", **parameters) -# action = await action.wait() -# assert action.status == "completed", "failed to set backup password" - -# # wait for charm to be idle after setting password -# await asyncio.gather( -# ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), -# ) - -# # verify we still have connection to pbm via creating a backup -# action = await db_unit.run_action(action_name="create-backup") -# backup_result = await action.wait() -# assert "backup started" in backup_result.results["backup-status"], "backup didn't start" +@pytest.mark.abort_on_fail +async def test_ready_correct_conf(ops_test: OpsTest) -> None: + """Verifies charm goes into active status when s3 config and creds options are correct.""" + db_app_name = await helpers.app_name(ops_test) + choices = string.ascii_letters + string.digits + unique_path = "".join([secrets.choice(choices) for _ in range(4)]) + configuration_parameters = { + "bucket": "data-charms-testing", + "path": f"mongodb-vm/test-{unique_path}", + "endpoint": "https://s3.amazonaws.com", + "region": "us-east-1", + } + + # apply new configuration options + await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters) + + # after applying correct config options and creds the applications should both be active + await ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active", timeout=TIMEOUT) + await ops_test.model.wait_for_idle( + apps=[db_app_name], status="active", timeout=TIMEOUT, idle_period=60 + ) + + +@pytest.mark.abort_on_fail +async def test_create_and_list_backups(ops_test: OpsTest) -> None: + db_unit = await helpers.get_leader_unit(ops_test) + + # verify backup list works + action = await db_unit.run_action(action_name="list-backups") + list_result = await action.wait() + backups = list_result.results["backups"] + assert backups, "backups not outputted" + + # verify backup is started + action = await db_unit.run_action(action_name="create-backup") + backup_result = await action.wait() + assert "backup started" in backup_result.results["backup-status"], "backup didn't start" + + # verify backup is present in the list of backups + # the action `create-backup` only confirms that the command was sent to the `pbm`. Creating a + # backup can take a lot of time so this function returns once the command was successfully + # sent to pbm. Therefore we should retry listing the backup several times + try: + for attempt in Retrying(stop=stop_after_delay(20), wait=wait_fixed(3)): + with attempt: + backups = await helpers.count_logical_backups(db_unit) + assert backups == 1 + except RetryError: + assert backups == 1, "Backup not created." + + +@pytest.mark.abort_on_fail +async def test_multi_backup(ops_test: OpsTest, continuous_writes_to_db) -> None: + """With writes in the DB test creating a backup while another one is running. + + Note that before creating the second backup we change the bucket and change the s3 storage + from AWS to GCP. This test verifies that the first backup in AWS is made, the second backup + in GCP is made, and that before the second backup is made that pbm correctly resyncs. + """ + db_app_name = await helpers.app_name(ops_test) + db_unit = await helpers.get_leader_unit(ops_test) + + # create first backup once ready + await asyncio.gather( + ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), + ) + + action = await db_unit.run_action(action_name="create-backup") + first_backup = await action.wait() + assert first_backup.status == "completed", "First backup not started." + + # while first backup is running change access key, secret keys, and bucket name + # for GCP + await helpers.set_credentials(ops_test, cloud="GCP") + + # change to GCP configs and wait for PBM to resync + configuration_parameters = { + "bucket": "data-charms-testing", + "endpoint": "https://storage.googleapis.com", + "region": "", + } + await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters) + + await asyncio.gather( + ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), + ) + + # create a backup as soon as possible. might not be immediately possible since only one backup + # can happen at a time. + try: + for attempt in Retrying(stop=stop_after_delay(40), wait=wait_fixed(5)): + with attempt: + action = await db_unit.run_action(action_name="create-backup") + second_backup = await action.wait() + assert second_backup.status == "completed" + except RetryError: + assert second_backup.status == "completed", "Second backup not started." + + # the action `create-backup` only confirms that the command was sent to the `pbm`. Creating a + # backup can take a lot of time so this function returns once the command was successfully + # sent to pbm. Therefore before checking, wait for Charmed MongoDB to finish creating the + # backup + await asyncio.gather( + ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), + ) + + # verify that backups was made in GCP bucket + try: + for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(5)): + with attempt: + backups = await helpers.count_logical_backups(db_unit) + assert backups == 1, "Backup not created in bucket on GCP." + except RetryError: + assert backups == 1, "Backup not created in first bucket on GCP." + + # set AWS credentials, set configs for s3 storage, and wait to resync + await helpers.set_credentials(ops_test, cloud="AWS") + configuration_parameters = { + "bucket": "data-charms-testing", + "region": "us-east-1", + "endpoint": "https://s3.amazonaws.com", + } + await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters) + await asyncio.gather( + ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), + ) + + # verify that backups was made on the AWS bucket + try: + for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(5)): + with attempt: + backups = await helpers.count_logical_backups(db_unit) + assert backups == 2, "Backup not created in bucket on AWS." + except RetryError: + assert backups == 2, "Backup not created in bucket on AWS." + + +@pytest.mark.abort_on_fail +async def test_restore(ops_test: OpsTest, add_writes_to_db) -> None: + """Simple backup tests that verifies that writes are correctly restored.""" + # count total writes + number_writes = await ha_helpers.count_writes(ops_test) + assert number_writes > 0, "no writes to backup" + + # create a backup in the AWS bucket + db_app_name = await helpers.app_name(ops_test) + db_unit = await helpers.get_leader_unit(ops_test) + prev_backups = await helpers.count_logical_backups(db_unit) + action = await db_unit.run_action(action_name="create-backup") + first_backup = await action.wait() + assert first_backup.status == "completed", "First backup not started." + + # verify that backup was made on the bucket + try: + for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(5)): + with attempt: + backups = await helpers.count_logical_backups(db_unit) + assert backups == prev_backups + 1, "Backup not created." + except RetryError: + assert backups == prev_backups + 1, "Backup not created." + + # add writes to be cleared after restoring the backup. Note these are written to the same + # collection that was backed up. + await helpers.insert_unwanted_data(ops_test) + new_number_of_writes = await ha_helpers.count_writes(ops_test) + assert new_number_of_writes > number_writes, "No writes to be cleared after restoring." + + # find most recent backup id and restore + action = await db_unit.run_action(action_name="list-backups") + list_result = await action.wait() + list_result = list_result.results["backups"] + most_recent_backup = list_result.split("\n")[-1] + backup_id = most_recent_backup.split()[0] + action = await db_unit.run_action(action_name="restore", **{"backup-id": backup_id}) + restore = await action.wait() + assert restore.results["restore-status"] == "restore started", "restore not successful" + + await asyncio.gather( + ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), + ) + + # verify all writes are present + try: + for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(20)): + with attempt: + number_writes_restored = await ha_helpers.count_writes(ops_test) + assert number_writes == number_writes_restored, "writes not correctly restored" + except RetryError: + assert number_writes == number_writes_restored, "writes not correctly restored" + + +@pytest.mark.parametrize("cloud_provider", ["AWS", "GCP"]) +async def test_restore_new_cluster(ops_test: OpsTest, add_writes_to_db, cloud_provider): + # configure test for the cloud provider + db_app_name = await helpers.app_name(ops_test) + await helpers.set_credentials(ops_test, cloud=cloud_provider) + if cloud_provider == "AWS": + configuration_parameters = { + "bucket": "data-charms-testing", + "region": "us-east-1", + "endpoint": "https://s3.amazonaws.com", + } + else: + configuration_parameters = { + "bucket": "data-charms-testing", + "endpoint": "https://storage.googleapis.com", + "region": "", + } + + await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters) + await asyncio.gather( + ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active"), + ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), + ) + + # create a backup + writes_in_old_cluster = await ha_helpers.count_writes(ops_test, db_app_name) + assert writes_in_old_cluster > 0, "old cluster has no writes." + await helpers.create_and_verify_backup(ops_test) + + # save old password, since after restoring we will need this password to authenticate. + old_password = await ha_helpers.get_password(ops_test, db_app_name) + + # deploy a new cluster with a different name + db_charm = await ops_test.build_charm(".") + await ops_test.model.deploy(db_charm, num_units=3, application_name=NEW_CLUSTER) + await asyncio.gather( + ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active", idle_period=20), + ) + + db_unit = await helpers.get_leader_unit(ops_test, db_app_name=NEW_CLUSTER) + action = await db_unit.run_action("set-password", **{"password": old_password}) + action = await action.wait() + assert action.status == "completed" + + # relate to s3 - s3 has the necessary configurations + await ops_test.model.add_relation(S3_APP_NAME, NEW_CLUSTER) + await ops_test.model.block_until( + lambda: helpers.is_relation_joined(ops_test, ENDPOINT, ENDPOINT) is True, + timeout=TIMEOUT, + ) + + # wait for new cluster to sync + await asyncio.gather( + ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active", idle_period=20), + ) + + # verify that the listed backups from the old cluster are not listed as failed. + assert ( + await helpers.count_failed_backups(db_unit) == 0 + ), "Backups from old cluster are listed as failed" + + # find most recent backup id and restore + action = await db_unit.run_action(action_name="list-backups") + list_result = await action.wait() + list_result = list_result.results["backups"] + most_recent_backup = list_result.split("\n")[-1] + backup_id = most_recent_backup.split()[0] + action = await db_unit.run_action(action_name="restore", **{"backup-id": backup_id}) + restore = await action.wait() + assert restore.results["restore-status"] == "restore started", "restore not successful" + + # verify all writes are present + try: + for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(20)): + with attempt: + writes_in_new_cluster = await ha_helpers.count_writes(ops_test, NEW_CLUSTER) + assert ( + writes_in_new_cluster == writes_in_old_cluster + ), "new cluster writes do not match old cluster writes after restore" + except RetryError: + assert ( + writes_in_new_cluster == writes_in_old_cluster + ), "new cluster writes do not match old cluster writes after restore" + + await helpers.destroy_cluster(ops_test, cluster_name=NEW_CLUSTER) + + +@pytest.mark.abort_on_fail +async def test_update_backup_password(ops_test: OpsTest) -> None: + """Verifies that after changing the backup password the pbm tool is updated and functional.""" + db_app_name = await helpers.app_name(ops_test) + db_unit = await helpers.get_leader_unit(ops_test) + + # wait for charm to be idle before setting password + await asyncio.gather( + ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), + ) + + parameters = {"username": "backup"} + action = await db_unit.run_action("set-password", **parameters) + action = await action.wait() + assert action.status == "completed", "failed to set backup password" + + # wait for charm to be idle after setting password + await asyncio.gather( + ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), + ) + + # verify we still have connection to pbm via creating a backup + action = await db_unit.run_action(action_name="create-backup") + backup_result = await action.wait() + assert "backup started" in backup_result.results["backup-status"], "backup didn't start" From 563f0495a6288219439f2d37998d3178ac4bf731 Mon Sep 17 00:00:00 2001 From: Mia Altieri Date: Tue, 19 Sep 2023 09:26:16 +0000 Subject: [PATCH 11/12] correct ip binding --- lib/charms/mongodb/v0/helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/charms/mongodb/v0/helpers.py b/lib/charms/mongodb/v0/helpers.py index 7f9085250..f63366e42 100644 --- a/lib/charms/mongodb/v0/helpers.py +++ b/lib/charms/mongodb/v0/helpers.py @@ -94,7 +94,7 @@ def get_mongos_args(config: MongoDBConfiguration) -> str: cmd = [ # mongos on config server side should run on 0.0.0.0 so it can be accessed by other units # in the sharded cluster - "--bind_ip", + "--bind_ip_all", f"--configdb {config_server_uri}", # config server is already using 27017 f"--port {Config.MONGOS_PORT}", From beacd0ea84905f43a92d03729ad9112612f536fa Mon Sep 17 00:00:00 2001 From: Mia Altieri Date: Wed, 20 Sep 2023 06:54:22 +0000 Subject: [PATCH 12/12] mongosh not packaged in snap, remove it --- lib/charms/mongodb/v0/helpers.py | 2 +- tests/integration/relation_tests/legacy_relations/helpers.py | 2 +- tests/integration/test_charm.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/charms/mongodb/v0/helpers.py b/lib/charms/mongodb/v0/helpers.py index 85187f345..f63366e42 100644 --- a/lib/charms/mongodb/v0/helpers.py +++ b/lib/charms/mongodb/v0/helpers.py @@ -50,7 +50,7 @@ # noinspection GrazieInspection def get_create_user_cmd( - config: MongoDBConfiguration, mongo_path="charmed-mongodb.mongosh" + config: MongoDBConfiguration, mongo_path="charmed-mongodb.mongo" ) -> List[str]: """Creates initial admin user for MongoDB. diff --git a/tests/integration/relation_tests/legacy_relations/helpers.py b/tests/integration/relation_tests/legacy_relations/helpers.py index ccde5c01d..9bb27b3fe 100644 --- a/tests/integration/relation_tests/legacy_relations/helpers.py +++ b/tests/integration/relation_tests/legacy_relations/helpers.py @@ -136,7 +136,7 @@ async def mongo_tls_command(ops_test: OpsTest) -> str: replica_set_uri = f"mongodb://{hosts}/admin?replicaSet={app}" return ( - f"charmed-mongodb.mongosh '{replica_set_uri}' --eval 'rs.status()'" + f"charmed-mongodb.mongo '{replica_set_uri}' --eval 'rs.status()'" f" --tls --tlsCAFile {EXTERNAL_CERT_PATH}" f" --tlsCertificateKeyFile {EXTERNAL_PEM_PATH}" ) diff --git a/tests/integration/test_charm.py b/tests/integration/test_charm.py index b4688b649..0f75d43ce 100644 --- a/tests/integration/test_charm.py +++ b/tests/integration/test_charm.py @@ -180,7 +180,7 @@ async def test_monitor_user(ops_test: OpsTest) -> None: ] hosts = ",".join(replica_set_hosts) replica_set_uri = f"mongodb://monitor:{password}@{hosts}/admin?replicaSet=mongodb" - admin_mongod_cmd = f"charmed-mongodb.mongosh '{replica_set_uri}' --eval 'rs.conf()'" + admin_mongod_cmd = f"charmed-mongodb.mongo '{replica_set_uri}' --eval 'rs.conf()'" check_monitor_cmd = f"exec --unit {unit.name} -- {admin_mongod_cmd}" return_code, _, _ = await ops_test.juju(*check_monitor_cmd.split()) assert return_code == 0, "command rs.conf() on monitor user does not work"