From 4c40d3d212909e3c75c36715b0004a4f7f68294c Mon Sep 17 00:00:00 2001 From: Mia Altieri <32723809+MiaAltieri@users.noreply.github.com> Date: Wed, 10 May 2023 14:53:46 +0200 Subject: [PATCH] only leader should be able to restore backups (#216) only leader should allow restores - this prevents multiple restores from occurring at a time which can be destructive --- lib/charms/mongodb/v0/mongodb_backups.py | 10 ++++- lib/charms/mongodb/v0/mongodb_provider.py | 10 ++++- .../integration/backup_tests/test_backups.py | 39 +++++++++---------- 3 files changed, 34 insertions(+), 25 deletions(-) diff --git a/lib/charms/mongodb/v0/mongodb_backups.py b/lib/charms/mongodb/v0/mongodb_backups.py index 5048e0d15..0ea6de914 100644 --- a/lib/charms/mongodb/v0/mongodb_backups.py +++ b/lib/charms/mongodb/v0/mongodb_backups.py @@ -43,7 +43,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 4 +LIBPATCH = 5 MONGODB_SNAP_DATA_DIR = "/var/snap/charmed-mongodb/current" logger = logging.getLogger(__name__) @@ -359,6 +359,12 @@ def _on_restore_action(self, event) -> None: event.fail("Missing backup-id to restore") return + # only leader can restore backups. This prevents multiple restores from being attempted at + # once. + if not self.charm.unit.is_leader(): + event.fail("The action can be run only on leader unit.") + return + # cannot restore backup if pbm is not ready. This could be due to: resyncing, incompatible, # options, incorrect credentials, creating a backup, or already performing a restore. pbm_status = self._get_pbm_status() @@ -371,7 +377,7 @@ def _on_restore_action(self, event) -> None: logger.debug("Sync-ing configurations needs more time, must wait before restoring.") return if isinstance(pbm_status, BlockedStatus): - event.fail(f"Cannot create backup {pbm_status.message}.") + event.fail(f"Cannot restore backup {pbm_status.message}.") return try: diff --git a/lib/charms/mongodb/v0/mongodb_provider.py b/lib/charms/mongodb/v0/mongodb_provider.py index 34f452c86..3738108b7 100644 --- a/lib/charms/mongodb/v0/mongodb_provider.py +++ b/lib/charms/mongodb/v0/mongodb_provider.py @@ -76,8 +76,7 @@ def _on_relation_event(self, event): return # legacy relations have auth disabled, which new relations require - legacy_relation_users = self._get_users_from_relations(None, rel=LEGACY_REL_NAME) - if len(legacy_relation_users) > 0: + if self.model.get_relation(LEGACY_REL_NAME): self.charm.unit.status = BlockedStatus("cannot have both legacy and new relations") logger.error("Auth disabled due to existing connections to legacy relations") return @@ -117,6 +116,13 @@ def oversee_users(self, departed_relation_id: Optional[int], event): relation is still on the list of all relations. Therefore, for proper work of the function, we need to exclude departed relation from the list. """ + # This hook gets called from other contexts within the charm so it is necessary to check + # for legacy relations which have auth disabled, which new relations require + if self.model.get_relation(LEGACY_REL_NAME): + self.charm.unit.status = BlockedStatus("cannot have both legacy and new relations") + logger.error("Auth disabled due to existing connections to legacy relations") + return + with MongoDBConnection(self.charm.mongodb_config) as mongo: database_users = mongo.get_users() relation_users = self._get_users_from_relations(departed_relation_id) diff --git a/tests/integration/backup_tests/test_backups.py b/tests/integration/backup_tests/test_backups.py index 16f3038b7..1436b047e 100644 --- a/tests/integration/backup_tests/test_backups.py +++ b/tests/integration/backup_tests/test_backups.py @@ -75,7 +75,7 @@ async def test_blocked_incorrect_creds(ops_test: OpsTest) -> None: # verify that Charmed MongoDB is blocked and reports incorrect credentials await asyncio.gather( ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active"), - ops_test.model.wait_for_idle(apps=[db_app_name], status="blocked"), + ops_test.model.wait_for_idle(apps=[db_app_name], status="blocked", idle_period=20), ) db_unit = ops_test.model.applications[db_app_name].units[0] @@ -93,7 +93,7 @@ async def test_blocked_incorrect_conf(ops_test: OpsTest) -> None: # wait for both applications to be idle with the correct statuses await asyncio.gather( ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active"), - ops_test.model.wait_for_idle(apps=[db_app_name], status="blocked"), + ops_test.model.wait_for_idle(apps=[db_app_name], status="blocked", idle_period=20), ) db_unit = ops_test.model.applications[db_app_name].units[0] assert db_unit.workload_status_message == "s3 configurations are incompatible." @@ -162,10 +162,9 @@ async def test_multi_backup(ops_test: OpsTest, continuous_writes_to_db) -> None: db_unit = await helpers.get_leader_unit(ops_test) # create first backup once ready - async with ops_test.fast_forward(): - await asyncio.gather( - ops_test.model.wait_for_idle(apps=[db_app_name], status="active"), - ) + await asyncio.gather( + ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), + ) action = await db_unit.run_action(action_name="create-backup") first_backup = await action.wait() @@ -184,7 +183,7 @@ async def test_multi_backup(ops_test: OpsTest, continuous_writes_to_db) -> None: await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters) await asyncio.gather( - ops_test.model.wait_for_idle(apps=[db_app_name], status="active"), + ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), ) # create a backup as soon as possible. might not be immediately possible since only one backup @@ -202,10 +201,9 @@ async def test_multi_backup(ops_test: OpsTest, continuous_writes_to_db) -> None: # backup can take a lot of time so this function returns once the command was successfully # sent to pbm. Therefore before checking, wait for Charmed MongoDB to finish creating the # backup - async with ops_test.fast_forward(): - await asyncio.gather( - ops_test.model.wait_for_idle(apps=[db_app_name], status="active"), - ) + await asyncio.gather( + ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), + ) # verify that backups was made in GCP bucket try: @@ -225,7 +223,7 @@ async def test_multi_backup(ops_test: OpsTest, continuous_writes_to_db) -> None: } await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters) await asyncio.gather( - ops_test.model.wait_for_idle(apps=[db_app_name], status="active"), + ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), ) # verify that backups was made on the AWS bucket @@ -278,10 +276,9 @@ async def test_restore(ops_test: OpsTest, add_writes_to_db) -> None: restore = await action.wait() assert restore.results["restore-status"] == "restore started", "restore not successful" - async with ops_test.fast_forward(): - await asyncio.gather( - ops_test.model.wait_for_idle(apps=[db_app_name], status="active"), - ) + await asyncio.gather( + ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), + ) # verify all writes are present try: @@ -314,7 +311,7 @@ async def test_restore_new_cluster(ops_test: OpsTest, add_writes_to_db, cloud_pr await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters) await asyncio.gather( ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active"), - ops_test.model.wait_for_idle(apps=[db_app_name], status="active"), + ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), ) # create a backup @@ -329,7 +326,7 @@ async def test_restore_new_cluster(ops_test: OpsTest, add_writes_to_db, cloud_pr db_charm = await ops_test.build_charm(".") await ops_test.model.deploy(db_charm, num_units=3, application_name=NEW_CLUSTER) await asyncio.gather( - ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active"), + ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active", idle_period=20), ) db_unit = await helpers.get_leader_unit(ops_test, db_app_name=NEW_CLUSTER) @@ -346,7 +343,7 @@ async def test_restore_new_cluster(ops_test: OpsTest, add_writes_to_db, cloud_pr # wait for new cluster to sync await asyncio.gather( - ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active"), + ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active", idle_period=20), ) # verify that the listed backups from the old cluster are not listed as failed. @@ -388,7 +385,7 @@ async def test_update_backup_password(ops_test: OpsTest) -> None: # wait for charm to be idle before setting password await asyncio.gather( - ops_test.model.wait_for_idle(apps=[db_app_name], status="active"), + ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), ) parameters = {"username": "backup"} @@ -398,7 +395,7 @@ async def test_update_backup_password(ops_test: OpsTest) -> None: # wait for charm to be idle after setting password await asyncio.gather( - ops_test.model.wait_for_idle(apps=[db_app_name], status="active"), + ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20), ) # verify we still have connection to pbm via creating a backup