Skip to content

Commit

Permalink
only leader should be able to restore backups (#216)
Browse files Browse the repository at this point in the history
only leader should allow restores - this prevents multiple restores from
occurring at a time which can be destructive
  • Loading branch information
MiaAltieri authored May 10, 2023
1 parent 459c734 commit 4c40d3d
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 25 deletions.
10 changes: 8 additions & 2 deletions lib/charms/mongodb/v0/mongodb_backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 4
LIBPATCH = 5
MONGODB_SNAP_DATA_DIR = "/var/snap/charmed-mongodb/current"

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -359,6 +359,12 @@ def _on_restore_action(self, event) -> None:
event.fail("Missing backup-id to restore")
return

# only leader can restore backups. This prevents multiple restores from being attempted at
# once.
if not self.charm.unit.is_leader():
event.fail("The action can be run only on leader unit.")
return

# cannot restore backup if pbm is not ready. This could be due to: resyncing, incompatible,
# options, incorrect credentials, creating a backup, or already performing a restore.
pbm_status = self._get_pbm_status()
Expand All @@ -371,7 +377,7 @@ def _on_restore_action(self, event) -> None:
logger.debug("Sync-ing configurations needs more time, must wait before restoring.")
return
if isinstance(pbm_status, BlockedStatus):
event.fail(f"Cannot create backup {pbm_status.message}.")
event.fail(f"Cannot restore backup {pbm_status.message}.")
return

try:
Expand Down
10 changes: 8 additions & 2 deletions lib/charms/mongodb/v0/mongodb_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,7 @@ def _on_relation_event(self, event):
return

# legacy relations have auth disabled, which new relations require
legacy_relation_users = self._get_users_from_relations(None, rel=LEGACY_REL_NAME)
if len(legacy_relation_users) > 0:
if self.model.get_relation(LEGACY_REL_NAME):
self.charm.unit.status = BlockedStatus("cannot have both legacy and new relations")
logger.error("Auth disabled due to existing connections to legacy relations")
return
Expand Down Expand Up @@ -117,6 +116,13 @@ def oversee_users(self, departed_relation_id: Optional[int], event):
relation is still on the list of all relations. Therefore, for proper
work of the function, we need to exclude departed relation from the list.
"""
# This hook gets called from other contexts within the charm so it is necessary to check
# for legacy relations which have auth disabled, which new relations require
if self.model.get_relation(LEGACY_REL_NAME):
self.charm.unit.status = BlockedStatus("cannot have both legacy and new relations")
logger.error("Auth disabled due to existing connections to legacy relations")
return

with MongoDBConnection(self.charm.mongodb_config) as mongo:
database_users = mongo.get_users()
relation_users = self._get_users_from_relations(departed_relation_id)
Expand Down
39 changes: 18 additions & 21 deletions tests/integration/backup_tests/test_backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ async def test_blocked_incorrect_creds(ops_test: OpsTest) -> None:
# verify that Charmed MongoDB is blocked and reports incorrect credentials
await asyncio.gather(
ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active"),
ops_test.model.wait_for_idle(apps=[db_app_name], status="blocked"),
ops_test.model.wait_for_idle(apps=[db_app_name], status="blocked", idle_period=20),
)
db_unit = ops_test.model.applications[db_app_name].units[0]

Expand All @@ -93,7 +93,7 @@ async def test_blocked_incorrect_conf(ops_test: OpsTest) -> None:
# wait for both applications to be idle with the correct statuses
await asyncio.gather(
ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active"),
ops_test.model.wait_for_idle(apps=[db_app_name], status="blocked"),
ops_test.model.wait_for_idle(apps=[db_app_name], status="blocked", idle_period=20),
)
db_unit = ops_test.model.applications[db_app_name].units[0]
assert db_unit.workload_status_message == "s3 configurations are incompatible."
Expand Down Expand Up @@ -162,10 +162,9 @@ async def test_multi_backup(ops_test: OpsTest, continuous_writes_to_db) -> None:
db_unit = await helpers.get_leader_unit(ops_test)

# create first backup once ready
async with ops_test.fast_forward():
await asyncio.gather(
ops_test.model.wait_for_idle(apps=[db_app_name], status="active"),
)
await asyncio.gather(
ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
)

action = await db_unit.run_action(action_name="create-backup")
first_backup = await action.wait()
Expand All @@ -184,7 +183,7 @@ async def test_multi_backup(ops_test: OpsTest, continuous_writes_to_db) -> None:
await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters)

await asyncio.gather(
ops_test.model.wait_for_idle(apps=[db_app_name], status="active"),
ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
)

# create a backup as soon as possible. might not be immediately possible since only one backup
Expand All @@ -202,10 +201,9 @@ async def test_multi_backup(ops_test: OpsTest, continuous_writes_to_db) -> None:
# backup can take a lot of time so this function returns once the command was successfully
# sent to pbm. Therefore before checking, wait for Charmed MongoDB to finish creating the
# backup
async with ops_test.fast_forward():
await asyncio.gather(
ops_test.model.wait_for_idle(apps=[db_app_name], status="active"),
)
await asyncio.gather(
ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
)

# verify that backups was made in GCP bucket
try:
Expand All @@ -225,7 +223,7 @@ async def test_multi_backup(ops_test: OpsTest, continuous_writes_to_db) -> None:
}
await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters)
await asyncio.gather(
ops_test.model.wait_for_idle(apps=[db_app_name], status="active"),
ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
)

# verify that backups was made on the AWS bucket
Expand Down Expand Up @@ -278,10 +276,9 @@ async def test_restore(ops_test: OpsTest, add_writes_to_db) -> None:
restore = await action.wait()
assert restore.results["restore-status"] == "restore started", "restore not successful"

async with ops_test.fast_forward():
await asyncio.gather(
ops_test.model.wait_for_idle(apps=[db_app_name], status="active"),
)
await asyncio.gather(
ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
)

# verify all writes are present
try:
Expand Down Expand Up @@ -314,7 +311,7 @@ async def test_restore_new_cluster(ops_test: OpsTest, add_writes_to_db, cloud_pr
await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters)
await asyncio.gather(
ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active"),
ops_test.model.wait_for_idle(apps=[db_app_name], status="active"),
ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
)

# create a backup
Expand All @@ -329,7 +326,7 @@ async def test_restore_new_cluster(ops_test: OpsTest, add_writes_to_db, cloud_pr
db_charm = await ops_test.build_charm(".")
await ops_test.model.deploy(db_charm, num_units=3, application_name=NEW_CLUSTER)
await asyncio.gather(
ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active"),
ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active", idle_period=20),
)

db_unit = await helpers.get_leader_unit(ops_test, db_app_name=NEW_CLUSTER)
Expand All @@ -346,7 +343,7 @@ async def test_restore_new_cluster(ops_test: OpsTest, add_writes_to_db, cloud_pr

# wait for new cluster to sync
await asyncio.gather(
ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active"),
ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active", idle_period=20),
)

# verify that the listed backups from the old cluster are not listed as failed.
Expand Down Expand Up @@ -388,7 +385,7 @@ async def test_update_backup_password(ops_test: OpsTest) -> None:

# wait for charm to be idle before setting password
await asyncio.gather(
ops_test.model.wait_for_idle(apps=[db_app_name], status="active"),
ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
)

parameters = {"username": "backup"}
Expand All @@ -398,7 +395,7 @@ async def test_update_backup_password(ops_test: OpsTest) -> None:

# wait for charm to be idle after setting password
await asyncio.gather(
ops_test.model.wait_for_idle(apps=[db_app_name], status="active"),
ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
)

# verify we still have connection to pbm via creating a backup
Expand Down

0 comments on commit 4c40d3d

Please sign in to comment.