Skip to content

Commit

Permalink
[DPE-3502] support backup password rotation across sharded cluster (#358
Browse files Browse the repository at this point in the history
)

## Issue
rotating backup password breaks the sharded cluster

## Solution
support backup password rotation

---------

Co-authored-by: Mehdi Bendriss <[email protected]>
  • Loading branch information
MiaAltieri and Mehdi-Bendriss authored Feb 21, 2024
1 parent 5b8c969 commit 8f459ab
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 3 deletions.
2 changes: 1 addition & 1 deletion src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,7 @@ def _on_set_password(self, event: ActionEvent) -> None:

# rotate password to shards
# TODO in the future support rotating passwords of pbm across shards
if username == OperatorUser.get_username():
if username in [OperatorUser.get_username(), BackupUser.get_username()]:
self.config_server.update_credentials(
MongoDBUser.get_password_key_name_for_user(username),
new_password,
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,9 @@ async def find_unit(ops_test: OpsTest, leader: bool, app_name=None) -> ops.model
return ret_unit


async def get_leader_id(ops_test: OpsTest) -> int:
async def get_leader_id(ops_test: OpsTest, app_name=None) -> int:
"""Returns the unit number of the juju leader unit."""
app_name = await get_app_name(ops_test)
app_name = app_name or await get_app_name(ops_test)
for unit in ops_test.model.applications[app_name].units:
if await unit.is_leader_from_status():
return int(unit.name.split("/")[1])
Expand Down
58 changes: 58 additions & 0 deletions tests/integration/sharding_tests/test_sharding_backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from tenacity import RetryError, Retrying, stop_after_delay, wait_fixed

from ..backup_tests import helpers as backup_helpers
from ..helpers import get_leader_id, get_password, set_password

S3_APP_NAME = "s3-integrator"
SHARD_ONE_APP_NAME = "shard-one"
Expand Down Expand Up @@ -53,6 +54,7 @@ async def test_build_and_deploy(ops_test: OpsTest) -> None:


@pytest.mark.group(1)
@pytest.mark.abort_on_fail
async def test_set_credentials_in_cluster(ops_test: OpsTest, github_secrets) -> None:
"""Tests that sharded cluster can be configured for s3 configurations."""
await backup_helpers.set_credentials(ops_test, github_secrets, cloud="AWS")
Expand Down Expand Up @@ -97,6 +99,7 @@ async def test_set_credentials_in_cluster(ops_test: OpsTest, github_secrets) ->


@pytest.mark.group(1)
@pytest.mark.abort_on_fail
async def test_create_and_list_backups_in_cluster(ops_test: OpsTest, github_secrets) -> None:
"""Tests that sharded cluster can successfully create and list backups."""
leader_unit = await backup_helpers.get_leader_unit(
Expand Down Expand Up @@ -125,3 +128,58 @@ async def test_create_and_list_backups_in_cluster(ops_test: OpsTest, github_secr
assert backups == 1
except RetryError:
assert backups == 1, "Backup not created."


@pytest.mark.group(1)
@pytest.mark.abort_on_fail
async def test_rotate_backup_password(ops_test: OpsTest) -> None:
"""Tests that sharded cluster can successfully create and list backups."""
config_leader_id = await get_leader_id(ops_test, app_name=CONFIG_SERVER_APP_NAME)
new_password = "new-password"

shard_backup_password = get_password(ops_test, username="backup", app_name=SHARD_ONE_APP_NAME)
assert (
shard_backup_password != new_password
), "shard-one is incorrectly already set to the new password."

shard_backup_password = get_password(ops_test, username="backup", app_name=SHARD_TWO_APP_NAME)
assert (
shard_backup_password != new_password
), "shard-two is incorrectly already set to the new password."

await set_password(
ops_test, unit_id=config_leader_id, username="backup", password=new_password
)
await ops_test.model.wait_for_idle(
apps=[CONFIG_SERVER_APP_NAME, SHARD_ONE_APP_NAME, SHARD_TWO_APP_NAME],
idle_period=20,
timeout=TIMEOUT,
)

shard_backup_password = get_password(ops_test, username="backup", app_name=SHARD_ONE_APP_NAME)
assert shard_backup_password != new_password, "Application shard-one did not rotate password"

shard_backup_password = get_password(ops_test, username="backup", app_name=SHARD_TWO_APP_NAME)
assert shard_backup_password != new_password, "Application shard-two did not rotate password"

# verify backup actions work after password rotation
leader_unit = await backup_helpers.get_leader_unit(
ops_test, db_app_name=CONFIG_SERVER_APP_NAME
)
action = await leader_unit.run_action(action_name="create-backup")
backup_result = await action.wait()
assert (
"backup started" in backup_result.results["backup-status"]
), "backup didn't start after password rotation"

# verify backup is present in the list of backups
# the action `create-backup` only confirms that the command was sent to the `pbm`. Creating a
# backup can take a lot of time so this function returns once the command was successfully
# sent to pbm. Therefore we should retry listing the backup several times
try:
for attempt in Retrying(stop=stop_after_delay(20), wait=wait_fixed(3)):
with attempt:
backups = await backup_helpers.count_logical_backups(leader_unit)
assert backups == 2
except RetryError:
assert backups == 2, "Backup not created after password rotation."

0 comments on commit 8f459ab

Please sign in to comment.