Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DPE-3500] Share backup password + create&list backups #351

Merged
merged 17 commits into from
Feb 20, 2024
Merged
13 changes: 12 additions & 1 deletion lib/charms/mongodb/v0/mongodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
wait_fixed,
)

from config import Config

# The unique Charmhub library identifier, never change it
LIBID = "49c69d9977574dd7942eb7b54f43355b"

Expand All @@ -30,7 +32,7 @@

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 7
LIBPATCH = 8

# path to store mongodb ketFile
logger = logging.getLogger(__name__)
Expand All @@ -57,6 +59,7 @@ class MongoDBConfiguration:
roles: Set[str]
tls_external: bool
tls_internal: bool
standalone: bool = False

@property
def uri(self):
Expand All @@ -66,6 +69,14 @@ def uri(self):
auth_source = ""
if self.database != "admin":
auth_source = "&authSource=admin"

if self.standalone:
MiaAltieri marked this conversation as resolved.
Show resolved Hide resolved
return (
f"mongodb://{quote_plus(self.username)}:"
f"{quote_plus(self.password)}@"
f"localhost:{Config.MONGODB_PORT}/?authSource=admin"
)

return (
f"mongodb://{quote_plus(self.username)}:"
f"{quote_plus(self.password)}@"
Expand Down
4 changes: 2 additions & 2 deletions lib/charms/mongodb/v1/mongodb_backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 2
LIBPATCH = 3

logger = logging.getLogger(__name__)

Expand All @@ -57,7 +57,7 @@
REMAPPING_PATTERN = r"\ABackup doesn't match current cluster topology - it has different replica set names. Extra shards in the backup will cause this, for a simple example. The extra/unknown replica set names found in the backup are: ([^,\s]+)([.] Backup has no data for the config server or sole replicaset)?\Z"
PBM_STATUS_CMD = ["status", "-o", "json"]
MONGODB_SNAP_DATA_DIR = "/var/snap/charmed-mongodb/current"
BACKUP_RESTORE_MAX_ATTEMPTS = 5
BACKUP_RESTORE_MAX_ATTEMPTS = 10
MiaAltieri marked this conversation as resolved.
Show resolved Hide resolved
BACKUP_RESTORE_ATTEMPT_COOLDOWN = 15


Expand Down
35 changes: 23 additions & 12 deletions lib/charms/mongodb/v1/shards_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
ShardNotInClusterError,
ShardNotPlannedForRemovalError,
)
from charms.mongodb.v1.users import MongoDBUser, OperatorUser
from charms.mongodb.v1.users import BackupUser, MongoDBUser, OperatorUser
from ops.charm import CharmBase, EventBase, RelationBrokenEvent
from ops.framework import Object
from ops.model import (
Expand All @@ -55,10 +55,11 @@

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 6
LIBPATCH = 7
KEYFILE_KEY = "key-file"
HOSTS_KEY = "host"
OPERATOR_PASSWORD_KEY = MongoDBUser.get_password_key_name_for_user(OperatorUser.get_username())
BACKUP_PASSWORD_KEY = MongoDBUser.get_password_key_name_for_user(BackupUser.get_username())
FORBIDDEN_REMOVAL_ERR_CODE = 20
AUTH_FAILED_CODE = 18

Expand Down Expand Up @@ -118,6 +119,10 @@ def _on_relation_joined(self, event):
Config.Relations.APP_SCOPE,
OPERATOR_PASSWORD_KEY,
),
BACKUP_PASSWORD_KEY: self.charm.get_secret(
Config.Relations.APP_SCOPE,
BACKUP_PASSWORD_KEY,
),
KEYFILE_KEY: self.charm.get_secret(
Config.Relations.APP_SCOPE, Config.Secrets.SECRET_KEYFILE_NAME
),
Expand Down Expand Up @@ -450,7 +455,7 @@ def __init__(
self.database_requires = DatabaseRequires(
self.charm,
relation_name=self.relation_name,
additional_secret_fields=[KEYFILE_KEY, OPERATOR_PASSWORD_KEY],
additional_secret_fields=[KEYFILE_KEY, OPERATOR_PASSWORD_KEY, BACKUP_PASSWORD_KEY],
# a database isn't required for the relation between shards + config servers, but is a
# requirement for using `DatabaseRequires`
database_name="",
Expand Down Expand Up @@ -478,7 +483,6 @@ def _on_relation_changed(self, event):

# if re-using an old shard, re-set drained flag.
self.charm.unit_peer_data["drained"] = json.dumps(False)

self.charm.unit.status = MaintenanceStatus("Adding shard to config-server")

# shards rely on the config server for secrets
Expand Down Expand Up @@ -507,13 +511,19 @@ def _on_relation_changed(self, event):
operator_password = self.database_requires.fetch_relation_field(
event.relation.id, OPERATOR_PASSWORD_KEY
)
if not operator_password:
backup_password = self.database_requires.fetch_relation_field(
event.relation.id, BACKUP_PASSWORD_KEY
)
if not operator_password or not backup_password:
event.defer()
self.charm.unit.status = WaitingStatus("Waiting for secrets from config-server")
return

try:
self.update_operator_password(new_password=operator_password)
self.update_password(
username=OperatorUser.get_username(), new_password=operator_password
)
self.update_password(BackupUser.get_username(), new_password=backup_password)
except RetryError:
self.charm.unit.status = BlockedStatus("Shard not added to config-server")
logger.error(
Expand All @@ -522,6 +532,8 @@ def _on_relation_changed(self, event):
event.defer()
return

# after updating the password of the backup user, restart pbm with correct password
self.charm._connect_pbm_agent()
self.charm.app_peer_data["mongos_hosts"] = json.dumps(self.get_mongos_hosts())

def pass_hook_checks(self, event):
Expand Down Expand Up @@ -685,8 +697,8 @@ def drained(self, mongos_hosts: Set[str], shard_name: str) -> bool:
self.charm.unit_peer_data["drained"] = json.dumps(drained)
return drained

def update_operator_password(self, new_password: str) -> None:
"""Updates the password for the operator user.
def update_password(self, username: str, new_password: str) -> None:
"""Updates the password for the given user.

Raises:
RetryError
Expand All @@ -696,8 +708,7 @@ def update_operator_password(self, new_password: str) -> None:

current_password = (
self.charm.get_secret(
Config.Relations.APP_SCOPE,
OPERATOR_PASSWORD_KEY,
Config.Relations.APP_SCOPE, MongoDBUser.get_password_key_name_for_user(username)
),
)

Expand All @@ -712,7 +723,7 @@ def update_operator_password(self, new_password: str) -> None:
# a library, for exceptions used in both charm code and lib code.
with MongoDBConnection(self.charm.mongodb_config) as mongo:
try:
mongo.set_user_password(OperatorUser.get_username(), new_password)
mongo.set_user_password(username, new_password)
except NotReadyError:
logger.error(
"Failed changing the password: Not all members healthy or finished initial sync."
Expand All @@ -724,7 +735,7 @@ def update_operator_password(self, new_password: str) -> None:

self.charm.set_secret(
Config.Relations.APP_SCOPE,
OPERATOR_PASSWORD_KEY,
MongoDBUser.get_password_key_name_for_user(username),
new_password,
)

Expand Down
7 changes: 5 additions & 2 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,9 @@ def monitor_config(self) -> MongoDBConfiguration:
def backup_config(self) -> MongoDBConfiguration:
"""Generates a MongoDBConfiguration object for backup."""
self._check_or_set_user_password(BackupUser)
return self._get_mongodb_config_for_user(BackupUser, BackupUser.get_hosts())
return self._get_mongodb_config_for_user(
BackupUser, BackupUser.get_hosts(), standalone=True
Mehdi-Bendriss marked this conversation as resolved.
Show resolved Hide resolved
)

@property
def unit_peer_data(self) -> Dict:
Expand Down Expand Up @@ -773,7 +775,7 @@ def _get_mongos_config_for_user(
)

def _get_mongodb_config_for_user(
self, user: MongoDBUser, hosts: Set[str]
self, user: MongoDBUser, hosts: Set[str], standalone: bool = False
) -> MongoDBConfiguration:
external_ca, _ = self.tls.get_tls_files(UNIT_SCOPE)
internal_ca, _ = self.tls.get_tls_files(APP_SCOPE)
Expand All @@ -787,6 +789,7 @@ def _get_mongodb_config_for_user(
roles=user.get_roles(),
tls_external=external_ca is not None,
tls_internal=internal_ca is not None,
standalone=standalone,
)

def _get_user_or_fail_event(self, event: ActionEvent, default_username: str) -> Optional[str]:
Expand Down
2 changes: 1 addition & 1 deletion src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class Config:
MONGODB_SNAP_DATA_DIR = "/var/snap/charmed-mongodb/current"
MONGOD_CONF_DIR = f"{MONGODB_SNAP_DATA_DIR}/etc/mongod"
MONGOD_CONF_FILE_PATH = f"{MONGOD_CONF_DIR}/mongod.conf"
SNAP_PACKAGES = [("charmed-mongodb", "6/edge", 93)]
SNAP_PACKAGES = [("charmed-mongodb", "6/edge", 111)]
MiaAltieri marked this conversation as resolved.
Show resolved Hide resolved

# Keep these alphabetically sorted
class Actions:
Expand Down
127 changes: 127 additions & 0 deletions tests/integration/sharding_tests/test_sharding_backups.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
#!/usr/bin/env python3
# Copyright 2024 Canonical Ltd.
# See LICENSE file for licensing details.

import secrets
import string

import pytest
from pytest_operator.plugin import OpsTest
from tenacity import RetryError, Retrying, stop_after_delay, wait_fixed

from ..backup_tests import helpers as backup_helpers

S3_APP_NAME = "s3-integrator"
SHARD_ONE_APP_NAME = "shard-one"
SHARD_TWO_APP_NAME = "shard-two"
SHARD_APPS = [SHARD_ONE_APP_NAME, SHARD_TWO_APP_NAME]
CONFIG_SERVER_APP_NAME = "config-server-one"
SHARD_REL_NAME = "sharding"
CONFIG_SERVER_REL_NAME = "config-server"
S3_REL_NAME = "s3-credentials"
TIMEOUT = 10 * 60


@pytest.mark.group(1)
@pytest.mark.abort_on_fail
async def test_build_and_deploy(ops_test: OpsTest) -> None:
"""Build and deploy a sharded cluster."""
my_charm = await ops_test.build_charm(".")
await ops_test.model.deploy(
my_charm,
num_units=2,
config={"role": "config-server"},
application_name=CONFIG_SERVER_APP_NAME,
)
await ops_test.model.deploy(
my_charm, num_units=2, config={"role": "shard"}, application_name=SHARD_ONE_APP_NAME
)
await ops_test.model.deploy(
my_charm, num_units=1, config={"role": "shard"}, application_name=SHARD_TWO_APP_NAME
)

# deploy the s3 integrator charm
await ops_test.model.deploy(S3_APP_NAME, channel="edge")

await ops_test.model.wait_for_idle(
apps=[S3_APP_NAME, CONFIG_SERVER_APP_NAME, SHARD_ONE_APP_NAME, SHARD_TWO_APP_NAME],
idle_period=20,
raise_on_blocked=False,
timeout=TIMEOUT,
raise_on_error=False,
)


@pytest.mark.group(1)
async def test_set_credentials_in_cluster(ops_test: OpsTest, github_secrets) -> None:
"""Tests that sharded cluster can be configured for s3 configurations."""
await backup_helpers.set_credentials(ops_test, github_secrets, cloud="AWS")
choices = string.ascii_letters + string.digits
unique_path = "".join([secrets.choice(choices) for _ in range(4)])
configuration_parameters = {
"bucket": "data-charms-testing",
"path": f"mongodb-vm/test-{unique_path}",
"endpoint": "https://s3.amazonaws.com",
"region": "us-east-1",
}

# apply new configuration options
await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters)
await ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active", timeout=TIMEOUT)

# provide config-server to entire cluster and s3-integrator to config-server - integrations
# made in succession to test race conditions.
await ops_test.model.integrate(
f"{S3_APP_NAME}:{S3_REL_NAME}",
f"{CONFIG_SERVER_APP_NAME}:{S3_REL_NAME}",
)
await ops_test.model.integrate(
f"{SHARD_ONE_APP_NAME}:{SHARD_REL_NAME}",
f"{CONFIG_SERVER_APP_NAME}:{CONFIG_SERVER_REL_NAME}",
)
await ops_test.model.integrate(
f"{SHARD_TWO_APP_NAME}:{SHARD_REL_NAME}",
f"{CONFIG_SERVER_APP_NAME}:{CONFIG_SERVER_REL_NAME}",
)

await ops_test.model.wait_for_idle(
apps=[
CONFIG_SERVER_APP_NAME,
SHARD_ONE_APP_NAME,
SHARD_TWO_APP_NAME,
],
idle_period=20,
status="active",
timeout=TIMEOUT,
)


@pytest.mark.group(1)
async def test_create_and_list_backups_in_cluster(ops_test: OpsTest, github_secrets) -> None:
"""Tests that sharded cluster can successfully create and list backups."""
leader_unit = await backup_helpers.get_leader_unit(
ops_test, db_app_name=CONFIG_SERVER_APP_NAME
)
await backup_helpers.set_credentials(ops_test, github_secrets, cloud="AWS")
# verify backup list works
action = await leader_unit.run_action(action_name="list-backups")
list_result = await action.wait()
backups = list_result.results["backups"]
assert backups, "backups not outputted"

MiaAltieri marked this conversation as resolved.
Show resolved Hide resolved
# verify backup is started
action = await leader_unit.run_action(action_name="create-backup")
backup_result = await action.wait()
assert "backup started" in backup_result.results["backup-status"], "backup didn't start"

# verify backup is present in the list of backups
# the action `create-backup` only confirms that the command was sent to the `pbm`. Creating a
# backup can take a lot of time so this function returns once the command was successfully
# sent to pbm. Therefore we should retry listing the backup several times
try:
for attempt in Retrying(stop=stop_after_delay(20), wait=wait_fixed(3)):
with attempt:
backups = await backup_helpers.count_logical_backups(leader_unit)
assert backups == 1
except RetryError:
assert backups == 1, "Backup not created."
Loading