Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DPE-3499] prevent shards from recieving backup actions + integrating on the s3 interface #357

Merged
merged 24 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 52 additions & 19 deletions lib/charms/mongodb/v1/mongodb_backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 3
LIBPATCH = 4

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -112,18 +112,34 @@ def __init__(self, charm):

# s3 relation handles the config options for s3 backups
self.s3_client = S3Requirer(self.charm, S3_RELATION)
self.framework.observe(
self.charm.on[S3_RELATION].relation_joined, self.on_s3_relation_joined
)
self.framework.observe(
self.s3_client.on.credentials_changed, self._on_s3_credential_changed
)
self.framework.observe(self.charm.on.create_backup_action, self._on_create_backup_action)
self.framework.observe(self.charm.on.list_backups_action, self._on_list_backups_action)
self.framework.observe(self.charm.on.restore_action, self._on_restore_action)

def on_s3_relation_joined(self, _) -> None:
"""Checks for valid integration for s3-integrations."""
if not self.is_valid_s3_integration():
logger.debug(
"Shard does not support s3 relations, please relate s3-integrator to config-server only."
)
self.charm.unit.status = BlockedStatus(
"Relation to s3-integrator is not supported, config role must be config-server"
)

def _on_s3_credential_changed(self, event: CredentialsChangedEvent):
"""Sets pbm credentials, resyncs if necessary and reports config errors."""
# handling PBM configurations requires that MongoDB is running and the pbm snap is
# installed.
action = "configure-pbm"
if not self._pass_sanity_checks(event, action):
return

if not self.charm.db_initialised:
self._defer_action_with_info_log(
event, action, "Set PBM credentials, MongoDB not ready."
Expand All @@ -140,12 +156,7 @@ def _on_s3_credential_changed(self, event: CredentialsChangedEvent):

def _on_create_backup_action(self, event) -> None:
action = "backup"
if self.model.get_relation(S3_RELATION) is None:
self._fail_action_with_error_log(
event,
action,
"Relation with s3-integrator charm missing, cannot create backup.",
)
if not self._pass_sanity_checks(event, action):
return

# only leader can create backups. This prevents multiple backups from being attempted at
Expand Down Expand Up @@ -195,12 +206,7 @@ def _on_create_backup_action(self, event) -> None:

def _on_list_backups_action(self, event) -> None:
action = "list-backups"
if self.model.get_relation(S3_RELATION) is None:
self._fail_action_with_error_log(
event,
action,
"Relation with s3-integrator charm missing, cannot list backups.",
)
if not self._pass_sanity_checks(event, action):
return

# cannot list backups if pbm is resyncing, or has incompatible options or incorrect
Expand Down Expand Up @@ -229,12 +235,7 @@ def _on_list_backups_action(self, event) -> None:

def _on_restore_action(self, event) -> None:
action = "restore"
if self.model.get_relation(S3_RELATION) is None:
self._fail_action_with_error_log(
event,
action,
"Relation with s3-integrator charm missing, cannot restore from a backup.",
)
if not self._pass_sanity_checks(event, action):
return

backup_id = event.params.get("backup-id")
Expand Down Expand Up @@ -289,6 +290,38 @@ def _on_restore_action(self, event) -> None:
self._fail_action_with_error_log(event, action, str(restore_error))

# BEGIN: helper functions
def is_valid_s3_integration(self) -> bool:
"""Return true if relation to s3-integrator is valid.

Only replica sets and config servers can integrate to s3-integrator.
"""
if self.charm.is_role(Config.Role.SHARD) and self.model.get_relation(S3_RELATION):
return False

return True

def _pass_sanity_checks(self, event, action) -> bool:
"""Return True if basic pre-conditions for running backup actions are met.

No matter what backup-action is being run, these requirements must be met.
"""
if not self.is_valid_s3_integration():
self._fail_action_with_error_log(
event,
action,
"Shards do not support backup operations, please run action on config-server.",
)
return False

if self.model.get_relation(S3_RELATION) is None:
self._fail_action_with_error_log(
event,
action,
"Relation with s3-integrator charm missing, cannot restore from a backup.",
)
return False

return True

def _configure_pbm_options(self, event) -> None:
action = "configure-pbm"
Expand Down
5 changes: 5 additions & 0 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1368,6 +1368,11 @@ def get_invalid_integration_status(self) -> Optional[StatusBase]:
"Relation to mongos not supported, config role must be config-server"
)

if not self.backups.is_valid_s3_integration():
return BlockedStatus(
"Relation to s3-integrator is not supported, config role must be config-server"
)

def get_status(self) -> StatusBase:
"""Returns the status with the highest priority from backups, sharding, and mongod.

Expand Down
17 changes: 17 additions & 0 deletions tests/integration/sharding_tests/test_sharding_backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,23 @@ async def test_create_and_list_backups_in_cluster(ops_test: OpsTest, github_secr
assert backups == 1, "Backup not created."


@pytest.mark.group(1)
@pytest.mark.abort_on_fail
async def test_shards_cannot_run_backup_actions(ops_test: OpsTest) -> None:
shard_unit = await backup_helpers.get_leader_unit(ops_test, db_app_name=SHARD_ONE_APP_NAME)
action = await shard_unit.run_action(action_name="create-backup")
attempted_backup = await action.wait()
assert attempted_backup.status == "failed", "shard ran create-backup command."

action = await shard_unit.run_action(action_name="list-backups")
attempted_backup = await action.wait()
assert attempted_backup.status == "failed", "shard ran list-backup command."

action = await shard_unit.run_action(action_name="restore")
attempted_backup = await action.wait()
assert attempted_backup.status == "failed", "shard ran list-backup command."


@pytest.mark.group(1)
@pytest.mark.abort_on_fail
async def test_rotate_backup_password(ops_test: OpsTest) -> None:
Expand Down
51 changes: 44 additions & 7 deletions tests/integration/sharding_tests/test_sharding_relations.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
#!/usr/bin/env python3
# Copyright 2023 Canonical Ltd.
# See LICENSE file for licensing details.
import time

import pytest
from juju.errors import JujuAPIError
from pytest_operator.plugin import OpsTest

S3_APP_NAME = "s3-integrator"
SHARD_ONE_APP_NAME = "shard"
CONFIG_SERVER_ONE_APP_NAME = "config-server-one"
CONFIG_SERVER_TWO_APP_NAME = "config-server-two"
Expand Down Expand Up @@ -58,6 +57,7 @@ async def test_build_and_deploy(
channel="6/edge",
revision=3,
)
await ops_test.model.deploy(S3_APP_NAME, channel="edge")

# TODO: Future PR, once data integrator works with mongos charm deploy that charm instead of
# packing and deploying the charm in the application dir.
Expand Down Expand Up @@ -287,11 +287,12 @@ async def test_replication_mongos_relation(ops_test: OpsTest) -> None:
f"{MONGOS_APP_NAME}:cluster",
)

# TODO remove this and wait for mongos to be active
# right now we cannot wait for `mongos` to be active after removing the relation due to a bug
# in the mongos charm. To fix the bug it is first necessary to publish the updated library
# lib/charms/mongodb/v0/config_server.py
time.sleep(60)
await ops_test.model.wait_for_idle(
apps=[SHARD_ONE_APP_NAME],
idle_period=20,
raise_on_blocked=False,
timeout=TIMEOUT,
)


@pytest.mark.group(1)
Expand Down Expand Up @@ -321,3 +322,39 @@ async def test_shard_mongos_relation(ops_test: OpsTest) -> None:
f"{MONGOS_APP_NAME}:cluster",
f"{SHARD_ONE_APP_NAME}:cluster",
)

await ops_test.model.wait_for_idle(
apps=[SHARD_ONE_APP_NAME],
idle_period=20,
raise_on_blocked=False,
timeout=TIMEOUT,
)


@pytest.mark.group(1)
async def test_shard_s3_relation(ops_test: OpsTest) -> None:
"""Verifies integrating a shard to s3-integrator fails."""
# attempt to add a replication deployment as a shard to the config server.
await ops_test.model.integrate(
f"{SHARD_ONE_APP_NAME}",
f"{S3_APP_NAME}",
)

await ops_test.model.wait_for_idle(
apps=[SHARD_ONE_APP_NAME],
idle_period=20,
raise_on_blocked=False,
timeout=TIMEOUT,
)

shard_unit = ops_test.model.applications[SHARD_ONE_APP_NAME].units[0]
assert (
shard_unit.workload_status_message
== "Relation to s3-integrator is not supported, config role must be config-server"
), "Shard cannot be related to s3-integrator."

# clean up relations
await ops_test.model.applications[SHARD_ONE_APP_NAME].remove_relation(
f"{S3_APP_NAME}:s3-credentials",
f"{SHARD_ONE_APP_NAME}:s3-credentials",
)
Loading