Skip to content

Commit

Permalink
Merge branch '6/edge' into dratushnyy/fix_mongodb_logs
Browse files Browse the repository at this point in the history
  • Loading branch information
dmitry-ratushnyy authored Jan 15, 2024
2 parents a136906 + c28e7b9 commit 66c48da
Show file tree
Hide file tree
Showing 16 changed files with 957 additions and 682 deletions.
1 change: 0 additions & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ jobs:
with:
provider: lxd
juju-channel: 3.1/stable
bootstrap-options: "--agent-version 3.1.6"
- name: Download packed charm(s)
uses: actions/download-artifact@v3
with:
Expand Down
32 changes: 29 additions & 3 deletions lib/charms/mongodb/v0/config_server_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 2
LIBPATCH = 3


class ClusterProvider(Object):
Expand All @@ -54,8 +54,13 @@ def __init__(
charm.on[self.relation_name].relation_changed, self._on_relation_changed
)

# TODO Future PRs handle scale down
# TODO Future PRs handle changing of units/passwords to be propagated to mongos
self.framework.observe(
charm.on[self.relation_name].relation_departed,
self.charm.check_relation_broken_or_scale_down,
)
self.framework.observe(
charm.on[self.relation_name].relation_broken, self._on_relation_broken
)

def pass_hook_checks(self, event: EventBase) -> bool:
"""Runs the pre-hooks checks for ClusterProvider, returns True if all pass."""
Expand Down Expand Up @@ -97,6 +102,26 @@ def _on_relation_changed(self, event) -> None:
},
)

def _on_relation_broken(self, event) -> None:
# Only relation_deparated events can check if scaling down
departed_relation_id = event.relation.id
if not self.charm.has_departed_run(departed_relation_id):
logger.info(
"Deferring, must wait for relation departed hook to decide if relation should be removed."
)
event.defer()
return

if not self.pass_hook_checks(event):
logger.info("Skipping relation broken event: hook checks did not pass")
return

if not self.charm.proceed_on_broken_event(event):
logger.info("Skipping relation broken event, broken event due to scale down")
return

self.charm.client_relations.oversee_users(departed_relation_id, event)

def update_config_server_db(self, event):
"""Provides related mongos applications with new config server db."""
if not self.pass_hook_checks(event):
Expand Down Expand Up @@ -157,6 +182,7 @@ def __init__(
self.framework.observe(
charm.on[self.relation_name].relation_changed, self._on_relation_changed
)

# TODO Future PRs handle scale down

def _on_database_created(self, event) -> None:
Expand Down
18 changes: 16 additions & 2 deletions lib/charms/mongodb/v0/mongodb_secrets.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from ops import Secret, SecretInfo
from ops.charm import CharmBase
from ops.model import SecretNotFoundError
from ops.model import ModelError, SecretNotFoundError

from config import Config
from exceptions import SecretAlreadyExistsError
Expand Down Expand Up @@ -93,7 +93,21 @@ def get_content(self) -> Dict[str, str]:
"""Getting cached secret content."""
if not self._secret_content:
if self.meta:
self._secret_content = self.meta.get_content()
try:
self._secret_content = self.meta.get_content(refresh=True)
except (ValueError, ModelError) as err:
# https://bugs.launchpad.net/juju/+bug/2042596
# Only triggered when 'refresh' is set
known_model_errors = [
"ERROR either URI or label should be used for getting an owned secret but not both",
"ERROR secret owner cannot use --refresh",
]
if isinstance(err, ModelError) and not any(
msg in str(err) for msg in known_model_errors
):
raise
# Due to: ValueError: Secret owner cannot use refresh=True
self._secret_content = self.meta.get_content()
return self._secret_content

def set_content(self, content: Dict[str, str]) -> None:
Expand Down
26 changes: 2 additions & 24 deletions lib/charms/mongodb/v1/shards_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 2
LIBPATCH = 3
KEYFILE_KEY = "key-file"
HOSTS_KEY = "host"
OPERATOR_PASSWORD_KEY = MongoDBUser.get_password_key_name_for_user(OperatorUser.get_username())
Expand Down Expand Up @@ -148,28 +148,6 @@ def pass_hook_checks(self, event: EventBase) -> bool:

return True

def _proceed_on_broken_event(self, event) -> int:
"""Returns relation_id if relation broken event occurred due to a removed relation."""
departed_relation_id = None

# Only relation_deparated events can check if scaling down
departed_relation_id = event.relation.id
if not self.charm.has_departed_run(departed_relation_id):
logger.info(
"Deferring, must wait for relation departed hook to decide if relation should be removed."
)
event.defer()
return

# check if were scaling down and add a log message
if self.charm.is_scaling_down(event.relation.id):
logger.info(
"Relation broken event occurring due to scale down, do not proceed to remove users."
)
return

return departed_relation_id

def _on_relation_event(self, event):
"""Handles adding and removing of shards.
Expand All @@ -181,7 +159,7 @@ def _on_relation_event(self, event):

departed_relation_id = None
if isinstance(event, RelationBrokenEvent):
departed_relation_id = self._proceed_on_broken_event(event)
departed_relation_id = self.charm.proceed_on_broken_event(event)
if not departed_relation_id:
return

Expand Down
26 changes: 24 additions & 2 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1319,10 +1319,32 @@ def set_scaling_down(self, event: RelationDepartedEvent) -> bool:
# check if relation departed is due to current unit being removed. (i.e. scaling down the
# application.)
rel_departed_key = self._generate_relation_departed_key(event.relation.id)
scaling_down = json.dumps(event.departing_unit == self.unit)
self.unit_peer_data[rel_departed_key] = scaling_down
scaling_down = event.departing_unit == self.unit
self.unit_peer_data[rel_departed_key] = json.dumps(scaling_down)
return scaling_down

def proceed_on_broken_event(self, event) -> int:
"""Returns relation_id if relation broken event occurred due to a removed relation."""
departed_relation_id = None

# Only relation_deparated events can check if scaling down
departed_relation_id = event.relation.id
if not self.has_departed_run(departed_relation_id):
logger.info(
"Deferring, must wait for relation departed hook to decide if relation should be removed."
)
event.defer()
return

# check if were scaling down and add a log message
if self.is_scaling_down(departed_relation_id):
logger.info(
"Relation broken event occurring due to scale down, do not proceed to remove users."
)
return

return departed_relation_id

@staticmethod
def _generate_relation_departed_key(rel_id: int) -> str:
"""Generates the relation departed key for a specified relation id."""
Expand Down
29 changes: 6 additions & 23 deletions tests/integration/backup_tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from tenacity import RetryError, Retrying, stop_after_attempt, wait_fixed

from ..ha_tests import helpers as ha_helpers
from ..helpers import get_app_name

S3_APP_NAME = "s3-integrator"
TIMEOUT = 10 * 60
Expand Down Expand Up @@ -62,30 +63,12 @@ async def create_and_verify_backup(ops_test: OpsTest) -> None:

async def get_leader_unit(ops_test: OpsTest, db_app_name=None) -> ops.model.Unit:
"""Returns the leader unit of the database charm."""
db_app_name = db_app_name or await app_name(ops_test)
db_app_name = db_app_name or await get_app_name(ops_test)
for unit in ops_test.model.applications[db_app_name].units:
if await unit.is_leader_from_status():
return unit


async def app_name(ops_test: OpsTest) -> str:
"""Returns the name of the cluster running MongoDB.
This is important since not all deployments of the MongoDB charm have the application name
"mongodb".
Note: if multiple clusters are running MongoDB this will return the one first found.
"""
status = await ops_test.model.get_status()
for app in ops_test.model.applications:
# note that format of the charm field is not exactly "mongodb" but instead takes the form
# of `local:focal/mongodb-6`
if "mongodb" in status["applications"][app]["charm"]:
return app

return None


async def count_logical_backups(db_unit: ops.model.Unit) -> int:
"""Count the number of logical backups."""
action = await db_unit.run_action(action_name="list-backups")
Expand Down Expand Up @@ -142,11 +125,11 @@ def is_relation_joined(ops_test: OpsTest, endpoint_one: str, endpoint_two: str)

async def insert_unwanted_data(ops_test: OpsTest) -> None:
"""Inserts the data into the MongoDB cluster via primary replica."""
app = await app_name(ops_test)
ip_addresses = [unit.public_address for unit in ops_test.model.applications[app].units]
app_name = await get_app_name(ops_test)
ip_addresses = [unit.public_address for unit in ops_test.model.applications[app_name].units]
primary = (await ha_helpers.replica_set_primary(ip_addresses, ops_test)).public_address
password = await ha_helpers.get_password(ops_test, app)
client = MongoClient(ha_helpers.unit_uri(primary, password, app), directConnection=True)
password = await ha_helpers.get_password(ops_test, app_name)
client = MongoClient(ha_helpers.unit_uri(primary, password, app_name), directConnection=True)
db = client["new-db"]
test_collection = db["test_collection"]
test_collection.insert_one({"unwanted_data": "bad data 1"})
Expand Down
34 changes: 21 additions & 13 deletions tests/integration/backup_tests/test_backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright 2023 Canonical Ltd.
# See LICENSE file for licensing details.
import asyncio
import logging
import secrets
import string
import time
Expand All @@ -10,6 +11,8 @@
from pytest_operator.plugin import OpsTest
from tenacity import RetryError, Retrying, stop_after_delay, wait_fixed

from tests.integration.helpers import get_app_name

from ..ha_tests import helpers as ha_helpers
from . import helpers

Expand All @@ -18,6 +21,8 @@
ENDPOINT = "s3-credentials"
NEW_CLUSTER = "new-mongodb"

logger = logging.getLogger(__name__)


@pytest.fixture()
async def continuous_writes_to_db(ops_test: OpsTest):
Expand All @@ -43,7 +48,7 @@ async def test_build_and_deploy(ops_test: OpsTest) -> None:
"""Build and deploy one unit of MongoDB."""
# it is possible for users to provide their own cluster for testing. Hence check if there
# is a pre-existing cluster.
if not await helpers.app_name(ops_test):
if not await get_app_name(ops_test):
db_charm = await ops_test.build_charm(".")
await ops_test.model.deploy(db_charm, num_units=3)

Expand All @@ -56,7 +61,7 @@ async def test_build_and_deploy(ops_test: OpsTest) -> None:
@pytest.mark.abort_on_fail
async def test_blocked_incorrect_creds(ops_test: OpsTest) -> None:
"""Verifies that the charm goes into blocked status when s3 creds are incorrect."""
db_app_name = await helpers.app_name(ops_test)
db_app_name = await get_app_name(ops_test)

# set incorrect s3 credentials
s3_integrator_unit = ops_test.model.applications[S3_APP_NAME].units[0]
Expand Down Expand Up @@ -86,7 +91,7 @@ async def test_blocked_incorrect_creds(ops_test: OpsTest) -> None:
@pytest.mark.abort_on_fail
async def test_blocked_incorrect_conf(ops_test: OpsTest) -> None:
"""Verifies that the charm goes into blocked status when s3 config options are incorrect."""
db_app_name = await helpers.app_name(ops_test)
db_app_name = await get_app_name(ops_test)

# set correct AWS credentials for s3 storage but incorrect configs
await helpers.set_credentials(ops_test, cloud="AWS")
Expand All @@ -105,7 +110,7 @@ async def test_blocked_incorrect_conf(ops_test: OpsTest) -> None:
@pytest.mark.abort_on_fail
async def test_ready_correct_conf(ops_test: OpsTest) -> None:
"""Verifies charm goes into active status when s3 config and creds options are correct."""
db_app_name = await helpers.app_name(ops_test)
db_app_name = await get_app_name(ops_test)
choices = string.ascii_letters + string.digits
unique_path = "".join([secrets.choice(choices) for _ in range(4)])
configuration_parameters = {
Expand All @@ -127,16 +132,19 @@ async def test_ready_correct_conf(ops_test: OpsTest) -> None:

@pytest.mark.abort_on_fail
async def test_create_and_list_backups(ops_test: OpsTest) -> None:
db_unit = await helpers.get_leader_unit(ops_test)

db_app_name = await get_app_name(ops_test)
leader_unit = await helpers.get_leader_unit(ops_test, db_app_name=db_app_name)
await helpers.set_credentials(ops_test, cloud="AWS")
# verify backup list works
action = await db_unit.run_action(action_name="list-backups")
logger.error("!!!!! test_create_and_list_backups >>> %s", leader_unit)
action = await leader_unit.run_action(action_name="list-backups")
list_result = await action.wait()
logger.error("!!!!! test_create_and_list_backups >>> %s", list_result.results)
backups = list_result.results["backups"]
assert backups, "backups not outputted"

# verify backup is started
action = await db_unit.run_action(action_name="create-backup")
action = await leader_unit.run_action(action_name="create-backup")
backup_result = await action.wait()
assert "backup started" in backup_result.results["backup-status"], "backup didn't start"

Expand All @@ -147,7 +155,7 @@ async def test_create_and_list_backups(ops_test: OpsTest) -> None:
try:
for attempt in Retrying(stop=stop_after_delay(20), wait=wait_fixed(3)):
with attempt:
backups = await helpers.count_logical_backups(db_unit)
backups = await helpers.count_logical_backups(leader_unit)
assert backups == 1
except RetryError:
assert backups == 1, "Backup not created."
Expand All @@ -161,7 +169,7 @@ async def test_multi_backup(ops_test: OpsTest, continuous_writes_to_db) -> None:
from AWS to GCP. This test verifies that the first backup in AWS is made, the second backup
in GCP is made, and that before the second backup is made that pbm correctly resyncs.
"""
db_app_name = await helpers.app_name(ops_test)
db_app_name = await get_app_name(ops_test)
db_unit = await helpers.get_leader_unit(ops_test)

# create first backup once ready
Expand Down Expand Up @@ -247,7 +255,7 @@ async def test_restore(ops_test: OpsTest, add_writes_to_db) -> None:
assert number_writes > 0, "no writes to backup"

# create a backup in the AWS bucket
db_app_name = await helpers.app_name(ops_test)
db_app_name = await get_app_name(ops_test)
db_unit = await helpers.get_leader_unit(ops_test)
prev_backups = await helpers.count_logical_backups(db_unit)
action = await db_unit.run_action(action_name="create-backup")
Expand Down Expand Up @@ -296,7 +304,7 @@ async def test_restore(ops_test: OpsTest, add_writes_to_db) -> None:
@pytest.mark.parametrize("cloud_provider", ["AWS", "GCP"])
async def test_restore_new_cluster(ops_test: OpsTest, add_writes_to_db, cloud_provider):
# configure test for the cloud provider
db_app_name = await helpers.app_name(ops_test)
db_app_name = await get_app_name(ops_test)
await helpers.set_credentials(ops_test, cloud=cloud_provider)
if cloud_provider == "AWS":
configuration_parameters = {
Expand Down Expand Up @@ -383,7 +391,7 @@ async def test_restore_new_cluster(ops_test: OpsTest, add_writes_to_db, cloud_pr
@pytest.mark.abort_on_fail
async def test_update_backup_password(ops_test: OpsTest) -> None:
"""Verifies that after changing the backup password the pbm tool is updated and functional."""
db_app_name = await helpers.app_name(ops_test)
db_app_name = await get_app_name(ops_test)
db_unit = await helpers.get_leader_unit(ops_test)

# wait for charm to be idle before setting password
Expand Down
Loading

0 comments on commit 66c48da

Please sign in to comment.