diff --git a/lib/charms/mongodb/v0/config_server_interface.py b/lib/charms/mongodb/v0/config_server_interface.py index e9b3ab0ca..70df7aa37 100644 --- a/lib/charms/mongodb/v0/config_server_interface.py +++ b/lib/charms/mongodb/v0/config_server_interface.py @@ -35,7 +35,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 4 +LIBPATCH = 5 class ClusterProvider(Object): @@ -64,6 +64,11 @@ def __init__( def pass_hook_checks(self, event: EventBase) -> bool: """Runs the pre-hooks checks for ClusterProvider, returns True if all pass.""" + if not self.charm.db_initialised: + logger.info("Deferring %s. db is not initialised.", type(event)) + event.defer() + return False + if not self.charm.is_role(Config.Role.CONFIG_SERVER): logger.info( "Skipping %s. ShardingProvider is only be executed by config-server", type(event) @@ -73,11 +78,6 @@ def pass_hook_checks(self, event: EventBase) -> bool: if not self.charm.unit.is_leader(): return False - if not self.charm.db_initialised: - logger.info("Deferring %s. db is not initialised.", type(event)) - event.defer() - return False - return True def _on_relation_changed(self, event) -> None: diff --git a/lib/charms/mongodb/v1/mongodb_provider.py b/lib/charms/mongodb/v1/mongodb_provider.py index 691997a8b..ff9bf688c 100644 --- a/lib/charms/mongodb/v1/mongodb_provider.py +++ b/lib/charms/mongodb/v1/mongodb_provider.py @@ -31,7 +31,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 3 +LIBPATCH = 4 logger = logging.getLogger(__name__) REL_NAME = "database" @@ -87,6 +87,11 @@ def __init__(self, charm: CharmBase, substrate="k8s", relation_name: str = "data def pass_hook_checks(self) -> bool: """Runs the pre-hooks checks for MongoDBProvider, returns True if all pass.""" + # We shouldn't try to create or update users if the database is not + # initialised. We will create users as part of initialisation. + if not self.charm.db_initialised: + return False + if not self.charm.is_relation_feasible(self.relation_name): logger.info("Skipping code for relations.") return False @@ -100,11 +105,6 @@ def pass_hook_checks(self) -> bool: if not self.charm.unit.is_leader(): return False - # We shouldn't try to create or update users if the database is not - # initialised. We will create users as part of initialisation. - if not self.charm.db_initialised: - return False - return True def _on_relation_event(self, event): diff --git a/lib/charms/mongodb/v1/shards_interface.py b/lib/charms/mongodb/v1/shards_interface.py index a9cd9f637..b9a876aec 100644 --- a/lib/charms/mongodb/v1/shards_interface.py +++ b/lib/charms/mongodb/v1/shards_interface.py @@ -51,7 +51,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 4 +LIBPATCH = 5 KEYFILE_KEY = "key-file" HOSTS_KEY = "host" OPERATOR_PASSWORD_KEY = MongoDBUser.get_password_key_name_for_user(OperatorUser.get_username()) @@ -121,6 +121,11 @@ def _on_relation_joined(self, event): def pass_hook_checks(self, event: EventBase) -> bool: """Runs the pre-hooks checks for ShardingProvider, returns True if all pass.""" + if not self.charm.db_initialised: + logger.info("Deferring %s. db is not initialised.", type(event)) + event.defer() + return False + if not self.charm.is_relation_feasible(self.relation_name): logger.info("Skipping event %s , relation not feasible.", type(event)) return False @@ -134,11 +139,6 @@ def pass_hook_checks(self, event: EventBase) -> bool: if not self.charm.unit.is_leader(): return False - if not self.charm.db_initialised: - logger.info("Deferring %s. db is not initialised.", type(event)) - event.defer() - return False - # adding/removing shards while a backup/restore is in progress can be disastrous pbm_status = self.charm.backups.get_pbm_status() if isinstance(pbm_status, MaintenanceStatus): @@ -507,6 +507,11 @@ def _on_relation_changed(self, event): def pass_hook_checks(self, event): """Runs the pre-hooks checks for ConfigServerRequirer, returns True if all pass.""" + if not self.charm.db_initialised: + logger.info("Deferring %s. db is not initialised.", type(event)) + event.defer() + return False + if not self.charm.is_relation_feasible(self.relation_name): logger.info("Skipping event %s , relation not feasible.", type(event)) return False @@ -515,11 +520,6 @@ def pass_hook_checks(self, event): logger.info("skipping %s is only be executed by shards", type(event)) return False - if not self.charm.db_initialised: - logger.info("Deferring %s. db is not initialised.", type(event)) - event.defer() - return False - mongos_hosts = event.relation.data[event.relation.app].get(HOSTS_KEY, None) if isinstance(event, RelationBrokenEvent) and not mongos_hosts: logger.info("Config-server relation never set up, no need to process broken event.") diff --git a/tests/integration/sharding_tests/test_sharding_race_conds.py b/tests/integration/sharding_tests/test_sharding_race_conds.py new file mode 100644 index 000000000..f21a97851 --- /dev/null +++ b/tests/integration/sharding_tests/test_sharding_race_conds.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +# Copyright 2023 Canonical Ltd. +# See LICENSE file for licensing details. + +import pytest +from pytest_operator.plugin import OpsTest + +from .helpers import generate_mongodb_client, has_correct_shards + +SHARD_ONE_APP_NAME = "shard-one" +SHARD_TWO_APP_NAME = "shard-two" +SHARD_THREE_APP_NAME = "shard-three" +SHARD_APPS = [SHARD_ONE_APP_NAME, SHARD_TWO_APP_NAME] +CONFIG_SERVER_APP_NAME = "config-server-one" +SHARD_REL_NAME = "sharding" +CONFIG_SERVER_REL_NAME = "config-server" +MONGODB_KEYFILE_PATH = "/var/snap/charmed-mongodb/current/etc/mongod/keyFile" + +TIMEOUT = 60 * 10 + + +@pytest.mark.abort_on_fail +async def test_build_and_deploy(ops_test: OpsTest) -> None: + """Build and deploy a sharded cluster.""" + my_charm = await ops_test.build_charm(".") + await ops_test.model.deploy( + my_charm, + num_units=2, + config={"role": "config-server"}, + application_name=CONFIG_SERVER_APP_NAME, + ) + await ops_test.model.deploy( + my_charm, num_units=2, config={"role": "shard"}, application_name=SHARD_ONE_APP_NAME + ) + await ops_test.model.deploy( + my_charm, num_units=2, config={"role": "shard"}, application_name=SHARD_TWO_APP_NAME + ) + await ops_test.model.deploy( + my_charm, num_units=2, config={"role": "shard"}, application_name=SHARD_THREE_APP_NAME + ) + + +@pytest.mark.abort_on_fail +async def test_immediate_relate(ops_test: OpsTest) -> None: + """Tests the immediate integration of cluster components works without error.""" + await ops_test.model.integrate( + f"{SHARD_ONE_APP_NAME}:{SHARD_REL_NAME}", + f"{CONFIG_SERVER_APP_NAME}:{CONFIG_SERVER_REL_NAME}", + ) + await ops_test.model.integrate( + f"{SHARD_TWO_APP_NAME}:{SHARD_REL_NAME}", + f"{CONFIG_SERVER_APP_NAME}:{CONFIG_SERVER_REL_NAME}", + ) + await ops_test.model.integrate( + f"{SHARD_THREE_APP_NAME}:{SHARD_REL_NAME}", + f"{CONFIG_SERVER_APP_NAME}:{CONFIG_SERVER_REL_NAME}", + ) + + await ops_test.model.wait_for_idle( + apps=[ + CONFIG_SERVER_APP_NAME, + SHARD_ONE_APP_NAME, + SHARD_TWO_APP_NAME, + SHARD_THREE_APP_NAME, + ], + idle_period=20, + status="active", + timeout=TIMEOUT, + raise_on_error=False, + ) + + mongos_client = await generate_mongodb_client( + ops_test, app_name=CONFIG_SERVER_APP_NAME, mongos=True + ) + + # verify sharded cluster config + assert has_correct_shards( + mongos_client, + expected_shards=[SHARD_ONE_APP_NAME, SHARD_TWO_APP_NAME, SHARD_THREE_APP_NAME], + ), "Config server did not process config properly" diff --git a/tests/unit/test_config_server_lib.py b/tests/unit/test_config_server_lib.py index 45b7d4d90..3a7f27fb2 100644 --- a/tests/unit/test_config_server_lib.py +++ b/tests/unit/test_config_server_lib.py @@ -84,3 +84,59 @@ def is_config_mock_call(*args): self.harness.set_leader(False) self.harness.charm.cluster.update_config_server_db(mock.Mock()) self.harness.charm.cluster.database_provides.update_relation_data.assert_not_called() + + def test_pass_hooks_check_waits_for_start_config_server(self): + """Ensure that pass_hooks defers until the database is initialized. + + Note: in some cases sharding related hooks execute before config and leader elected hooks, + therefore it is important that the `pass_hooks_check` defers an event until the database + has been started + """ + + def is_shard_mock_call(*args): + return args == ("shard") + + self.harness.charm.is_role = is_shard_mock_call + + event = mock.Mock() + event.params = {} + + self.harness.set_leader(False) + self.harness.charm.config_server.pass_hook_checks(event) + event.defer.assert_called() + + # once the database has been initialised, pass hooks check should no longer defer if the + # unit is not the leader nor is the wrong wrole + event = mock.Mock() + event.params = {} + self.harness.charm.app_peer_data["db_initialised"] = "True" + self.harness.charm.config_server.pass_hook_checks(event) + event.defer.assert_not_called() + + def test_pass_hooks_check_waits_for_start_shard(self): + """Ensure that pass_hooks defers until the database is initialized. + + Note: in some cases sharding related hooks execute before config and leader elected hooks, + therefore it is important that the `pass_hooks_check` defers an event until the database + has been started + """ + + def is_config_mock_call(*args): + return args == ("config-server") + + self.harness.charm.is_role = is_config_mock_call + + event = mock.Mock() + event.params = {} + + self.harness.set_leader(False) + self.harness.charm.shard.pass_hook_checks(event) + event.defer.assert_called() + + # once the database has been initialised, pass hooks check should no longer defer if the + # unit is not the leader nor is the wrong wrole + event = mock.Mock() + event.params = {} + self.harness.charm.app_peer_data["db_initialised"] = "True" + self.harness.charm.shard.pass_hook_checks(event) + event.defer.assert_not_called() diff --git a/tox.ini b/tox.ini index 34c97d6e5..700b12c10 100644 --- a/tox.ini +++ b/tox.ini @@ -216,6 +216,22 @@ deps = commands = pytest -v --tb native --log-cli-level=INFO -s --durations=0 {posargs} {[vars]tests_path}/integration/sharding_tests/test_sharding_relations.py +[testenv:sharding-race-conditions] +description = Run sharding race condition tests +pass_env = + {[testenv]pass_env} + CI +deps = + pytest + juju==3.2.0.1 + pytest-mock + pytest-operator + protobuf==3.20 # temporary fix until new libjuju is released + git+https://github.com/canonical/data-platform-workflows@v8\#subdirectory=python/pytest_plugins/pytest_operator_cache + -r {tox_root}/requirements.txt +commands = + pytest -v --tb native --log-cli-level=INFO -s --durations=0 {posargs} {[vars]tests_path}/integration/sharding_tests/test_sharding_race_conds.py + [testenv:integration] description = Run all integration tests