From b6c9f25165c7e1e6682937f4e9d124b8b23cd7c2 Mon Sep 17 00:00:00 2001 From: Mia Altieri <32723809+MiaAltieri@users.noreply.github.com> Date: Thu, 14 Mar 2024 12:55:38 +0100 Subject: [PATCH] [DPE-3813] make cluster password sync more robust (#377) ## Issue During sharding integration tests, sharding components will occasionally go into error state when executing `update_status`. However when the hook gets fired again the component goes out of error. The errors are either `SeverSelectionError` or `OperationFailure` (code 18) indicating the cluster is still syncing either password or internal membership. The current check for `cluster_password_synced` is not robust enough to catch these. ## Solution Update `cluster_password_synced` and its dependent functions to catch these errors right away. --- lib/charms/mongodb/v1/shards_interface.py | 27 +++++++++++++---------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/lib/charms/mongodb/v1/shards_interface.py b/lib/charms/mongodb/v1/shards_interface.py index 34b5d92ca..5070d99f7 100644 --- a/lib/charms/mongodb/v1/shards_interface.py +++ b/lib/charms/mongodb/v1/shards_interface.py @@ -460,14 +460,13 @@ def cluster_password_synced(self) -> bool: if not self.charm.is_role(Config.Role.CONFIG_SERVER): return True - # base case: no cluster relation - if not self.model.relations[self.relation_name]: - return True - try: - # check our ability to use connect to cluster + # check our ability to use connect to mongos with MongosConnection(self.charm.mongos_config) as mongos: mongos.get_shard_members() + # check our ability to use connect to mongod + with MongoDBConnection(self.charm.mongodb_config) as mongod: + mongod.get_replset_status() except OperationFailure as e: if e.code == 18: # Unauthorized Error - i.e. password is not in sync return False @@ -967,7 +966,7 @@ def _update_relation_data(self, relation_id: int, data: dict) -> None: """ self.database_requires.update_relation_data(relation_id, data) - def _is_mongos_reachable(self) -> bool: + def _is_mongos_reachable(self, with_auth=False) -> bool: """Returns True if mongos is reachable.""" if not self.model.get_relation(self.relation_name): logger.info("Mongos is not reachable, no relation to config-sever") @@ -979,11 +978,15 @@ def _is_mongos_reachable(self) -> bool: config = self.charm.remote_mongos_config(set(mongos_hosts)) - # use a URI that is not dependent on the operator password, as we are not guaranteed that - # the shard has received the password yet. - uri = f"mongodb://{','.join(mongos_hosts)}" - with MongosConnection(config, uri) as mongo: - return mongo.is_ready + if not with_auth: + # use a URI that is not dependent on the operator password, as we are not guaranteed + # that the shard has received the password yet. + uri = f"mongodb://{','.join(mongos_hosts)}" + with MongosConnection(config, uri) as mongo: + return mongo.is_ready + else: + with MongosConnection(self.charm.remote_mongos_config(set(mongos_hosts))) as mongo: + return mongo.is_ready def _is_added_to_cluster(self) -> bool: """Returns True if the shard has been added to the cluster.""" @@ -1017,7 +1020,7 @@ def cluster_password_synced(self) -> bool: try: # check our ability to use connect to both mongos and our current replica set. - mongos_reachable = self._is_mongos_reachable() + mongos_reachable = self._is_mongos_reachable(with_auth=True) with MongoDBConnection(self.charm.mongodb_config) as mongo: mongod_reachable = mongo.is_ready except OperationFailure as e: