Skip to content

Commit

Permalink
[DPE-3813] make cluster password sync more robust (#377)
Browse files Browse the repository at this point in the history
## Issue
During sharding integration tests, sharding components will occasionally
go into error state when executing `update_status`. However when the
hook gets fired again the component goes out of error. The errors are
either `SeverSelectionError` or `OperationFailure` (code 18) indicating
the cluster is still syncing either password or internal membership. The
current check for `cluster_password_synced` is not robust enough to
catch these.

## Solution
Update `cluster_password_synced` and its dependent functions to catch
these errors right away.
  • Loading branch information
MiaAltieri authored Mar 14, 2024
1 parent 1f9c2cf commit b6c9f25
Showing 1 changed file with 15 additions and 12 deletions.
27 changes: 15 additions & 12 deletions lib/charms/mongodb/v1/shards_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,14 +460,13 @@ def cluster_password_synced(self) -> bool:
if not self.charm.is_role(Config.Role.CONFIG_SERVER):
return True

# base case: no cluster relation
if not self.model.relations[self.relation_name]:
return True

try:
# check our ability to use connect to cluster
# check our ability to use connect to mongos
with MongosConnection(self.charm.mongos_config) as mongos:
mongos.get_shard_members()
# check our ability to use connect to mongod
with MongoDBConnection(self.charm.mongodb_config) as mongod:
mongod.get_replset_status()
except OperationFailure as e:
if e.code == 18: # Unauthorized Error - i.e. password is not in sync
return False
Expand Down Expand Up @@ -967,7 +966,7 @@ def _update_relation_data(self, relation_id: int, data: dict) -> None:
"""
self.database_requires.update_relation_data(relation_id, data)

def _is_mongos_reachable(self) -> bool:
def _is_mongos_reachable(self, with_auth=False) -> bool:
"""Returns True if mongos is reachable."""
if not self.model.get_relation(self.relation_name):
logger.info("Mongos is not reachable, no relation to config-sever")
Expand All @@ -979,11 +978,15 @@ def _is_mongos_reachable(self) -> bool:

config = self.charm.remote_mongos_config(set(mongos_hosts))

# use a URI that is not dependent on the operator password, as we are not guaranteed that
# the shard has received the password yet.
uri = f"mongodb://{','.join(mongos_hosts)}"
with MongosConnection(config, uri) as mongo:
return mongo.is_ready
if not with_auth:
# use a URI that is not dependent on the operator password, as we are not guaranteed
# that the shard has received the password yet.
uri = f"mongodb://{','.join(mongos_hosts)}"
with MongosConnection(config, uri) as mongo:
return mongo.is_ready
else:
with MongosConnection(self.charm.remote_mongos_config(set(mongos_hosts))) as mongo:
return mongo.is_ready

def _is_added_to_cluster(self) -> bool:
"""Returns True if the shard has been added to the cluster."""
Expand Down Expand Up @@ -1017,7 +1020,7 @@ def cluster_password_synced(self) -> bool:

try:
# check our ability to use connect to both mongos and our current replica set.
mongos_reachable = self._is_mongos_reachable()
mongos_reachable = self._is_mongos_reachable(with_auth=True)
with MongoDBConnection(self.charm.mongodb_config) as mongo:
mongod_reachable = mongo.is_ready
except OperationFailure as e:
Expand Down

0 comments on commit b6c9f25

Please sign in to comment.