diff --git a/lib/charms/mongodb/v1/shards_interface.py b/lib/charms/mongodb/v1/shards_interface.py index 4669974ff..8f5afffad 100644 --- a/lib/charms/mongodb/v1/shards_interface.py +++ b/lib/charms/mongodb/v1/shards_interface.py @@ -55,7 +55,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 7 +LIBPATCH = 8 KEYFILE_KEY = "key-file" HOSTS_KEY = "host" OPERATOR_PASSWORD_KEY = MongoDBUser.get_password_key_name_for_user(OperatorUser.get_username()) @@ -442,6 +442,27 @@ def get_draining_shards(self) -> List[str]: return draining_shards + def cluster_password_synced(self) -> bool: + """Returns True if the cluster password is synced.""" + # base case: not config-server + if self.charm.is_role(Config.Role.SHARD): + return True + + # base case: no cluster relation + if not self.model.relations[self.relation_name]: + return True + + try: + # check our ability to use connect to cluster + with MongosConnection(self.charm.mongos_config) as mongos: + mongos.get_shard_members() + except OperationFailure as e: + if e.code == 18: # Unauthorized Error - i.e. password is not in sync + return False + raise + + return True + class ConfigServerRequirer(Object): """Manage relations between the config server and the shard, on the shard's side.""" @@ -483,7 +504,9 @@ def _on_relation_changed(self, event): # if re-using an old shard, re-set drained flag. self.charm.unit_peer_data["drained"] = json.dumps(False) - self.charm.unit.status = MaintenanceStatus("Adding shard to config-server") + + if not self._is_added_to_cluster(): + self.charm.unit.status = MaintenanceStatus("Adding shard to config-server") # shards rely on the config server for secrets key_file_contents = self.database_requires.fetch_relation_field( @@ -809,6 +832,37 @@ def _is_added_to_cluster(self) -> bool: raise + def cluster_password_synced(self) -> bool: + """Returns True if the cluster password is synced for the shard.""" + # base case: config-server (i.e. cluster password maintainer) + if self.charm.is_role(Config.Role.CONFIG_SERVER): + return True + + # base case: no cluster relation + if not self.model.get_relation(self.relation_name): + return True + + try: + # check our ability to use connect to both mongos and our current replica set. + mongos_reachable = self._is_mongos_reachable() + with MongoDBConnection(self.charm.mongodb_config) as mongo: + mongod_reachable = mongo.is_ready + except OperationFailure as e: + if e.code == 18: # Unauthorized Error - i.e. password is not in sync + return False + raise + + return mongos_reachable and mongod_reachable + + def get_shard_members(self) -> List[str]: + """Returns a list of shard members. + + Raises: PyMongoError + """ + mongos_hosts = self.get_mongos_hosts() + with MongosConnection(self.charm.remote_mongos_config(set(mongos_hosts))) as mongo: + return mongo.get_shard_members() + def _is_shard_aware(self) -> bool: """Returns True if shard is in cluster and shard aware.""" if not self.model.get_relation(self.relation_name): diff --git a/src/charm.py b/src/charm.py index ba86bae5a..dba040dac 100755 --- a/src/charm.py +++ b/src/charm.py @@ -548,6 +548,17 @@ def _on_update_status(self, event: UpdateStatusEvent): self.unit.status = WaitingStatus("Waiting for MongoDB to start") return + # Cannot check more advanced MongoDB statuses if the cluster doesn't have passwords synced + # this can occur in two cases: + # 1. password rotation + # 2. race conditions when a new shard is addeded. + if ( + not self.shard.cluster_password_synced() + or not self.config_server.cluster_password_synced() + ): + self.unit.status = WaitingStatus("Waiting to sync passwords across the cluster") + return + # leader should periodically handle configuring the replica set. Incidents such as network # cuts can lead to new IP addresses and therefore will require a reconfigure. Especially # in the case that the leader a change in IP address it will not receive a relation event.