Skip to content

Commit

Permalink
Sharding relations test are flaky (#473)
Browse files Browse the repository at this point in the history
## Issue

 * Test fails 90% of the time.
 * The real issue here is an unprotected call to `charm.client_provider.update_app_relation_data` which would create the password only for an invalid relation (integrating a shard as database).
 * This call is run in every status check.

## Solution

 * Protect the call by running some sanity checks.
 * Update all tests that started failing due to this (needed to add some monkey patching for all methods used in this checks.

## Bug fixes

 * The `charm.db_initialised` function was buggy: setting `charm.db_initialised = False` would still return `charm.db_initialised == True`. This is now fixed in a retro compatible way to ensure that nothing breaks with upgrades.
 * This fix needs to be ported to mongodb k8s as well.
 * Also fix a bug with HA tests and juju 3.5.3
  • Loading branch information
Gu1nness authored Sep 9, 2024
1 parent e932849 commit 3623043
Show file tree
Hide file tree
Showing 10 changed files with 164 additions and 65 deletions.
20 changes: 15 additions & 5 deletions lib/charms/mongodb/v1/mongodb_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ def __init__(self, charm: CharmBase, substrate="k8s", relation_name: str = REL_N
self.database_provides.on.database_requested, self._on_relation_event
)

def pass_hook_checks(self, event: EventBase) -> bool:
"""Runs the pre-hooks checks for MongoDBProvider, returns True if all pass."""
def pass_sanity_hook_checks(self) -> bool:
"""Runs reusable and event agnostic checks."""
# We shouldn't try to create or update users if the database is not
# initialised. We will create users as part of initialisation.
if not self.charm.db_initialised:
Expand All @@ -92,6 +92,13 @@ def pass_hook_checks(self, event: EventBase) -> bool:
if not self.charm.unit.is_leader():
return False

return True

def pass_hook_checks(self, event: EventBase) -> bool:
"""Runs the pre-hooks checks for MongoDBProvider, returns True if all pass."""
if not self.pass_sanity_hook_checks():
return False

if self.charm.upgrade_in_progress:
logger.warning(
"Adding relations is not supported during an upgrade. The charm may be in a broken, unrecoverable state."
Expand Down Expand Up @@ -173,7 +180,7 @@ def oversee_users(self, departed_relation_id: Optional[int], event):
mongo.drop_user(username)

for username in relation_users - database_users:
config = self._get_config(username, None)
config = self._get_config(username, None, event)
if config.database is None:
# We need to wait for the moment when the provider library
# set the database name into the relation.
Expand Down Expand Up @@ -240,7 +247,7 @@ def _diff(self, event: RelationChangedEvent) -> Diff:

def update_app_relation_data(self) -> None:
"""Helper function to update application relation data."""
if not self.charm.db_initialised:
if not self.pass_sanity_hook_checks():
return

database_users = set()
Expand Down Expand Up @@ -282,7 +289,9 @@ def _get_or_set_password(self, relation: Relation) -> str:
self.database_provides.update_relation_data(relation.id, {"password": password})
return password

def _get_config(self, username: str, password: Optional[str]) -> MongoConfiguration:
def _get_config(
self, username: str, password: Optional[str], event=None
) -> MongoConfiguration:
"""Construct the config object for future user creation."""
relation = self._get_relation_from_username(username)
if not password:
Expand All @@ -299,6 +308,7 @@ def _get_config(self, username: str, password: Optional[str]) -> MongoConfigurat
"tls_external": False,
"tls_internal": False,
}

if self.charm.is_role(Config.Role.MONGOS):
mongo_args["port"] = Config.MONGOS_PORT
if self.substrate == Config.Substrate.K8S:
Expand Down
32 changes: 16 additions & 16 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,17 @@ coverage = {extras = ["toml"], version = "^7.5.0"}
pytest = "^8.1.1"
parameterized = "^0.9.0"
pymongo = "^4.7.3"
juju = "~3.5.0"
# should not be updated unless https://github.com/juju/python-libjuju/issues/1093 is fixed
juju = "==3.5.0"

[tool.poetry.group.integration.dependencies]
allure-pytest = "^2.13.5"
ops = "^2.15.0"
tenacity = "^8.2.3"
pymongo = "^4.7.3"
parameterized = "^0.9.0"
juju = "~3.5.0"
# should not be updated unless https://github.com/juju/python-libjuju/issues/1093 is fixed
juju = "==3.5.0"
pytest = "^8.1.1"
pytest-asyncio = "^0.21.1"
pytest-mock = "^3.14.0"
Expand Down
5 changes: 3 additions & 2 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,8 @@ def peers_units(self) -> list[Unit]:
@property
def db_initialised(self) -> bool:
"""Check if MongoDB is initialised."""
return "db_initialised" in self.app_peer_data
# Needs to lowercase it so it also work with older versions
return json.loads(self.app_peer_data.get("db_initialised", "false").lower())

@property
def role(self) -> str:
Expand Down Expand Up @@ -352,7 +353,7 @@ def is_role(self, role_name: str) -> bool:
def db_initialised(self, value):
"""Set the db_initialised flag."""
if isinstance(value, bool):
self.app_peer_data["db_initialised"] = str(value)
self.app_peer_data["db_initialised"] = json.dumps(value)
else:
raise ValueError(
f"'db_initialised' must be a boolean value. Proivded: {value} is of type {type(value)}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ async def test_build_and_deploy(
await ops_test.model.deploy(
MONGOS_APP_NAME,
channel="6/edge",
revision=3,
)
await ops_test.model.deploy(S3_APP_NAME, channel="edge")

Expand Down
41 changes: 27 additions & 14 deletions tests/unit/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,16 +208,21 @@ def test_mongodb_relation_joined_non_leader_does_nothing(self, connection):

@patch_network_get(private_address="1.1.1.1")
@patch("charm.MongoDBConnection")
@patch("charm.CrossAppVersionChecker.is_local_charm")
@patch("charm.CrossAppVersionChecker.is_integrated_to_locally_built_charm")
@patch("charm.get_charm_revision")
@patch("charm.MongodbOperatorCharm._connect_mongodb_exporter")
def test_mongodb_relation_joined_all_replicas_not_ready(self, _, connection):
def test_mongodb_relation_joined_all_replicas_not_ready(
self, _, rev, local, is_local, connection
):
"""Tests that we go into waiting when current ReplicaSet hosts are not ready.
Tests the scenario that if current replica set hosts are not ready, the leader goes into
WaitingStatus and no attempt to reconfigure is made.
"""
# preset values
self.harness.set_leader(True)
self.harness.charm.app_peer_data["db_initialised"] = "True"
self.harness.charm.app_peer_data["db_initialised"] = "true"
connection.return_value.__enter__.return_value.is_ready = False
connection.return_value.__enter__.return_value.get_replset_members.return_value = {
"1.1.1.1"
Expand All @@ -236,8 +241,13 @@ def test_mongodb_relation_joined_all_replicas_not_ready(self, _, connection):
@patch("ops.framework.EventBase.defer")
@patch("charm.MongoDBConnection")
@patch("charms.mongodb.v0.mongo.MongoClient")
@patch("charm.CrossAppVersionChecker.is_local_charm")
@patch("charm.CrossAppVersionChecker.is_integrated_to_locally_built_charm")
@patch("charm.get_charm_revision")
@patch("charm.MongodbOperatorCharm._connect_mongodb_exporter")
def test_relation_joined_get_members_failure(self, _, client, connection, defer):
def test_relation_joined_get_members_failure(
self, _, rev, local, is_local, client, connection, defer
):
"""Tests reconfigure does not execute when unable to get the replica set members.
Verifies in case of relation_joined and relation departed, that when the the database
Expand All @@ -246,7 +256,7 @@ def test_relation_joined_get_members_failure(self, _, client, connection, defer)
"""
# presets
self.harness.set_leader(True)
self.harness.charm.app_peer_data["db_initialised"] = "True"
self.harness.charm.app_peer_data["db_initialised"] = "true"
rel = self.harness.charm.model.get_relation("database-peers")

for exception in PYMONGO_EXCEPTIONS:
Expand All @@ -271,16 +281,19 @@ def test_relation_joined_get_members_failure(self, _, client, connection, defer)
@patch_network_get(private_address="1.1.1.1")
@patch("ops.framework.EventBase.defer")
@patch("charm.MongoDBConnection")
@patch("charm.CrossAppVersionChecker.is_local_charm")
@patch("charm.CrossAppVersionChecker.is_integrated_to_locally_built_charm")
@patch("charm.get_charm_revision")
@patch("charm.MongodbOperatorCharm._connect_mongodb_exporter")
def test_reconfigure_add_member_failure(self, _, connection, defer):
def test_reconfigure_add_member_failure(self, _, rev, local, is_local, connection, defer):
"""Tests reconfigure does not proceed when unable to add a member.
Verifies in relation joined events, that when the database cannot add a member that the
event is deferred.
"""
# presets
self.harness.set_leader(True)
self.harness.charm.app_peer_data["db_initialised"] = "True"
self.harness.charm.app_peer_data["db_initialised"] = "true"
connection.return_value.__enter__.return_value.get_replset_members.return_value = {
"1.1.1.1"
}
Expand Down Expand Up @@ -357,7 +370,7 @@ def test_update_status_mongodb_error(
"""Tests that when MongoDB is not active, that is reported instead of pbm."""
# assume leader has already initialised the replica set
self.harness.set_leader(True)
self.harness.charm.app_peer_data["db_initialised"] = "True"
self.harness.charm.app_peer_data["db_initialised"] = "true"
connection.return_value.__enter__.return_value.is_ready = True

pbm_statuses = [
Expand Down Expand Up @@ -403,7 +416,7 @@ def test_update_status_pbm_error(
"""Tests when MongoDB is active and pbm is in the error state, pbm status is reported."""
# assume leader has already initialised the replica set
self.harness.set_leader(True)
self.harness.charm.app_peer_data["db_initialised"] = "True"
self.harness.charm.app_peer_data["db_initialised"] = "true"
connection.return_value.__enter__.return_value.is_ready = True

pbm_statuses = [
Expand Down Expand Up @@ -444,7 +457,7 @@ def test_update_status_pbm_and_mongodb_ready(
"""Tests when both Mongodb and pbm are ready that MongoDB status is reported."""
# assume leader has already initialised the replica set
self.harness.set_leader(True)
self.harness.charm.app_peer_data["db_initialised"] = "True"
self.harness.charm.app_peer_data["db_initialised"] = "true"
connection.return_value.__enter__.return_value.is_ready = True

self.harness.add_relation(S3_RELATION_NAME, "s3-integrator")
Expand Down Expand Up @@ -477,7 +490,7 @@ def test_update_status_no_s3(
"""Tests when the s3 relation isn't present that the MongoDB status is reported."""
# assume leader has already initialised the replica set
self.harness.set_leader(True)
self.harness.charm.app_peer_data["db_initialised"] = "True"
self.harness.charm.app_peer_data["db_initialised"] = "true"
connection.return_value.__enter__.return_value.is_ready = True
has_backup_service.return_value = True

Expand Down Expand Up @@ -506,7 +519,7 @@ def test_update_status_primary(
"""Tests that update status identifies the primary unit and updates status."""
# assume leader has already initialised the replica set
self.harness.set_leader(True)
self.harness.charm.app_peer_data["db_initialised"] = "True"
self.harness.charm.app_peer_data["db_initialised"] = "true"
pbm_status.return_value = ActiveStatus("")

self.harness.set_leader(False)
Expand Down Expand Up @@ -538,7 +551,7 @@ def test_update_status_secondary(
"""Tests that update status identifies secondary units and doesn't update status."""
# assume leader has already initialised the replica set
self.harness.set_leader(True)
self.harness.charm.app_peer_data["db_initialised"] = "True"
self.harness.charm.app_peer_data["db_initialised"] = "true"
pbm_status.return_value = ActiveStatus("")

self.harness.set_leader(False)
Expand Down Expand Up @@ -570,7 +583,7 @@ def test_update_status_additional_messages(
"""Tests status updates are correct for non-primary and non-secondary cases."""
# assume leader has already initialised the replica set
self.harness.set_leader(True)
self.harness.charm.app_peer_data["db_initialised"] = "True"
self.harness.charm.app_peer_data["db_initialised"] = "true"
pbm_status.return_value = ActiveStatus("")

# Case 1: Unit has not been added to replica set yet
Expand Down Expand Up @@ -614,7 +627,7 @@ def test_update_status_not_ready(
"""Tests that if mongod is not running on this unit it restarts it."""
get_secret.return_value = "pass123"
connection.return_value.__enter__.return_value.is_ready = False
self.harness.charm.app_peer_data["db_initialised"] = "True"
self.harness.charm.app_peer_data["db_initialised"] = "true"

self.harness.charm.on.update_status.emit()
self.assertEqual(
Expand Down
12 changes: 6 additions & 6 deletions tests/unit/test_config_server_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def is_not_config_mock_call(*args):
assert args == ("config-server",)
return False

self.harness.charm.app_peer_data["db_initialised"] = "True"
self.harness.charm.app_peer_data["db_initialised"] = "true"

# fails due to being run on non-config-server
self.harness.charm.is_role = is_not_config_mock_call
Expand Down Expand Up @@ -96,7 +96,7 @@ def is_not_config_mock_call(*args):
assert args == ("config-server",)
return False

self.harness.charm.app_peer_data["db_initialised"] = "True"
self.harness.charm.app_peer_data["db_initialised"] = "true"
self.harness.add_relation("cluster", "mongos")

# fails due to being run on non-config-server
Expand Down Expand Up @@ -155,7 +155,7 @@ def is_shard_mock_call(*args):
# unit is not the leader nor is the wrong wrole
event = mock.Mock()
event.params = {}
self.harness.charm.app_peer_data["db_initialised"] = "True"
self.harness.charm.app_peer_data["db_initialised"] = "true"
self.harness.charm.config_server.pass_hook_checks(event)
event.defer.assert_not_called()

Expand Down Expand Up @@ -190,7 +190,7 @@ def is_config_mock_call(*args):
# unit is not the leader nor is the wrong wrole
event = mock.Mock()
event.params = {}
self.harness.charm.app_peer_data["db_initialised"] = "True"
self.harness.charm.app_peer_data["db_initialised"] = "true"
self.harness.charm.shard.pass_hook_checks(event)
event.defer.assert_not_called()

Expand All @@ -214,7 +214,7 @@ def get_cluster_mismatched_revision_status_mock_success(*unused):

event = mock.Mock()
event.params = {}
self.harness.charm.app_peer_data["db_initialised"] = "True"
self.harness.charm.app_peer_data["db_initialised"] = "true"

self.harness.charm.config_server.pass_hook_checks(event)
event.defer.assert_called()
Expand Down Expand Up @@ -249,7 +249,7 @@ def get_cluster_mismatched_revision_status_mock_success(*unused):

event = mock.Mock()
event.params = {}
self.harness.charm.app_peer_data["db_initialised"] = "True"
self.harness.charm.app_peer_data["db_initialised"] = "true"

self.harness.charm.shard.pass_hook_checks(event)
event.defer.assert_called()
Expand Down
Loading

0 comments on commit 3623043

Please sign in to comment.