diff --git a/charm_internal_version b/charm_internal_version index 8351c1939..60d3b2f4a 100644 --- a/charm_internal_version +++ b/charm_internal_version @@ -1 +1 @@ -14 +15 diff --git a/src/upgrades/machine_upgrade.py b/src/upgrades/machine_upgrade.py index 628a14c47..d0cd25f9c 100644 --- a/src/upgrades/machine_upgrade.py +++ b/src/upgrades/machine_upgrade.py @@ -185,11 +185,6 @@ def upgrade_unit(self, *, charm) -> None: self._unit_workload_version = self._current_versions["workload"] logger.debug(f"Saved {_SNAP_REVISION} in unit databag after upgrade") - # once the last unit has upgrade, notify relevant integrated applications of the new - # version. - if charm.unit == self._sorted_units[-1]: - charm.version_checker.set_version_across_all_relations() - # post upgrade check should be retried in case of failure, for this it is necessary to # emit a separate event. charm.upgrade.post_app_upgrade_event.emit() diff --git a/src/upgrades/mongodb_upgrade.py b/src/upgrades/mongodb_upgrade.py index df58292c5..fdbbef7e2 100644 --- a/src/upgrades/mongodb_upgrade.py +++ b/src/upgrades/mongodb_upgrade.py @@ -129,6 +129,7 @@ def _on_upgrade_charm(self, _): logger.info("Charm upgraded. MongoDB version unchanged") self._upgrade.upgrade_resumed = False + self.charm.version_checker.set_version_across_all_relations() # Only call `_reconcile_upgrade` on leader unit to avoid race conditions with # `upgrade_resumed` self._reconcile_upgrade() diff --git a/tests/integration/sharding_tests/helpers.py b/tests/integration/sharding_tests/helpers.py index ef3bf21f7..aee9e3b6c 100644 --- a/tests/integration/sharding_tests/helpers.py +++ b/tests/integration/sharding_tests/helpers.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # Copyright 2023 Canonical Ltd. # See LICENSE file for licensing details. -from typing import Dict, List, Optional, Tuple +from typing import List, Optional, Tuple from urllib.parse import quote_plus from pymongo import MongoClient @@ -13,9 +13,10 @@ get_secret_data, ) -TIMEOUT = 10 * 60 +TIMEOUT = 15 * 60 MONGOS_PORT = 27018 MONGOD_PORT = 27017 +MONGODB_CHARM_NAME = "mongodb" SHARD_ONE_APP_NAME = "shard-one" SHARD_TWO_APP_NAME = "shard-two" CONFIG_SERVER_APP_NAME = "config-server" @@ -117,7 +118,7 @@ def count_users(mongos_client: MongoClient) -> int: async def deploy_cluster_components( - ops_test: OpsTest, num_units_cluster_config: Dict = None + ops_test: OpsTest, num_units_cluster_config: dict | None = None, channel: str | None = None ) -> None: if not num_units_cluster_config: num_units_cluster_config = { @@ -126,24 +127,30 @@ async def deploy_cluster_components( SHARD_TWO_APP_NAME: 1, } - my_charm = await ops_test.build_charm(".") + if channel is None: + my_charm = await ops_test.build_charm(".") + else: + my_charm = MONGODB_CHARM_NAME await ops_test.model.deploy( my_charm, num_units=num_units_cluster_config[CONFIG_SERVER_APP_NAME], config={"role": "config-server"}, application_name=CONFIG_SERVER_APP_NAME, + channel=channel, ) await ops_test.model.deploy( my_charm, num_units=num_units_cluster_config[SHARD_ONE_APP_NAME], config={"role": "shard"}, application_name=SHARD_ONE_APP_NAME, + channel=channel, ) await ops_test.model.deploy( my_charm, num_units=num_units_cluster_config[SHARD_TWO_APP_NAME], config={"role": "shard"}, application_name=SHARD_TWO_APP_NAME, + channel=channel, ) await ops_test.model.wait_for_idle( diff --git a/tests/integration/upgrade/test_sharding_rollback.py b/tests/integration/upgrade/test_sharding_rollback.py index c7913ffe7..93802f56b 100644 --- a/tests/integration/upgrade/test_sharding_rollback.py +++ b/tests/integration/upgrade/test_sharding_rollback.py @@ -2,6 +2,8 @@ # Copyright 2024 Canonical Ltd. # See LICENSE file for licensing details. +from pathlib import Path + import pytest from pytest_operator.plugin import OpsTest @@ -27,22 +29,18 @@ MEDIAN_REELECTION_TIME = 12 -@pytest.mark.skip("re-enable these tests once upgrades are available on charmhub") @pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) @pytest.mark.group(1) @pytest.mark.abort_on_fail +@pytest.mark.skip("Need a new version published with upgrade bug fixed") async def test_build_and_deploy(ops_test: OpsTest) -> None: - """Build deploy, and integrate, a sharded cluster. - - TODO: When upgrades are supported, deploy with most recent revision (6/stable when possible, - but 6/edge as soon as available) - """ + """Build deploy, and integrate, a sharded cluster.""" num_units_cluster_config = { CONFIG_SERVER_APP_NAME: 3, SHARD_ONE_APP_NAME: 3, SHARD_TWO_APP_NAME: 1, } - await deploy_cluster_components(ops_test, num_units_cluster_config) + await deploy_cluster_components(ops_test, num_units_cluster_config, channel="6/edge") await ops_test.model.wait_for_idle( apps=CLUSTER_COMPONENTS, idle_period=20, timeout=TIMEOUT, raise_on_blocked=False @@ -53,10 +51,10 @@ async def test_build_and_deploy(ops_test: OpsTest) -> None: ) -@pytest.mark.skip("re-enable these tests once upgrades are available on charmhub") @pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) @pytest.mark.group(1) @pytest.mark.abort_on_fail +@pytest.mark.skip("Need a new version published with upgrade bug fixed") async def test_rollback_on_config_server( ops_test: OpsTest, continuous_writes_to_shard_one, continuous_writes_to_shard_two ) -> None: @@ -73,9 +71,11 @@ async def test_rollback_on_config_server( ) # instead of resuming upgrade refresh with the old version - # TODO: instead of using new_charm - use the one deployed on charmhub - cannot do this until - # the newest revision is published - await ops_test.model.applications[CONFIG_SERVER_APP_NAME].refresh(path=new_charm) + # TODO: Use this when https://github.com/juju/python-libjuju/issues/1086 is fixed + # await ops_test.model.applications[CONFIG_SERVER_APP_NAME].refresh( + # channel="6/edge", switch="ch:mongodb" + # ) + await refresh_with_juju(ops_test, CONFIG_SERVER_APP_NAME, "6/stable") # verify no writes were skipped during upgrade/rollback process shard_one_expected_writes = await stop_continous_writes( @@ -106,33 +106,32 @@ async def test_rollback_on_config_server( # TODO implement this check once we have implemented the post-cluster-upgrade code DPE-4143 -@pytest.mark.skip("re-enable these tests once upgrades are available on charmhub") @pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) @pytest.mark.group(1) @pytest.mark.abort_on_fail +@pytest.mark.skip("Need a new version published with upgrade bug fixed") async def test_rollback_on_shard_and_config_server( ops_test: OpsTest, continuous_writes_to_shard_one, continuous_writes_to_shard_two ) -> None: """Verify that a config-server and shard can safely rollback without losing writes.""" new_charm = await ops_test.build_charm(".") - await run_upgrade_sequence(ops_test, CONFIG_SERVER_APP_NAME, new_charm) + await run_upgrade_sequence(ops_test, CONFIG_SERVER_APP_NAME, new_charm=new_charm) shard_unit = await find_unit(ops_test, leader=True, app_name=SHARD_ONE_APP_NAME) action = await shard_unit.run_action("pre-upgrade-check") await action.wait() assert action.status == "completed", "pre-upgrade-check failed, expected to succeed." - # instead of resuming upgrade refresh with the old version - # TODO: instead of using new_charm - use the one deployed on charmhub - cannot do this until - # the newest revision is published - await ops_test.model.applications[SHARD_ONE_APP_NAME].refresh(path=new_charm) + # TODO: Use this when https://github.com/juju/python-libjuju/issues/1086 is fixed + # await ops_test.model.applications[SHARD_ONE_APP_NAME].refresh( + # channel="6/edge", switch="ch:mongodb" + # ) + await refresh_with_juju(ops_test, SHARD_ONE_APP_NAME, "6/stable") await ops_test.model.wait_for_idle( apps=[CONFIG_SERVER_APP_NAME], timeout=1000, idle_period=120 ) - # TODO: instead of using new_charm - use the one deployed on charmhub - cannot do this until - # the newest revision is published - await run_upgrade_sequence(ops_test, CONFIG_SERVER_APP_NAME, new_charm) + await run_upgrade_sequence(ops_test, CONFIG_SERVER_APP_NAME, channel="6/edge") # verify no writes were skipped during upgrade process shard_one_expected_writes = await stop_continous_writes( @@ -163,16 +162,42 @@ async def test_rollback_on_shard_and_config_server( # TODO implement this check once we have implemented the post-cluster-upgrade code DPE-4143 -async def run_upgrade_sequence(ops_test: OpsTest, app_name: str, new_charm) -> None: +async def refresh_with_juju(ops_test: OpsTest, app_name: str, channel: str) -> None: + refresh_cmd = f"refresh {app_name} --channel {channel} --switch ch:mongodb" + await ops_test.juju(*refresh_cmd.split()) + + +async def run_upgrade_sequence( + ops_test: OpsTest, app_name: str, new_charm: Path | None = None, channel: str | None = None +) -> None: """Runs the upgrade sequence on a given app.""" leader_unit = await find_unit(ops_test, leader=True, app_name=app_name) action = await leader_unit.run_action("pre-upgrade-check") await action.wait() assert action.status == "completed", "pre-upgrade-check failed, expected to succeed." - await ops_test.model.applications[app_name].refresh(path=new_charm) + if new_charm is not None: + await ops_test.model.applications[app_name].refresh(path=new_charm) + elif channel is not None: + # TODO: Use this when https://github.com/juju/python-libjuju/issues/1086 is fixed + # await ops_test.model.applications[app_name].refresh( + # channel=channel, switch="ch:mongodb" + # ) + await refresh_with_juju(ops_test, app_name, channel) + else: + raise ValueError("Either new_charm or channel must be provided.") + await ops_test.model.wait_for_idle(apps=[app_name], timeout=1000, idle_period=120) + # resume upgrade only needs to be ran when: + # 1. there are more than one units in the application + # 2. AND the underlying workload was updated + if not len(ops_test.model.applications[app_name].units) > 1: + return + + if "resume-upgrade" not in ops_test.model.applications[app_name].status_message: + return + action = await leader_unit.run_action("resume-upgrade") await action.wait() assert action.status == "completed", "resume-upgrade failed, expected to succeed." diff --git a/tests/integration/upgrade/test_sharding_upgrade.py b/tests/integration/upgrade/test_sharding_upgrade.py index 856e7ffab..1356519e5 100644 --- a/tests/integration/upgrade/test_sharding_upgrade.py +++ b/tests/integration/upgrade/test_sharding_upgrade.py @@ -38,17 +38,13 @@ @pytest.mark.group(1) @pytest.mark.abort_on_fail async def test_build_and_deploy(ops_test: OpsTest) -> None: - """Build deploy, and integrate, a sharded cluster. - - TODO: When upgrades are supported, deploy with most recent revision (6/stable when possible, - but 6/edge as soon as available) - """ + """Build deploy, and integrate, a sharded cluster.""" num_units_cluster_config = { CONFIG_SERVER_APP_NAME: 3, SHARD_ONE_APP_NAME: 3, SHARD_TWO_APP_NAME: 3, } - await deploy_cluster_components(ops_test, num_units_cluster_config) + await deploy_cluster_components(ops_test, num_units_cluster_config, channel="6/edge") await ops_test.model.wait_for_idle( apps=CLUSTER_COMPONENTS, idle_period=20, timeout=TIMEOUT, raise_on_blocked=False @@ -72,11 +68,15 @@ async def test_upgrade( assert action.status == "completed", "pre-upgrade-check failed, expected to succeed." new_charm = await ops_test.build_charm(".") - await run_upgrade_sequence(ops_test, CONFIG_SERVER_APP_NAME, new_charm) + await run_upgrade_sequence(ops_test, CONFIG_SERVER_APP_NAME, new_charm=new_charm) for shard_app_name in SHARD_COMPONENTS: - await run_upgrade_sequence(ops_test, shard_app_name, new_charm) + await run_upgrade_sequence(ops_test, shard_app_name, new_charm=new_charm) + # We want to be sure that everything is settled down + await ops_test.model.wait_for_idle( + CLUSTER_COMPONENTS, status="active", idle_period=20, timeout=TIMEOUT + ) # verify no writes were skipped during upgrade process shard_one_expected_writes = await stop_continous_writes( ops_test, @@ -130,7 +130,7 @@ async def test_pre_upgrade_check_failure(ops_test: OpsTest) -> None: # re-enable network on sharded cluster and wait for idle active ha_helpers.restore_network_for_unit(shard_one_host_name) - async with ops_test.fast_forward(): + async with ops_test.fast_forward(fast_interval="1m"): # sleep for twice the median election time time.sleep(MEDIAN_REELECTION_TIME * 2) @@ -153,7 +153,7 @@ async def run_upgrade_sequence(ops_test: OpsTest, app_name: str, new_charm) -> N assert action.status == "completed", "pre-upgrade-check failed, expected to succeed." await ops_test.model.applications[app_name].refresh(path=new_charm) - await ops_test.model.wait_for_idle(apps=[app_name], timeout=1000, idle_period=120) + await ops_test.model.wait_for_idle(apps=[app_name], timeout=1000, idle_period=30) # resume upgrade only needs to be ran when: # 1. there are more than one units in the application @@ -168,4 +168,4 @@ async def run_upgrade_sequence(ops_test: OpsTest, app_name: str, new_charm) -> N await action.wait() assert action.status == "completed", "resume-upgrade failed, expected to succeed." - await ops_test.model.wait_for_idle(apps=[app_name], timeout=1000, idle_period=120) + await ops_test.model.wait_for_idle(apps=[app_name], timeout=1000, idle_period=30) diff --git a/tests/integration/upgrade/test_upgrade.py b/tests/integration/upgrade/test_upgrade.py index 99424276d..9486cfd5f 100644 --- a/tests/integration/upgrade/test_upgrade.py +++ b/tests/integration/upgrade/test_upgrade.py @@ -14,6 +14,7 @@ MEDIAN_REELECTION_TIME = 12 +MONGODB_CHARM_NAME = "mongodb" @pytest.fixture() @@ -35,10 +36,7 @@ async def test_build_and_deploy(ops_test: OpsTest) -> None: await check_or_scale_app(ops_test, app_name, required_units=3) return - # TODO: When upgrades are supported, deploy with most recent revision (6/stable when possible, - # but 6/edge as soon as available) - charm = await ops_test.build_charm(".") - await ops_test.model.deploy(charm, channel="edge", num_units=3) + await ops_test.model.deploy(MONGODB_CHARM_NAME, channel="6/edge", num_units=3) await ops_test.model.wait_for_idle( apps=["mongodb"], status="active", timeout=1000, idle_period=120 @@ -55,6 +53,8 @@ async def test_upgrade(ops_test: OpsTest, continuous_writes) -> None: action = await leader_unit.run_action("pre-upgrade-check") await action.wait() + assert action.status == "completed", "pre-upgrade-check-failed, expected to succeed" + await ops_test.model.wait_for_idle( apps=[app_name], status="active", timeout=1000, idle_period=120 ) @@ -65,7 +65,13 @@ async def test_upgrade(ops_test: OpsTest, continuous_writes) -> None: await ops_test.model.wait_for_idle( apps=[app_name], status="active", timeout=1000, idle_period=120 ) - # verify that the cluster is actually correctly configured after upgrade + + if "resume-upgrade" in ops_test.model.applications[app_name].status_message: + action = await leader_unit.run_action("resume-upgrade") + await action.wait() + assert action.status == "completed", "resume-upgrade failed, expected to succeed" + + await ops_test.model.wait_for_idle(apps=[app_name], timeout=1000, idle_period=120) # verify that the no writes were skipped total_expected_writes = await ha_helpers.stop_continous_writes(ops_test, app_name=app_name)