From 3d98182b70c0b12e52668187b237112f476c142e Mon Sep 17 00:00:00 2001 From: Mia Altieri <32723809+MiaAltieri@users.noreply.github.com> Date: Fri, 16 Feb 2024 08:53:31 +0100 Subject: [PATCH 1/5] [DPE-3293] Changes external mongos (#350) ## Issue Mongos charm does not support external connections ## Solution Enable mongos to provide an external connection when it is requested by the host charm ## Note There are changes required in the shared MongoDB libs - but the bulk of this work will be done in the Mongos Charm --- lib/charms/mongodb/v0/config_server_interface.py | 15 ++++++++++++--- lib/charms/mongodb/v1/helpers.py | 9 +++++---- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/lib/charms/mongodb/v0/config_server_interface.py b/lib/charms/mongodb/v0/config_server_interface.py index f0a9b20fe..9e05e6a1f 100644 --- a/lib/charms/mongodb/v0/config_server_interface.py +++ b/lib/charms/mongodb/v0/config_server_interface.py @@ -35,7 +35,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 6 +LIBPATCH = 7 class ClusterProvider(Object): @@ -275,7 +275,13 @@ def _on_relation_broken(self, event: RelationBrokenEvent) -> None: def is_mongos_running(self) -> bool: """Returns true if mongos service is running.""" - with MongosConnection(None, f"mongodb://{MONGOS_SOCKET_URI_FMT}") as mongo: + connection_uri = f"mongodb://{self.charm.get_mongos_host()}" + + # when running internally, connections through Unix Domain sockets do not need port. + if self.charm.is_external_client: + connection_uri = connection_uri + f":{Config.MONGOS_PORT}" + + with MongosConnection(None, connection_uri) as mongo: return mongo.is_ready def update_config_server_db(self, config_server_db) -> bool: @@ -285,7 +291,10 @@ def update_config_server_db(self, config_server_db) -> bool: mongos_config = self.charm.mongos_config mongos_start_args = get_mongos_args( - mongos_config, snap_install=True, config_server_db=config_server_db + mongos_config, + snap_install=True, + config_server_db=config_server_db, + external_connectivity=self.charm.is_external_client, ) add_args_to_env("MONGOS_ARGS", mongos_start_args) return True diff --git a/lib/charms/mongodb/v1/helpers.py b/lib/charms/mongodb/v1/helpers.py index 69459b971..9038198d1 100644 --- a/lib/charms/mongodb/v1/helpers.py +++ b/lib/charms/mongodb/v1/helpers.py @@ -30,7 +30,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 3 +LIBPATCH = 4 # path to store mongodb ketFile KEY_FILE = "keyFile" @@ -96,6 +96,7 @@ def get_mongos_args( config, snap_install: bool = False, config_server_db: str = None, + external_connectivity: bool = True, ) -> str: """Returns the arguments used for starting mongos on a config-server side application. @@ -104,9 +105,9 @@ def get_mongos_args( """ # suborinate charm which provides its own config_server_db, should only use unix domain socket binding_ips = ( - f"--bind_ip {MONGODB_COMMON_DIR}/var/mongodb-27018.sock" - if config_server_db - else "--bind_ip_all" + "--bind_ip_all" + if external_connectivity + else f"--bind_ip {MONGODB_COMMON_DIR}/var/mongodb-27018.sock" ) # mongos running on the config server communicates through localhost From 3016d4d65cf64dde592175e37b64c9a5f336e7a6 Mon Sep 17 00:00:00 2001 From: Mia Altieri <32723809+MiaAltieri@users.noreply.github.com> Date: Mon, 19 Feb 2024 11:18:27 +0100 Subject: [PATCH 2/5] migrating to DPE int tests for CI (#354) ## Issue We are not using DP workflows for int tests, this causes two problems: 1. forked PRs fail due to inability to access secrets 2. we want to easily share secrets across the int tests ## Solution migrate to the new DP job for int tests --------- Co-authored-by: Carl Csaposs --- .github/workflows/ci.yaml | 103 +++------ .github/workflows/release.yaml | 15 +- tests/__init__.py | 2 - tests/integration/__init__.py | 1 - tests/integration/backup_tests/helpers.py | 7 +- .../integration/backup_tests/test_backups.py | 32 ++- tests/integration/ha_tests/test_ha.py | 16 +- .../integration/metrics_tests/test_metrics.py | 4 + .../test_charm_legacy_relations.py | 8 + .../relation_tests/new_relations/helpers.py | 2 +- .../new_relations/test_charm_relations.py | 9 + .../integration/sharding_tests/test_mongos.py | 3 + .../sharding_tests/test_sharding.py | 6 + .../test_sharding_race_conds.py | 2 + .../sharding_tests/test_sharding_relations.py | 8 + tests/integration/test_charm.py | 14 ++ tests/integration/tls_tests/test_tls.py | 5 + tox.ini | 201 ++---------------- 18 files changed, 144 insertions(+), 294 deletions(-) delete mode 100644 tests/__init__.py diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b6818d87d..6babe86dc 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -12,17 +12,6 @@ on: - cron: "53 0 * * *" # Daily at 00:53 UTC # Triggered on push to branch "main" by .github/workflows/release.yaml workflow_call: - secrets: - CHARMHUB_TOKEN: - required: true - AWS_ACCESS_KEY: - required: true - AWS_SECRET_KEY: - required: true - GCP_ACCESS_KEY: - required: true - GCP_SECRET_KEY: - required: true jobs: lint: @@ -66,73 +55,37 @@ jobs: github-token: "${{ secrets.GITHUB_TOKEN }}" build: - name: Build charms - uses: canonical/data-platform-workflows/.github/workflows/build_charms_with_cache.yaml@v8 - - integration-test: strategy: - fail-fast: false matrix: - tox-environments: - - charm-integration - - ha-integration - - relation-integration - - legacy-integration - - tls-integration - - backup-integration - - metric-integration - - sharding-integration - - sharding-relation-integration - - sharding-race-conditions-integration - - sharding-mongos-integration - name: ${{ matrix.tox-environments }} + path: + - . + - tests/integration/sharding_tests/application + - tests/integration/relation_tests/new_relations/application-charm + - tests/integration/dummy_legacy_app + name: Build charm + uses: canonical/data-platform-workflows/.github/workflows/build_charm.yaml@v11.0.1 + with: + path-to-charm-directory: ${{ matrix.path }} + cache: true + + integration-test: + name: Integration test charm needs: - lint - unit-test - - lib-check - build - runs-on: ubuntu-latest - timeout-minutes: 120 - steps: - - name: Checkout - uses: actions/checkout@v3 - - name: Setup operator environment - # TODO: Replace with custom image on self-hosted runner - uses: charmed-kubernetes/actions-operator@main - with: - provider: lxd - juju-channel: 3.1/stable - - name: Download packed charm(s) - uses: actions/download-artifact@v3 - with: - name: ${{ needs.build.outputs.artifact-name }} - - name: Free disk space - run: | - echo "Free disk space before cleanup" - df -T - # free space in the runner - sudo rm -rf /usr/share/dotnet - sudo rm -rf /opt/ghc - sudo rm -rf /usr/local/share/boost - sudo rm -rf "$AGENT_TOOLSDIRECTORY" - echo "Free disk space after cleanup" - df -T - - name: Select tests - id: select-tests - run: | - if [ "${{ github.event_name }}" == "schedule" ] - then - echo Running unstable and stable tests - echo "mark_expression=" >> $GITHUB_OUTPUT - else - echo Skipping unstable tests - echo "mark_expression=not unstable" >> $GITHUB_OUTPUT - fi - - name: Run integration tests - run: tox run -e ${{ matrix.tox-environments }} -- -m '${{ steps.select-tests.outputs.mark_expression }}' - env: - CI_PACKED_CHARMS: ${{ needs.build.outputs.charms }} - AWS_ACCESS_KEY: ${{ matrix.tox-environments != 'backup-integration' || secrets.AWS_ACCESS_KEY }} - AWS_SECRET_KEY: ${{ matrix.tox-environments != 'backup-integration' || secrets.AWS_SECRET_KEY }} - GCP_ACCESS_KEY: ${{ matrix.tox-environments != 'backup-integration' || secrets.GCP_ACCESS_KEY }} - GCP_SECRET_KEY: ${{ matrix.tox-environments != 'backup-integration' || secrets.GCP_SECRET_KEY }} + uses: canonical/data-platform-workflows/.github/workflows/integration_test_charm.yaml@v11.0.1 + with: + artifact-prefix: packed-charm-cache-true + cloud: lxd + juju-agent-version: 3.1.6 + permissions: + contents: write # Needed for Allure Report beta + secrets: + integration-test: | + { + "AWS_ACCESS_KEY": "${{ secrets.AWS_ACCESS_KEY }}", + "AWS_SECRET_KEY": "${{ secrets.AWS_SECRET_KEY }}", + "GCP_ACCESS_KEY": "${{ secrets.GCP_ACCESS_KEY }}", + "GCP_SECRET_KEY": "${{ secrets.GCP_SECRET_KEY }}", + } diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 06fc80ee8..227fec404 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -8,25 +8,18 @@ on: jobs: ci-tests: uses: ./.github/workflows/ci.yaml - secrets: - CHARMHUB_TOKEN: "${{ secrets.CHARMHUB_TOKEN }}" - AWS_ACCESS_KEY: "${{ secrets.AWS_ACCESS_KEY }}" - AWS_SECRET_KEY: "${{ secrets.AWS_SECRET_KEY }}" - GCP_ACCESS_KEY: "${{ secrets.GCP_ACCESS_KEY }}" - GCP_SECRET_KEY: "${{ secrets.GCP_SECRET_KEY }}" + secrets: inherit build: name: Build charm - uses: canonical/data-platform-workflows/.github/workflows/build_charm_without_cache.yaml@v8 - with: - charmcraft-snap-channel: "latest/stable" + uses: canonical/data-platform-workflows/.github/workflows/build_charm_without_cache.yaml@v11.0.1 release-charm: name: Release charm needs: - ci-tests - build - uses: canonical/data-platform-workflows/.github/workflows/release_charm.yaml@v8 + uses: canonical/data-platform-workflows/.github/workflows/release_charm.yaml@v11.0.1 with: channel: 6/edge artifact-name: ${{ needs.build.outputs.artifact-name }} @@ -36,7 +29,7 @@ jobs: contents: write # Needed to create GitHub release release-libraries: - name: Release libraries + name: Release libraries runs-on: ubuntu-latest needs: - ci-tests diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index db3bfe1a6..000000000 --- a/tests/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright 2023 Canonical Ltd. -# See LICENSE file for licensing details. diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py index db3bfe1a6..84043b992 100644 --- a/tests/integration/__init__.py +++ b/tests/integration/__init__.py @@ -1,2 +1 @@ # Copyright 2023 Canonical Ltd. -# See LICENSE file for licensing details. diff --git a/tests/integration/backup_tests/helpers.py b/tests/integration/backup_tests/helpers.py index 7ffcc4edd..a665be20a 100644 --- a/tests/integration/backup_tests/helpers.py +++ b/tests/integration/backup_tests/helpers.py @@ -1,6 +1,5 @@ # Copyright 2023 Canonical Ltd. # See LICENSE file for licensing details. -import os import subprocess import ops @@ -95,11 +94,11 @@ async def count_failed_backups(db_unit: ops.model.Unit) -> int: return failed_backups -async def set_credentials(ops_test: OpsTest, cloud: str) -> None: +async def set_credentials(ops_test: OpsTest, github_secrets, cloud: str) -> None: """Sets the s3 crednetials for the provided cloud, valid options are AWS or GCP.""" # set access key and secret keys - access_key = os.environ.get(f"{cloud}_ACCESS_KEY", False) - secret_key = os.environ.get(f"{cloud}_SECRET_KEY", False) + access_key = github_secrets[f"{cloud}_ACCESS_KEY"] + secret_key = github_secrets[f"{cloud}_SECRET_KEY"] assert access_key and secret_key, f"{cloud} access key and secret key not provided." s3_integrator_unit = ops_test.model.applications[S3_APP_NAME].units[0] diff --git a/tests/integration/backup_tests/test_backups.py b/tests/integration/backup_tests/test_backups.py index 4d73a6040..4d3311f63 100644 --- a/tests/integration/backup_tests/test_backups.py +++ b/tests/integration/backup_tests/test_backups.py @@ -11,9 +11,8 @@ from pytest_operator.plugin import OpsTest from tenacity import RetryError, Retrying, stop_after_delay, wait_fixed -from tests.integration.helpers import get_app_name - from ..ha_tests import helpers as ha_helpers +from ..helpers import get_app_name from . import helpers S3_APP_NAME = "s3-integrator" @@ -43,6 +42,7 @@ async def add_writes_to_db(ops_test: OpsTest): await ha_helpers.clear_db_writes(ops_test) +@pytest.mark.group(1) @pytest.mark.abort_on_fail async def test_build_and_deploy(ops_test: OpsTest) -> None: """Build and deploy one unit of MongoDB.""" @@ -58,6 +58,7 @@ async def test_build_and_deploy(ops_test: OpsTest) -> None: await ops_test.model.wait_for_idle() +@pytest.mark.group(1) @pytest.mark.abort_on_fail async def test_blocked_incorrect_creds(ops_test: OpsTest) -> None: """Verifies that the charm goes into blocked status when s3 creds are incorrect.""" @@ -88,13 +89,14 @@ async def test_blocked_incorrect_creds(ops_test: OpsTest) -> None: assert db_unit.workload_status_message == "s3 credentials are incorrect." +@pytest.mark.group(1) @pytest.mark.abort_on_fail -async def test_blocked_incorrect_conf(ops_test: OpsTest) -> None: +async def test_blocked_incorrect_conf(ops_test: OpsTest, github_secrets) -> None: """Verifies that the charm goes into blocked status when s3 config options are incorrect.""" db_app_name = await get_app_name(ops_test) # set correct AWS credentials for s3 storage but incorrect configs - await helpers.set_credentials(ops_test, cloud="AWS") + await helpers.set_credentials(ops_test, github_secrets, cloud="AWS") # wait for both applications to be idle with the correct statuses async with ops_test.fast_forward(): @@ -107,6 +109,7 @@ async def test_blocked_incorrect_conf(ops_test: OpsTest) -> None: assert db_unit.workload_status_message == "s3 configurations are incompatible." +@pytest.mark.group(1) @pytest.mark.abort_on_fail async def test_ready_correct_conf(ops_test: OpsTest) -> None: """Verifies charm goes into active status when s3 config and creds options are correct.""" @@ -130,11 +133,12 @@ async def test_ready_correct_conf(ops_test: OpsTest) -> None: ) +@pytest.mark.group(1) @pytest.mark.abort_on_fail -async def test_create_and_list_backups(ops_test: OpsTest) -> None: +async def test_create_and_list_backups(ops_test: OpsTest, github_secrets) -> None: db_app_name = await get_app_name(ops_test) leader_unit = await helpers.get_leader_unit(ops_test, db_app_name=db_app_name) - await helpers.set_credentials(ops_test, cloud="AWS") + await helpers.set_credentials(ops_test, github_secrets, cloud="AWS") # verify backup list works logger.error("!!!!! test_create_and_list_backups >>> %s", leader_unit) action = await leader_unit.run_action(action_name="list-backups") @@ -161,8 +165,9 @@ async def test_create_and_list_backups(ops_test: OpsTest) -> None: assert backups == 1, "Backup not created." +@pytest.mark.group(1) @pytest.mark.abort_on_fail -async def test_multi_backup(ops_test: OpsTest, continuous_writes_to_db) -> None: +async def test_multi_backup(ops_test: OpsTest, continuous_writes_to_db, github_secrets) -> None: """With writes in the DB test creating a backup while another one is running. Note that before creating the second backup we change the bucket and change the s3 storage @@ -183,7 +188,7 @@ async def test_multi_backup(ops_test: OpsTest, continuous_writes_to_db) -> None: # while first backup is running change access key, secret keys, and bucket name # for GCP - await helpers.set_credentials(ops_test, cloud="GCP") + await helpers.set_credentials(ops_test, github_secrets, cloud="GCP") # change to GCP configs and wait for PBM to resync configuration_parameters = { @@ -226,7 +231,7 @@ async def test_multi_backup(ops_test: OpsTest, continuous_writes_to_db) -> None: assert backups == 1, "Backup not created in first bucket on GCP." # set AWS credentials, set configs for s3 storage, and wait to resync - await helpers.set_credentials(ops_test, cloud="AWS") + await helpers.set_credentials(ops_test, github_secrets, cloud="AWS") configuration_parameters = { "bucket": "data-charms-testing", "region": "us-east-1", @@ -247,6 +252,7 @@ async def test_multi_backup(ops_test: OpsTest, continuous_writes_to_db) -> None: assert backups == 2, "Backup not created in bucket on AWS." +@pytest.mark.group(1) @pytest.mark.abort_on_fail async def test_restore(ops_test: OpsTest, add_writes_to_db) -> None: """Simple backup tests that verifies that writes are correctly restored.""" @@ -301,11 +307,14 @@ async def test_restore(ops_test: OpsTest, add_writes_to_db) -> None: assert number_writes == number_writes_restored, "writes not correctly restored" +@pytest.mark.group(1) @pytest.mark.parametrize("cloud_provider", ["AWS", "GCP"]) -async def test_restore_new_cluster(ops_test: OpsTest, add_writes_to_db, cloud_provider): +async def test_restore_new_cluster( + ops_test: OpsTest, add_writes_to_db, cloud_provider, github_secrets +): # configure test for the cloud provider db_app_name = await get_app_name(ops_test) - await helpers.set_credentials(ops_test, cloud=cloud_provider) + await helpers.set_credentials(ops_test, github_secrets, cloud=cloud_provider) if cloud_provider == "AWS": configuration_parameters = { "bucket": "data-charms-testing", @@ -388,6 +397,7 @@ async def test_restore_new_cluster(ops_test: OpsTest, add_writes_to_db, cloud_pr await helpers.destroy_cluster(ops_test, cluster_name=NEW_CLUSTER) +@pytest.mark.group(1) @pytest.mark.abort_on_fail async def test_update_backup_password(ops_test: OpsTest) -> None: """Verifies that after changing the backup password the pbm tool is updated and functional.""" diff --git a/tests/integration/ha_tests/test_ha.py b/tests/integration/ha_tests/test_ha.py index 83bdcc690..b4e42014a 100644 --- a/tests/integration/ha_tests/test_ha.py +++ b/tests/integration/ha_tests/test_ha.py @@ -63,6 +63,7 @@ logger = logging.getLogger(__name__) +@pytest.mark.group(1) @pytest.mark.skipif( os.environ.get("PYTEST_SKIP_DEPLOY", False), reason="skipping deploy, model expected to be provided.", @@ -83,6 +84,7 @@ async def test_build_and_deploy(ops_test: OpsTest) -> None: await ops_test.model.wait_for_idle() +@pytest.mark.group(1) async def test_storage_re_use(ops_test, continuous_writes): """Verifies that database units with attached storage correctly repurpose storage. @@ -122,6 +124,7 @@ async def test_storage_re_use(ops_test, continuous_writes): assert total_expected_writes["number"] == actual_writes +@pytest.mark.group(1) @pytest.mark.abort_on_fail async def test_add_units(ops_test: OpsTest, continuous_writes) -> None: """Tests juju add-unit functionality. @@ -152,7 +155,7 @@ async def test_add_units(ops_test: OpsTest, continuous_writes) -> None: assert total_expected_writes["number"] == actual_writes -@pytest.mark.abort_on_fail +@pytest.mark.group(1) @pytest.mark.abort_on_fail async def test_scale_down_capablities(ops_test: OpsTest, continuous_writes) -> None: """Tests clusters behavior when scaling down a minority and removing a primary replica. @@ -230,6 +233,7 @@ async def test_scale_down_capablities(ops_test: OpsTest, continuous_writes) -> N assert total_expected_writes["number"] == actual_writes +@pytest.mark.group(1) async def test_replication_across_members(ops_test: OpsTest, continuous_writes) -> None: """Check consistency, ie write to primary, read data from secondaries.""" # first find primary, write to primary, then read from each unit @@ -256,6 +260,7 @@ async def test_replication_across_members(ops_test: OpsTest, continuous_writes) assert total_expected_writes["number"] == actual_writes +@pytest.mark.group(1) async def test_unique_cluster_dbs(ops_test: OpsTest, continuous_writes) -> None: """Verify unique clusters do not share DBs.""" # first find primary, write to primary, @@ -304,6 +309,7 @@ async def test_unique_cluster_dbs(ops_test: OpsTest, continuous_writes) -> None: assert total_expected_writes["number"] == actual_writes +@pytest.mark.group(1) async def test_replication_member_scaling(ops_test: OpsTest, continuous_writes) -> None: """Verify newly added and newly removed members properly replica data. @@ -349,6 +355,7 @@ async def test_replication_member_scaling(ops_test: OpsTest, continuous_writes) assert total_expected_writes["number"] == actual_writes +@pytest.mark.group(1) async def test_kill_db_process(ops_test, continuous_writes): # locate primary unit app_name = await get_app_name(ops_test) @@ -385,6 +392,7 @@ async def test_kill_db_process(ops_test, continuous_writes): ), "secondary not up to date with the cluster after restarting." +@pytest.mark.group(1) async def test_freeze_db_process(ops_test, continuous_writes): # locate primary unit app_name = await get_app_name(ops_test) @@ -438,6 +446,7 @@ async def test_freeze_db_process(ops_test, continuous_writes): ), "secondary not up to date with the cluster after restarting." +@pytest.mark.group(1) async def test_restart_db_process(ops_test, continuous_writes, change_logging): # locate primary unit app_name = await get_app_name(ops_test) @@ -484,6 +493,7 @@ async def test_restart_db_process(ops_test, continuous_writes, change_logging): ), "secondary not up to date with the cluster after restarting." +@pytest.mark.group(1) async def test_full_cluster_crash(ops_test: OpsTest, continuous_writes, reset_restart_delay): app_name = await get_app_name(ops_test) @@ -534,6 +544,7 @@ async def test_full_cluster_crash(ops_test: OpsTest, continuous_writes, reset_re assert actual_writes == total_expected_writes["number"], "db writes missing." +@pytest.mark.group(1) async def test_full_cluster_restart(ops_test: OpsTest, continuous_writes, reset_restart_delay): app_name = await get_app_name(ops_test) @@ -582,6 +593,7 @@ async def test_full_cluster_restart(ops_test: OpsTest, continuous_writes, reset_ assert total_expected_writes["number"] == actual_writes, "writes to the db were missed." +@pytest.mark.group(1) async def test_network_cut(ops_test, continuous_writes): # locate primary unit app_name = await get_app_name(ops_test) @@ -661,6 +673,7 @@ async def test_network_cut(ops_test, continuous_writes): ), "secondary not up to date with the cluster after restarting." +@pytest.mark.group(1) @pytest.mark.abort_on_fail @pytest.mark.unstable async def test_scale_up_down(ops_test: OpsTest, continuous_writes): @@ -671,6 +684,7 @@ async def test_scale_up_down(ops_test: OpsTest, continuous_writes): await verify_writes(ops_test) +@pytest.mark.group(1) @pytest.mark.abort_on_fail @pytest.mark.unstable async def test_scale_up_down_removing_leader(ops_test: OpsTest, continuous_writes): diff --git a/tests/integration/metrics_tests/test_metrics.py b/tests/integration/metrics_tests/test_metrics.py index d7a9eec5c..eba60debf 100644 --- a/tests/integration/metrics_tests/test_metrics.py +++ b/tests/integration/metrics_tests/test_metrics.py @@ -23,6 +23,7 @@ MEDIAN_REELECTION_TIME = 12 +@pytest.mark.group(1) @pytest.mark.skipif( os.environ.get("PYTEST_SKIP_DEPLOY", False), reason="skipping deploy, model expected to be provided.", @@ -41,6 +42,7 @@ async def test_build_and_deploy(ops_test: OpsTest) -> None: await ops_test.model.wait_for_idle() +@pytest.mark.group(1) async def test_endpoints(ops_test: OpsTest): """Sanity check that endpoints are running.""" app_name = await get_app_name(ops_test) @@ -50,6 +52,7 @@ async def test_endpoints(ops_test: OpsTest): await verify_endpoints(ops_test, unit) +@pytest.mark.group(1) async def test_endpoints_new_password(ops_test: OpsTest): """Verify that endpoints still function correctly after the monitor user password changes.""" app_name = await get_app_name(ops_test) @@ -64,6 +67,7 @@ async def test_endpoints_new_password(ops_test: OpsTest): await verify_endpoints(ops_test, unit) +@pytest.mark.group(1) async def test_endpoints_network_cut(ops_test: OpsTest): """Verify that endpoint still function correctly after a network cut.""" app_name = await get_app_name(ops_test) diff --git a/tests/integration/relation_tests/legacy_relations/test_charm_legacy_relations.py b/tests/integration/relation_tests/legacy_relations/test_charm_legacy_relations.py index 2b99d93a2..2ea4d75ba 100644 --- a/tests/integration/relation_tests/legacy_relations/test_charm_legacy_relations.py +++ b/tests/integration/relation_tests/legacy_relations/test_charm_legacy_relations.py @@ -32,6 +32,7 @@ APP_NAMES = [GRAYLOG_APP_NAME, ELASTIC_APP_NAME, DATABASE_APP_NAME] +@pytest.mark.group(1) @pytest.mark.skip("Reactive charms don't work with juju 3.1.5") async def test_build_deploy_charms(ops_test: OpsTest): """Deploy both charms (application and database) to use in the tests.""" @@ -64,6 +65,7 @@ async def test_build_deploy_charms(ops_test: OpsTest): ) +@pytest.mark.group(1) @pytest.mark.skip("Reactive charms don't work with juju 3.1.5") async def test_relation_data(ops_test: OpsTest) -> None: """Test the relation data is set correctly for this legacy relation.""" @@ -94,6 +96,7 @@ async def test_relation_data(ops_test: OpsTest) -> None: assert replset == DATABASE_APP_NAME +@pytest.mark.group(1) @pytest.mark.skip("Reactive charms don't work with juju 3.1.5") async def test_mongodb_auth_disabled(ops_test: OpsTest) -> None: """Test mongodb no longer uses auth after relating to a legacy relation.""" @@ -104,6 +107,7 @@ async def test_mongodb_auth_disabled(ops_test: OpsTest) -> None: ), "MongoDB requires authentication after legacy relation" +@pytest.mark.group(1) @pytest.mark.skip("Reactive charms don't work with juju 3.1.5") async def test_legacy_db_ops(ops_test: OpsTest) -> None: """Test graylog is able to do CRUD operations.""" @@ -132,6 +136,7 @@ async def test_legacy_db_ops(ops_test: OpsTest) -> None: assert "users:tokenlist" not in user_info["permissions"], "unable to perform delete operations" +@pytest.mark.group(1) @pytest.mark.skip("Reactive charms don't work with juju 3.1.5") async def test_add_unit_joins_without_auth(ops_test: OpsTest): """Verify scaling mongodb with legacy relations supports no auth.""" @@ -148,6 +153,7 @@ async def test_add_unit_joins_without_auth(ops_test: OpsTest): ), "MongoDB requires disabled authentication to support legacy relations" +@pytest.mark.group(1) @pytest.mark.unstable async def test_enable_tls(ops_test: OpsTest) -> None: """Verify each unit has TLS enabled after relating to the TLS application.""" @@ -174,6 +180,7 @@ async def test_enable_tls(ops_test: OpsTest) -> None: await ops_test.model.wait_for_idle(apps=[DATABASE_APP_NAME], status="active", timeout=1000) +@pytest.mark.group(1) @pytest.mark.unstable async def test_new_relation_fails_with_legacy(ops_test: OpsTest) -> None: """Verify new relation joining results in blocked when legacy relations exist. @@ -201,6 +208,7 @@ async def test_new_relation_fails_with_legacy(ops_test: OpsTest) -> None: ), "MongoDB requires disabled authentication to support legacy relations" +@pytest.mark.group(1) @pytest.mark.skip("Reactive charms don't work with juju 3.1.5") async def test_legacy_relation_fails_with_new(ops_test: OpsTest) -> None: """Verify legacy relation joining results in blocked when new relations exist.""" diff --git a/tests/integration/relation_tests/new_relations/helpers.py b/tests/integration/relation_tests/new_relations/helpers.py index dcaf62bc8..4ce19ae3c 100644 --- a/tests/integration/relation_tests/new_relations/helpers.py +++ b/tests/integration/relation_tests/new_relations/helpers.py @@ -7,7 +7,7 @@ from pytest_operator.plugin import OpsTest from tenacity import RetryError, Retrying, stop_after_delay, wait_fixed -from tests.integration.helpers import get_application_relation_data +from ...helpers import get_application_relation_data async def verify_application_data( diff --git a/tests/integration/relation_tests/new_relations/test_charm_relations.py b/tests/integration/relation_tests/new_relations/test_charm_relations.py index 6be5f1aa9..8d1a8eade 100644 --- a/tests/integration/relation_tests/new_relations/test_charm_relations.py +++ b/tests/integration/relation_tests/new_relations/test_charm_relations.py @@ -33,6 +33,7 @@ APP_NAMES = [APPLICATION_APP_NAME, ANOTHER_DATABASE_APP_NAME] +@pytest.mark.group(1) @pytest.mark.abort_on_fail async def test_deploy_charms(ops_test: OpsTest, application_charm, database_charm): """Deploy both charms (application and database) to use in the tests.""" @@ -84,6 +85,7 @@ async def test_deploy_charms(ops_test: OpsTest, application_charm, database_char ) +@pytest.mark.group(1) @pytest.mark.abort_on_fail async def test_database_relation_with_charm_libraries(ops_test: OpsTest): """Test basic functionality of database relation interface.""" @@ -134,6 +136,7 @@ async def test_database_relation_with_charm_libraries(ops_test: OpsTest): client.close() +@pytest.mark.group(1) @pytest.mark.abort_on_fail async def test_app_relation_metadata_change(ops_test: OpsTest) -> None: """Verifies that the app metadata changes with db relation joined and departed events.""" @@ -243,6 +246,7 @@ async def test_app_relation_metadata_change(ops_test: OpsTest) -> None: client.close() +@pytest.mark.group(1) async def test_user_with_extra_roles(ops_test: OpsTest): """Test superuser actions (ie creating a new user and creating a new database).""" connection_string = await get_connection_string( @@ -265,6 +269,7 @@ async def test_user_with_extra_roles(ops_test: OpsTest): client.close() +@pytest.mark.group(1) async def test_two_applications_doesnt_share_the_same_relation_data( ops_test: OpsTest, application_charm ): @@ -303,6 +308,7 @@ async def test_two_applications_doesnt_share_the_same_relation_data( assert application_connection_string != another_application_connection_string +@pytest.mark.group(1) async def test_an_application_can_connect_to_multiple_database_clusters(ops_test: OpsTest): """Test that an application can connect to different clusters of the same database.""" # Relate the application with both database clusters @@ -339,6 +345,7 @@ async def test_an_application_can_connect_to_multiple_database_clusters(ops_test assert application_connection_string != another_application_connection_string +@pytest.mark.group(1) async def test_an_application_can_connect_to_multiple_aliased_database_clusters( ops_test: OpsTest, database_charm ): @@ -381,6 +388,7 @@ async def test_an_application_can_connect_to_multiple_aliased_database_clusters( assert application_connection_string != another_application_connection_string +@pytest.mark.group(1) async def test_an_application_can_request_multiple_databases(ops_test: OpsTest, application_charm): """Test that an application can request additional databases using the same interface.""" # Relate the charms using another relation and wait for them exchanging some connection data. @@ -405,6 +413,7 @@ async def test_an_application_can_request_multiple_databases(ops_test: OpsTest, assert first_database_connection_string != second_database_connection_string +@pytest.mark.group(1) async def test_removed_relation_no_longer_has_access(ops_test: OpsTest): """Verify removed applications no longer have access to the database.""" # before removing relation we need its authorisation via connection string diff --git a/tests/integration/sharding_tests/test_mongos.py b/tests/integration/sharding_tests/test_mongos.py index a9cf28844..813ab3a99 100644 --- a/tests/integration/sharding_tests/test_mongos.py +++ b/tests/integration/sharding_tests/test_mongos.py @@ -20,6 +20,7 @@ TIMEOUT = 10 * 60 +@pytest.mark.group(1) @pytest.mark.abort_on_fail async def test_build_and_deploy(ops_test: OpsTest, mongos_host_application_charm) -> None: """Build and deploy a sharded cluster.""" @@ -55,6 +56,7 @@ async def test_build_and_deploy(ops_test: OpsTest, mongos_host_application_charm ) +@pytest.mark.group(1) @pytest.mark.abort_on_fail async def test_connect_to_cluster_creates_user(ops_test: OpsTest) -> None: """Verifies that when the cluster is formed a new user is created.""" @@ -113,6 +115,7 @@ async def test_connect_to_cluster_creates_user(ops_test: OpsTest) -> None: mongos_user_client.admin.command("dbStats") +@pytest.mark.group(1) @pytest.mark.abort_on_fail async def test_disconnect_from_cluster_removes_user(ops_test: OpsTest) -> None: """Verifies that when the cluster is formed a the user is removed.""" diff --git a/tests/integration/sharding_tests/test_sharding.py b/tests/integration/sharding_tests/test_sharding.py index 987911541..28b15a3b1 100644 --- a/tests/integration/sharding_tests/test_sharding.py +++ b/tests/integration/sharding_tests/test_sharding.py @@ -28,6 +28,7 @@ TIMEOUT = 30 * 60 +@pytest.mark.group(1) @pytest.mark.abort_on_fail async def test_build_and_deploy(ops_test: OpsTest) -> None: """Build and deploy a sharded cluster.""" @@ -80,6 +81,7 @@ async def test_build_and_deploy(ops_test: OpsTest) -> None: assert shard_unit.workload_status_message == "missing relation to config server" +@pytest.mark.group(1) @pytest.mark.abort_on_fail async def test_cluster_active(ops_test: OpsTest) -> None: """Tests the integration of cluster components works without error.""" @@ -120,6 +122,7 @@ async def test_cluster_active(ops_test: OpsTest) -> None: ), "Config server did not process config properly" +@pytest.mark.group(1) @pytest.mark.abort_on_fail async def test_sharding(ops_test: OpsTest) -> None: """Tests writing data to mongos gets propagated to shards.""" @@ -173,6 +176,7 @@ async def test_sharding(ops_test: OpsTest) -> None: assert has_correct_data, "data not written to shard-three" +@pytest.mark.group(1) async def test_shard_removal(ops_test: OpsTest) -> None: """Test shard removal. @@ -231,6 +235,7 @@ async def test_shard_removal(ops_test: OpsTest) -> None: ), "Not all databases on final shard" +@pytest.mark.group(1) async def test_removal_of_non_primary_shard(ops_test: OpsTest): """Tests safe removal of a shard that is not primary.""" # add back a shard so we can safely remove a shard. @@ -282,6 +287,7 @@ async def test_removal_of_non_primary_shard(ops_test: OpsTest): ), "Not all databases on final shard" +@pytest.mark.group(1) async def test_unconventual_shard_removal(ops_test: OpsTest): """Tests that removing a shard application safely drains data. diff --git a/tests/integration/sharding_tests/test_sharding_race_conds.py b/tests/integration/sharding_tests/test_sharding_race_conds.py index 986ad7f47..59346e2ba 100644 --- a/tests/integration/sharding_tests/test_sharding_race_conds.py +++ b/tests/integration/sharding_tests/test_sharding_race_conds.py @@ -19,6 +19,7 @@ TIMEOUT = 60 * 30 +@pytest.mark.group(1) @pytest.mark.abort_on_fail async def test_build_and_deploy(ops_test: OpsTest) -> None: """Build and deploy a sharded cluster.""" @@ -40,6 +41,7 @@ async def test_build_and_deploy(ops_test: OpsTest) -> None: ) +@pytest.mark.group(1) @pytest.mark.abort_on_fail async def test_immediate_relate(ops_test: OpsTest) -> None: """Tests the immediate integration of cluster components works without error.""" diff --git a/tests/integration/sharding_tests/test_sharding_relations.py b/tests/integration/sharding_tests/test_sharding_relations.py index 1f87a4b64..32c2dcfac 100644 --- a/tests/integration/sharding_tests/test_sharding_relations.py +++ b/tests/integration/sharding_tests/test_sharding_relations.py @@ -30,6 +30,7 @@ TIMEOUT = 30 * 60 +@pytest.mark.group(1) @pytest.mark.abort_on_fail async def test_build_and_deploy( ops_test: OpsTest, @@ -93,6 +94,7 @@ async def test_build_and_deploy( ) +@pytest.mark.group(1) async def test_only_one_config_server_relation(ops_test: OpsTest) -> None: """Verify that a shard can only be related to one config server.""" await ops_test.model.integrate( @@ -124,6 +126,7 @@ async def test_only_one_config_server_relation(ops_test: OpsTest) -> None: ) +@pytest.mark.group(1) async def test_cannot_use_db_relation(ops_test: OpsTest) -> None: """Verify that sharding components cannot use the DB relation.""" for sharded_component in SHARDING_COMPONENTS: @@ -158,6 +161,7 @@ async def test_cannot_use_db_relation(ops_test: OpsTest) -> None: ) +@pytest.mark.group(1) async def test_cannot_use_legacy_db_relation(ops_test: OpsTest) -> None: """Verify that sharding components cannot use the legacy DB relation.""" for sharded_component in SHARDING_COMPONENTS: @@ -192,6 +196,7 @@ async def test_cannot_use_legacy_db_relation(ops_test: OpsTest) -> None: ) +@pytest.mark.group(1) async def test_replication_config_server_relation(ops_test: OpsTest): """Verifies that using a replica as a shard fails.""" # attempt to add a replication deployment as a shard to the config server. @@ -219,6 +224,7 @@ async def test_replication_config_server_relation(ops_test: OpsTest): ) +@pytest.mark.group(1) async def test_replication_shard_relation(ops_test: OpsTest): """Verifies that using a replica as a config-server fails.""" # attempt to add a shard to a replication deployment as a config server. @@ -253,6 +259,7 @@ async def test_replication_shard_relation(ops_test: OpsTest): ) +@pytest.mark.group(1) async def test_replication_mongos_relation(ops_test: OpsTest) -> None: """Verifies connecting a replica to a mongos router fails.""" # attempt to add a replication deployment as a shard to the config server. @@ -287,6 +294,7 @@ async def test_replication_mongos_relation(ops_test: OpsTest) -> None: time.sleep(60) +@pytest.mark.group(1) async def test_shard_mongos_relation(ops_test: OpsTest) -> None: """Verifies connecting a shard to a mongos router fails.""" # attempt to add a replication deployment as a shard to the config server. diff --git a/tests/integration/test_charm.py b/tests/integration/test_charm.py index 69f1f2e1d..264535d7f 100644 --- a/tests/integration/test_charm.py +++ b/tests/integration/test_charm.py @@ -38,6 +38,7 @@ MEDIAN_REELECTION_TIME = 12 +@pytest.mark.group(1) @pytest.mark.skipif( os.environ.get("PYTEST_SKIP_DEPLOY", False), reason="skipping deploy, model expected to be provided.", @@ -56,6 +57,7 @@ async def test_build_and_deploy(ops_test: OpsTest) -> None: await ops_test.model.wait_for_idle() +@pytest.mark.group(1) @pytest.mark.abort_on_fail async def test_status(ops_test: OpsTest) -> None: """Verifies that the application and unit are active.""" @@ -64,6 +66,7 @@ async def test_status(ops_test: OpsTest) -> None: assert len(ops_test.model.applications[app_name].units) == len(UNIT_IDS) +@pytest.mark.group(1) @pytest.mark.parametrize("unit_id", UNIT_IDS) async def test_unit_is_running_as_replica_set(ops_test: OpsTest, unit_id: int) -> None: """Tests that mongodb is running as a replica set for the application unit.""" @@ -83,6 +86,7 @@ async def test_unit_is_running_as_replica_set(ops_test: OpsTest, unit_id: int) - client.close() +@pytest.mark.group(1) async def test_leader_is_primary_on_deployment(ops_test: OpsTest) -> None: """Tests that right after deployment that the primary unit is the leader.""" app_name = await get_app_name(ops_test) @@ -103,6 +107,7 @@ async def test_leader_is_primary_on_deployment(ops_test: OpsTest) -> None: client.close() +@pytest.mark.group(1) async def test_exactly_one_primary(ops_test: OpsTest) -> None: """Tests that there is exactly one primary in the deployed units.""" app_name = await get_app_name(ops_test) @@ -118,6 +123,7 @@ async def test_exactly_one_primary(ops_test: OpsTest) -> None: ) +@pytest.mark.group(1) async def test_get_primary_action(ops_test: OpsTest) -> None: """Tests that action get-primary outputs the correct unit with the primary replica.""" # determine which unit is the primary @@ -150,6 +156,7 @@ async def test_get_primary_action(ops_test: OpsTest) -> None: assert identified_primary == expected_primary +@pytest.mark.group(1) async def test_set_password_action(ops_test: OpsTest) -> None: """Tests that action set-password outputs resets the password on app data and mongod.""" # verify that password is correctly rotated by comparing old password with rotated one. @@ -195,6 +202,7 @@ async def test_set_password_action(ops_test: OpsTest) -> None: client.close() +@pytest.mark.group(1) async def test_monitor_user(ops_test: OpsTest) -> None: """Test verifies that the monitor user can perform operations such as 'rs.conf()'.""" app_name = await get_app_name(ops_test) @@ -212,6 +220,7 @@ async def test_monitor_user(ops_test: OpsTest) -> None: assert return_code == 0, "command rs.conf() on monitor user does not work" +@pytest.mark.group(1) async def test_only_leader_can_set_while_all_can_read_password_secret(ops_test: OpsTest) -> None: """Test verifies that only the leader can set a password, while all units can read it.""" # Setting existing password @@ -230,6 +239,7 @@ async def test_only_leader_can_set_while_all_can_read_password_secret(ops_test: assert password2 == password +@pytest.mark.group(1) async def test_reset_and_get_password_secret_same_as_cli(ops_test: OpsTest) -> None: """Test verifies that we can set and retrieve the correct password using Juju 3.x secrets.""" new_password = str(uuid4()) @@ -264,6 +274,7 @@ async def test_reset_and_get_password_secret_same_as_cli(ops_test: OpsTest) -> N assert data[secret_id]["content"]["Data"]["monitor-password"] == password +@pytest.mark.group(1) async def test_empty_password(ops_test: OpsTest) -> None: """Test that the password can't be set to an empty string.""" leader_id = await get_leader_id(ops_test) @@ -276,6 +287,7 @@ async def test_empty_password(ops_test: OpsTest) -> None: assert password1 == password2 +@pytest.mark.group(1) async def test_no_password_change_on_invalid_password(ops_test: OpsTest) -> None: """Test that in general, there is no change when password validation fails.""" leader_id = await get_leader_id(ops_test) @@ -289,6 +301,7 @@ async def test_no_password_change_on_invalid_password(ops_test: OpsTest) -> None assert password1 == password2 +@pytest.mark.group(1) async def test_exactly_one_primary_reported_by_juju(ops_test: OpsTest) -> None: """Tests that there is exactly one replica set primary unit reported by juju.""" @@ -340,6 +353,7 @@ def juju_reports_one_primary(unit_messages): await ops_test.model.destroy_unit(target_unit) +@pytest.mark.group(1) @pytest.mark.skip("Skipping until write to log files enabled") async def test_audit_log(ops_test: OpsTest) -> None: """Test that audit log was created and contains actual audit data.""" diff --git a/tests/integration/tls_tests/test_tls.py b/tests/integration/tls_tests/test_tls.py index 8ef6c32b1..36e821967 100644 --- a/tests/integration/tls_tests/test_tls.py +++ b/tests/integration/tls_tests/test_tls.py @@ -28,6 +28,7 @@ DB_SERVICE = "snap.charmed-mongodb.mongod.service" +@pytest.mark.group(1) @pytest.mark.skipif( os.environ.get("PYTEST_SKIP_DEPLOY", False), reason="skipping deploy, model expected to be provided.", @@ -54,6 +55,7 @@ async def test_build_and_deploy(ops_test: OpsTest) -> None: ) +@pytest.mark.group(1) async def test_enable_tls(ops_test: OpsTest) -> None: """Verify each unit has TLS enabled after relating to the TLS application.""" # Relate it to the MongoDB to enable TLS. @@ -67,6 +69,7 @@ async def test_enable_tls(ops_test: OpsTest) -> None: assert await check_tls(ops_test, unit, enabled=True, app_name=app_name) +@pytest.mark.group(1) async def test_rotate_tls_key(ops_test: OpsTest) -> None: """Verify rotating tls private keys restarts mongod with new certificates. @@ -133,6 +136,7 @@ async def test_rotate_tls_key(ops_test: OpsTest) -> None: ), f"tls is not enabled for {unit.name}." +@pytest.mark.group(1) async def test_set_tls_key(ops_test: OpsTest) -> None: """Verify rotating tls private keys restarts mongod with new certificates. @@ -214,6 +218,7 @@ async def test_set_tls_key(ops_test: OpsTest) -> None: ), f"tls is not enabled for {unit.name}." +@pytest.mark.group(1) async def test_disable_tls(ops_test: OpsTest) -> None: """Verify each unit has TLS disabled after removing relation to the TLS application.""" # Remove the relation. diff --git a/tox.ini b/tox.ini index 7f04f028d..d654bcf4e 100644 --- a/tox.ini +++ b/tox.ini @@ -14,10 +14,8 @@ all_path = {[vars]src_path} {[vars]tests_path} {[vars]mongodb_lib_path} [testenv] set_env = - PYTHONPATH = {tox_root}/lib:{[vars]src_path} - PYTHONBREAKPOINT=ipdb.set_trace + PYTHONPATH = {[vars]src_path}:{tox_root}/lib PY_COLORS=1 - PYTHONDONTWRITEBYTECODE=1 pass_env = PYTHONPATH CHARM_BUILD_DIR @@ -67,203 +65,30 @@ commands = -m pytest -v --tb native -s {posargs} {[vars]tests_path}/unit coverage report -[testenv:charm-integration] -description = Run charm integration tests -pass_env = - {[testenv]pass_env} - CI -deps = - pytest - juju==3.2.0.1 - pytest-operator - protobuf==3.20 # temporary fix until new libjuju is released - pytest-mock - git+https://github.com/canonical/data-platform-workflows@v8\#subdirectory=python/pytest_plugins/pytest_operator_cache - -r {tox_root}/requirements.txt -commands = - pytest -v --tb native --log-cli-level=INFO -s --durations=0 {posargs} {[vars]tests_path}/integration/test_charm.py - -[testenv:ha-integration] -description = Run high availability integration tests -pass_env = - {[testenv]pass_env} - CI -deps = - pytest - juju==3.2.0.1 - pytest-mock - pytest-operator - protobuf==3.20 # temporary fix until new libjuju is released - git+https://github.com/canonical/data-platform-workflows@v8\#subdirectory=python/pytest_plugins/pytest_operator_cache - -r {tox_root}/requirements.txt -commands = - pytest -vvv --tb native --log-cli-level=INFO -s --durations=0 {posargs} {[vars]tests_path}/integration/ha_tests/test_ha.py - -[testenv:relation-integration] -description = Run new relation integration tests -pass_env = - {[testenv]pass_env} - CI -deps = - pytest - juju==3.2.0.1 - pytest-mock - pytest-operator - protobuf==3.20 # temporary fix until new libjuju is released - git+https://github.com/canonical/data-platform-workflows@v8\#subdirectory=python/pytest_plugins/pytest_operator_cache - -r {tox_root}/requirements.txt -commands = - pytest -v --tb native --log-cli-level=INFO -s --durations=0 {posargs} {[vars]tests_path}/integration/relation_tests/new_relations/test_charm_relations.py - -[testenv:legacy-integration] -description = Run legacy relation integration tests -pass_env = - {[testenv]pass_env} - CI -deps = - pytest - juju==3.2.0.1 - pytest-mock - pytest-operator - protobuf==3.20 # temporary fix until new libjuju is released - git+https://github.com/canonical/data-platform-workflows@v8\#subdirectory=python/pytest_plugins/pytest_operator_cache - -r {tox_root}/requirements.txt -commands = - pytest -v --tb native --log-cli-level=INFO -s --durations=0 {posargs} {[vars]tests_path}/integration/relation_tests/legacy_relations/test_charm_legacy_relations.py - -[testenv:tls-integration] -description = Run tls integration tests -pass_env = - {[testenv]pass_env} - CI -deps = - pytest - juju==3.2.0.1 - pytest-mock - pytest-operator - protobuf==3.20 # temporary fix until new libjuju is released - git+https://github.com/canonical/data-platform-workflows@v8\#subdirectory=python/pytest_plugins/pytest_operator_cache - -r {tox_root}/requirements.txt -commands = - pytest -v --tb native --log-cli-level=INFO -s --durations=0 {posargs} {[vars]tests_path}/integration/tls_tests/test_tls.py - - -[testenv:backup-integration] -description = Run backup integration tests -pass_env = - {[testenv]pass_env} - CI - AWS_ACCESS_KEY - AWS_SECRET_KEY - GCP_ACCESS_KEY - GCP_SECRET_KEY -deps = - pytest - juju==3.2.0.1 - pytest-mock - pytest-operator - protobuf==3.20 # temporary fix until new libjuju is released - git+https://github.com/canonical/data-platform-workflows@v8\#subdirectory=python/pytest_plugins/pytest_operator_cache - -r {tox_root}/requirements.txt -commands = - pytest -v --tb native --log-cli-level=INFO -s --durations=0 {posargs} {[vars]tests_path}/integration/backup_tests/test_backups.py - -[testenv:metric-integration] -description = Run metrics integration tests -pass_env = - {[testenv]pass_env} - CI -deps = - pytest - juju==3.2.0.1 - pytest-mock - pytest-operator - protobuf==3.20 # temporary fix until new libjuju is released - git+https://github.com/canonical/data-platform-workflows@v8\#subdirectory=python/pytest_plugins/pytest_operator_cache - -r {tox_root}/requirements.txt -commands = - pytest -v --tb native --log-cli-level=INFO -s --durations=0 {posargs} {[vars]tests_path}/integration/metrics_tests/test_metrics.py - -[testenv:sharding-integration] -description = Run sharding integration tests -pass_env = - {[testenv]pass_env} - CI -deps = - pytest - juju==3.2.0.1 - pytest-mock - pytest-operator - protobuf==3.20 # temporary fix until new libjuju is released - git+https://github.com/canonical/data-platform-workflows@v8\#subdirectory=python/pytest_plugins/pytest_operator_cache - -r {tox_root}/requirements.txt -commands = - pytest -v --tb native --log-cli-level=INFO -s --durations=0 {posargs} {[vars]tests_path}/integration/sharding_tests/test_sharding.py - -[testenv:sharding-relation-integration] -description = Run sharding integration tests -pass_env = - {[testenv]pass_env} - CI -deps = - pytest - juju==3.2.0.1 - pytest-mock - pytest-operator - protobuf==3.20 # temporary fix until new libjuju is released - git+https://github.com/canonical/data-platform-workflows@v8\#subdirectory=python/pytest_plugins/pytest_operator_cache - -r {tox_root}/requirements.txt -commands = - pytest -v --tb native --log-cli-level=INFO -s --durations=0 {posargs} {[vars]tests_path}/integration/sharding_tests/test_sharding_relations.py - -[testenv:sharding-race-conditions-integration] -description = Run sharding race condition tests -pass_env = - {[testenv]pass_env} - CI -deps = - pytest - juju==3.2.0.1 - pytest-mock - pytest-operator - protobuf==3.20 # temporary fix until new libjuju is released - git+https://github.com/canonical/data-platform-workflows@v8\#subdirectory=python/pytest_plugins/pytest_operator_cache - -r {tox_root}/requirements.txt -commands = - pytest -v --tb native --log-cli-level=INFO -s --durations=0 {posargs} {[vars]tests_path}/integration/sharding_tests/test_sharding_race_conds.py - -[testenv:sharding-mongos-integration] -description = Run sharding mongos tests -pass_env = - {[testenv]pass_env} - CI -deps = - pytest - juju==3.2.0.1 - pytest-mock - pytest-operator - protobuf==3.20 # temporary fix until new libjuju is released - git+https://github.com/canonical/data-platform-workflows@v8\#subdirectory=python/pytest_plugins/pytest_operator_cache - -r {tox_root}/requirements.txt -commands = - pytest -v --tb native --log-cli-level=INFO -s --durations=0 {posargs} {[vars]tests_path}/integration/sharding_tests/test_mongos.py - - [testenv:integration] description = Run all integration tests +set_env = + {[testenv]set_env} + # Workaround for https://github.com/python-poetry/poetry/issues/6958 + POETRY_INSTALLER_PARALLEL = false pass_env = - {[testenv]pass_env} CI + GITHUB_OUTPUT + SECRETS_FROM_GITHUB +allowlist_externals = + {[testenv:build-wrapper]allowlist_externals} deps = pytest juju==3.2.0.1 pytest-mock pytest-operator protobuf==3.20 # temporary fix until new libjuju is released - git+https://github.com/canonical/data-platform-workflows@v8\#subdirectory=python/pytest_plugins/pytest_operator_cache + git+https://github.com/canonical/data-platform-workflows@v11.0.1\#subdirectory=python/pytest_plugins/github_secrets + git+https://github.com/canonical/data-platform-workflows@v11.0.1\#subdirectory=python/pytest_plugins/pytest_operator_groups + git+https://github.com/canonical/data-platform-workflows@v11.0.1\#subdirectory=python/pytest_plugins/pytest_operator_cache -r {tox_root}/requirements.txt commands = - pytest -v --tb native --log-cli-level=INFO -s --durations=0 {posargs} {[vars]tests_path}/integration/ + pytest -v --tb native --log-cli-level=INFO -s --ignore={[vars]tests_path}/unit/ {posargs} [testenv:cleanup_juju_models] description = Cleanup Juju models From 5b8c969a00ae9ee285c0f68cd1baa2bbcfb7fbe8 Mon Sep 17 00:00:00 2001 From: Mia Altieri <32723809+MiaAltieri@users.noreply.github.com> Date: Tue, 20 Feb 2024 14:00:04 +0100 Subject: [PATCH 3/5] [DPE-3500] Share backup password + create&list backups (#351) ## Issue Backups are not feasible in sharded clusters ## Solution Make backups possible in sharded cluster + add tests. Necessary changes to make backups feasible in sharded cluster: 1. pbm password shared across cluster 2. shards restart with new pbm uri ## Other Changes pbm was incorrectly configured to the wrong URI, according to the documentation it should use standalone URI --- lib/charms/mongodb/v0/mongodb.py | 13 +- lib/charms/mongodb/v1/mongodb_backups.py | 4 +- lib/charms/mongodb/v1/shards_interface.py | 35 +++-- src/charm.py | 7 +- src/config.py | 2 +- .../sharding_tests/test_sharding_backups.py | 127 ++++++++++++++++++ 6 files changed, 170 insertions(+), 18 deletions(-) create mode 100644 tests/integration/sharding_tests/test_sharding_backups.py diff --git a/lib/charms/mongodb/v0/mongodb.py b/lib/charms/mongodb/v0/mongodb.py index 2bc5c6657..f482f7eec 100644 --- a/lib/charms/mongodb/v0/mongodb.py +++ b/lib/charms/mongodb/v0/mongodb.py @@ -22,6 +22,8 @@ wait_fixed, ) +from config import Config + # The unique Charmhub library identifier, never change it LIBID = "49c69d9977574dd7942eb7b54f43355b" @@ -30,7 +32,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 7 +LIBPATCH = 8 # path to store mongodb ketFile logger = logging.getLogger(__name__) @@ -57,6 +59,7 @@ class MongoDBConfiguration: roles: Set[str] tls_external: bool tls_internal: bool + standalone: bool = False @property def uri(self): @@ -66,6 +69,14 @@ def uri(self): auth_source = "" if self.database != "admin": auth_source = "&authSource=admin" + + if self.standalone: + return ( + f"mongodb://{quote_plus(self.username)}:" + f"{quote_plus(self.password)}@" + f"localhost:{Config.MONGODB_PORT}/?authSource=admin" + ) + return ( f"mongodb://{quote_plus(self.username)}:" f"{quote_plus(self.password)}@" diff --git a/lib/charms/mongodb/v1/mongodb_backups.py b/lib/charms/mongodb/v1/mongodb_backups.py index 9dc6b274a..5f7d07427 100644 --- a/lib/charms/mongodb/v1/mongodb_backups.py +++ b/lib/charms/mongodb/v1/mongodb_backups.py @@ -40,7 +40,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 2 +LIBPATCH = 3 logger = logging.getLogger(__name__) @@ -57,7 +57,7 @@ REMAPPING_PATTERN = r"\ABackup doesn't match current cluster topology - it has different replica set names. Extra shards in the backup will cause this, for a simple example. The extra/unknown replica set names found in the backup are: ([^,\s]+)([.] Backup has no data for the config server or sole replicaset)?\Z" PBM_STATUS_CMD = ["status", "-o", "json"] MONGODB_SNAP_DATA_DIR = "/var/snap/charmed-mongodb/current" -BACKUP_RESTORE_MAX_ATTEMPTS = 5 +BACKUP_RESTORE_MAX_ATTEMPTS = 10 BACKUP_RESTORE_ATTEMPT_COOLDOWN = 15 diff --git a/lib/charms/mongodb/v1/shards_interface.py b/lib/charms/mongodb/v1/shards_interface.py index ce5eb8add..4669974ff 100644 --- a/lib/charms/mongodb/v1/shards_interface.py +++ b/lib/charms/mongodb/v1/shards_interface.py @@ -30,7 +30,7 @@ ShardNotInClusterError, ShardNotPlannedForRemovalError, ) -from charms.mongodb.v1.users import MongoDBUser, OperatorUser +from charms.mongodb.v1.users import BackupUser, MongoDBUser, OperatorUser from ops.charm import CharmBase, EventBase, RelationBrokenEvent from ops.framework import Object from ops.model import ( @@ -55,10 +55,11 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 6 +LIBPATCH = 7 KEYFILE_KEY = "key-file" HOSTS_KEY = "host" OPERATOR_PASSWORD_KEY = MongoDBUser.get_password_key_name_for_user(OperatorUser.get_username()) +BACKUP_PASSWORD_KEY = MongoDBUser.get_password_key_name_for_user(BackupUser.get_username()) FORBIDDEN_REMOVAL_ERR_CODE = 20 AUTH_FAILED_CODE = 18 @@ -118,6 +119,10 @@ def _on_relation_joined(self, event): Config.Relations.APP_SCOPE, OPERATOR_PASSWORD_KEY, ), + BACKUP_PASSWORD_KEY: self.charm.get_secret( + Config.Relations.APP_SCOPE, + BACKUP_PASSWORD_KEY, + ), KEYFILE_KEY: self.charm.get_secret( Config.Relations.APP_SCOPE, Config.Secrets.SECRET_KEYFILE_NAME ), @@ -450,7 +455,7 @@ def __init__( self.database_requires = DatabaseRequires( self.charm, relation_name=self.relation_name, - additional_secret_fields=[KEYFILE_KEY, OPERATOR_PASSWORD_KEY], + additional_secret_fields=[KEYFILE_KEY, OPERATOR_PASSWORD_KEY, BACKUP_PASSWORD_KEY], # a database isn't required for the relation between shards + config servers, but is a # requirement for using `DatabaseRequires` database_name="", @@ -478,7 +483,6 @@ def _on_relation_changed(self, event): # if re-using an old shard, re-set drained flag. self.charm.unit_peer_data["drained"] = json.dumps(False) - self.charm.unit.status = MaintenanceStatus("Adding shard to config-server") # shards rely on the config server for secrets @@ -507,13 +511,19 @@ def _on_relation_changed(self, event): operator_password = self.database_requires.fetch_relation_field( event.relation.id, OPERATOR_PASSWORD_KEY ) - if not operator_password: + backup_password = self.database_requires.fetch_relation_field( + event.relation.id, BACKUP_PASSWORD_KEY + ) + if not operator_password or not backup_password: event.defer() self.charm.unit.status = WaitingStatus("Waiting for secrets from config-server") return try: - self.update_operator_password(new_password=operator_password) + self.update_password( + username=OperatorUser.get_username(), new_password=operator_password + ) + self.update_password(BackupUser.get_username(), new_password=backup_password) except RetryError: self.charm.unit.status = BlockedStatus("Shard not added to config-server") logger.error( @@ -522,6 +532,8 @@ def _on_relation_changed(self, event): event.defer() return + # after updating the password of the backup user, restart pbm with correct password + self.charm._connect_pbm_agent() self.charm.app_peer_data["mongos_hosts"] = json.dumps(self.get_mongos_hosts()) def pass_hook_checks(self, event): @@ -685,8 +697,8 @@ def drained(self, mongos_hosts: Set[str], shard_name: str) -> bool: self.charm.unit_peer_data["drained"] = json.dumps(drained) return drained - def update_operator_password(self, new_password: str) -> None: - """Updates the password for the operator user. + def update_password(self, username: str, new_password: str) -> None: + """Updates the password for the given user. Raises: RetryError @@ -696,8 +708,7 @@ def update_operator_password(self, new_password: str) -> None: current_password = ( self.charm.get_secret( - Config.Relations.APP_SCOPE, - OPERATOR_PASSWORD_KEY, + Config.Relations.APP_SCOPE, MongoDBUser.get_password_key_name_for_user(username) ), ) @@ -712,7 +723,7 @@ def update_operator_password(self, new_password: str) -> None: # a library, for exceptions used in both charm code and lib code. with MongoDBConnection(self.charm.mongodb_config) as mongo: try: - mongo.set_user_password(OperatorUser.get_username(), new_password) + mongo.set_user_password(username, new_password) except NotReadyError: logger.error( "Failed changing the password: Not all members healthy or finished initial sync." @@ -724,7 +735,7 @@ def update_operator_password(self, new_password: str) -> None: self.charm.set_secret( Config.Relations.APP_SCOPE, - OPERATOR_PASSWORD_KEY, + MongoDBUser.get_password_key_name_for_user(username), new_password, ) diff --git a/src/charm.py b/src/charm.py index cc3544c63..dfc8008f4 100755 --- a/src/charm.py +++ b/src/charm.py @@ -225,7 +225,9 @@ def monitor_config(self) -> MongoDBConfiguration: def backup_config(self) -> MongoDBConfiguration: """Generates a MongoDBConfiguration object for backup.""" self._check_or_set_user_password(BackupUser) - return self._get_mongodb_config_for_user(BackupUser, BackupUser.get_hosts()) + return self._get_mongodb_config_for_user( + BackupUser, BackupUser.get_hosts(), standalone=True + ) @property def unit_peer_data(self) -> Dict: @@ -773,7 +775,7 @@ def _get_mongos_config_for_user( ) def _get_mongodb_config_for_user( - self, user: MongoDBUser, hosts: Set[str] + self, user: MongoDBUser, hosts: Set[str], standalone: bool = False ) -> MongoDBConfiguration: external_ca, _ = self.tls.get_tls_files(UNIT_SCOPE) internal_ca, _ = self.tls.get_tls_files(APP_SCOPE) @@ -787,6 +789,7 @@ def _get_mongodb_config_for_user( roles=user.get_roles(), tls_external=external_ca is not None, tls_internal=internal_ca is not None, + standalone=standalone, ) def _get_user_or_fail_event(self, event: ActionEvent, default_username: str) -> Optional[str]: diff --git a/src/config.py b/src/config.py index 2e9feae4b..e6161ba8a 100644 --- a/src/config.py +++ b/src/config.py @@ -17,7 +17,7 @@ class Config: MONGODB_SNAP_DATA_DIR = "/var/snap/charmed-mongodb/current" MONGOD_CONF_DIR = f"{MONGODB_SNAP_DATA_DIR}/etc/mongod" MONGOD_CONF_FILE_PATH = f"{MONGOD_CONF_DIR}/mongod.conf" - SNAP_PACKAGES = [("charmed-mongodb", "6/edge", 93)] + SNAP_PACKAGES = [("charmed-mongodb", "6/edge", 111)] # Keep these alphabetically sorted class Actions: diff --git a/tests/integration/sharding_tests/test_sharding_backups.py b/tests/integration/sharding_tests/test_sharding_backups.py new file mode 100644 index 000000000..3a807837f --- /dev/null +++ b/tests/integration/sharding_tests/test_sharding_backups.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. + +import secrets +import string + +import pytest +from pytest_operator.plugin import OpsTest +from tenacity import RetryError, Retrying, stop_after_delay, wait_fixed + +from ..backup_tests import helpers as backup_helpers + +S3_APP_NAME = "s3-integrator" +SHARD_ONE_APP_NAME = "shard-one" +SHARD_TWO_APP_NAME = "shard-two" +SHARD_APPS = [SHARD_ONE_APP_NAME, SHARD_TWO_APP_NAME] +CONFIG_SERVER_APP_NAME = "config-server-one" +SHARD_REL_NAME = "sharding" +CONFIG_SERVER_REL_NAME = "config-server" +S3_REL_NAME = "s3-credentials" +TIMEOUT = 10 * 60 + + +@pytest.mark.group(1) +@pytest.mark.abort_on_fail +async def test_build_and_deploy(ops_test: OpsTest) -> None: + """Build and deploy a sharded cluster.""" + my_charm = await ops_test.build_charm(".") + await ops_test.model.deploy( + my_charm, + num_units=2, + config={"role": "config-server"}, + application_name=CONFIG_SERVER_APP_NAME, + ) + await ops_test.model.deploy( + my_charm, num_units=2, config={"role": "shard"}, application_name=SHARD_ONE_APP_NAME + ) + await ops_test.model.deploy( + my_charm, num_units=1, config={"role": "shard"}, application_name=SHARD_TWO_APP_NAME + ) + + # deploy the s3 integrator charm + await ops_test.model.deploy(S3_APP_NAME, channel="edge") + + await ops_test.model.wait_for_idle( + apps=[S3_APP_NAME, CONFIG_SERVER_APP_NAME, SHARD_ONE_APP_NAME, SHARD_TWO_APP_NAME], + idle_period=20, + raise_on_blocked=False, + timeout=TIMEOUT, + raise_on_error=False, + ) + + +@pytest.mark.group(1) +async def test_set_credentials_in_cluster(ops_test: OpsTest, github_secrets) -> None: + """Tests that sharded cluster can be configured for s3 configurations.""" + await backup_helpers.set_credentials(ops_test, github_secrets, cloud="AWS") + choices = string.ascii_letters + string.digits + unique_path = "".join([secrets.choice(choices) for _ in range(4)]) + configuration_parameters = { + "bucket": "data-charms-testing", + "path": f"mongodb-vm/test-{unique_path}", + "endpoint": "https://s3.amazonaws.com", + "region": "us-east-1", + } + + # apply new configuration options + await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters) + await ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active", timeout=TIMEOUT) + + # provide config-server to entire cluster and s3-integrator to config-server - integrations + # made in succession to test race conditions. + await ops_test.model.integrate( + f"{S3_APP_NAME}:{S3_REL_NAME}", + f"{CONFIG_SERVER_APP_NAME}:{S3_REL_NAME}", + ) + await ops_test.model.integrate( + f"{SHARD_ONE_APP_NAME}:{SHARD_REL_NAME}", + f"{CONFIG_SERVER_APP_NAME}:{CONFIG_SERVER_REL_NAME}", + ) + await ops_test.model.integrate( + f"{SHARD_TWO_APP_NAME}:{SHARD_REL_NAME}", + f"{CONFIG_SERVER_APP_NAME}:{CONFIG_SERVER_REL_NAME}", + ) + + await ops_test.model.wait_for_idle( + apps=[ + CONFIG_SERVER_APP_NAME, + SHARD_ONE_APP_NAME, + SHARD_TWO_APP_NAME, + ], + idle_period=20, + status="active", + timeout=TIMEOUT, + ) + + +@pytest.mark.group(1) +async def test_create_and_list_backups_in_cluster(ops_test: OpsTest, github_secrets) -> None: + """Tests that sharded cluster can successfully create and list backups.""" + leader_unit = await backup_helpers.get_leader_unit( + ops_test, db_app_name=CONFIG_SERVER_APP_NAME + ) + await backup_helpers.set_credentials(ops_test, github_secrets, cloud="AWS") + # verify backup list works + action = await leader_unit.run_action(action_name="list-backups") + list_result = await action.wait() + backups = list_result.results["backups"] + assert backups, "backups not outputted" + + # verify backup is started + action = await leader_unit.run_action(action_name="create-backup") + backup_result = await action.wait() + assert "backup started" in backup_result.results["backup-status"], "backup didn't start" + + # verify backup is present in the list of backups + # the action `create-backup` only confirms that the command was sent to the `pbm`. Creating a + # backup can take a lot of time so this function returns once the command was successfully + # sent to pbm. Therefore we should retry listing the backup several times + try: + for attempt in Retrying(stop=stop_after_delay(20), wait=wait_fixed(3)): + with attempt: + backups = await backup_helpers.count_logical_backups(leader_unit) + assert backups == 1 + except RetryError: + assert backups == 1, "Backup not created." From 8f459ab8336fe96368cf399cd9adc3b8c10410a5 Mon Sep 17 00:00:00 2001 From: Mia Altieri <32723809+MiaAltieri@users.noreply.github.com> Date: Wed, 21 Feb 2024 09:18:19 +0100 Subject: [PATCH 4/5] [DPE-3502] support backup password rotation across sharded cluster (#358) ## Issue rotating backup password breaks the sharded cluster ## Solution support backup password rotation --------- Co-authored-by: Mehdi Bendriss --- src/charm.py | 2 +- tests/integration/helpers.py | 4 +- .../sharding_tests/test_sharding_backups.py | 58 +++++++++++++++++++ 3 files changed, 61 insertions(+), 3 deletions(-) diff --git a/src/charm.py b/src/charm.py index dfc8008f4..d5953b427 100755 --- a/src/charm.py +++ b/src/charm.py @@ -604,7 +604,7 @@ def _on_set_password(self, event: ActionEvent) -> None: # rotate password to shards # TODO in the future support rotating passwords of pbm across shards - if username == OperatorUser.get_username(): + if username in [OperatorUser.get_username(), BackupUser.get_username()]: self.config_server.update_credentials( MongoDBUser.get_password_key_name_for_user(username), new_password, diff --git a/tests/integration/helpers.py b/tests/integration/helpers.py index ddedaf408..b511e6f70 100644 --- a/tests/integration/helpers.py +++ b/tests/integration/helpers.py @@ -98,9 +98,9 @@ async def find_unit(ops_test: OpsTest, leader: bool, app_name=None) -> ops.model return ret_unit -async def get_leader_id(ops_test: OpsTest) -> int: +async def get_leader_id(ops_test: OpsTest, app_name=None) -> int: """Returns the unit number of the juju leader unit.""" - app_name = await get_app_name(ops_test) + app_name = app_name or await get_app_name(ops_test) for unit in ops_test.model.applications[app_name].units: if await unit.is_leader_from_status(): return int(unit.name.split("/")[1]) diff --git a/tests/integration/sharding_tests/test_sharding_backups.py b/tests/integration/sharding_tests/test_sharding_backups.py index 3a807837f..3f534c987 100644 --- a/tests/integration/sharding_tests/test_sharding_backups.py +++ b/tests/integration/sharding_tests/test_sharding_backups.py @@ -10,6 +10,7 @@ from tenacity import RetryError, Retrying, stop_after_delay, wait_fixed from ..backup_tests import helpers as backup_helpers +from ..helpers import get_leader_id, get_password, set_password S3_APP_NAME = "s3-integrator" SHARD_ONE_APP_NAME = "shard-one" @@ -53,6 +54,7 @@ async def test_build_and_deploy(ops_test: OpsTest) -> None: @pytest.mark.group(1) +@pytest.mark.abort_on_fail async def test_set_credentials_in_cluster(ops_test: OpsTest, github_secrets) -> None: """Tests that sharded cluster can be configured for s3 configurations.""" await backup_helpers.set_credentials(ops_test, github_secrets, cloud="AWS") @@ -97,6 +99,7 @@ async def test_set_credentials_in_cluster(ops_test: OpsTest, github_secrets) -> @pytest.mark.group(1) +@pytest.mark.abort_on_fail async def test_create_and_list_backups_in_cluster(ops_test: OpsTest, github_secrets) -> None: """Tests that sharded cluster can successfully create and list backups.""" leader_unit = await backup_helpers.get_leader_unit( @@ -125,3 +128,58 @@ async def test_create_and_list_backups_in_cluster(ops_test: OpsTest, github_secr assert backups == 1 except RetryError: assert backups == 1, "Backup not created." + + +@pytest.mark.group(1) +@pytest.mark.abort_on_fail +async def test_rotate_backup_password(ops_test: OpsTest) -> None: + """Tests that sharded cluster can successfully create and list backups.""" + config_leader_id = await get_leader_id(ops_test, app_name=CONFIG_SERVER_APP_NAME) + new_password = "new-password" + + shard_backup_password = get_password(ops_test, username="backup", app_name=SHARD_ONE_APP_NAME) + assert ( + shard_backup_password != new_password + ), "shard-one is incorrectly already set to the new password." + + shard_backup_password = get_password(ops_test, username="backup", app_name=SHARD_TWO_APP_NAME) + assert ( + shard_backup_password != new_password + ), "shard-two is incorrectly already set to the new password." + + await set_password( + ops_test, unit_id=config_leader_id, username="backup", password=new_password + ) + await ops_test.model.wait_for_idle( + apps=[CONFIG_SERVER_APP_NAME, SHARD_ONE_APP_NAME, SHARD_TWO_APP_NAME], + idle_period=20, + timeout=TIMEOUT, + ) + + shard_backup_password = get_password(ops_test, username="backup", app_name=SHARD_ONE_APP_NAME) + assert shard_backup_password != new_password, "Application shard-one did not rotate password" + + shard_backup_password = get_password(ops_test, username="backup", app_name=SHARD_TWO_APP_NAME) + assert shard_backup_password != new_password, "Application shard-two did not rotate password" + + # verify backup actions work after password rotation + leader_unit = await backup_helpers.get_leader_unit( + ops_test, db_app_name=CONFIG_SERVER_APP_NAME + ) + action = await leader_unit.run_action(action_name="create-backup") + backup_result = await action.wait() + assert ( + "backup started" in backup_result.results["backup-status"] + ), "backup didn't start after password rotation" + + # verify backup is present in the list of backups + # the action `create-backup` only confirms that the command was sent to the `pbm`. Creating a + # backup can take a lot of time so this function returns once the command was successfully + # sent to pbm. Therefore we should retry listing the backup several times + try: + for attempt in Retrying(stop=stop_after_delay(20), wait=wait_fixed(3)): + with attempt: + backups = await backup_helpers.count_logical_backups(leader_unit) + assert backups == 2 + except RetryError: + assert backups == 2, "Backup not created after password rotation." From 0422edb2318e984dd67304d0491bd45a11e177d1 Mon Sep 17 00:00:00 2001 From: Mia Altieri <32723809+MiaAltieri@users.noreply.github.com> Date: Thu, 22 Feb 2024 13:04:25 +0100 Subject: [PATCH 5/5] [DPE-3499] prevent shards from recieving backup actions + integrating on the s3 interface (#357) ## Issue 1. Shards can be integrated on the s3 interface 2. Shards can receive backup actions ## Solution Prevent this from occurring --- lib/charms/mongodb/v1/mongodb_backups.py | 71 ++++++++++++++----- src/charm.py | 5 ++ .../sharding_tests/test_sharding_backups.py | 17 +++++ .../sharding_tests/test_sharding_relations.py | 51 +++++++++++-- 4 files changed, 118 insertions(+), 26 deletions(-) diff --git a/lib/charms/mongodb/v1/mongodb_backups.py b/lib/charms/mongodb/v1/mongodb_backups.py index 5f7d07427..53d46b81c 100644 --- a/lib/charms/mongodb/v1/mongodb_backups.py +++ b/lib/charms/mongodb/v1/mongodb_backups.py @@ -40,7 +40,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 3 +LIBPATCH = 4 logger = logging.getLogger(__name__) @@ -112,6 +112,9 @@ def __init__(self, charm): # s3 relation handles the config options for s3 backups self.s3_client = S3Requirer(self.charm, S3_RELATION) + self.framework.observe( + self.charm.on[S3_RELATION].relation_joined, self.on_s3_relation_joined + ) self.framework.observe( self.s3_client.on.credentials_changed, self._on_s3_credential_changed ) @@ -119,11 +122,24 @@ def __init__(self, charm): self.framework.observe(self.charm.on.list_backups_action, self._on_list_backups_action) self.framework.observe(self.charm.on.restore_action, self._on_restore_action) + def on_s3_relation_joined(self, _) -> None: + """Checks for valid integration for s3-integrations.""" + if not self.is_valid_s3_integration(): + logger.debug( + "Shard does not support s3 relations, please relate s3-integrator to config-server only." + ) + self.charm.unit.status = BlockedStatus( + "Relation to s3-integrator is not supported, config role must be config-server" + ) + def _on_s3_credential_changed(self, event: CredentialsChangedEvent): """Sets pbm credentials, resyncs if necessary and reports config errors.""" # handling PBM configurations requires that MongoDB is running and the pbm snap is # installed. action = "configure-pbm" + if not self._pass_sanity_checks(event, action): + return + if not self.charm.db_initialised: self._defer_action_with_info_log( event, action, "Set PBM credentials, MongoDB not ready." @@ -140,12 +156,7 @@ def _on_s3_credential_changed(self, event: CredentialsChangedEvent): def _on_create_backup_action(self, event) -> None: action = "backup" - if self.model.get_relation(S3_RELATION) is None: - self._fail_action_with_error_log( - event, - action, - "Relation with s3-integrator charm missing, cannot create backup.", - ) + if not self._pass_sanity_checks(event, action): return # only leader can create backups. This prevents multiple backups from being attempted at @@ -195,12 +206,7 @@ def _on_create_backup_action(self, event) -> None: def _on_list_backups_action(self, event) -> None: action = "list-backups" - if self.model.get_relation(S3_RELATION) is None: - self._fail_action_with_error_log( - event, - action, - "Relation with s3-integrator charm missing, cannot list backups.", - ) + if not self._pass_sanity_checks(event, action): return # cannot list backups if pbm is resyncing, or has incompatible options or incorrect @@ -229,12 +235,7 @@ def _on_list_backups_action(self, event) -> None: def _on_restore_action(self, event) -> None: action = "restore" - if self.model.get_relation(S3_RELATION) is None: - self._fail_action_with_error_log( - event, - action, - "Relation with s3-integrator charm missing, cannot restore from a backup.", - ) + if not self._pass_sanity_checks(event, action): return backup_id = event.params.get("backup-id") @@ -289,6 +290,38 @@ def _on_restore_action(self, event) -> None: self._fail_action_with_error_log(event, action, str(restore_error)) # BEGIN: helper functions + def is_valid_s3_integration(self) -> bool: + """Return true if relation to s3-integrator is valid. + + Only replica sets and config servers can integrate to s3-integrator. + """ + if self.charm.is_role(Config.Role.SHARD) and self.model.get_relation(S3_RELATION): + return False + + return True + + def _pass_sanity_checks(self, event, action) -> bool: + """Return True if basic pre-conditions for running backup actions are met. + + No matter what backup-action is being run, these requirements must be met. + """ + if not self.is_valid_s3_integration(): + self._fail_action_with_error_log( + event, + action, + "Shards do not support backup operations, please run action on config-server.", + ) + return False + + if self.model.get_relation(S3_RELATION) is None: + self._fail_action_with_error_log( + event, + action, + "Relation with s3-integrator charm missing, cannot restore from a backup.", + ) + return False + + return True def _configure_pbm_options(self, event) -> None: action = "configure-pbm" diff --git a/src/charm.py b/src/charm.py index d5953b427..ba86bae5a 100755 --- a/src/charm.py +++ b/src/charm.py @@ -1368,6 +1368,11 @@ def get_invalid_integration_status(self) -> Optional[StatusBase]: "Relation to mongos not supported, config role must be config-server" ) + if not self.backups.is_valid_s3_integration(): + return BlockedStatus( + "Relation to s3-integrator is not supported, config role must be config-server" + ) + def get_status(self) -> StatusBase: """Returns the status with the highest priority from backups, sharding, and mongod. diff --git a/tests/integration/sharding_tests/test_sharding_backups.py b/tests/integration/sharding_tests/test_sharding_backups.py index 3f534c987..49a6e6190 100644 --- a/tests/integration/sharding_tests/test_sharding_backups.py +++ b/tests/integration/sharding_tests/test_sharding_backups.py @@ -130,6 +130,23 @@ async def test_create_and_list_backups_in_cluster(ops_test: OpsTest, github_secr assert backups == 1, "Backup not created." +@pytest.mark.group(1) +@pytest.mark.abort_on_fail +async def test_shards_cannot_run_backup_actions(ops_test: OpsTest) -> None: + shard_unit = await backup_helpers.get_leader_unit(ops_test, db_app_name=SHARD_ONE_APP_NAME) + action = await shard_unit.run_action(action_name="create-backup") + attempted_backup = await action.wait() + assert attempted_backup.status == "failed", "shard ran create-backup command." + + action = await shard_unit.run_action(action_name="list-backups") + attempted_backup = await action.wait() + assert attempted_backup.status == "failed", "shard ran list-backup command." + + action = await shard_unit.run_action(action_name="restore") + attempted_backup = await action.wait() + assert attempted_backup.status == "failed", "shard ran list-backup command." + + @pytest.mark.group(1) @pytest.mark.abort_on_fail async def test_rotate_backup_password(ops_test: OpsTest) -> None: diff --git a/tests/integration/sharding_tests/test_sharding_relations.py b/tests/integration/sharding_tests/test_sharding_relations.py index 32c2dcfac..c1c24d381 100644 --- a/tests/integration/sharding_tests/test_sharding_relations.py +++ b/tests/integration/sharding_tests/test_sharding_relations.py @@ -1,12 +1,11 @@ #!/usr/bin/env python3 # Copyright 2023 Canonical Ltd. # See LICENSE file for licensing details. -import time - import pytest from juju.errors import JujuAPIError from pytest_operator.plugin import OpsTest +S3_APP_NAME = "s3-integrator" SHARD_ONE_APP_NAME = "shard" CONFIG_SERVER_ONE_APP_NAME = "config-server-one" CONFIG_SERVER_TWO_APP_NAME = "config-server-two" @@ -58,6 +57,7 @@ async def test_build_and_deploy( channel="6/edge", revision=3, ) + await ops_test.model.deploy(S3_APP_NAME, channel="edge") # TODO: Future PR, once data integrator works with mongos charm deploy that charm instead of # packing and deploying the charm in the application dir. @@ -287,11 +287,12 @@ async def test_replication_mongos_relation(ops_test: OpsTest) -> None: f"{MONGOS_APP_NAME}:cluster", ) - # TODO remove this and wait for mongos to be active - # right now we cannot wait for `mongos` to be active after removing the relation due to a bug - # in the mongos charm. To fix the bug it is first necessary to publish the updated library - # lib/charms/mongodb/v0/config_server.py - time.sleep(60) + await ops_test.model.wait_for_idle( + apps=[SHARD_ONE_APP_NAME], + idle_period=20, + raise_on_blocked=False, + timeout=TIMEOUT, + ) @pytest.mark.group(1) @@ -321,3 +322,39 @@ async def test_shard_mongos_relation(ops_test: OpsTest) -> None: f"{MONGOS_APP_NAME}:cluster", f"{SHARD_ONE_APP_NAME}:cluster", ) + + await ops_test.model.wait_for_idle( + apps=[SHARD_ONE_APP_NAME], + idle_period=20, + raise_on_blocked=False, + timeout=TIMEOUT, + ) + + +@pytest.mark.group(1) +async def test_shard_s3_relation(ops_test: OpsTest) -> None: + """Verifies integrating a shard to s3-integrator fails.""" + # attempt to add a replication deployment as a shard to the config server. + await ops_test.model.integrate( + f"{SHARD_ONE_APP_NAME}", + f"{S3_APP_NAME}", + ) + + await ops_test.model.wait_for_idle( + apps=[SHARD_ONE_APP_NAME], + idle_period=20, + raise_on_blocked=False, + timeout=TIMEOUT, + ) + + shard_unit = ops_test.model.applications[SHARD_ONE_APP_NAME].units[0] + assert ( + shard_unit.workload_status_message + == "Relation to s3-integrator is not supported, config role must be config-server" + ), "Shard cannot be related to s3-integrator." + + # clean up relations + await ops_test.model.applications[SHARD_ONE_APP_NAME].remove_relation( + f"{S3_APP_NAME}:s3-credentials", + f"{SHARD_ONE_APP_NAME}:s3-credentials", + )