From ac24eed18b8c32621f11e5de8073d4c9f0c62d2f Mon Sep 17 00:00:00 2001
From: Mia Altieri <mgaltier200@gmail.com>
Date: Wed, 13 Sep 2023 09:09:24 +0000
Subject: [PATCH 01/12] charm can start as shard or config server

---
 config.yaml                      |  6 ++++++
 lib/charms/mongodb/v0/helpers.py |  7 +++++++
 src/charm.py                     |  6 +++++-
 src/machine_helpers.py           | 10 ++++++----
 4 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/config.yaml b/config.yaml
index 0e820d039..fb9659b65 100644
--- a/config.yaml
+++ b/config.yaml
@@ -8,3 +8,9 @@ options:
       When a relation is removed, auto-delete ensures that any relevant databases
       associated with the relation are also removed
     default: false
+  role:
+    description: |
+      role config option exists to deploy the charmed-mongodb application as a shard, 
+      config-server, or as a replica set.
+    type: string
+    default: replication
diff --git a/lib/charms/mongodb/v0/helpers.py b/lib/charms/mongodb/v0/helpers.py
index ba33c09d9..58b42258c 100644
--- a/lib/charms/mongodb/v0/helpers.py
+++ b/lib/charms/mongodb/v0/helpers.py
@@ -84,6 +84,7 @@ def get_mongod_args(
     config: MongoDBConfiguration,
     auth: bool = True,
     snap_install: bool = False,
+    role: str = "replication",
 ) -> str:
     """Construct the MongoDB startup command line.
 
@@ -137,6 +138,12 @@ def get_mongod_args(
             ]
         )
 
+    if role == "config-server":
+        cmd.append("--configsvr")
+
+    if role == "shard":
+        cmd.append("--shardsvr")
+
     cmd.append("\n")
     return " ".join(cmd)
 
diff --git a/src/charm.py b/src/charm.py
index a6c1effcc..e9dc9c742 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -253,7 +253,10 @@ def _on_install(self, event: InstallEvent) -> None:
 
         # Construct the mongod startup commandline args for systemd and reload the daemon.
         update_mongod_service(
-            auth=auth, machine_ip=self._unit_ip(self.unit), config=self.mongodb_config
+            auth=auth,
+            machine_ip=self._unit_ip(self.unit),
+            config=self.mongodb_config,
+            role=self.model.config["role"],
         )
 
         # add licenses
@@ -947,6 +950,7 @@ def restart_mongod_service(self, auth=None):
                 auth,
                 self._unit_ip(self.unit),
                 config=self.mongodb_config,
+                role=self.model.config["role"],
             )
             mongodb_snap.start(services=["mongod"])
         except snap.SnapError as e:
diff --git a/src/machine_helpers.py b/src/machine_helpers.py
index eab2050e8..ebb9b2519 100644
--- a/src/machine_helpers.py
+++ b/src/machine_helpers.py
@@ -18,24 +18,26 @@
 MONGO_USER = "snap_daemon"
 
 
-def update_mongod_service(auth: bool, machine_ip: str, config: MongoDBConfiguration) -> None:
+def update_mongod_service(
+    auth: bool, machine_ip: str, config: MongoDBConfiguration, role: str = "replication"
+) -> None:
     """Updates the mongod service file with the new options for starting."""
     with open(Config.ENV_VAR_PATH, "r") as env_var_file:
         env_vars = env_var_file.readlines()
 
     # write our arguments and write them to /etc/environment - the environment variable here is
     # read in in the charmed-mongob.mongod.service file.
-    mongod_start_args = get_mongod_args(config, auth, snap_install=True)
+    mongo_start_args = get_mongod_args(config, auth, snap_install=True)
     args_added = False
     for index, line in enumerate(env_vars):
         if "MONGOD_ARGS" in line:
             args_added = True
-            env_vars[index] = f"MONGOD_ARGS={mongod_start_args}"
+            env_vars[index] = f"MONGOD_ARGS={mongo_start_args}"
 
     # if it is the first time adding these args to the file - will will need to append them to the
     # file
     if not args_added:
-        env_vars.append(f"MONGOD_ARGS={mongod_start_args}")
+        env_vars.append(f"MONGOD_ARGS={mongo_start_args}")
 
     with open(Config.ENV_VAR_PATH, "w") as service_file:
         service_file.writelines(env_vars)

From f8ff8a1bb71fc74495a6d6df7660b431129c606e Mon Sep 17 00:00:00 2001
From: Mia Altieri <mgaltier200@gmail.com>
Date: Thu, 14 Sep 2023 16:27:40 +0000
Subject: [PATCH 02/12] mongos, shard, and config server all start without
 error

---
 lib/charms/mongodb/v0/helpers.py | 25 ++++++++++++
 src/charm.py                     | 67 +++++++++++++++++++++++++-------
 src/config.py                    |  8 ++--
 src/machine_helpers.py           | 23 +++++++----
 4 files changed, 98 insertions(+), 25 deletions(-)

diff --git a/lib/charms/mongodb/v0/helpers.py b/lib/charms/mongodb/v0/helpers.py
index 58b42258c..789aa1791 100644
--- a/lib/charms/mongodb/v0/helpers.py
+++ b/lib/charms/mongodb/v0/helpers.py
@@ -80,6 +80,31 @@ def get_create_user_cmd(
     ]
 
 
+def get_mongos_args(config: MongoDBConfiguration) -> str:
+    """Returns the arguments used for starting mongos on a config-server side application.
+
+    Returns:
+        A string representing the arguments to be passed to mongos.
+    """
+    # mongos running on the config server communicates through localhost
+    config_server_uri = f"{config.replset}/localhost"
+
+    # no need to add TLS since no network calls are used, since mongos is configured to listen
+    # on local host
+    cmd = [
+        # mongos on config server side only runs on local host
+        "--bind_ip localhost",
+        # todo figure out this one
+        f"--configdb {config_server_uri}",
+        # config server is already using 27017
+        "--port 27018",
+        # todo followup PR add keyfile and auth
+        "\n",
+    ]
+
+    return " ".join(cmd)
+
+
 def get_mongod_args(
     config: MongoDBConfiguration,
     auth: bool = True,
diff --git a/src/charm.py b/src/charm.py
index e9dc9c742..ba275c067 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -75,7 +75,6 @@ class MongodbOperatorCharm(CharmBase):
     def __init__(self, *args):
         super().__init__(*args)
         self._port = Config.MONGODB_PORT
-
         self.framework.observe(self.on.install, self._on_install)
         self.framework.observe(self.on.start, self._on_start)
         self.framework.observe(self.on.update_status, self._on_update_status)
@@ -213,6 +212,15 @@ def db_initialised(self) -> bool:
         """Check if MongoDB is initialised."""
         return "db_initialised" in self.app_peer_data
 
+    @property
+    def role(self) -> str:
+        """Returns role of MongoDB deployment."""
+        return self.model.config["role"]
+
+    def is_role(self, role_name: str) -> bool:
+        """Checks if application is running in provided role."""
+        return self.role == role_name
+
     @db_initialised.setter
     def db_initialised(self, value):
         """Set the db_initialised flag."""
@@ -256,7 +264,7 @@ def _on_install(self, event: InstallEvent) -> None:
             auth=auth,
             machine_ip=self._unit_ip(self.unit),
             config=self.mongodb_config,
-            role=self.model.config["role"],
+            role=self.role,
         )
 
         # add licenses
@@ -275,9 +283,7 @@ def _on_start(self, event: StartEvent) -> None:
         try:
             logger.debug("starting MongoDB.")
             self.unit.status = MaintenanceStatus("starting MongoDB")
-            snap_cache = snap.SnapCache()
-            mongodb_snap = snap_cache["charmed-mongodb"]
-            mongodb_snap.start(services=["mongod"])
+            self.start_mongod_service()
             self.unit.status = ActiveStatus()
         except snap.SnapError as e:
             logger.error("An exception occurred when starting mongod agent, error: %s.", str(e))
@@ -291,7 +297,7 @@ def _on_start(self, event: StartEvent) -> None:
             return
 
         # check if this unit's deployment of MongoDB is ready
-        with MongoDBConnection(self.mongodb_config, "localhost", direct=True) as direct_mongo:
+        with MongoDBConnection(self.mongodb_config, f"localhost", direct=True) as direct_mongo:
             if not direct_mongo.is_ready:
                 logger.debug("mongodb service is not ready yet.")
                 self.unit.status = WaitingStatus("waiting for MongoDB to start")
@@ -464,7 +470,7 @@ def _on_update_status(self, event: UpdateStatusEvent):
             return
 
         # Cannot check more advanced MongoDB statuses if mongod hasn't started.
-        with MongoDBConnection(self.mongodb_config, "localhost", direct=True) as direct_mongo:
+        with MongoDBConnection(self.mongodb_config, f"localhost", direct=True) as direct_mongo:
             if not direct_mongo.is_ready:
                 self.unit.status = WaitingStatus("Waiting for MongoDB to start")
                 return
@@ -871,19 +877,24 @@ def _initialise_replica_set(self, event: StartEvent) -> None:
             # can be corrupted.
             return
 
-        with MongoDBConnection(self.mongodb_config, "localhost", direct=True) as direct_mongo:
+        with MongoDBConnection(self.mongodb_config, f"localhost", direct=True) as direct_mongo:
             try:
                 logger.info("Replica Set initialization")
                 direct_mongo.init_replset()
                 self._peers.data[self.app]["replica_set_hosts"] = json.dumps(
                     [self._unit_ip(self.unit)]
                 )
+
                 logger.info("User initialization")
                 self._init_operator_user()
                 self._init_backup_user()
                 self._init_monitor_user()
-                logger.info("Manage relations")
-                self.client_relations.oversee_users(None, None)
+
+                # in sharding, user management is handled by mongos subordinate charm
+                if self.is_role(Config.REPLICATION):
+                    logger.info("Manage user")
+                    self.client_relations.oversee_users(None, None)
+
             except subprocess.CalledProcessError as e:
                 logger.error(
                     "Deferring on_start: exit code: %i, stderr: %s", e.exit_code, e.stderr
@@ -937,22 +948,48 @@ def set_secret(self, scope: str, key: str, value: Optional[str]) -> None:
         else:
             raise RuntimeError("Unknown secret scope.")
 
+    def start_mongod_service(self):
+        """Starts the mongod service and if necessary starts mongos.
+
+        Raises:
+            snap.SnapError
+        """
+        snap_cache = snap.SnapCache()
+        mongodb_snap = snap_cache["charmed-mongodb"]
+        mongodb_snap.start(services=["mongod"])
+
+        # charms running as config server are responsible for maintaing a server side mongos
+        if self.is_role(Config.CONFIG_SERVER):
+            mongodb_snap.start(services=["mongos"])
+
+    def stop_mongod_service(self):
+        """Stops the mongod service and if necessary stops mongos.
+
+        Raises:
+            snap.SnapError
+        """
+        snap_cache = snap.SnapCache()
+        mongodb_snap = snap_cache["charmed-mongodb"]
+        mongodb_snap.stop(services=["mongod"])
+
+        # charms running as config server are responsible for maintaing a server side mongos
+        if self.is_role(Config.CONFIG_SERVER):
+            mongodb_snap.stop(services=["mongos"])
+
     def restart_mongod_service(self, auth=None):
         """Restarts the mongod service with its associated configuration."""
         if auth is None:
             auth = self.auth_enabled()
 
         try:
-            snap_cache = snap.SnapCache()
-            mongodb_snap = snap_cache["charmed-mongodb"]
-            mongodb_snap.stop(services=["mongod"])
+            self.stop_mongod_service()
             update_mongod_service(
                 auth,
                 self._unit_ip(self.unit),
                 config=self.mongodb_config,
-                role=self.model.config["role"],
+                role=self.role,
             )
-            mongodb_snap.start(services=["mongod"])
+            self.start_mongod_service()
         except snap.SnapError as e:
             logger.error("An exception occurred when starting mongod agent, error: %s.", str(e))
             self.unit.status = BlockedStatus("couldn't start MongoDB")
diff --git a/src/config.py b/src/config.py
index 8d8279f87..7c13a5c02 100644
--- a/src/config.py
+++ b/src/config.py
@@ -6,14 +6,16 @@
 class Config:
     """Configuration for MongoDB Charm."""
 
-    SUBSTRATE = "vm"
-    # We expect the MongoDB container to use the default ports
     MONGODB_PORT = 27017
+    SUBSTRATE = "vm"
     ENV_VAR_PATH = "/etc/environment"
     MONGODB_SNAP_DATA_DIR = "/var/snap/charmed-mongodb/current"
     MONGOD_CONF_DIR = f"{MONGODB_SNAP_DATA_DIR}/etc/mongod"
     MONGOD_CONF_FILE_PATH = f"{MONGOD_CONF_DIR}/mongod.conf"
-    SNAP_PACKAGES = [("charmed-mongodb", "5/edge", 82)]
+    SNAP_PACKAGES = [("charmed-mongodb", "5/edge/mongos", 83)]
+    CONFIG_SERVER = "config-server"
+    REPLICATION = "replication"
+    SHARD = "shard"
 
     class Actions:
         """Actions related config for MongoDB Charm."""
diff --git a/src/machine_helpers.py b/src/machine_helpers.py
index ebb9b2519..ddd3f031e 100644
--- a/src/machine_helpers.py
+++ b/src/machine_helpers.py
@@ -6,7 +6,7 @@
 import pwd
 from pathlib import Path
 
-from charms.mongodb.v0.helpers import get_mongod_args
+from charms.mongodb.v0.helpers import get_mongod_args, get_mongos_args
 from charms.mongodb.v0.mongodb import MongoDBConfiguration
 
 from config import Config
@@ -22,22 +22,31 @@ def update_mongod_service(
     auth: bool, machine_ip: str, config: MongoDBConfiguration, role: str = "replication"
 ) -> None:
     """Updates the mongod service file with the new options for starting."""
+    # write our arguments and write them to /etc/environment - the environment variable here is
+    # read in in the charmed-mongob.mongod.service file.
+    mongod_start_args = get_mongod_args(config, auth, snap_install=True)
+    add_args_to_env("MONGOD_ARGS", mongod_start_args)
+
+    if role == "config-server":
+        mongos_start_args = get_mongos_args(config)
+        add_args_to_env("MONGOS_ARGS", mongos_start_args)
+
+
+def add_args_to_env(var: str, args: str):
+    """Adds the provided arguments to the environment as the provided variable."""
     with open(Config.ENV_VAR_PATH, "r") as env_var_file:
         env_vars = env_var_file.readlines()
 
-    # write our arguments and write them to /etc/environment - the environment variable here is
-    # read in in the charmed-mongob.mongod.service file.
-    mongo_start_args = get_mongod_args(config, auth, snap_install=True)
     args_added = False
     for index, line in enumerate(env_vars):
-        if "MONGOD_ARGS" in line:
+        if var in line:
             args_added = True
-            env_vars[index] = f"MONGOD_ARGS={mongo_start_args}"
+            env_vars[index] = f"{var}={args}"
 
     # if it is the first time adding these args to the file - will will need to append them to the
     # file
     if not args_added:
-        env_vars.append(f"MONGOD_ARGS={mongo_start_args}")
+        env_vars.append(f"{var}={args}")
 
     with open(Config.ENV_VAR_PATH, "w") as service_file:
         service_file.writelines(env_vars)

From 9b64f319b4be0d2ece66889d22422a7afbb36d13 Mon Sep 17 00:00:00 2001
From: Mia Altieri <mgaltier200@gmail.com>
Date: Fri, 15 Sep 2023 08:09:37 +0000
Subject: [PATCH 03/12] use correct snap

---
 src/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/config.py b/src/config.py
index 7c13a5c02..85a9fe3fa 100644
--- a/src/config.py
+++ b/src/config.py
@@ -12,7 +12,7 @@ class Config:
     MONGODB_SNAP_DATA_DIR = "/var/snap/charmed-mongodb/current"
     MONGOD_CONF_DIR = f"{MONGODB_SNAP_DATA_DIR}/etc/mongod"
     MONGOD_CONF_FILE_PATH = f"{MONGOD_CONF_DIR}/mongod.conf"
-    SNAP_PACKAGES = [("charmed-mongodb", "5/edge/mongos", 83)]
+    SNAP_PACKAGES = [("charmed-mongodb", "5/edge", 84)]
     CONFIG_SERVER = "config-server"
     REPLICATION = "replication"
     SHARD = "shard"

From 11f3c6515a20cba37c71be8d2c4e89015e4cbae3 Mon Sep 17 00:00:00 2001
From: Mia Altieri <mgaltier200@gmail.com>
Date: Fri, 15 Sep 2023 08:53:38 +0000
Subject: [PATCH 04/12] fmt + lint

---
 src/charm.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/charm.py b/src/charm.py
index ff83fbdc3..d86a0c4ac 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -322,7 +322,7 @@ def _on_start(self, event: StartEvent) -> None:
             return
 
         # check if this unit's deployment of MongoDB is ready
-        with MongoDBConnection(self.mongodb_config, f"localhost", direct=True) as direct_mongo:
+        with MongoDBConnection(self.mongodb_config, "localhost", direct=True) as direct_mongo:
             if not direct_mongo.is_ready:
                 logger.debug("mongodb service is not ready yet.")
                 self.unit.status = WaitingStatus("waiting for MongoDB to start")
@@ -495,7 +495,7 @@ def _on_update_status(self, event: UpdateStatusEvent):
             return
 
         # Cannot check more advanced MongoDB statuses if mongod hasn't started.
-        with MongoDBConnection(self.mongodb_config, f"localhost", direct=True) as direct_mongo:
+        with MongoDBConnection(self.mongodb_config, "localhost", direct=True) as direct_mongo:
             if not direct_mongo.is_ready:
                 self.unit.status = WaitingStatus("Waiting for MongoDB to start")
                 return
@@ -952,7 +952,7 @@ def _initialise_replica_set(self, event: StartEvent) -> None:
             # can be corrupted.
             return
 
-        with MongoDBConnection(self.mongodb_config, f"localhost", direct=True) as direct_mongo:
+        with MongoDBConnection(self.mongodb_config, "localhost", direct=True) as direct_mongo:
             try:
                 logger.info("Replica Set initialization")
                 direct_mongo.init_replset()
@@ -1045,7 +1045,7 @@ def start_mongod_service(self):
         mongodb_snap = snap_cache["charmed-mongodb"]
         mongodb_snap.start(services=["mongod"])
 
-        # charms running as config server are responsible for maintaing a server side mongos
+        # charms running as config server are responsible for maintaining a server side mongos
         if self.is_role(Config.CONFIG_SERVER):
             mongodb_snap.start(services=["mongos"])
 
@@ -1059,7 +1059,7 @@ def stop_mongod_service(self):
         mongodb_snap = snap_cache["charmed-mongodb"]
         mongodb_snap.stop(services=["mongod"])
 
-        # charms running as config server are responsible for maintaing a server side mongos
+        # charms running as config server are responsible for maintaining a server side mongos
         if self.is_role(Config.CONFIG_SERVER):
             mongodb_snap.stop(services=["mongos"])
 

From 8e8572b2e154d4d62e18f3ca7364c4e32918047e Mon Sep 17 00:00:00 2001
From: Pedro Guimaraes <pedro.guimaraes@canonical.com>
Date: Fri, 15 Sep 2023 14:14:26 +0200
Subject: [PATCH 05/12] Moving from mongo to mongosh cli

---
 lib/charms/mongodb/v0/helpers.py                             | 2 +-
 tests/integration/relation_tests/legacy_relations/helpers.py | 2 +-
 tests/integration/test_charm.py                              | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/charms/mongodb/v0/helpers.py b/lib/charms/mongodb/v0/helpers.py
index ba33c09d9..1f6fa8860 100644
--- a/lib/charms/mongodb/v0/helpers.py
+++ b/lib/charms/mongodb/v0/helpers.py
@@ -49,7 +49,7 @@
 
 # noinspection GrazieInspection
 def get_create_user_cmd(
-    config: MongoDBConfiguration, mongo_path="charmed-mongodb.mongo"
+    config: MongoDBConfiguration, mongo_path="charmed-mongodb.mongosh"
 ) -> List[str]:
     """Creates initial admin user for MongoDB.
 
diff --git a/tests/integration/relation_tests/legacy_relations/helpers.py b/tests/integration/relation_tests/legacy_relations/helpers.py
index 9bb27b3fe..ccde5c01d 100644
--- a/tests/integration/relation_tests/legacy_relations/helpers.py
+++ b/tests/integration/relation_tests/legacy_relations/helpers.py
@@ -136,7 +136,7 @@ async def mongo_tls_command(ops_test: OpsTest) -> str:
     replica_set_uri = f"mongodb://{hosts}/admin?replicaSet={app}"
 
     return (
-        f"charmed-mongodb.mongo '{replica_set_uri}'  --eval 'rs.status()'"
+        f"charmed-mongodb.mongosh '{replica_set_uri}'  --eval 'rs.status()'"
         f" --tls --tlsCAFile {EXTERNAL_CERT_PATH}"
         f" --tlsCertificateKeyFile {EXTERNAL_PEM_PATH}"
     )
diff --git a/tests/integration/test_charm.py b/tests/integration/test_charm.py
index c7e1450fd..1832acf42 100644
--- a/tests/integration/test_charm.py
+++ b/tests/integration/test_charm.py
@@ -177,7 +177,7 @@ async def test_monitor_user(ops_test: OpsTest) -> None:
     hosts = ",".join(replica_set_hosts)
     replica_set_uri = f"mongodb://monitor:{password}@{hosts}/admin?replicaSet=mongodb"
 
-    admin_mongod_cmd = f"charmed-mongodb.mongo '{replica_set_uri}'  --eval 'rs.conf()'"
+    admin_mongod_cmd = f"charmed-mongodb.mongosh '{replica_set_uri}'  --eval 'rs.conf()'"
     check_monitor_cmd = f"exec --unit {unit.name} -- {admin_mongod_cmd}"
     return_code, _, _ = await ops_test.juju(*check_monitor_cmd.split())
     assert return_code == 0, "command rs.conf() on monitor user does not work"

From 4bf9d5f9608976cf84d813f521826ea25eb9e04b Mon Sep 17 00:00:00 2001
From: Mia Altieri <mgaltier200@gmail.com>
Date: Fri, 15 Sep 2023 15:30:16 +0000
Subject: [PATCH 06/12] update error processing

---
 lib/charms/mongodb/v0/helpers.py         | 21 +------
 lib/charms/mongodb/v0/mongodb_backups.py | 70 +++++++++++++++++++-----
 2 files changed, 56 insertions(+), 35 deletions(-)

diff --git a/lib/charms/mongodb/v0/helpers.py b/lib/charms/mongodb/v0/helpers.py
index 789aa1791..cbde28fcf 100644
--- a/lib/charms/mongodb/v0/helpers.py
+++ b/lib/charms/mongodb/v0/helpers.py
@@ -7,7 +7,7 @@
 import secrets
 import string
 import subprocess
-from typing import List, Optional, Union
+from typing import List
 
 from charms.mongodb.v0.mongodb import MongoDBConfiguration, MongoDBConnection
 from ops.model import (
@@ -234,25 +234,6 @@ def copy_licenses_to_unit():
     )
 
 
-_StrOrBytes = Union[str, bytes]
-
-
-def process_pbm_error(error_string: Optional[_StrOrBytes]) -> str:
-    """Parses pbm error string and returns a user friendly message."""
-    message = "couldn't configure s3 backup option"
-    if not error_string:
-        return message
-    if type(error_string) == bytes:
-        error_string = error_string.decode("utf-8")
-    if "status code: 403" in error_string:  # type: ignore
-        message = "s3 credentials are incorrect."
-    elif "status code: 404" in error_string:  # type: ignore
-        message = "s3 configurations are incompatible."
-    elif "status code: 301" in error_string:  # type: ignore
-        message = "s3 configurations are incompatible."
-    return message
-
-
 def current_pbm_op(pbm_status: str) -> str:
     """Parses pbm status for the operation that pbm is running."""
     pbm_status = json.loads(pbm_status)
diff --git a/lib/charms/mongodb/v0/mongodb_backups.py b/lib/charms/mongodb/v0/mongodb_backups.py
index c65c86f1d..94d661975 100644
--- a/lib/charms/mongodb/v0/mongodb_backups.py
+++ b/lib/charms/mongodb/v0/mongodb_backups.py
@@ -13,14 +13,10 @@
 import re
 import subprocess
 import time
-from typing import Dict, List
+from typing import Dict, List, Optional, Union
 
 from charms.data_platform_libs.v0.s3 import CredentialsChangedEvent, S3Requirer
-from charms.mongodb.v0.helpers import (
-    current_pbm_op,
-    process_pbm_error,
-    process_pbm_status,
-)
+from charms.mongodb.v0.helpers import current_pbm_op, process_pbm_status
 from charms.operator_libs_linux.v1 import snap
 from ops.framework import Object
 from ops.model import BlockedStatus, MaintenanceStatus, StatusBase, WaitingStatus
@@ -316,7 +312,7 @@ def _configure_pbm_options(self, event) -> None:
             ),
             return
         except ExecError as e:
-            self.charm.unit.status = BlockedStatus(process_pbm_error(e.stdout))
+            self.charm.unit.status = BlockedStatus(self.process_pbm_error(e.stdout))
             return
         except subprocess.CalledProcessError as e:
             logger.error("Syncing configurations failed: %s", str(e))
@@ -418,7 +414,7 @@ def _wait_pbm_status(self) -> None:
                         )
                         raise ResyncError
                 except ExecError as e:
-                    self.charm.unit.status = BlockedStatus(process_pbm_error(e.stdout))
+                    self.charm.unit.status = BlockedStatus(self.process_pbm_error(e.stdout))
 
     def _get_pbm_status(self) -> StatusBase:
         """Retrieve pbm status."""
@@ -428,15 +424,14 @@ def _get_pbm_status(self) -> StatusBase:
         try:
             previous_pbm_status = self.charm.unit.status
             pbm_status = self.charm.run_pbm_command(PBM_STATUS_CMD)
+
+            # pbm errors are outputted in json and do not raise CLI errors
+            pbm_error = self.process_pbm_error(pbm_status)
+            if pbm_error:
+                return BlockedStatus(pbm_error)
+
             self._log_backup_restore_result(pbm_status, previous_pbm_status)
             return process_pbm_status(pbm_status)
-        except ExecError as e:
-            logger.error(f"Failed to get pbm status. {e}")
-            return BlockedStatus(process_pbm_error(e.stdout))
-        except subprocess.CalledProcessError as e:
-            # pbm pipes a return code of 1, but its output shows the true error code so it is
-            # necessary to parse the output
-            return BlockedStatus(process_pbm_error(e.output))
         except Exception as e:
             # pbm pipes a return code of 1, but its output shows the true error code so it is
             # necessary to parse the output
@@ -652,3 +647,48 @@ def _get_backup_restore_operation_result(self, current_pbm_status, previous_pbm_
                 return f"Backup {backup_id} completed successfully"
 
         return "Unknown operation result"
+
+    def retrieve_error_message(self, pbm_status: Dict) -> str:
+        """Parses pbm status for an error message from the current unit.
+
+        If pbm_agent is in the error state, the command `pbm status` does not raise an error.
+        Instead, it is in the log messages. pbm_agent also shows all the error messages for other
+        replicas in the set.
+        """
+        try:
+            clusters = pbm_status["cluster"]
+            for cluster in clusters:
+                if cluster["rs"] == self.charm.app.name:
+                    break
+
+            for host_info in cluster["nodes"]:
+                replica_info = f"mongodb/{self.charm._unit_ip(self.charm.unit)}:27107"
+                if host_info["host"] == replica_info:
+                    break
+
+            return str(host_info["errors"])
+        except KeyError:
+            return ""
+
+    _StrOrBytes = Union[str, bytes]
+
+    def process_pbm_error(self, pbm_status: Optional[_StrOrBytes]) -> str:
+        """Returns errors found in PBM status."""
+        if type(pbm_status) == bytes:
+            pbm_status = pbm_status.decode("utf-8")
+
+        try:
+            error_message = self.retrieve_error_message(json.loads(pbm_status))
+        except json.decoder.JSONDecodeError:
+            # if pbm status doesn't return a parsable dictionary it is an error message
+            # represented as a string
+            error_message = pbm_status
+
+        message = None
+        if "status code: 403" in error_message:
+            message = "s3 credentials are incorrect."
+        elif "status code: 404" in error_message:
+            message = "s3 configurations are incompatible."
+        elif "status code: 301" in error_message:
+            message = "s3 configurations are incompatible."
+        return message

From e924c0067d9732e2ceefc74b6f5947c2bf04ee37 Mon Sep 17 00:00:00 2001
From: Mia Altieri <mgaltier200@gmail.com>
Date: Fri, 15 Sep 2023 15:32:12 +0000
Subject: [PATCH 07/12] bump lib patch

---
 lib/charms/mongodb/v0/helpers.py         | 2 +-
 lib/charms/mongodb/v0/mongodb_backups.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/charms/mongodb/v0/helpers.py b/lib/charms/mongodb/v0/helpers.py
index cbde28fcf..63784afa0 100644
--- a/lib/charms/mongodb/v0/helpers.py
+++ b/lib/charms/mongodb/v0/helpers.py
@@ -27,7 +27,7 @@
 
 # Increment this PATCH version before using `charmcraft publish-lib` or reset
 # to 0 if you are raising the major API version
-LIBPATCH = 8
+LIBPATCH = 9
 
 
 # path to store mongodb ketFile
diff --git a/lib/charms/mongodb/v0/mongodb_backups.py b/lib/charms/mongodb/v0/mongodb_backups.py
index 94d661975..e947aa2f7 100644
--- a/lib/charms/mongodb/v0/mongodb_backups.py
+++ b/lib/charms/mongodb/v0/mongodb_backups.py
@@ -38,7 +38,7 @@
 
 # Increment this PATCH version before using `charmcraft publish-lib` or reset
 # to 0 if you are raising the major API version
-LIBPATCH = 6
+LIBPATCH = 7
 
 logger = logging.getLogger(__name__)
 

From 06af49a06725860a41cf9ee8eb6c5489ff8381e0 Mon Sep 17 00:00:00 2001
From: Mia Altieri <mgaltier200@gmail.com>
Date: Tue, 19 Sep 2023 08:20:51 +0000
Subject: [PATCH 08/12] mongos should be run on 0.0.0.0

---
 lib/charms/mongodb/v0/helpers.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lib/charms/mongodb/v0/helpers.py b/lib/charms/mongodb/v0/helpers.py
index ec7eaeaf4..d4c820aaa 100644
--- a/lib/charms/mongodb/v0/helpers.py
+++ b/lib/charms/mongodb/v0/helpers.py
@@ -91,8 +91,9 @@ def get_mongos_args(config: MongoDBConfiguration) -> str:
     # no need to add TLS since no network calls are used, since mongos is configured to listen
     # on local host
     cmd = [
-        # mongos on config server side only runs on local host
-        "--bind_ip localhost",
+        # mongos on config server side should run on 0.0.0.0 so it can be accessed by other units
+        # in the sharded cluster
+        "--bind_ip 0.0.0.0",
         # todo figure out this one
         f"--configdb {config_server_uri}",
         # config server is already using 27017

From 363db5f01a2dcf049dda9c0011c64d9c890da74c Mon Sep 17 00:00:00 2001
From: Mia Altieri <mgaltier200@gmail.com>
Date: Tue, 19 Sep 2023 08:51:38 +0000
Subject: [PATCH 09/12] addressing PR comments

---
 lib/charms/mongodb/v0/helpers.py              |   4 +-
 lib/charms/mongodb/v0/mongodb_backups.py      |  11 +-
 src/charm.py                                  |   6 +-
 src/config.py                                 |  11 +-
 .../integration/backup_tests/test_backups.py  | 606 +++++++++---------
 5 files changed, 325 insertions(+), 313 deletions(-)

diff --git a/lib/charms/mongodb/v0/helpers.py b/lib/charms/mongodb/v0/helpers.py
index d4c820aaa..c95d99393 100644
--- a/lib/charms/mongodb/v0/helpers.py
+++ b/lib/charms/mongodb/v0/helpers.py
@@ -19,6 +19,8 @@
 )
 from pymongo.errors import AutoReconnect, ServerSelectionTimeoutError
 
+from config import Config
+
 # The unique Charmhub library identifier, never change it
 LIBID = "b9a7fe0c38d8486a9d1ce94c27d4758e"
 
@@ -97,7 +99,7 @@ def get_mongos_args(config: MongoDBConfiguration) -> str:
         # todo figure out this one
         f"--configdb {config_server_uri}",
         # config server is already using 27017
-        "--port 27018",
+        f"--port {Config.MONGOS_PORT}",
         # todo followup PR add keyfile and auth
         "\n",
     ]
diff --git a/lib/charms/mongodb/v0/mongodb_backups.py b/lib/charms/mongodb/v0/mongodb_backups.py
index e947aa2f7..f0fde7286 100644
--- a/lib/charms/mongodb/v0/mongodb_backups.py
+++ b/lib/charms/mongodb/v0/mongodb_backups.py
@@ -30,6 +30,8 @@
     wait_fixed,
 )
 
+from config import Config
+
 # The unique Charmhub library identifier, never change it
 LIBID = "9f2b91c6128d48d6ba22724bf365da3b"
 
@@ -59,6 +61,9 @@
 BACKUP_RESTORE_ATTEMPT_COOLDOWN = 15
 
 
+_StrOrBytes = Union[str, bytes]
+
+
 class ResyncError(Exception):
     """Raised when pbm is resyncing configurations and is not ready to be used."""
 
@@ -662,7 +667,9 @@ def retrieve_error_message(self, pbm_status: Dict) -> str:
                     break
 
             for host_info in cluster["nodes"]:
-                replica_info = f"mongodb/{self.charm._unit_ip(self.charm.unit)}:27107"
+                replica_info = (
+                    f"mongodb/{self.charm._unit_ip(self.charm.unit)}:{Config.MONGOS_PORT}"
+                )
                 if host_info["host"] == replica_info:
                     break
 
@@ -670,8 +677,6 @@ def retrieve_error_message(self, pbm_status: Dict) -> str:
         except KeyError:
             return ""
 
-    _StrOrBytes = Union[str, bytes]
-
     def process_pbm_error(self, pbm_status: Optional[_StrOrBytes]) -> str:
         """Returns errors found in PBM status."""
         if type(pbm_status) == bytes:
diff --git a/src/charm.py b/src/charm.py
index d86a0c4ac..bcad1165b 100755
--- a/src/charm.py
+++ b/src/charm.py
@@ -966,7 +966,7 @@ def _initialise_replica_set(self, event: StartEvent) -> None:
                 self._init_monitor_user()
 
                 # in sharding, user management is handled by mongos subordinate charm
-                if self.is_role(Config.REPLICATION):
+                if self.is_role(Config.Role.REPLICATION):
                     logger.info("Manage user")
                     self.client_relations.oversee_users(None, None)
 
@@ -1046,7 +1046,7 @@ def start_mongod_service(self):
         mongodb_snap.start(services=["mongod"])
 
         # charms running as config server are responsible for maintaining a server side mongos
-        if self.is_role(Config.CONFIG_SERVER):
+        if self.is_role(Config.Role.CONFIG_SERVER):
             mongodb_snap.start(services=["mongos"])
 
     def stop_mongod_service(self):
@@ -1060,7 +1060,7 @@ def stop_mongod_service(self):
         mongodb_snap.stop(services=["mongod"])
 
         # charms running as config server are responsible for maintaining a server side mongos
-        if self.is_role(Config.CONFIG_SERVER):
+        if self.is_role(Config.Role.CONFIG_SERVER):
             mongodb_snap.stop(services=["mongos"])
 
     def restart_mongod_service(self, auth=None):
diff --git a/src/config.py b/src/config.py
index 283b13f19..da3261864 100644
--- a/src/config.py
+++ b/src/config.py
@@ -9,6 +9,7 @@
 class Config:
     """Configuration for MongoDB Charm."""
 
+    MONGOS_PORT = 27018
     MONGODB_PORT = 27017
     SUBSTRATE = "vm"
     ENV_VAR_PATH = "/etc/environment"
@@ -16,9 +17,13 @@ class Config:
     MONGOD_CONF_DIR = f"{MONGODB_SNAP_DATA_DIR}/etc/mongod"
     MONGOD_CONF_FILE_PATH = f"{MONGOD_CONF_DIR}/mongod.conf"
     SNAP_PACKAGES = [("charmed-mongodb", "5/edge", 84)]
-    CONFIG_SERVER = "config-server"
-    REPLICATION = "replication"
-    SHARD = "shard"
+
+    class Role:
+        """Role config names for MongoDB Charm."""
+
+        CONFIG_SERVER = "config-server"
+        REPLICATION = "replication"
+        SHARD = "shard"
 
     class Actions:
         """Actions related config for MongoDB Charm."""
diff --git a/tests/integration/backup_tests/test_backups.py b/tests/integration/backup_tests/test_backups.py
index b7cb64d48..cc78957a2 100644
--- a/tests/integration/backup_tests/test_backups.py
+++ b/tests/integration/backup_tests/test_backups.py
@@ -99,306 +99,306 @@ async def test_blocked_incorrect_conf(ops_test: OpsTest) -> None:
     assert db_unit.workload_status_message == "s3 configurations are incompatible."
 
 
-@pytest.mark.abort_on_fail
-async def test_ready_correct_conf(ops_test: OpsTest) -> None:
-    """Verifies charm goes into active status when s3 config and creds options are correct."""
-    db_app_name = await helpers.app_name(ops_test)
-    choices = string.ascii_letters + string.digits
-    unique_path = "".join([secrets.choice(choices) for _ in range(4)])
-    configuration_parameters = {
-        "bucket": "data-charms-testing",
-        "path": f"mongodb-vm/test-{unique_path}",
-        "endpoint": "https://s3.amazonaws.com",
-        "region": "us-east-1",
-    }
-
-    # apply new configuration options
-    await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters)
-
-    # after applying correct config options and creds the applications should both be active
-    await ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active", timeout=TIMEOUT)
-    await ops_test.model.wait_for_idle(
-        apps=[db_app_name], status="active", timeout=TIMEOUT, idle_period=60
-    )
-
-
-@pytest.mark.abort_on_fail
-async def test_create_and_list_backups(ops_test: OpsTest) -> None:
-    db_unit = await helpers.get_leader_unit(ops_test)
-
-    # verify backup list works
-    action = await db_unit.run_action(action_name="list-backups")
-    list_result = await action.wait()
-    backups = list_result.results["backups"]
-    assert backups, "backups not outputted"
-
-    # verify backup is started
-    action = await db_unit.run_action(action_name="create-backup")
-    backup_result = await action.wait()
-    assert "backup started" in backup_result.results["backup-status"], "backup didn't start"
-
-    # verify backup is present in the list of backups
-    # the action `create-backup` only confirms that the command was sent to the `pbm`. Creating a
-    # backup can take a lot of time so this function returns once the command was successfully
-    # sent to pbm. Therefore we should retry listing the backup several times
-    try:
-        for attempt in Retrying(stop=stop_after_delay(20), wait=wait_fixed(3)):
-            with attempt:
-                backups = await helpers.count_logical_backups(db_unit)
-                assert backups == 1
-    except RetryError:
-        assert backups == 1, "Backup not created."
-
-
-@pytest.mark.abort_on_fail
-async def test_multi_backup(ops_test: OpsTest, continuous_writes_to_db) -> None:
-    """With writes in the DB test creating a backup while another one is running.
-
-    Note that before creating the second backup we change the bucket and change the s3 storage
-    from AWS to GCP. This test verifies that the first backup in AWS is made, the second backup
-    in GCP is made, and that before the second backup is made that pbm correctly resyncs.
-    """
-    db_app_name = await helpers.app_name(ops_test)
-    db_unit = await helpers.get_leader_unit(ops_test)
-
-    # create first backup once ready
-    await asyncio.gather(
-        ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
-    )
-
-    action = await db_unit.run_action(action_name="create-backup")
-    first_backup = await action.wait()
-    assert first_backup.status == "completed", "First backup not started."
-
-    # while first backup is running change access key, secret keys, and bucket name
-    # for GCP
-    await helpers.set_credentials(ops_test, cloud="GCP")
-
-    # change to GCP configs and wait for PBM to resync
-    configuration_parameters = {
-        "bucket": "data-charms-testing",
-        "endpoint": "https://storage.googleapis.com",
-        "region": "",
-    }
-    await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters)
-
-    await asyncio.gather(
-        ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
-    )
-
-    # create a backup as soon as possible. might not be immediately possible since only one backup
-    # can happen at a time.
-    try:
-        for attempt in Retrying(stop=stop_after_delay(40), wait=wait_fixed(5)):
-            with attempt:
-                action = await db_unit.run_action(action_name="create-backup")
-                second_backup = await action.wait()
-                assert second_backup.status == "completed"
-    except RetryError:
-        assert second_backup.status == "completed", "Second backup not started."
-
-    # the action `create-backup` only confirms that the command was sent to the `pbm`. Creating a
-    # backup can take a lot of time so this function returns once the command was successfully
-    # sent to pbm. Therefore before checking, wait for Charmed MongoDB to finish creating the
-    # backup
-    await asyncio.gather(
-        ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
-    )
-
-    # verify that backups was made in GCP bucket
-    try:
-        for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(5)):
-            with attempt:
-                backups = await helpers.count_logical_backups(db_unit)
-                assert backups == 1, "Backup not created in bucket on GCP."
-    except RetryError:
-        assert backups == 1, "Backup not created in first bucket on GCP."
-
-    # set AWS credentials, set configs for s3 storage, and wait to resync
-    await helpers.set_credentials(ops_test, cloud="AWS")
-    configuration_parameters = {
-        "bucket": "data-charms-testing",
-        "region": "us-east-1",
-        "endpoint": "https://s3.amazonaws.com",
-    }
-    await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters)
-    await asyncio.gather(
-        ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
-    )
-
-    # verify that backups was made on the AWS bucket
-    try:
-        for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(5)):
-            with attempt:
-                backups = await helpers.count_logical_backups(db_unit)
-                assert backups == 2, "Backup not created in bucket on AWS."
-    except RetryError:
-        assert backups == 2, "Backup not created in bucket on AWS."
-
-
-@pytest.mark.abort_on_fail
-async def test_restore(ops_test: OpsTest, add_writes_to_db) -> None:
-    """Simple backup tests that verifies that writes are correctly restored."""
-    # count total writes
-    number_writes = await ha_helpers.count_writes(ops_test)
-    assert number_writes > 0, "no writes to backup"
-
-    # create a backup in the AWS bucket
-    db_app_name = await helpers.app_name(ops_test)
-    db_unit = await helpers.get_leader_unit(ops_test)
-    prev_backups = await helpers.count_logical_backups(db_unit)
-    action = await db_unit.run_action(action_name="create-backup")
-    first_backup = await action.wait()
-    assert first_backup.status == "completed", "First backup not started."
-
-    # verify that backup was made on the bucket
-    try:
-        for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(5)):
-            with attempt:
-                backups = await helpers.count_logical_backups(db_unit)
-                assert backups == prev_backups + 1, "Backup not created."
-    except RetryError:
-        assert backups == prev_backups + 1, "Backup not created."
-
-    # add writes to be cleared after restoring the backup. Note these are written to the same
-    # collection that was backed up.
-    await helpers.insert_unwanted_data(ops_test)
-    new_number_of_writes = await ha_helpers.count_writes(ops_test)
-    assert new_number_of_writes > number_writes, "No writes to be cleared after restoring."
-
-    # find most recent backup id and restore
-    action = await db_unit.run_action(action_name="list-backups")
-    list_result = await action.wait()
-    list_result = list_result.results["backups"]
-    most_recent_backup = list_result.split("\n")[-1]
-    backup_id = most_recent_backup.split()[0]
-    action = await db_unit.run_action(action_name="restore", **{"backup-id": backup_id})
-    restore = await action.wait()
-    assert restore.results["restore-status"] == "restore started", "restore not successful"
-
-    await asyncio.gather(
-        ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
-    )
-
-    # verify all writes are present
-    try:
-        for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(20)):
-            with attempt:
-                number_writes_restored = await ha_helpers.count_writes(ops_test)
-                assert number_writes == number_writes_restored, "writes not correctly restored"
-    except RetryError:
-        assert number_writes == number_writes_restored, "writes not correctly restored"
-
-
-@pytest.mark.parametrize("cloud_provider", ["AWS", "GCP"])
-async def test_restore_new_cluster(ops_test: OpsTest, add_writes_to_db, cloud_provider):
-    # configure test for the cloud provider
-    db_app_name = await helpers.app_name(ops_test)
-    await helpers.set_credentials(ops_test, cloud=cloud_provider)
-    if cloud_provider == "AWS":
-        configuration_parameters = {
-            "bucket": "data-charms-testing",
-            "region": "us-east-1",
-            "endpoint": "https://s3.amazonaws.com",
-        }
-    else:
-        configuration_parameters = {
-            "bucket": "data-charms-testing",
-            "endpoint": "https://storage.googleapis.com",
-            "region": "",
-        }
-
-    await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters)
-    await asyncio.gather(
-        ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active"),
-        ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
-    )
-
-    # create a backup
-    writes_in_old_cluster = await ha_helpers.count_writes(ops_test, db_app_name)
-    assert writes_in_old_cluster > 0, "old cluster has no writes."
-    await helpers.create_and_verify_backup(ops_test)
-
-    # save old password, since after restoring we will need this password to authenticate.
-    old_password = await ha_helpers.get_password(ops_test, db_app_name)
-
-    # deploy a new cluster with a different name
-    db_charm = await ops_test.build_charm(".")
-    await ops_test.model.deploy(db_charm, num_units=3, application_name=NEW_CLUSTER)
-    await asyncio.gather(
-        ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active", idle_period=20),
-    )
-
-    db_unit = await helpers.get_leader_unit(ops_test, db_app_name=NEW_CLUSTER)
-    action = await db_unit.run_action("set-password", **{"password": old_password})
-    action = await action.wait()
-    assert action.status == "completed"
-
-    # relate to s3 - s3 has the necessary configurations
-    await ops_test.model.add_relation(S3_APP_NAME, NEW_CLUSTER)
-    await ops_test.model.block_until(
-        lambda: helpers.is_relation_joined(ops_test, ENDPOINT, ENDPOINT) is True,
-        timeout=TIMEOUT,
-    )
-
-    # wait for new cluster to sync
-    await asyncio.gather(
-        ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active", idle_period=20),
-    )
-
-    # verify that the listed backups from the old cluster are not listed as failed.
-    assert (
-        await helpers.count_failed_backups(db_unit) == 0
-    ), "Backups from old cluster are listed as failed"
-
-    # find most recent backup id and restore
-    action = await db_unit.run_action(action_name="list-backups")
-    list_result = await action.wait()
-    list_result = list_result.results["backups"]
-    most_recent_backup = list_result.split("\n")[-1]
-    backup_id = most_recent_backup.split()[0]
-    action = await db_unit.run_action(action_name="restore", **{"backup-id": backup_id})
-    restore = await action.wait()
-    assert restore.results["restore-status"] == "restore started", "restore not successful"
-
-    # verify all writes are present
-    try:
-        for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(20)):
-            with attempt:
-                writes_in_new_cluster = await ha_helpers.count_writes(ops_test, NEW_CLUSTER)
-                assert (
-                    writes_in_new_cluster == writes_in_old_cluster
-                ), "new cluster writes do not match old cluster writes after restore"
-    except RetryError:
-        assert (
-            writes_in_new_cluster == writes_in_old_cluster
-        ), "new cluster writes do not match old cluster writes after restore"
-
-    await helpers.destroy_cluster(ops_test, cluster_name=NEW_CLUSTER)
-
-
-@pytest.mark.abort_on_fail
-async def test_update_backup_password(ops_test: OpsTest) -> None:
-    """Verifies that after changing the backup password the pbm tool is updated and functional."""
-    db_app_name = await helpers.app_name(ops_test)
-    db_unit = await helpers.get_leader_unit(ops_test)
-
-    # wait for charm to be idle before setting password
-    await asyncio.gather(
-        ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
-    )
-
-    parameters = {"username": "backup"}
-    action = await db_unit.run_action("set-password", **parameters)
-    action = await action.wait()
-    assert action.status == "completed", "failed to set backup password"
-
-    # wait for charm to be idle after setting password
-    await asyncio.gather(
-        ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
-    )
-
-    # verify we still have connection to pbm via creating a backup
-    action = await db_unit.run_action(action_name="create-backup")
-    backup_result = await action.wait()
-    assert "backup started" in backup_result.results["backup-status"], "backup didn't start"
+# @pytest.mark.abort_on_fail
+# async def test_ready_correct_conf(ops_test: OpsTest) -> None:
+#     """Verifies charm goes into active status when s3 config and creds options are correct."""
+#     db_app_name = await helpers.app_name(ops_test)
+#     choices = string.ascii_letters + string.digits
+#     unique_path = "".join([secrets.choice(choices) for _ in range(4)])
+#     configuration_parameters = {
+#         "bucket": "data-charms-testing",
+#         "path": f"mongodb-vm/test-{unique_path}",
+#         "endpoint": "https://s3.amazonaws.com",
+#         "region": "us-east-1",
+#     }
+
+#     # apply new configuration options
+#     await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters)
+
+#     # after applying correct config options and creds the applications should both be active
+#     await ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active", timeout=TIMEOUT)
+#     await ops_test.model.wait_for_idle(
+#         apps=[db_app_name], status="active", timeout=TIMEOUT, idle_period=60
+#     )
+
+
+# @pytest.mark.abort_on_fail
+# async def test_create_and_list_backups(ops_test: OpsTest) -> None:
+#     db_unit = await helpers.get_leader_unit(ops_test)
+
+#     # verify backup list works
+#     action = await db_unit.run_action(action_name="list-backups")
+#     list_result = await action.wait()
+#     backups = list_result.results["backups"]
+#     assert backups, "backups not outputted"
+
+#     # verify backup is started
+#     action = await db_unit.run_action(action_name="create-backup")
+#     backup_result = await action.wait()
+#     assert "backup started" in backup_result.results["backup-status"], "backup didn't start"
+
+#     # verify backup is present in the list of backups
+#     # the action `create-backup` only confirms that the command was sent to the `pbm`. Creating a
+#     # backup can take a lot of time so this function returns once the command was successfully
+#     # sent to pbm. Therefore we should retry listing the backup several times
+#     try:
+#         for attempt in Retrying(stop=stop_after_delay(20), wait=wait_fixed(3)):
+#             with attempt:
+#                 backups = await helpers.count_logical_backups(db_unit)
+#                 assert backups == 1
+#     except RetryError:
+#         assert backups == 1, "Backup not created."
+
+
+# @pytest.mark.abort_on_fail
+# async def test_multi_backup(ops_test: OpsTest, continuous_writes_to_db) -> None:
+#     """With writes in the DB test creating a backup while another one is running.
+
+#     Note that before creating the second backup we change the bucket and change the s3 storage
+#     from AWS to GCP. This test verifies that the first backup in AWS is made, the second backup
+#     in GCP is made, and that before the second backup is made that pbm correctly resyncs.
+#     """
+#     db_app_name = await helpers.app_name(ops_test)
+#     db_unit = await helpers.get_leader_unit(ops_test)
+
+#     # create first backup once ready
+#     await asyncio.gather(
+#         ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
+#     )
+
+#     action = await db_unit.run_action(action_name="create-backup")
+#     first_backup = await action.wait()
+#     assert first_backup.status == "completed", "First backup not started."
+
+#     # while first backup is running change access key, secret keys, and bucket name
+#     # for GCP
+#     await helpers.set_credentials(ops_test, cloud="GCP")
+
+#     # change to GCP configs and wait for PBM to resync
+#     configuration_parameters = {
+#         "bucket": "data-charms-testing",
+#         "endpoint": "https://storage.googleapis.com",
+#         "region": "",
+#     }
+#     await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters)
+
+#     await asyncio.gather(
+#         ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
+#     )
+
+#     # create a backup as soon as possible. might not be immediately possible since only one backup
+#     # can happen at a time.
+#     try:
+#         for attempt in Retrying(stop=stop_after_delay(40), wait=wait_fixed(5)):
+#             with attempt:
+#                 action = await db_unit.run_action(action_name="create-backup")
+#                 second_backup = await action.wait()
+#                 assert second_backup.status == "completed"
+#     except RetryError:
+#         assert second_backup.status == "completed", "Second backup not started."
+
+#     # the action `create-backup` only confirms that the command was sent to the `pbm`. Creating a
+#     # backup can take a lot of time so this function returns once the command was successfully
+#     # sent to pbm. Therefore before checking, wait for Charmed MongoDB to finish creating the
+#     # backup
+#     await asyncio.gather(
+#         ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
+#     )
+
+#     # verify that backups was made in GCP bucket
+#     try:
+#         for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(5)):
+#             with attempt:
+#                 backups = await helpers.count_logical_backups(db_unit)
+#                 assert backups == 1, "Backup not created in bucket on GCP."
+#     except RetryError:
+#         assert backups == 1, "Backup not created in first bucket on GCP."
+
+#     # set AWS credentials, set configs for s3 storage, and wait to resync
+#     await helpers.set_credentials(ops_test, cloud="AWS")
+#     configuration_parameters = {
+#         "bucket": "data-charms-testing",
+#         "region": "us-east-1",
+#         "endpoint": "https://s3.amazonaws.com",
+#     }
+#     await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters)
+#     await asyncio.gather(
+#         ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
+#     )
+
+#     # verify that backups was made on the AWS bucket
+#     try:
+#         for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(5)):
+#             with attempt:
+#                 backups = await helpers.count_logical_backups(db_unit)
+#                 assert backups == 2, "Backup not created in bucket on AWS."
+#     except RetryError:
+#         assert backups == 2, "Backup not created in bucket on AWS."
+
+
+# @pytest.mark.abort_on_fail
+# async def test_restore(ops_test: OpsTest, add_writes_to_db) -> None:
+#     """Simple backup tests that verifies that writes are correctly restored."""
+#     # count total writes
+#     number_writes = await ha_helpers.count_writes(ops_test)
+#     assert number_writes > 0, "no writes to backup"
+
+#     # create a backup in the AWS bucket
+#     db_app_name = await helpers.app_name(ops_test)
+#     db_unit = await helpers.get_leader_unit(ops_test)
+#     prev_backups = await helpers.count_logical_backups(db_unit)
+#     action = await db_unit.run_action(action_name="create-backup")
+#     first_backup = await action.wait()
+#     assert first_backup.status == "completed", "First backup not started."
+
+#     # verify that backup was made on the bucket
+#     try:
+#         for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(5)):
+#             with attempt:
+#                 backups = await helpers.count_logical_backups(db_unit)
+#                 assert backups == prev_backups + 1, "Backup not created."
+#     except RetryError:
+#         assert backups == prev_backups + 1, "Backup not created."
+
+#     # add writes to be cleared after restoring the backup. Note these are written to the same
+#     # collection that was backed up.
+#     await helpers.insert_unwanted_data(ops_test)
+#     new_number_of_writes = await ha_helpers.count_writes(ops_test)
+#     assert new_number_of_writes > number_writes, "No writes to be cleared after restoring."
+
+#     # find most recent backup id and restore
+#     action = await db_unit.run_action(action_name="list-backups")
+#     list_result = await action.wait()
+#     list_result = list_result.results["backups"]
+#     most_recent_backup = list_result.split("\n")[-1]
+#     backup_id = most_recent_backup.split()[0]
+#     action = await db_unit.run_action(action_name="restore", **{"backup-id": backup_id})
+#     restore = await action.wait()
+#     assert restore.results["restore-status"] == "restore started", "restore not successful"
+
+#     await asyncio.gather(
+#         ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
+#     )
+
+#     # verify all writes are present
+#     try:
+#         for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(20)):
+#             with attempt:
+#                 number_writes_restored = await ha_helpers.count_writes(ops_test)
+#                 assert number_writes == number_writes_restored, "writes not correctly restored"
+#     except RetryError:
+#         assert number_writes == number_writes_restored, "writes not correctly restored"
+
+
+# @pytest.mark.parametrize("cloud_provider", ["AWS", "GCP"])
+# async def test_restore_new_cluster(ops_test: OpsTest, add_writes_to_db, cloud_provider):
+#     # configure test for the cloud provider
+#     db_app_name = await helpers.app_name(ops_test)
+#     await helpers.set_credentials(ops_test, cloud=cloud_provider)
+#     if cloud_provider == "AWS":
+#         configuration_parameters = {
+#             "bucket": "data-charms-testing",
+#             "region": "us-east-1",
+#             "endpoint": "https://s3.amazonaws.com",
+#         }
+#     else:
+#         configuration_parameters = {
+#             "bucket": "data-charms-testing",
+#             "endpoint": "https://storage.googleapis.com",
+#             "region": "",
+#         }
+
+#     await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters)
+#     await asyncio.gather(
+#         ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active"),
+#         ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
+#     )
+
+#     # create a backup
+#     writes_in_old_cluster = await ha_helpers.count_writes(ops_test, db_app_name)
+#     assert writes_in_old_cluster > 0, "old cluster has no writes."
+#     await helpers.create_and_verify_backup(ops_test)
+
+#     # save old password, since after restoring we will need this password to authenticate.
+#     old_password = await ha_helpers.get_password(ops_test, db_app_name)
+
+#     # deploy a new cluster with a different name
+#     db_charm = await ops_test.build_charm(".")
+#     await ops_test.model.deploy(db_charm, num_units=3, application_name=NEW_CLUSTER)
+#     await asyncio.gather(
+#         ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active", idle_period=20),
+#     )
+
+#     db_unit = await helpers.get_leader_unit(ops_test, db_app_name=NEW_CLUSTER)
+#     action = await db_unit.run_action("set-password", **{"password": old_password})
+#     action = await action.wait()
+#     assert action.status == "completed"
+
+#     # relate to s3 - s3 has the necessary configurations
+#     await ops_test.model.add_relation(S3_APP_NAME, NEW_CLUSTER)
+#     await ops_test.model.block_until(
+#         lambda: helpers.is_relation_joined(ops_test, ENDPOINT, ENDPOINT) is True,
+#         timeout=TIMEOUT,
+#     )
+
+#     # wait for new cluster to sync
+#     await asyncio.gather(
+#         ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active", idle_period=20),
+#     )
+
+#     # verify that the listed backups from the old cluster are not listed as failed.
+#     assert (
+#         await helpers.count_failed_backups(db_unit) == 0
+#     ), "Backups from old cluster are listed as failed"
+
+#     # find most recent backup id and restore
+#     action = await db_unit.run_action(action_name="list-backups")
+#     list_result = await action.wait()
+#     list_result = list_result.results["backups"]
+#     most_recent_backup = list_result.split("\n")[-1]
+#     backup_id = most_recent_backup.split()[0]
+#     action = await db_unit.run_action(action_name="restore", **{"backup-id": backup_id})
+#     restore = await action.wait()
+#     assert restore.results["restore-status"] == "restore started", "restore not successful"
+
+#     # verify all writes are present
+#     try:
+#         for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(20)):
+#             with attempt:
+#                 writes_in_new_cluster = await ha_helpers.count_writes(ops_test, NEW_CLUSTER)
+#                 assert (
+#                     writes_in_new_cluster == writes_in_old_cluster
+#                 ), "new cluster writes do not match old cluster writes after restore"
+#     except RetryError:
+#         assert (
+#             writes_in_new_cluster == writes_in_old_cluster
+#         ), "new cluster writes do not match old cluster writes after restore"
+
+#     await helpers.destroy_cluster(ops_test, cluster_name=NEW_CLUSTER)
+
+
+# @pytest.mark.abort_on_fail
+# async def test_update_backup_password(ops_test: OpsTest) -> None:
+#     """Verifies that after changing the backup password the pbm tool is updated and functional."""
+#     db_app_name = await helpers.app_name(ops_test)
+#     db_unit = await helpers.get_leader_unit(ops_test)
+
+#     # wait for charm to be idle before setting password
+#     await asyncio.gather(
+#         ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
+#     )
+
+#     parameters = {"username": "backup"}
+#     action = await db_unit.run_action("set-password", **parameters)
+#     action = await action.wait()
+#     assert action.status == "completed", "failed to set backup password"
+
+#     # wait for charm to be idle after setting password
+#     await asyncio.gather(
+#         ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
+#     )
+
+#     # verify we still have connection to pbm via creating a backup
+#     action = await db_unit.run_action(action_name="create-backup")
+#     backup_result = await action.wait()
+#     assert "backup started" in backup_result.results["backup-status"], "backup didn't start"

From ac78ed38c3a58b73998a95d3d00c7fe178ab502f Mon Sep 17 00:00:00 2001
From: Mia Altieri <mgaltier200@gmail.com>
Date: Tue, 19 Sep 2023 09:20:37 +0000
Subject: [PATCH 10/12] PR comments

---
 lib/charms/mongodb/v0/helpers.py              |   6 +-
 .../integration/backup_tests/test_backups.py  | 606 +++++++++---------
 2 files changed, 305 insertions(+), 307 deletions(-)

diff --git a/lib/charms/mongodb/v0/helpers.py b/lib/charms/mongodb/v0/helpers.py
index c95d99393..7f9085250 100644
--- a/lib/charms/mongodb/v0/helpers.py
+++ b/lib/charms/mongodb/v0/helpers.py
@@ -90,13 +90,11 @@ def get_mongos_args(config: MongoDBConfiguration) -> str:
     # mongos running on the config server communicates through localhost
     config_server_uri = f"{config.replset}/localhost"
 
-    # no need to add TLS since no network calls are used, since mongos is configured to listen
-    # on local host
+    # todo follow up PR add TLS
     cmd = [
         # mongos on config server side should run on 0.0.0.0 so it can be accessed by other units
         # in the sharded cluster
-        "--bind_ip 0.0.0.0",
-        # todo figure out this one
+        "--bind_ip",
         f"--configdb {config_server_uri}",
         # config server is already using 27017
         f"--port {Config.MONGOS_PORT}",
diff --git a/tests/integration/backup_tests/test_backups.py b/tests/integration/backup_tests/test_backups.py
index cc78957a2..b7cb64d48 100644
--- a/tests/integration/backup_tests/test_backups.py
+++ b/tests/integration/backup_tests/test_backups.py
@@ -99,306 +99,306 @@ async def test_blocked_incorrect_conf(ops_test: OpsTest) -> None:
     assert db_unit.workload_status_message == "s3 configurations are incompatible."
 
 
-# @pytest.mark.abort_on_fail
-# async def test_ready_correct_conf(ops_test: OpsTest) -> None:
-#     """Verifies charm goes into active status when s3 config and creds options are correct."""
-#     db_app_name = await helpers.app_name(ops_test)
-#     choices = string.ascii_letters + string.digits
-#     unique_path = "".join([secrets.choice(choices) for _ in range(4)])
-#     configuration_parameters = {
-#         "bucket": "data-charms-testing",
-#         "path": f"mongodb-vm/test-{unique_path}",
-#         "endpoint": "https://s3.amazonaws.com",
-#         "region": "us-east-1",
-#     }
-
-#     # apply new configuration options
-#     await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters)
-
-#     # after applying correct config options and creds the applications should both be active
-#     await ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active", timeout=TIMEOUT)
-#     await ops_test.model.wait_for_idle(
-#         apps=[db_app_name], status="active", timeout=TIMEOUT, idle_period=60
-#     )
-
-
-# @pytest.mark.abort_on_fail
-# async def test_create_and_list_backups(ops_test: OpsTest) -> None:
-#     db_unit = await helpers.get_leader_unit(ops_test)
-
-#     # verify backup list works
-#     action = await db_unit.run_action(action_name="list-backups")
-#     list_result = await action.wait()
-#     backups = list_result.results["backups"]
-#     assert backups, "backups not outputted"
-
-#     # verify backup is started
-#     action = await db_unit.run_action(action_name="create-backup")
-#     backup_result = await action.wait()
-#     assert "backup started" in backup_result.results["backup-status"], "backup didn't start"
-
-#     # verify backup is present in the list of backups
-#     # the action `create-backup` only confirms that the command was sent to the `pbm`. Creating a
-#     # backup can take a lot of time so this function returns once the command was successfully
-#     # sent to pbm. Therefore we should retry listing the backup several times
-#     try:
-#         for attempt in Retrying(stop=stop_after_delay(20), wait=wait_fixed(3)):
-#             with attempt:
-#                 backups = await helpers.count_logical_backups(db_unit)
-#                 assert backups == 1
-#     except RetryError:
-#         assert backups == 1, "Backup not created."
-
-
-# @pytest.mark.abort_on_fail
-# async def test_multi_backup(ops_test: OpsTest, continuous_writes_to_db) -> None:
-#     """With writes in the DB test creating a backup while another one is running.
-
-#     Note that before creating the second backup we change the bucket and change the s3 storage
-#     from AWS to GCP. This test verifies that the first backup in AWS is made, the second backup
-#     in GCP is made, and that before the second backup is made that pbm correctly resyncs.
-#     """
-#     db_app_name = await helpers.app_name(ops_test)
-#     db_unit = await helpers.get_leader_unit(ops_test)
-
-#     # create first backup once ready
-#     await asyncio.gather(
-#         ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
-#     )
-
-#     action = await db_unit.run_action(action_name="create-backup")
-#     first_backup = await action.wait()
-#     assert first_backup.status == "completed", "First backup not started."
-
-#     # while first backup is running change access key, secret keys, and bucket name
-#     # for GCP
-#     await helpers.set_credentials(ops_test, cloud="GCP")
-
-#     # change to GCP configs and wait for PBM to resync
-#     configuration_parameters = {
-#         "bucket": "data-charms-testing",
-#         "endpoint": "https://storage.googleapis.com",
-#         "region": "",
-#     }
-#     await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters)
-
-#     await asyncio.gather(
-#         ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
-#     )
-
-#     # create a backup as soon as possible. might not be immediately possible since only one backup
-#     # can happen at a time.
-#     try:
-#         for attempt in Retrying(stop=stop_after_delay(40), wait=wait_fixed(5)):
-#             with attempt:
-#                 action = await db_unit.run_action(action_name="create-backup")
-#                 second_backup = await action.wait()
-#                 assert second_backup.status == "completed"
-#     except RetryError:
-#         assert second_backup.status == "completed", "Second backup not started."
-
-#     # the action `create-backup` only confirms that the command was sent to the `pbm`. Creating a
-#     # backup can take a lot of time so this function returns once the command was successfully
-#     # sent to pbm. Therefore before checking, wait for Charmed MongoDB to finish creating the
-#     # backup
-#     await asyncio.gather(
-#         ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
-#     )
-
-#     # verify that backups was made in GCP bucket
-#     try:
-#         for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(5)):
-#             with attempt:
-#                 backups = await helpers.count_logical_backups(db_unit)
-#                 assert backups == 1, "Backup not created in bucket on GCP."
-#     except RetryError:
-#         assert backups == 1, "Backup not created in first bucket on GCP."
-
-#     # set AWS credentials, set configs for s3 storage, and wait to resync
-#     await helpers.set_credentials(ops_test, cloud="AWS")
-#     configuration_parameters = {
-#         "bucket": "data-charms-testing",
-#         "region": "us-east-1",
-#         "endpoint": "https://s3.amazonaws.com",
-#     }
-#     await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters)
-#     await asyncio.gather(
-#         ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
-#     )
-
-#     # verify that backups was made on the AWS bucket
-#     try:
-#         for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(5)):
-#             with attempt:
-#                 backups = await helpers.count_logical_backups(db_unit)
-#                 assert backups == 2, "Backup not created in bucket on AWS."
-#     except RetryError:
-#         assert backups == 2, "Backup not created in bucket on AWS."
-
-
-# @pytest.mark.abort_on_fail
-# async def test_restore(ops_test: OpsTest, add_writes_to_db) -> None:
-#     """Simple backup tests that verifies that writes are correctly restored."""
-#     # count total writes
-#     number_writes = await ha_helpers.count_writes(ops_test)
-#     assert number_writes > 0, "no writes to backup"
-
-#     # create a backup in the AWS bucket
-#     db_app_name = await helpers.app_name(ops_test)
-#     db_unit = await helpers.get_leader_unit(ops_test)
-#     prev_backups = await helpers.count_logical_backups(db_unit)
-#     action = await db_unit.run_action(action_name="create-backup")
-#     first_backup = await action.wait()
-#     assert first_backup.status == "completed", "First backup not started."
-
-#     # verify that backup was made on the bucket
-#     try:
-#         for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(5)):
-#             with attempt:
-#                 backups = await helpers.count_logical_backups(db_unit)
-#                 assert backups == prev_backups + 1, "Backup not created."
-#     except RetryError:
-#         assert backups == prev_backups + 1, "Backup not created."
-
-#     # add writes to be cleared after restoring the backup. Note these are written to the same
-#     # collection that was backed up.
-#     await helpers.insert_unwanted_data(ops_test)
-#     new_number_of_writes = await ha_helpers.count_writes(ops_test)
-#     assert new_number_of_writes > number_writes, "No writes to be cleared after restoring."
-
-#     # find most recent backup id and restore
-#     action = await db_unit.run_action(action_name="list-backups")
-#     list_result = await action.wait()
-#     list_result = list_result.results["backups"]
-#     most_recent_backup = list_result.split("\n")[-1]
-#     backup_id = most_recent_backup.split()[0]
-#     action = await db_unit.run_action(action_name="restore", **{"backup-id": backup_id})
-#     restore = await action.wait()
-#     assert restore.results["restore-status"] == "restore started", "restore not successful"
-
-#     await asyncio.gather(
-#         ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
-#     )
-
-#     # verify all writes are present
-#     try:
-#         for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(20)):
-#             with attempt:
-#                 number_writes_restored = await ha_helpers.count_writes(ops_test)
-#                 assert number_writes == number_writes_restored, "writes not correctly restored"
-#     except RetryError:
-#         assert number_writes == number_writes_restored, "writes not correctly restored"
-
-
-# @pytest.mark.parametrize("cloud_provider", ["AWS", "GCP"])
-# async def test_restore_new_cluster(ops_test: OpsTest, add_writes_to_db, cloud_provider):
-#     # configure test for the cloud provider
-#     db_app_name = await helpers.app_name(ops_test)
-#     await helpers.set_credentials(ops_test, cloud=cloud_provider)
-#     if cloud_provider == "AWS":
-#         configuration_parameters = {
-#             "bucket": "data-charms-testing",
-#             "region": "us-east-1",
-#             "endpoint": "https://s3.amazonaws.com",
-#         }
-#     else:
-#         configuration_parameters = {
-#             "bucket": "data-charms-testing",
-#             "endpoint": "https://storage.googleapis.com",
-#             "region": "",
-#         }
-
-#     await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters)
-#     await asyncio.gather(
-#         ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active"),
-#         ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
-#     )
-
-#     # create a backup
-#     writes_in_old_cluster = await ha_helpers.count_writes(ops_test, db_app_name)
-#     assert writes_in_old_cluster > 0, "old cluster has no writes."
-#     await helpers.create_and_verify_backup(ops_test)
-
-#     # save old password, since after restoring we will need this password to authenticate.
-#     old_password = await ha_helpers.get_password(ops_test, db_app_name)
-
-#     # deploy a new cluster with a different name
-#     db_charm = await ops_test.build_charm(".")
-#     await ops_test.model.deploy(db_charm, num_units=3, application_name=NEW_CLUSTER)
-#     await asyncio.gather(
-#         ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active", idle_period=20),
-#     )
-
-#     db_unit = await helpers.get_leader_unit(ops_test, db_app_name=NEW_CLUSTER)
-#     action = await db_unit.run_action("set-password", **{"password": old_password})
-#     action = await action.wait()
-#     assert action.status == "completed"
-
-#     # relate to s3 - s3 has the necessary configurations
-#     await ops_test.model.add_relation(S3_APP_NAME, NEW_CLUSTER)
-#     await ops_test.model.block_until(
-#         lambda: helpers.is_relation_joined(ops_test, ENDPOINT, ENDPOINT) is True,
-#         timeout=TIMEOUT,
-#     )
-
-#     # wait for new cluster to sync
-#     await asyncio.gather(
-#         ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active", idle_period=20),
-#     )
-
-#     # verify that the listed backups from the old cluster are not listed as failed.
-#     assert (
-#         await helpers.count_failed_backups(db_unit) == 0
-#     ), "Backups from old cluster are listed as failed"
-
-#     # find most recent backup id and restore
-#     action = await db_unit.run_action(action_name="list-backups")
-#     list_result = await action.wait()
-#     list_result = list_result.results["backups"]
-#     most_recent_backup = list_result.split("\n")[-1]
-#     backup_id = most_recent_backup.split()[0]
-#     action = await db_unit.run_action(action_name="restore", **{"backup-id": backup_id})
-#     restore = await action.wait()
-#     assert restore.results["restore-status"] == "restore started", "restore not successful"
-
-#     # verify all writes are present
-#     try:
-#         for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(20)):
-#             with attempt:
-#                 writes_in_new_cluster = await ha_helpers.count_writes(ops_test, NEW_CLUSTER)
-#                 assert (
-#                     writes_in_new_cluster == writes_in_old_cluster
-#                 ), "new cluster writes do not match old cluster writes after restore"
-#     except RetryError:
-#         assert (
-#             writes_in_new_cluster == writes_in_old_cluster
-#         ), "new cluster writes do not match old cluster writes after restore"
-
-#     await helpers.destroy_cluster(ops_test, cluster_name=NEW_CLUSTER)
-
-
-# @pytest.mark.abort_on_fail
-# async def test_update_backup_password(ops_test: OpsTest) -> None:
-#     """Verifies that after changing the backup password the pbm tool is updated and functional."""
-#     db_app_name = await helpers.app_name(ops_test)
-#     db_unit = await helpers.get_leader_unit(ops_test)
-
-#     # wait for charm to be idle before setting password
-#     await asyncio.gather(
-#         ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
-#     )
-
-#     parameters = {"username": "backup"}
-#     action = await db_unit.run_action("set-password", **parameters)
-#     action = await action.wait()
-#     assert action.status == "completed", "failed to set backup password"
-
-#     # wait for charm to be idle after setting password
-#     await asyncio.gather(
-#         ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
-#     )
-
-#     # verify we still have connection to pbm via creating a backup
-#     action = await db_unit.run_action(action_name="create-backup")
-#     backup_result = await action.wait()
-#     assert "backup started" in backup_result.results["backup-status"], "backup didn't start"
+@pytest.mark.abort_on_fail
+async def test_ready_correct_conf(ops_test: OpsTest) -> None:
+    """Verifies charm goes into active status when s3 config and creds options are correct."""
+    db_app_name = await helpers.app_name(ops_test)
+    choices = string.ascii_letters + string.digits
+    unique_path = "".join([secrets.choice(choices) for _ in range(4)])
+    configuration_parameters = {
+        "bucket": "data-charms-testing",
+        "path": f"mongodb-vm/test-{unique_path}",
+        "endpoint": "https://s3.amazonaws.com",
+        "region": "us-east-1",
+    }
+
+    # apply new configuration options
+    await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters)
+
+    # after applying correct config options and creds the applications should both be active
+    await ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active", timeout=TIMEOUT)
+    await ops_test.model.wait_for_idle(
+        apps=[db_app_name], status="active", timeout=TIMEOUT, idle_period=60
+    )
+
+
+@pytest.mark.abort_on_fail
+async def test_create_and_list_backups(ops_test: OpsTest) -> None:
+    db_unit = await helpers.get_leader_unit(ops_test)
+
+    # verify backup list works
+    action = await db_unit.run_action(action_name="list-backups")
+    list_result = await action.wait()
+    backups = list_result.results["backups"]
+    assert backups, "backups not outputted"
+
+    # verify backup is started
+    action = await db_unit.run_action(action_name="create-backup")
+    backup_result = await action.wait()
+    assert "backup started" in backup_result.results["backup-status"], "backup didn't start"
+
+    # verify backup is present in the list of backups
+    # the action `create-backup` only confirms that the command was sent to the `pbm`. Creating a
+    # backup can take a lot of time so this function returns once the command was successfully
+    # sent to pbm. Therefore we should retry listing the backup several times
+    try:
+        for attempt in Retrying(stop=stop_after_delay(20), wait=wait_fixed(3)):
+            with attempt:
+                backups = await helpers.count_logical_backups(db_unit)
+                assert backups == 1
+    except RetryError:
+        assert backups == 1, "Backup not created."
+
+
+@pytest.mark.abort_on_fail
+async def test_multi_backup(ops_test: OpsTest, continuous_writes_to_db) -> None:
+    """With writes in the DB test creating a backup while another one is running.
+
+    Note that before creating the second backup we change the bucket and change the s3 storage
+    from AWS to GCP. This test verifies that the first backup in AWS is made, the second backup
+    in GCP is made, and that before the second backup is made that pbm correctly resyncs.
+    """
+    db_app_name = await helpers.app_name(ops_test)
+    db_unit = await helpers.get_leader_unit(ops_test)
+
+    # create first backup once ready
+    await asyncio.gather(
+        ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
+    )
+
+    action = await db_unit.run_action(action_name="create-backup")
+    first_backup = await action.wait()
+    assert first_backup.status == "completed", "First backup not started."
+
+    # while first backup is running change access key, secret keys, and bucket name
+    # for GCP
+    await helpers.set_credentials(ops_test, cloud="GCP")
+
+    # change to GCP configs and wait for PBM to resync
+    configuration_parameters = {
+        "bucket": "data-charms-testing",
+        "endpoint": "https://storage.googleapis.com",
+        "region": "",
+    }
+    await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters)
+
+    await asyncio.gather(
+        ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
+    )
+
+    # create a backup as soon as possible. might not be immediately possible since only one backup
+    # can happen at a time.
+    try:
+        for attempt in Retrying(stop=stop_after_delay(40), wait=wait_fixed(5)):
+            with attempt:
+                action = await db_unit.run_action(action_name="create-backup")
+                second_backup = await action.wait()
+                assert second_backup.status == "completed"
+    except RetryError:
+        assert second_backup.status == "completed", "Second backup not started."
+
+    # the action `create-backup` only confirms that the command was sent to the `pbm`. Creating a
+    # backup can take a lot of time so this function returns once the command was successfully
+    # sent to pbm. Therefore before checking, wait for Charmed MongoDB to finish creating the
+    # backup
+    await asyncio.gather(
+        ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
+    )
+
+    # verify that backups was made in GCP bucket
+    try:
+        for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(5)):
+            with attempt:
+                backups = await helpers.count_logical_backups(db_unit)
+                assert backups == 1, "Backup not created in bucket on GCP."
+    except RetryError:
+        assert backups == 1, "Backup not created in first bucket on GCP."
+
+    # set AWS credentials, set configs for s3 storage, and wait to resync
+    await helpers.set_credentials(ops_test, cloud="AWS")
+    configuration_parameters = {
+        "bucket": "data-charms-testing",
+        "region": "us-east-1",
+        "endpoint": "https://s3.amazonaws.com",
+    }
+    await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters)
+    await asyncio.gather(
+        ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
+    )
+
+    # verify that backups was made on the AWS bucket
+    try:
+        for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(5)):
+            with attempt:
+                backups = await helpers.count_logical_backups(db_unit)
+                assert backups == 2, "Backup not created in bucket on AWS."
+    except RetryError:
+        assert backups == 2, "Backup not created in bucket on AWS."
+
+
+@pytest.mark.abort_on_fail
+async def test_restore(ops_test: OpsTest, add_writes_to_db) -> None:
+    """Simple backup tests that verifies that writes are correctly restored."""
+    # count total writes
+    number_writes = await ha_helpers.count_writes(ops_test)
+    assert number_writes > 0, "no writes to backup"
+
+    # create a backup in the AWS bucket
+    db_app_name = await helpers.app_name(ops_test)
+    db_unit = await helpers.get_leader_unit(ops_test)
+    prev_backups = await helpers.count_logical_backups(db_unit)
+    action = await db_unit.run_action(action_name="create-backup")
+    first_backup = await action.wait()
+    assert first_backup.status == "completed", "First backup not started."
+
+    # verify that backup was made on the bucket
+    try:
+        for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(5)):
+            with attempt:
+                backups = await helpers.count_logical_backups(db_unit)
+                assert backups == prev_backups + 1, "Backup not created."
+    except RetryError:
+        assert backups == prev_backups + 1, "Backup not created."
+
+    # add writes to be cleared after restoring the backup. Note these are written to the same
+    # collection that was backed up.
+    await helpers.insert_unwanted_data(ops_test)
+    new_number_of_writes = await ha_helpers.count_writes(ops_test)
+    assert new_number_of_writes > number_writes, "No writes to be cleared after restoring."
+
+    # find most recent backup id and restore
+    action = await db_unit.run_action(action_name="list-backups")
+    list_result = await action.wait()
+    list_result = list_result.results["backups"]
+    most_recent_backup = list_result.split("\n")[-1]
+    backup_id = most_recent_backup.split()[0]
+    action = await db_unit.run_action(action_name="restore", **{"backup-id": backup_id})
+    restore = await action.wait()
+    assert restore.results["restore-status"] == "restore started", "restore not successful"
+
+    await asyncio.gather(
+        ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
+    )
+
+    # verify all writes are present
+    try:
+        for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(20)):
+            with attempt:
+                number_writes_restored = await ha_helpers.count_writes(ops_test)
+                assert number_writes == number_writes_restored, "writes not correctly restored"
+    except RetryError:
+        assert number_writes == number_writes_restored, "writes not correctly restored"
+
+
+@pytest.mark.parametrize("cloud_provider", ["AWS", "GCP"])
+async def test_restore_new_cluster(ops_test: OpsTest, add_writes_to_db, cloud_provider):
+    # configure test for the cloud provider
+    db_app_name = await helpers.app_name(ops_test)
+    await helpers.set_credentials(ops_test, cloud=cloud_provider)
+    if cloud_provider == "AWS":
+        configuration_parameters = {
+            "bucket": "data-charms-testing",
+            "region": "us-east-1",
+            "endpoint": "https://s3.amazonaws.com",
+        }
+    else:
+        configuration_parameters = {
+            "bucket": "data-charms-testing",
+            "endpoint": "https://storage.googleapis.com",
+            "region": "",
+        }
+
+    await ops_test.model.applications[S3_APP_NAME].set_config(configuration_parameters)
+    await asyncio.gather(
+        ops_test.model.wait_for_idle(apps=[S3_APP_NAME], status="active"),
+        ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
+    )
+
+    # create a backup
+    writes_in_old_cluster = await ha_helpers.count_writes(ops_test, db_app_name)
+    assert writes_in_old_cluster > 0, "old cluster has no writes."
+    await helpers.create_and_verify_backup(ops_test)
+
+    # save old password, since after restoring we will need this password to authenticate.
+    old_password = await ha_helpers.get_password(ops_test, db_app_name)
+
+    # deploy a new cluster with a different name
+    db_charm = await ops_test.build_charm(".")
+    await ops_test.model.deploy(db_charm, num_units=3, application_name=NEW_CLUSTER)
+    await asyncio.gather(
+        ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active", idle_period=20),
+    )
+
+    db_unit = await helpers.get_leader_unit(ops_test, db_app_name=NEW_CLUSTER)
+    action = await db_unit.run_action("set-password", **{"password": old_password})
+    action = await action.wait()
+    assert action.status == "completed"
+
+    # relate to s3 - s3 has the necessary configurations
+    await ops_test.model.add_relation(S3_APP_NAME, NEW_CLUSTER)
+    await ops_test.model.block_until(
+        lambda: helpers.is_relation_joined(ops_test, ENDPOINT, ENDPOINT) is True,
+        timeout=TIMEOUT,
+    )
+
+    # wait for new cluster to sync
+    await asyncio.gather(
+        ops_test.model.wait_for_idle(apps=[NEW_CLUSTER], status="active", idle_period=20),
+    )
+
+    # verify that the listed backups from the old cluster are not listed as failed.
+    assert (
+        await helpers.count_failed_backups(db_unit) == 0
+    ), "Backups from old cluster are listed as failed"
+
+    # find most recent backup id and restore
+    action = await db_unit.run_action(action_name="list-backups")
+    list_result = await action.wait()
+    list_result = list_result.results["backups"]
+    most_recent_backup = list_result.split("\n")[-1]
+    backup_id = most_recent_backup.split()[0]
+    action = await db_unit.run_action(action_name="restore", **{"backup-id": backup_id})
+    restore = await action.wait()
+    assert restore.results["restore-status"] == "restore started", "restore not successful"
+
+    # verify all writes are present
+    try:
+        for attempt in Retrying(stop=stop_after_delay(4), wait=wait_fixed(20)):
+            with attempt:
+                writes_in_new_cluster = await ha_helpers.count_writes(ops_test, NEW_CLUSTER)
+                assert (
+                    writes_in_new_cluster == writes_in_old_cluster
+                ), "new cluster writes do not match old cluster writes after restore"
+    except RetryError:
+        assert (
+            writes_in_new_cluster == writes_in_old_cluster
+        ), "new cluster writes do not match old cluster writes after restore"
+
+    await helpers.destroy_cluster(ops_test, cluster_name=NEW_CLUSTER)
+
+
+@pytest.mark.abort_on_fail
+async def test_update_backup_password(ops_test: OpsTest) -> None:
+    """Verifies that after changing the backup password the pbm tool is updated and functional."""
+    db_app_name = await helpers.app_name(ops_test)
+    db_unit = await helpers.get_leader_unit(ops_test)
+
+    # wait for charm to be idle before setting password
+    await asyncio.gather(
+        ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
+    )
+
+    parameters = {"username": "backup"}
+    action = await db_unit.run_action("set-password", **parameters)
+    action = await action.wait()
+    assert action.status == "completed", "failed to set backup password"
+
+    # wait for charm to be idle after setting password
+    await asyncio.gather(
+        ops_test.model.wait_for_idle(apps=[db_app_name], status="active", idle_period=20),
+    )
+
+    # verify we still have connection to pbm via creating a backup
+    action = await db_unit.run_action(action_name="create-backup")
+    backup_result = await action.wait()
+    assert "backup started" in backup_result.results["backup-status"], "backup didn't start"

From 563f0495a6288219439f2d37998d3178ac4bf731 Mon Sep 17 00:00:00 2001
From: Mia Altieri <mgaltier200@gmail.com>
Date: Tue, 19 Sep 2023 09:26:16 +0000
Subject: [PATCH 11/12] correct ip binding

---
 lib/charms/mongodb/v0/helpers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/charms/mongodb/v0/helpers.py b/lib/charms/mongodb/v0/helpers.py
index 7f9085250..f63366e42 100644
--- a/lib/charms/mongodb/v0/helpers.py
+++ b/lib/charms/mongodb/v0/helpers.py
@@ -94,7 +94,7 @@ def get_mongos_args(config: MongoDBConfiguration) -> str:
     cmd = [
         # mongos on config server side should run on 0.0.0.0 so it can be accessed by other units
         # in the sharded cluster
-        "--bind_ip",
+        "--bind_ip_all",
         f"--configdb {config_server_uri}",
         # config server is already using 27017
         f"--port {Config.MONGOS_PORT}",

From beacd0ea84905f43a92d03729ad9112612f536fa Mon Sep 17 00:00:00 2001
From: Mia Altieri <mgaltier200@gmail.com>
Date: Wed, 20 Sep 2023 06:54:22 +0000
Subject: [PATCH 12/12] mongosh not packaged in snap, remove it

---
 lib/charms/mongodb/v0/helpers.py                             | 2 +-
 tests/integration/relation_tests/legacy_relations/helpers.py | 2 +-
 tests/integration/test_charm.py                              | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/charms/mongodb/v0/helpers.py b/lib/charms/mongodb/v0/helpers.py
index 85187f345..f63366e42 100644
--- a/lib/charms/mongodb/v0/helpers.py
+++ b/lib/charms/mongodb/v0/helpers.py
@@ -50,7 +50,7 @@
 
 # noinspection GrazieInspection
 def get_create_user_cmd(
-    config: MongoDBConfiguration, mongo_path="charmed-mongodb.mongosh"
+    config: MongoDBConfiguration, mongo_path="charmed-mongodb.mongo"
 ) -> List[str]:
     """Creates initial admin user for MongoDB.
 
diff --git a/tests/integration/relation_tests/legacy_relations/helpers.py b/tests/integration/relation_tests/legacy_relations/helpers.py
index ccde5c01d..9bb27b3fe 100644
--- a/tests/integration/relation_tests/legacy_relations/helpers.py
+++ b/tests/integration/relation_tests/legacy_relations/helpers.py
@@ -136,7 +136,7 @@ async def mongo_tls_command(ops_test: OpsTest) -> str:
     replica_set_uri = f"mongodb://{hosts}/admin?replicaSet={app}"
 
     return (
-        f"charmed-mongodb.mongosh '{replica_set_uri}'  --eval 'rs.status()'"
+        f"charmed-mongodb.mongo '{replica_set_uri}'  --eval 'rs.status()'"
         f" --tls --tlsCAFile {EXTERNAL_CERT_PATH}"
         f" --tlsCertificateKeyFile {EXTERNAL_PEM_PATH}"
     )
diff --git a/tests/integration/test_charm.py b/tests/integration/test_charm.py
index b4688b649..0f75d43ce 100644
--- a/tests/integration/test_charm.py
+++ b/tests/integration/test_charm.py
@@ -180,7 +180,7 @@ async def test_monitor_user(ops_test: OpsTest) -> None:
     ]
     hosts = ",".join(replica_set_hosts)
     replica_set_uri = f"mongodb://monitor:{password}@{hosts}/admin?replicaSet=mongodb"
-    admin_mongod_cmd = f"charmed-mongodb.mongosh '{replica_set_uri}'  --eval 'rs.conf()'"
+    admin_mongod_cmd = f"charmed-mongodb.mongo '{replica_set_uri}'  --eval 'rs.conf()'"
     check_monitor_cmd = f"exec --unit {unit.name} -- {admin_mongod_cmd}"
     return_code, _, _ = await ops_test.juju(*check_monitor_cmd.split())
     assert return_code == 0, "command rs.conf() on monitor user does not work"