Skip to content

Commit

Permalink
[DPE-4665 ] Pre upgrade check sharding (#422)
Browse files Browse the repository at this point in the history
## Issue
Sharded deployments do not support preflight check

## Preflight checks added
**Both Config-Server and Shards check**
1. Ensure cluster is not running backup / tls actions [Config-Serve 
2. Health check - For shards and config servers, ensure that no node is
in the ROLLBACK or RECOVERING state.
3. Move Primary for Config Server to the unit with the lowest id 
4. make a dummy write to each shard (and read from each replica) to
ensure the MongoDB cluster works.


**Config Server only:**
1. check feature compatibility version on all nodes
2. set Feature Compatibility Version for mongos 
3. Disable the balancer 
4. Set status waiting for refresh command 

## Preflight checks NOT added
1. Check juju unit state for all units via shared interface with the
usage of “goal_state”
2. Backup the config Database

1. Will be done in a future PR
2. Will not be performed, instead we will instruct users to perform a
backup
  • Loading branch information
MiaAltieri authored Jul 2, 2024
1 parent b653333 commit 0328543
Show file tree
Hide file tree
Showing 6 changed files with 288 additions and 47 deletions.
35 changes: 23 additions & 12 deletions lib/charms/mongodb/v1/mongos.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import logging
from dataclasses import dataclass
from typing import Dict, List, Optional, Set, Tuple
from typing import List, Optional, Set, Tuple
from urllib.parse import quote_plus

from charms.mongodb.v0.mongodb import NotReadyError
Expand All @@ -22,11 +22,13 @@

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 4
LIBPATCH = 5

# path to store mongodb ketFile
logger = logging.getLogger(__name__)

SHARD_AWARE_STATE = 1


@dataclass
class MongosConfiguration:
Expand All @@ -53,19 +55,22 @@ class MongosConfiguration:
@property
def uri(self):
"""Return URI concatenated from fields."""
self.complete_hosts = self.hosts

# mongos using Unix Domain Socket to communicate do not use port
if self.port:
self.hosts = [f"{host}:{self.port}" for host in self.hosts]
self.complete_hosts = [f"{host}:{self.port}" for host in self.hosts]

complete_hosts = ",".join(self.complete_hosts)

hosts = ",".join(self.hosts)
# Auth DB should be specified while user connects to application DB.
auth_source = ""
if self.database != "admin":
auth_source = "authSource=admin"
return (
f"mongodb://{quote_plus(self.username)}:"
f"{quote_plus(self.password)}@"
f"{hosts}/{quote_plus(self.database)}?"
f"{complete_hosts}/{quote_plus(self.database)}?"
f"{auth_source}"
)

Expand Down Expand Up @@ -120,8 +125,6 @@ def __init__(self, config: MongosConfiguration, uri=None, direct=False):
direct: force a direct connection to a specific host, avoiding
reading replica set configuration and reconnection.
"""
self.mongodb_config = config

if uri is None:
uri = config.uri

Expand Down Expand Up @@ -187,8 +190,7 @@ def pre_remove_checks(self, shard_name):

# It is necessary to call removeShard multiple times on a shard to guarantee removal.
# Allow re-removal of shards that are currently draining.
sc_status = self.client.admin.command("listShards")
if self._is_any_draining(sc_status, ignore_shard=shard_name):
if self.is_any_draining(ignore_shard=shard_name):
cannot_remove_shard = (
f"cannot remove shard {shard_name} from cluster, another shard is draining"
)
Expand Down Expand Up @@ -294,8 +296,7 @@ def _get_databases_collection(self) -> collection.Collection:

return config_db["databases"]

@staticmethod
def _is_any_draining(sc_status: Dict, ignore_shard: str = "") -> bool:
def is_any_draining(self, ignore_shard: str = "") -> bool:
"""Returns true if any shard members is draining.
Checks if any members in sharded cluster are draining data.
Expand All @@ -304,6 +305,7 @@ def _is_any_draining(sc_status: Dict, ignore_shard: str = "") -> bool:
sc_status: current state of shard cluster status as reported by mongos.
ignore_shard: shard to ignore
"""
sc_status = self.client.admin.command("listShards")
return any(
# check draining status of all shards except the one to be ignored.
shard.get("draining", False) if shard["_id"] != ignore_shard else False
Expand Down Expand Up @@ -358,12 +360,21 @@ def is_ready(self) -> bool:

return True

def are_all_shards_aware(self) -> bool:
"""Returns True if all shards are shard aware."""
sc_status = self.client.admin.command("listShards")
for shard in sc_status["shards"]:
if shard["state"] != SHARD_AWARE_STATE:
return False

return True

def is_shard_aware(self, shard_name: str) -> bool:
"""Returns True if provided shard is shard aware."""
sc_status = self.client.admin.command("listShards")
for shard in sc_status["shards"]:
if shard["_id"] == shard_name:
return shard["state"] == 1
return shard["state"] == SHARD_AWARE_STATE

return False

Expand Down
6 changes: 3 additions & 3 deletions lib/charms/mongodb/v1/shards_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ def add_shards(self, departed_shard_id):
failed_to_add_shard = None
with MongosConnection(self.charm.mongos_config) as mongo:
cluster_shards = mongo.get_shard_members()
relation_shards = self._get_shards_from_relations(departed_shard_id)
relation_shards = self.get_shards_from_relations(departed_shard_id)
for shard in relation_shards - cluster_shards:
try:
shard_hosts = self._get_shard_hosts(shard)
Expand Down Expand Up @@ -299,7 +299,7 @@ def remove_shards(self, departed_shard_id):
retry_removal = False
with MongosConnection(self.charm.mongos_config) as mongo:
cluster_shards = mongo.get_shard_members()
relation_shards = self._get_shards_from_relations(departed_shard_id)
relation_shards = self.get_shards_from_relations(departed_shard_id)

for shard in cluster_shards - relation_shards:
try:
Expand Down Expand Up @@ -410,7 +410,7 @@ def _update_relation_data(self, relation_id: int, data: dict) -> None:
"""
self.database_provides.update_relation_data(relation_id, data)

def _get_shards_from_relations(self, departed_shard_id: Optional[int]):
def get_shards_from_relations(self, departed_shard_id: Optional[int] = None):
"""Returns a list of the shards related to the config-server."""
relations = self.model.relations[self.relation_name]
return set(
Expand Down
16 changes: 12 additions & 4 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,12 +257,20 @@ def mongos_config(self) -> MongoDBConfiguration:
"""Generates a MongoDBConfiguration object for mongos in the deployment of MongoDB."""
return self._get_mongos_config_for_user(OperatorUser, set(self.unit_ips))

def remote_mongos_config(self, hosts) -> MongoDBConfiguration:
"""Generates a MongoDBConfiguration object for mongos in the deployment of MongoDB."""
def remote_mongos_config(self, hosts) -> MongosConfiguration:
"""Generates a MongosConfiguration object for mongos in the deployment of MongoDB."""
# mongos that are part of the cluster have the same username and password, but different
# hosts
return self._get_mongos_config_for_user(OperatorUser, hosts)

def remote_mongodb_config(self, hosts, replset=None, standalone=None) -> MongoDBConfiguration:
"""Generates a MongoDBConfiguration object for mongod in the deployment of MongoDB."""
# mongos that are part of the cluster have the same username and password, but different
# hosts
return self._get_mongodb_config_for_user(
OperatorUser, hosts, replset=replset, standalone=standalone
)

@property
def mongodb_config(self) -> MongoDBConfiguration:
"""Generates a MongoDBConfiguration object for this deployment of MongoDB."""
Expand Down Expand Up @@ -879,13 +887,13 @@ def _get_mongos_config_for_user(
)

def _get_mongodb_config_for_user(
self, user: MongoDBUser, hosts: Set[str], standalone: bool = False
self, user: MongoDBUser, hosts: Set[str], standalone: bool = False, replset: str = None
) -> MongoDBConfiguration:
external_ca, _ = self.tls.get_tls_files(internal=False)
internal_ca, _ = self.tls.get_tls_files(internal=True)

return MongoDBConfiguration(
replset=self.app.name,
replset=replset or self.app.name,
database=user.get_database_name(),
username=user.get_username(),
password=self.get_secret(APP_SCOPE, user.get_password_key_name()),
Expand Down
12 changes: 12 additions & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,15 @@ class Secrets:
SECRET_INTERNAL_LABEL = "internal-secret"
SECRET_DELETED_LABEL = "None"
MAX_PASSWORD_LENGTH = 4096

class Status:
"""Status related constants.
TODO: move all status messages here.
"""

class Upgrade:
"""Upgrade related constants."""

WAITING_FOR_REFRESH_KEY = "waiting_for_refresh"
FEATURE_VERSION_6 = "6.0"
Loading

0 comments on commit 0328543

Please sign in to comment.