Skip to content

Commit

Permalink
feat: Use refresh v3 vocabulary
Browse files Browse the repository at this point in the history
  • Loading branch information
Gu1nness committed Oct 1, 2024
1 parent a24e2bb commit 37f0791
Show file tree
Hide file tree
Showing 7 changed files with 182 additions and 110 deletions.
13 changes: 8 additions & 5 deletions actions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,14 @@ set-tls-private-key:
type: string
description: The content of private key for internal communications with clients. Content will be auto-generated if this option is not specified.

pre-upgrade-check:
description: Check if charm is ready to upgrade
pre-refresh-check:
description: Check if charm is ready to refresh

resume-upgrade:
description: Upgrade remaining units (after you manually verified that upgraded units are healthy).
resume-refresh:
description: |
Refresh next unit(s) (after you have manually verified that refreshed units are healthy)
force-upgrade:
description: Force upgrade of this unit.
description: |
Force refresh of this unit.
Potential of data loss and downtime.
40 changes: 21 additions & 19 deletions lib/charms/mongodb/v0/upgrade_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@
ROLLBACK_INSTRUCTIONS = "To rollback, `juju refresh` to the previous revision"

PEER_RELATION_ENDPOINT_NAME = "upgrade-version-a"
RESUME_ACTION_NAME = "resume-upgrade"
PRECHECK_ACTION_NAME = "pre-upgrade-check"
RESUME_ACTION_NAME = "resume-refresh"
PRECHECK_ACTION_NAME = "pre-refresh-check"


# BEGIN: Helper functions
Expand Down Expand Up @@ -71,7 +71,7 @@ def __init__(self, message: str):
self.message = message
super().__init__(
BlockedStatus(
f"Rollback with `juju refresh`. Pre-upgrade check failed: {self.message}"
f"Rollback with `juju refresh`. Pre-refresh check failed: {self.message}"
)
)

Expand Down Expand Up @@ -179,7 +179,7 @@ def is_compatible(self) -> bool:
)
return False
logger.debug(
f"Versions before upgrade compatible with versions after upgrade {previous_version_strs=} {self._current_versions=}"
f"Versions before refresh compatible with versions after refresh {previous_version_strs=} {self._current_versions=}"
)
return True
except KeyError as exception:
Expand Down Expand Up @@ -227,9 +227,11 @@ def app_status(self) -> StatusBase | None:
f"Verify highest unit is healthy & run `{RESUME_ACTION_NAME}` action. "
)
return BlockedStatus(
f"Upgrading. {resume_string}To rollback, `juju refresh` to last revision"
f"Refreshing. {resume_string}To rollback, `juju refresh` to last revision"
)
return MaintenanceStatus("Upgrading. To rollback, `juju refresh` to the previous revision")
return MaintenanceStatus(
"Refreshing. To rollback, `juju refresh` to the previous revision"
)

@property
def versions_set(self) -> bool:
Expand Down Expand Up @@ -311,7 +313,7 @@ def pre_upgrade_check(self) -> None:
need to be modified).
See https://chat.canonical.com/canonical/pl/cmf6uhm1rp8b7k8gkjkdsj4mya
"""
logger.debug("Running pre-upgrade checks")
logger.debug("Running pre-refresh checks")

# TODO if shard is getting upgraded but BOTH have same revision, then fail
try:
Expand All @@ -337,7 +339,7 @@ def pre_upgrade_check(self) -> None:

if self._charm.is_role(Config.Role.CONFIG_SERVER):
if not self._charm.upgrade.are_pre_upgrade_operations_config_server_successful():
raise PrecheckFailed("Pre-upgrade operations on config-server failed.")
raise PrecheckFailed("Pre-refresh operations on config-server failed.")


# END: Useful classes
Expand Down Expand Up @@ -374,7 +376,7 @@ def move_primary_to_last_upgrade_unit(self) -> None:
unit_with_lowest_id = self._upgrade._sorted_units[-1]
if mongod.primary() == self.charm.unit_host(unit_with_lowest_id):
logger.debug(
"Not moving Primary before upgrade, primary is already on the last unit to upgrade."
"Not moving Primary before refresh, primary is already on the last unit to refresh."
)
return

Expand All @@ -401,7 +403,7 @@ def is_cluster_healthy(self) -> bool:
self.charm.mongodb_config, "localhost", direct=True
) as direct_mongo:
if not direct_mongo.is_ready:
logger.error("Cannot proceed with upgrade. Service mongod is not running")
logger.error("Cannot proceed with refresh. Service mongod is not running")
return False

# It is possible that in a previous run of post-upgrade-check, that the unit was set to
Expand All @@ -414,22 +416,22 @@ def is_cluster_healthy(self) -> bool:
unit_to_ignore=self.charm.unit.name
):
logger.error(
"Cannot proceed with upgrade. Status of charm units do not show active / waiting for upgrade."
"Cannot proceed with refresh. Status of charm units do not show active / waiting for refresh."
)
return False

if self.charm.is_role(Config.Role.CONFIG_SERVER):
if not self.charm.status.are_shards_status_ready_for_upgrade():
logger.error(
"Cannot proceed with upgrade. Status of shard units do not show active / waiting for upgrade."
"Cannot proceed with refresh. Status of shard units do not show active / waiting for refresh."
)
return False

try:
return self.are_nodes_healthy()
except (PyMongoError, OperationFailure, ServerSelectionTimeoutError) as e:
logger.error(
"Cannot proceed with upgrade. Failed to check cluster health, error: %s", e
"Cannot proceed with refresh. Failed to check cluster health, error: %s", e
)
return False

Expand All @@ -441,12 +443,12 @@ def are_nodes_healthy(self) -> bool:
mongos_config = self.get_cluster_mongos()
if not self.are_shards_healthy(mongos_config):
logger.debug(
"One or more individual shards are not healthy - do not proceed with upgrade."
"One or more individual shards are not healthy - do not proceed with refresh."
)
return False

if not self.are_replicas_in_sharded_cluster_healthy(mongos_config):
logger.debug("One or more nodes are not healthy - do not proceed with upgrade.")
logger.debug("One or more nodes are not healthy - do not proceed with refresh.")
return False

return True
Expand All @@ -465,11 +467,11 @@ def are_shards_healthy(self, mongos_config: MongoConfiguration) -> bool:
"""Returns True if all shards in the cluster are healthy."""
with MongosConnection(mongos_config) as mongos:
if mongos.is_any_draining():
logger.debug("Cluster is draining a shard, do not proceed with upgrade.")
logger.debug("Cluster is draining a shard, do not proceed with refresh.")
return False

if not mongos.are_all_shards_aware():
logger.debug("Not all shards are shard aware, do not proceed with upgrade.")
logger.debug("Not all shards are shard aware, do not proceed with refresh.")
return False

# Config-Server has access to all the related shard applications.
Expand All @@ -478,7 +480,7 @@ def are_shards_healthy(self, mongos_config: MongoConfiguration) -> bool:
cluster_shards = mongos.get_shard_members()
if len(relation_shards - cluster_shards):
logger.debug(
"Not all shards have been added/drained, do not proceed with upgrade."
"Not all shards have been added/drained, do not proceed with refresh."
)
return False

Expand Down Expand Up @@ -696,7 +698,7 @@ def are_pre_upgrade_operations_config_server_successful(self):
try:
self.turn_off_and_wait_for_balancer()
except BalancerStillRunningError:
logger.debug("Balancer is still running. Please try the pre-upgrade check later.")
logger.debug("Balancer is still running. Please try the pre-refresh check later.")
return False

return True
Expand Down
31 changes: 17 additions & 14 deletions src/upgrades/machine_upgrade.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,13 @@
import typing

import ops
from charms.mongodb.v0.upgrade_helpers import AbstractUpgrade, UnitState
from charms.mongodb.v0.upgrade_helpers import (
AbstractUpgrade,
FailedToElectNewPrimaryError,
UnitState,
)

from config import Config
from upgrades import mongodb_upgrade

logger = logging.getLogger(__name__)

Expand All @@ -32,7 +35,7 @@ def unit_state(self) -> typing.Optional[UnitState]:
self._unit_workload_container_version is not None
and self._unit_workload_container_version != self._app_workload_container_version
):
logger.debug("Unit upgrade state: outdated")
logger.debug("Unit refresh state: outdated")
return UnitState.OUTDATED
return super().unit_state

Expand All @@ -44,21 +47,21 @@ def unit_state(self, value: UnitState) -> None:
def _get_unit_healthy_status(self) -> ops.StatusBase:
if self._unit_workload_container_version == self._app_workload_container_version:
return ops.ActiveStatus(
f'MongoDB {self._unit_workload_version} running; Snap rev {self._unit_workload_container_version}; Charmed operator {self._current_versions["charm"]}'
f'MongoDB {self._unit_workload_version} running; Snap revision {self._unit_workload_container_version}; Charm revision {self._current_versions["charm"]}'
)
return ops.ActiveStatus(
f'MongoDB {self._unit_workload_version} running; Snap rev {self._unit_workload_container_version} (outdated); Charmed operator {self._current_versions["charm"]}'
f'MongoDB {self._unit_workload_version} running; Snap revision {self._unit_workload_container_version} (outdated); Charm revision {self._current_versions["charm"]}'
)

@property
def app_status(self) -> typing.Optional[ops.StatusBase]:
"""App upgrade status."""
if not self.is_compatible:
logger.info(
"Upgrade incompatible. If you accept potential *data loss* and *downtime*, you can continue by running `force-upgrade` action on each remaining unit"
"Refresh incompatible. Rollback with `juju refresh`. If you accept potential *data loss* and *downtime*, you can continue by running `force-upgrade` action on each remaining unit"
)
return ops.BlockedStatus(
"Upgrade incompatible. Rollback to previous revision with `juju refresh`"
"Refresh incompatible. Rollback to previous revision with `juju refresh`"
)
return super().app_status

Expand Down Expand Up @@ -98,9 +101,9 @@ def reconcile_partition(self, *, action_event: ops.ActionEvent = None) -> None:
"""Handle Juju action to confirm first upgraded unit is healthy and resume upgrade."""
if action_event:
self.upgrade_resumed = True
message = "Upgrade resumed."
message = "Refresh resumed."
action_event.set_results({"result": message})
logger.debug(f"Resume upgrade event succeeded: {message}")
logger.debug(f"Resume refresh succeeded: {message}")

@property
def upgrade_resumed(self) -> bool:
Expand Down Expand Up @@ -139,15 +142,15 @@ def authorized(self) -> bool:
== self._current_versions["charm"]
):
# Assumes charm version uniquely identifies charm revision
logger.debug("Rollback detected. Skipping pre-upgrade check")
logger.debug("Rollback detected. Skipping pre-refresh check")
else:
# Run pre-upgrade check
# (in case user forgot to run pre-upgrade-check action)
self.pre_upgrade_check()
logger.debug("Pre-upgrade check after `juju refresh` successful")
logger.debug("Pre-refresh check after `juju refresh` successful")
elif index == 1:
# User confirmation needed to resume upgrade (i.e. upgrade second unit)
logger.debug(f"Second unit authorized to upgrade if {self.upgrade_resumed=}")
logger.debug(f"Second unit authorized to refresh if {self.upgrade_resumed=}")
return self.upgrade_resumed
return True
state = self._peer_relation.data[unit].get("state")
Expand All @@ -173,7 +176,7 @@ def upgrade_unit(self, *, charm) -> None:
if self._unit.name == charm.primary:
logger.debug("Stepping down current primary, before upgrading service...")
charm.upgrade.step_down_primary_and_wait_reelection()
except mongodb_upgrade.FailedToElectNewPrimaryError:
except FailedToElectNewPrimaryError:
# by not setting the snap revision and immediately returning, this function will be
# called again, and an empty re-elect a primary will occur again.
logger.error("Failed to reelect primary before upgrading unit.")
Expand All @@ -184,7 +187,7 @@ def upgrade_unit(self, *, charm) -> None:
charm.install_snap_packages(packages=Config.SNAP_PACKAGES)
self._unit_databag["snap_revision"] = _SNAP_REVISION
self._unit_workload_version = self._current_versions["workload"]
logger.debug(f"Saved {_SNAP_REVISION} in unit databag after upgrade")
logger.debug(f"Saved {_SNAP_REVISION} in unit databag after refresh")

# post upgrade check should be retried in case of failure, for this it is necessary to
# emit a separate event.
Expand Down
Loading

0 comments on commit 37f0791

Please sign in to comment.