diff --git a/lib/charms/mongodb/v0/upgrade_helpers.py b/lib/charms/mongodb/v0/upgrade_helpers.py index 407be6e86..a97bb80d8 100644 --- a/lib/charms/mongodb/v0/upgrade_helpers.py +++ b/lib/charms/mongodb/v0/upgrade_helpers.py @@ -3,18 +3,24 @@ # Copyright 2024 Canonical Ltd. # See LICENSE file for licensing details. import abc +import copy +import enum +import json import logging +import pathlib import secrets import string -from typing import List, Tuple +from typing import Dict, List, Tuple +import poetry.core.constraints.version as poetry_version from charms.mongodb.v0.mongo import MongoConfiguration -from charms.mongodb.v1.mongodb import MongoDBConnection +from charms.mongodb.v1.mongodb import FailedToMovePrimaryError, MongoDBConnection from charms.mongodb.v1.mongos import MongosConnection +from ops import ActionEvent, BlockedStatus, MaintenanceStatus, StatusBase, Unit from ops.charm import CharmBase from ops.framework import Object from pymongo.errors import OperationFailure, PyMongoError, ServerSelectionTimeoutError -from tenacity import Retrying, retry, stop_after_attempt, wait_fixed +from tenacity import RetryError, Retrying, retry, stop_after_attempt, wait_fixed from config import Config @@ -27,7 +33,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 1 +LIBPATCH = 2 logger = logging.getLogger(__name__) @@ -35,18 +41,41 @@ WRITE_KEY = "write_value" ROLLBACK_INSTRUCTIONS = "To rollback, `juju refresh` to the previous revision" +PEER_RELATION_ENDPOINT_NAME = "upgrade-version-a" +RESUME_ACTION_NAME = "resume-upgrade" +PRECHECK_ACTION_NAME = "pre-upgrade-check" -# BEGIN: Useful classes -class AbstractUpgrade(abc.ABC): - """In-place upgrades abstract class (typing).""" - pass +# BEGIN: Helper functions +def unit_number(unit_: Unit) -> int: + """Get unit number.""" + return int(unit_.name.split("/")[-1]) -# END: Useful classes +# END: Helper functions # BEGIN: Exceptions +class StatusException(Exception): + """Exception with ops status.""" + + def __init__(self, status: StatusBase) -> None: + super().__init__(status.message) + self.status = status + + +class PrecheckFailed(StatusException): + """App is not ready to upgrade.""" + + def __init__(self, message: str): + self.message = message + super().__init__( + BlockedStatus( + f"Rollback with `juju refresh`. Pre-upgrade check failed: {self.message}" + ) + ) + + class FailedToElectNewPrimaryError(Exception): """Raised when a new primary isn't elected after stepping down.""" @@ -59,9 +88,261 @@ class BalancerStillRunningError(Exception): """Raised when the balancer is still running after stopping it.""" +class PeerRelationNotReady(Exception): + """Upgrade peer relation not available (to this unit).""" + + # END: Exceptions +class UnitState(str, enum.Enum): + """Unit upgrade state.""" + + HEALTHY = "healthy" + RESTARTING = "restarting" # Kubernetes only + UPGRADING = "upgrading" # Machines only + OUTDATED = "outdated" # Machines only + + +# BEGIN: Useful classes +class AbstractUpgrade(abc.ABC): + """In-place upgrades abstract class (typing). + + Based off specification: DA058 - In-Place Upgrades - Kubernetes v2 + (https://docs.google.com/document/d/1tLjknwHudjcHs42nzPVBNkHs98XxAOT2BXGGpP7NyEU/) + """ + + def __init__(self, charm_: CharmBase) -> None: + relations = charm_.model.relations[PEER_RELATION_ENDPOINT_NAME] + if not relations: + raise PeerRelationNotReady + assert len(relations) == 1 + self._peer_relation = relations[0] + self._charm = charm_ + self._unit: Unit = charm_.unit + self._unit_databag = self._peer_relation.data[self._unit] + self._app_databag = self._peer_relation.data[charm_.app] + self._app_name = charm_.app.name + self._current_versions = {} # For this unit + for version, file_name in { + "charm": "charm_version", + "workload": "workload_version", + }.items(): + self._current_versions[version] = pathlib.Path(file_name).read_text().strip() + + @property + def unit_state(self) -> UnitState | None: + """Unit upgrade state.""" + if state := self._unit_databag.get("state"): + return UnitState(state) + + @unit_state.setter + def unit_state(self, value: UnitState) -> None: + self._unit_databag["state"] = value.value + + @property + def is_compatible(self) -> bool: + """Whether upgrade is supported from previous versions.""" + assert self.versions_set + try: + previous_version_strs: Dict[str, str] = json.loads(self._app_databag["versions"]) + except KeyError as exception: + logger.debug("`versions` missing from peer relation", exc_info=exception) + return False + # TODO charm versioning: remove `.split("+")` (which removes git hash before comparing) + previous_version_strs["charm"] = previous_version_strs["charm"].split("+")[0] + previous_versions: Dict[str, poetry_version.Version] = { + key: poetry_version.Version.parse(value) + for key, value in previous_version_strs.items() + } + current_version_strs = copy.copy(self._current_versions) + current_version_strs["charm"] = current_version_strs["charm"].split("+")[0] + current_versions = { + key: poetry_version.Version.parse(value) for key, value in current_version_strs.items() + } + try: + # TODO Future PR: change this > sign to support downgrades + if ( + previous_versions["charm"] > current_versions["charm"] + or previous_versions["charm"].major != current_versions["charm"].major + ): + logger.debug( + f'{previous_versions["charm"]=} incompatible with {current_versions["charm"]=}' + ) + return False + if ( + previous_versions["workload"] > current_versions["workload"] + or previous_versions["workload"].major != current_versions["workload"].major + ): + logger.debug( + f'{previous_versions["workload"]=} incompatible with {current_versions["workload"]=}' + ) + return False + logger.debug( + f"Versions before upgrade compatible with versions after upgrade {previous_version_strs=} {self._current_versions=}" + ) + return True + except KeyError as exception: + logger.debug(f"Version missing from {previous_versions=}", exc_info=exception) + return False + + @property + def in_progress(self) -> bool: + """Whether upgrade is in progress.""" + logger.debug( + f"{self._app_workload_container_version=} {self._unit_workload_container_versions=}" + ) + return any( + version != self._app_workload_container_version + for version in self._unit_workload_container_versions.values() + ) + + @property + def _sorted_units(self) -> List[Unit]: + """Units sorted from highest to lowest unit number.""" + return sorted((self._unit, *self._peer_relation.units), key=unit_number, reverse=True) + + @abc.abstractmethod + def _get_unit_healthy_status(self) -> StatusBase: + """Status shown during upgrade if unit is healthy.""" + raise NotImplementedError() + + def get_unit_juju_status(self) -> StatusBase | None: + """Unit upgrade status.""" + if self.in_progress: + return self._get_unit_healthy_status() + + @property + def app_status(self) -> StatusBase | None: + """App upgrade status.""" + if not self.in_progress: + return + if not self.upgrade_resumed: + # User confirmation needed to resume upgrade (i.e. upgrade second unit) + # Statuses over 120 characters are truncated in `juju status` as of juju 3.1.6 and + # 2.9.45 + resume_string = "" + if len(self._sorted_units) > 1: + resume_string = ( + f"Verify highest unit is healthy & run `{RESUME_ACTION_NAME}` action. " + ) + return BlockedStatus( + f"Upgrading. {resume_string}To rollback, `juju refresh` to last revision" + ) + return MaintenanceStatus("Upgrading. To rollback, `juju refresh` to the previous revision") + + @property + def versions_set(self) -> bool: + """Whether versions have been saved in app databag. + + Should only be `False` during first charm install. + + If a user upgrades from a charm that does not set versions, this charm will get stuck. + """ + return self._app_databag.get("versions") is not None + + def set_versions_in_app_databag(self) -> None: + """Save current versions in app databag. + + Used after next upgrade to check compatibility (i.e. whether that upgrade should be + allowed). + """ + assert not self.in_progress + logger.debug(f"Setting {self._current_versions=} in upgrade peer relation app databag") + self._app_databag["versions"] = json.dumps(self._current_versions) + logger.debug(f"Set {self._current_versions=} in upgrade peer relation app databag") + + @property + @abc.abstractmethod + def upgrade_resumed(self) -> bool: + """Whether user has resumed upgrade with Juju action.""" + raise NotImplementedError() + + @property + @abc.abstractmethod + def _unit_workload_container_versions(self) -> Dict[str, str]: + """{Unit name: unique identifier for unit's workload container version}. + + If and only if this version changes, the workload will restart (during upgrade or + rollback). + + On Kubernetes, the workload & charm are upgraded together + On machines, the charm is upgraded before the workload + + This identifier should be comparable to `_app_workload_container_version` to determine if + the unit & app are the same workload container version. + """ + raise NotImplementedError() + + @property + @abc.abstractmethod + def _app_workload_container_version(self) -> str: + """Unique identifier for the app's workload container version. + + This should match the workload version in the current Juju app charm version. + + This identifier should be comparable to `_unit_workload_container_versions` to determine if + the app & unit are the same workload container version. + """ + raise NotImplementedError() + + @abc.abstractmethod + def reconcile_partition(self, *, action_event: ActionEvent | None = None) -> None: + """If ready, allow next unit to upgrade.""" + raise NotImplementedError() + + def pre_upgrade_check(self) -> None: + """Check if this app is ready to upgrade. + + Runs before any units are upgraded + + Does *not* run during rollback + + On machines, this runs before any units are upgraded (after `juju refresh`) + On machines & Kubernetes, this also runs during pre-upgrade-check action + + Can run on leader or non-leader unit + + Raises: + PrecheckFailed: App is not ready to upgrade + + TODO Kubernetes: Run (some) checks after `juju refresh` (in case user forgets to run + pre-upgrade-check action). Note: 1 unit will upgrade before we can run checks (checks may + need to be modified). + See https://chat.canonical.com/canonical/pl/cmf6uhm1rp8b7k8gkjkdsj4mya + """ + logger.debug("Running pre-upgrade checks") + + # TODO if shard is getting upgraded but BOTH have same revision, then fail + try: + self._charm.upgrade.wait_for_cluster_healthy() + except RetryError: + logger.error("Cluster is not healthy") + raise PrecheckFailed("Cluster is not healthy") + + # On VM charms we can choose the order to upgrade, but not on K8s. In order to keep the + # two charms in sync we decided to have the VM charm have the same upgrade order as the K8s + # charm (i.e. highest to lowest.) Hence, we move the primary to the last unit to upgrade. + # This prevents the primary from jumping around from unit to unit during the upgrade + # procedure. + try: + self._charm.upgrade.move_primary_to_last_upgrade_unit() + except FailedToMovePrimaryError: + logger.error("Cluster failed to move primary before re-election.") + raise PrecheckFailed("Primary switchover failed") + + if not self._charm.upgrade.is_cluster_able_to_read_write(): + logger.error("Cluster cannot read/write to replicas") + raise PrecheckFailed("Cluster is not healthy") + + if self._charm.is_role(Config.Role.CONFIG_SERVER): + if not self._charm.upgrade.are_pre_upgrade_operations_config_server_successful(): + raise PrecheckFailed("Pre-upgrade operations on config-server failed.") + + +# END: Useful classes + + class GenericMongoDBUpgrade(Object, abc.ABC): """Substrate agnostif, abstract handler for upgrade events.""" diff --git a/src/upgrades/machine_upgrade.py b/src/upgrades/machine_upgrade.py index 0aea5f7bd..bcff4dfed 100644 --- a/src/upgrades/machine_upgrade.py +++ b/src/upgrades/machine_upgrade.py @@ -12,33 +12,34 @@ import typing import ops +from charms.mongodb.v0.upgrade_helpers import AbstractUpgrade, UnitState from config import Config -from upgrades import mongodb_upgrade, upgrade +from upgrades import mongodb_upgrade logger = logging.getLogger(__name__) _SNAP_REVISION = str(Config.SNAP_PACKAGES[0][2]) -class Upgrade(upgrade.Upgrade): +class Upgrade(AbstractUpgrade): """In-place upgrades on machines.""" @property - def unit_state(self) -> typing.Optional[upgrade.UnitState]: + def unit_state(self) -> typing.Optional[UnitState]: """Returns the unit state.""" if ( self._unit_workload_container_version is not None and self._unit_workload_container_version != self._app_workload_container_version ): logger.debug("Unit upgrade state: outdated") - return upgrade.UnitState.OUTDATED + return UnitState.OUTDATED return super().unit_state @unit_state.setter - def unit_state(self, value: upgrade.UnitState) -> None: + def unit_state(self, value: UnitState) -> None: # Super call - upgrade.Upgrade.unit_state.fset(self, value) + AbstractUpgrade.unit_state.fset(self, value) def _get_unit_healthy_status(self) -> ops.StatusBase: if self._unit_workload_container_version == self._app_workload_container_version: @@ -151,11 +152,11 @@ def authorized(self) -> bool: return True state = self._peer_relation.data[unit].get("state") if state: - state = upgrade.UnitState(state) + state = UnitState(state) if ( self._unit_workload_container_versions.get(unit.name) != self._app_workload_container_version - or state is not upgrade.UnitState.HEALTHY + or state is not UnitState.HEALTHY ): # Waiting for higher number units to upgrade return False @@ -179,7 +180,7 @@ def upgrade_unit(self, *, charm) -> None: return logger.debug(f"Upgrading {self.authorized=}") - self.unit_state = upgrade.UnitState.UPGRADING + self.unit_state = UnitState.UPGRADING charm.install_snap_packages(packages=Config.SNAP_PACKAGES) self._unit_databag["snap_revision"] = _SNAP_REVISION self._unit_workload_version = self._current_versions["workload"] diff --git a/src/upgrades/mongodb_upgrade.py b/src/upgrades/mongodb_upgrade.py index 18e4e42d7..9a6101928 100644 --- a/src/upgrades/mongodb_upgrade.py +++ b/src/upgrades/mongodb_upgrade.py @@ -4,11 +4,16 @@ """Manager for handling MongoDB in-place upgrades.""" import logging -from typing import Optional from charms.mongodb.v0.upgrade_helpers import ( + PEER_RELATION_ENDPOINT_NAME, + PRECHECK_ACTION_NAME, + RESUME_ACTION_NAME, ROLLBACK_INSTRUCTIONS, GenericMongoDBUpgrade, + PeerRelationNotReady, + PrecheckFailed, + UnitState, ) from charms.mongodb.v1.mongos import BalancerNotEnabledError, MongosConnection from ops.charm import ActionEvent, CharmBase @@ -18,7 +23,7 @@ from tenacity import RetryError from config import Config -from upgrades import machine_upgrade, upgrade +from upgrades import machine_upgrade logger = logging.getLogger(__name__) @@ -38,25 +43,23 @@ class MongoDBUpgrade(GenericMongoDBUpgrade): def __init__(self, charm: CharmBase): self.charm = charm - super().__init__(charm, upgrade.PEER_RELATION_ENDPOINT_NAME) + super().__init__(charm, PEER_RELATION_ENDPOINT_NAME) @override def _observe_events(self, charm: CharmBase) -> None: self.framework.observe( - charm.on[upgrade.PRECHECK_ACTION_NAME].action, self._on_pre_upgrade_check_action + charm.on[PRECHECK_ACTION_NAME].action, self._on_pre_upgrade_check_action ) self.framework.observe( - charm.on[upgrade.PEER_RELATION_ENDPOINT_NAME].relation_created, + charm.on[PEER_RELATION_ENDPOINT_NAME].relation_created, self._on_upgrade_peer_relation_created, ) self.framework.observe( - charm.on[upgrade.PEER_RELATION_ENDPOINT_NAME].relation_changed, self._reconcile_upgrade + charm.on[PEER_RELATION_ENDPOINT_NAME].relation_changed, self._reconcile_upgrade ) self.framework.observe(charm.on.upgrade_charm, self._on_upgrade_charm) - self.framework.observe( - charm.on[upgrade.RESUME_ACTION_NAME].action, self._on_resume_upgrade_action - ) + self.framework.observe(charm.on[RESUME_ACTION_NAME].action, self._on_resume_upgrade_action) self.framework.observe(charm.on["force-upgrade"].action, self._on_force_upgrade_action) self.framework.observe(self.post_app_upgrade_event, self.run_post_app_upgrade_task) self.framework.observe(self.post_cluster_upgrade_event, self.run_post_cluster_upgrade_task) @@ -64,10 +67,10 @@ def _observe_events(self, charm: CharmBase) -> None: # BEGIN: properties @property @override - def _upgrade(self) -> Optional[machine_upgrade.Upgrade]: + def _upgrade(self) -> machine_upgrade.Upgrade | None: try: return machine_upgrade.Upgrade(self.charm) - except upgrade.PeerRelationNotReady: + except PeerRelationNotReady: return None # END: properties @@ -95,10 +98,10 @@ def _reconcile_upgrade(self, _=None): if not self._upgrade.is_compatible: self._set_upgrade_status() return - if self._upgrade.unit_state is upgrade.UnitState.OUTDATED: + if self._upgrade.unit_state is UnitState.OUTDATED: try: authorized = self._upgrade.authorized - except upgrade.PrecheckFailed as exception: + except PrecheckFailed as exception: self._set_upgrade_status() self.charm.status.set_and_share_status(exception.status) logger.debug(f"Set unit status to {self.unit.status}") @@ -126,7 +129,7 @@ def _on_upgrade_charm(self, _): def _on_pre_upgrade_check_action(self, event: ActionEvent) -> None: if not self.charm.unit.is_leader(): - message = f"Must run action on leader unit. (e.g. `juju run {self.charm.app.name}/leader {upgrade.PRECHECK_ACTION_NAME}`)" + message = f"Must run action on leader unit. (e.g. `juju run {self.charm.app.name}/leader {PRECHECK_ACTION_NAME}`)" logger.debug(f"Pre-upgrade check event failed: {message}") event.fail(message) return @@ -137,7 +140,7 @@ def _on_pre_upgrade_check_action(self, event: ActionEvent) -> None: return try: self._upgrade.pre_upgrade_check() - except upgrade.PrecheckFailed as exception: + except PrecheckFailed as exception: message = ( f"Charm is *not* ready for upgrade. Pre-upgrade check failed: {exception.message}" ) @@ -150,7 +153,7 @@ def _on_pre_upgrade_check_action(self, event: ActionEvent) -> None: def _on_resume_upgrade_action(self, event: ActionEvent) -> None: if not self.charm.unit.is_leader(): - message = f"Must run action on leader unit. (e.g. `juju run {self.charm.app.name}/leader {upgrade.RESUME_ACTION_NAME}`)" + message = f"Must run action on leader unit. (e.g. `juju run {self.charm.app.name}/leader {RESUME_ACTION_NAME}`)" logger.debug(f"Resume upgrade event failed: {message}") event.fail(message) return @@ -193,7 +196,7 @@ def run_post_app_upgrade_task(self, event: EventBase): logger.debug("Running post upgrade checks to verify cluster is not broken after upgrade") self.run_post_upgrade_checks(event, finished_whole_cluster=False) - if self._upgrade.unit_state != upgrade.UnitState.HEALTHY: + if self._upgrade.unit_state != UnitState.HEALTHY: return logger.debug("Cluster is healthy after upgrading unit %s", self.charm.unit.name) @@ -265,7 +268,7 @@ def run_post_upgrade_checks(self, event, finished_whole_cluster: bool) -> None: if self.charm.unit.status == Config.Status.UNHEALTHY_UPGRADE: self.charm.status.set_and_share_status(ActiveStatus()) - self._upgrade.unit_state = upgrade.UnitState.HEALTHY + self._upgrade.unit_state = UnitState.HEALTHY def _set_upgrade_status(self): # In the future if we decide to support app statuses, we will need to handle this diff --git a/src/upgrades/upgrade.py b/src/upgrades/upgrade.py deleted file mode 100644 index 9aea3699f..000000000 --- a/src/upgrades/upgrade.py +++ /dev/null @@ -1,307 +0,0 @@ -# Copyright 2024 Canonical Ltd. -# See LICENSE file for licensing details. - -"""In-place upgrades. - -Based off specification: DA058 - In-Place Upgrades - Kubernetes v2 -(https://docs.google.com/document/d/1tLjknwHudjcHs42nzPVBNkHs98XxAOT2BXGGpP7NyEU/) -""" - -import abc -import copy -import enum -import json -import logging -import pathlib -import typing - -import ops -import poetry.core.constraints.version as poetry_version -from charms.mongodb.v0.upgrade_helpers import AbstractUpgrade -from charms.mongodb.v1.mongodb import FailedToMovePrimaryError -from tenacity import RetryError - -import status_exception -from config import Config - -logger = logging.getLogger(__name__) - -SHARD = "shard" -PEER_RELATION_ENDPOINT_NAME = "upgrade-version-a" -PRECHECK_ACTION_NAME = "pre-upgrade-check" -RESUME_ACTION_NAME = "resume-upgrade" - - -def unit_number(unit_: ops.Unit) -> int: - """Get unit number.""" - return int(unit_.name.split("/")[-1]) - - -class PrecheckFailed(status_exception.StatusException): - """App is not ready to upgrade.""" - - def __init__(self, message: str): - self.message = message - super().__init__( - ops.BlockedStatus( - f"Rollback with `juju refresh`. Pre-upgrade check failed: {self.message}" - ) - ) - - -class PeerRelationNotReady(Exception): - """Upgrade peer relation not available (to this unit).""" - - -class UnitState(str, enum.Enum): - """Unit upgrade state.""" - - HEALTHY = "healthy" - RESTARTING = "restarting" # Kubernetes only - UPGRADING = "upgrading" # Machines only - OUTDATED = "outdated" # Machines only - - -class Upgrade(AbstractUpgrade): - """In-place upgrades.""" - - def __init__(self, charm_: ops.CharmBase) -> None: - relations = charm_.model.relations[PEER_RELATION_ENDPOINT_NAME] - if not relations: - raise PeerRelationNotReady - assert len(relations) == 1 - self._peer_relation = relations[0] - self._charm = charm_ - self._unit: ops.Unit = charm_.unit - self._unit_databag = self._peer_relation.data[self._unit] - self._app_databag = self._peer_relation.data[charm_.app] - self._app_name = charm_.app.name - self._current_versions = {} # For this unit - for version, file_name in { - "charm": "charm_version", - "workload": "workload_version", - }.items(): - self._current_versions[version] = pathlib.Path(file_name).read_text().strip() - - @property - def unit_state(self) -> typing.Optional[UnitState]: - """Unit upgrade state.""" - if state := self._unit_databag.get("state"): - return UnitState(state) - - @unit_state.setter - def unit_state(self, value: UnitState) -> None: - self._unit_databag["state"] = value.value - - @property - def is_compatible(self) -> bool: - """Whether upgrade is supported from previous versions.""" - assert self.versions_set - try: - previous_version_strs: typing.Dict[str, str] = json.loads( - self._app_databag["versions"] - ) - except KeyError as exception: - logger.debug("`versions` missing from peer relation", exc_info=exception) - return False - # TODO charm versioning: remove `.split("+")` (which removes git hash before comparing) - previous_version_strs["charm"] = previous_version_strs["charm"].split("+")[0] - previous_versions: typing.Dict[str, poetry_version.Version] = { - key: poetry_version.Version.parse(value) - for key, value in previous_version_strs.items() - } - current_version_strs = copy.copy(self._current_versions) - current_version_strs["charm"] = current_version_strs["charm"].split("+")[0] - current_versions = { - key: poetry_version.Version.parse(value) for key, value in current_version_strs.items() - } - try: - # TODO Future PR: change this > sign to support downgrades - if ( - previous_versions["charm"] > current_versions["charm"] - or previous_versions["charm"].major != current_versions["charm"].major - ): - logger.debug( - f'{previous_versions["charm"]=} incompatible with {current_versions["charm"]=}' - ) - return False - if ( - previous_versions["workload"] > current_versions["workload"] - or previous_versions["workload"].major != current_versions["workload"].major - ): - logger.debug( - f'{previous_versions["workload"]=} incompatible with {current_versions["workload"]=}' - ) - return False - logger.debug( - f"Versions before upgrade compatible with versions after upgrade {previous_version_strs=} {self._current_versions=}" - ) - return True - except KeyError as exception: - logger.debug(f"Version missing from {previous_versions=}", exc_info=exception) - return False - - @property - def in_progress(self) -> bool: - """Whether upgrade is in progress.""" - logger.debug( - f"{self._app_workload_container_version=} {self._unit_workload_container_versions=}" - ) - return any( - version != self._app_workload_container_version - for version in self._unit_workload_container_versions.values() - ) - - @property - def _sorted_units(self) -> typing.List[ops.Unit]: - """Units sorted from highest to lowest unit number.""" - return sorted((self._unit, *self._peer_relation.units), key=unit_number, reverse=True) - - @abc.abstractmethod - def _get_unit_healthy_status(self) -> ops.StatusBase: - """Status shown during upgrade if unit is healthy.""" - - def get_unit_juju_status(self) -> typing.Optional[ops.StatusBase]: - """Unit upgrade status.""" - if self.in_progress: - return self._get_unit_healthy_status() - - @property - def app_status(self) -> typing.Optional[ops.StatusBase]: - """App upgrade status.""" - if not self.in_progress: - return - if not self.upgrade_resumed: - # User confirmation needed to resume upgrade (i.e. upgrade second unit) - # Statuses over 120 characters are truncated in `juju status` as of juju 3.1.6 and - # 2.9.45 - resume_string = "" - if len(self._sorted_units) > 1: - resume_string = ( - f"Verify highest unit is healthy & run `{RESUME_ACTION_NAME}` action. " - ) - return ops.BlockedStatus( - f"Upgrading. {resume_string}To rollback, `juju refresh` to last revision" - ) - return ops.MaintenanceStatus( - "Upgrading. To rollback, `juju refresh` to the previous revision" - ) - - @property - def versions_set(self) -> bool: - """Whether versions have been saved in app databag. - - Should only be `False` during first charm install. - - If a user upgrades from a charm that does not set versions, this charm will get stuck. - """ - return self._app_databag.get("versions") is not None - - def set_versions_in_app_databag(self) -> None: - """Save current versions in app databag. - - Used after next upgrade to check compatibility (i.e. whether that upgrade should be - allowed). - """ - assert not self.in_progress - logger.debug(f"Setting {self._current_versions=} in upgrade peer relation app databag") - self._app_databag["versions"] = json.dumps(self._current_versions) - logger.debug(f"Set {self._current_versions=} in upgrade peer relation app databag") - - @property - @abc.abstractmethod - def upgrade_resumed(self) -> bool: - """Whether user has resumed upgrade with Juju action.""" - - @property - @abc.abstractmethod - def _unit_workload_container_versions(self) -> typing.Dict[str, str]: - """{Unit name: unique identifier for unit's workload container version}. - - If and only if this version changes, the workload will restart (during upgrade or - rollback). - - On Kubernetes, the workload & charm are upgraded together - On machines, the charm is upgraded before the workload - - This identifier should be comparable to `_app_workload_container_version` to determine if - the unit & app are the same workload container version. - """ - - @property - @abc.abstractmethod - def _app_workload_container_version(self) -> str: - """Unique identifier for the app's workload container version. - - This should match the workload version in the current Juju app charm version. - - This identifier should be comparable to `_unit_workload_container_versions` to determine if - the app & unit are the same workload container version. - """ - - @abc.abstractmethod - def reconcile_partition(self, *, action_event: ops.ActionEvent = None) -> None: - """If ready, allow next unit to upgrade.""" - - @property - @abc.abstractmethod - def authorized(self) -> bool: - """Whether this unit is authorized to upgrade. - - Only applies to machine charm - """ - - @abc.abstractmethod - def upgrade_unit(self, *, charm) -> None: - """Upgrade this unit. - - Only applies to machine charm - """ - - def pre_upgrade_check(self) -> None: - """Check if this app is ready to upgrade. - - Runs before any units are upgraded - - Does *not* run during rollback - - On machines, this runs before any units are upgraded (after `juju refresh`) - On machines & Kubernetes, this also runs during pre-upgrade-check action - - Can run on leader or non-leader unit - - Raises: - PrecheckFailed: App is not ready to upgrade - - TODO Kubernetes: Run (some) checks after `juju refresh` (in case user forgets to run - pre-upgrade-check action). Note: 1 unit will upgrade before we can run checks (checks may - need to be modified). - See https://chat.canonical.com/canonical/pl/cmf6uhm1rp8b7k8gkjkdsj4mya - """ - logger.debug("Running pre-upgrade checks") - - # TODO if shard is getting upgraded but BOTH have same revision, then fail - try: - self._charm.upgrade.wait_for_cluster_healthy() - except RetryError: - logger.error("Cluster is not healthy") - raise PrecheckFailed("Cluster is not healthy") - - # On VM charms we can choose the order to upgrade, but not on K8s. In order to keep the - # two charms in sync we decided to have the VM charm have the same upgrade order as the K8s - # charm (i.e. highest to lowest.) Hence, we move the primary to the last unit to upgrade. - # This prevents the primary from jumping around from unit to unit during the upgrade - # procedure. - try: - self._charm.upgrade.move_primary_to_last_upgrade_unit() - except FailedToMovePrimaryError: - logger.error("Cluster failed to move primary before re-election.") - raise PrecheckFailed("Primary switchover failed") - - if not self._charm.upgrade.is_cluster_able_to_read_write(): - logger.error("Cluster cannot read/write to replicas") - raise PrecheckFailed("Cluster is not healthy") - - if self._charm.is_role(Config.Role.CONFIG_SERVER): - if not self._charm.upgrade.are_pre_upgrade_operations_config_server_successful(): - raise PrecheckFailed("Pre-upgrade operations on config-server failed.")