Skip to content

Commit

Permalink
Merge pull request #138 from canonical/IAM-513
Browse files Browse the repository at this point in the history
Fix database/migration logic
  • Loading branch information
nsklikas authored Sep 29, 2023
2 parents ea8b075 + 3fe4232 commit 2bd042d
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 45 deletions.
9 changes: 8 additions & 1 deletion actions.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
run-migration:
description: Run a command to create SQL schemas and apply migration plans.
description:
Run a migration, this is needed after upgrades. This is a non-reversible operation.
Run this after backing up the database.
params:
timeout:
description: Timeout after which the migration will be canceled
type: number
default: 120
create-oauth-client:
description: Register an oauth client
params:
Expand Down
108 changes: 72 additions & 36 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,20 +350,35 @@ def _set_version(self) -> None:
self.unit.set_workload_version(version)

def _get_database_relation_info(self) -> dict:
if not self.database.relations:
return None

relation_id = self.database.relations[0].id
relation_data = self.database.fetch_relation_data()[relation_id]

return {
"username": relation_data.get("username"),
"password": relation_data.get("password"),
"endpoints": relation_data.get("endpoints"),
"database_name": self._db_name,
}

@property
def _dsn(self) -> Optional[str]:
db_info = self._get_database_relation_info()
if not db_info:
return None

return "postgres://{username}:{password}@{endpoints}/{database_name}".format(
username=db_info.get("username"),
password=db_info.get("password"),
endpoints=db_info.get("endpoints"),
database_name=db_info.get("database_name"),
)

def _run_sql_migration(self, timeout: float = 60) -> bool:
"""Runs a command to create SQL schemas and apply migration plans."""
try:
self._hydra_cli.run_migration(timeout=timeout)
self._hydra_cli.run_migration(dsn=self._dsn, timeout=timeout)
except ExecError as err:
logger.error(f"Exited with code {err.exit_code}. Stderr: {err.stderr}")
return False
Expand All @@ -372,6 +387,21 @@ def _run_sql_migration(self, timeout: float = 60) -> bool:
def _oauth_relation_peer_data_key(self, relation_id: int) -> str:
return f"oauth_{relation_id}"

@property
def _migration_peer_data_key(self) -> Optional[str]:
if not self.database.relations:
return None
# We append the relation ID to the migration key in peer data, this is
# needed in order to be able to store multiple migration versions.
#
# When a database relation is departed, we can't remove the key because we
# can't be sure if the relation is actually departing or if the unit is
# dying. If a new database relation is then created we need to be able to tell
# that it is a different relation. By appending the relation ID we overcome this
# problem.
# See https://github.com/canonical/hydra-operator/pull/138#discussion_r1338409081
return f"{DB_MIGRATION_VERSION_KEY}_{self.database.relations[0].id}"

@property
def _peers(self) -> Optional[Relation]:
"""Fetch the peer relation."""
Expand Down Expand Up @@ -445,13 +475,6 @@ def _handle_status_update_config(self, event: HookEvent) -> None:
return

self.unit.status = MaintenanceStatus("Configuring resources")
self._container.add_layer(self._container_name, self._hydra_layer, combine=True)

if not self._hydra_service_is_created:
event.defer()
self.unit.status = WaitingStatus("Waiting for Hydra service")
logger.info("Hydra service is absent. Deferring the event.")
return

if not self.model.relations[self._db_relation_name]:
self.unit.status = BlockedStatus("Missing required relation with postgresql")
Expand All @@ -474,13 +497,16 @@ def _handle_status_update_config(self, event: HookEvent) -> None:
event.defer()
return

self._cleanup_peer_data()
self._container.push(self._hydra_config_path, self._render_conf_file(), make_dirs=True)
self._container.add_layer(self._container_name, self._hydra_layer, combine=True)
try:
self._container.restart(self._container_name)
except ChangeError as err:
logger.error(str(err))
self.unit.status = BlockedStatus("Failed to restart, please consult the logs")
return

self.unit.status = ActiveStatus()

def _on_leader_elected(self, event: LeaderElectedEvent) -> None:
Expand Down Expand Up @@ -527,16 +553,12 @@ def _migration_is_needed(self):
if not self._peers:
return

return self._get_peer_data(DB_MIGRATION_VERSION_KEY) != self._hydra_cli.get_version()
return self._get_peer_data(self._migration_peer_data_key) != self._hydra_cli.get_version()

def _on_database_created(self, event: DatabaseCreatedEvent) -> None:
"""Event Handler for database created event."""
logger.info("Retrieved database details")

if not self.model.relations[self._db_relation_name]:
self.unit.status = BlockedStatus("Missing required relation with postgresql")
return

if not self._container.can_connect():
event.defer()
logger.info("Cannot connect to Hydra container. Deferring the event.")
Expand All @@ -548,22 +570,13 @@ def _on_database_created(self, event: DatabaseCreatedEvent) -> None:
event.defer()
return

self.unit.status = MaintenanceStatus(
"Configuring container and resources for database connection"
)

if not self._get_secrets():
self.unit.status = WaitingStatus("Waiting for secret creation")
event.defer()
return

logger.info("Updating Hydra config and restarting service")
self._container.add_layer(self._container_name, self._hydra_layer, combine=True)
self._container.push(self._hydra_config_path, self._render_conf_file(), make_dirs=True)

if not self._migration_is_needed():
self._container.start(self._container_name)
self.unit.status = ActiveStatus()
self._handle_status_update_config(event)
return

if not self.unit.is_leader():
Expand All @@ -577,29 +590,52 @@ def _on_database_created(self, event: DatabaseCreatedEvent) -> None:
logger.error("Automigration job failed, please use the run-migration action")
return

self._set_peer_data(DB_MIGRATION_VERSION_KEY, self._hydra_cli.get_version())
self._container.start(self._container_name)
self.unit.status = ActiveStatus()
self._set_peer_data(self._migration_peer_data_key, self._hydra_cli.get_version())
self._handle_status_update_config(event)

def _on_database_changed(self, event: DatabaseEndpointsChangedEvent) -> None:
"""Event Handler for database changed event."""
self._handle_status_update_config(event)

def _on_run_migration(self, event: ActionEvent) -> None:
"""Runs the migration as an action response."""
logger.info("Executing database migration initiated by user")
if not self._run_sql_migration():
event.fail("Execution failed, please inspect the logs")
if not self._container.can_connect():
event.fail("Service is not ready. Please re-run the action when the charm is active")
return
event.log("Successfully ran migration")

timeout = float(event.params.get("timeout", 120))
event.log("Migrating database.")
try:
self._hydra_cli.run_migration(timeout=timeout, dsn=self._dsn)
except Error as e:
err_msg = e.stderr if isinstance(e, ExecError) else e
event.fail(f"Database migration action failed: {err_msg}")
return
event.log("Successfully migrated the database.")

if not self._peers:
event.fail("Peer relation not ready. Failed to store migration version")
return
self._set_peer_data(self._migration_peer_data_key, self._hydra_cli.get_version())
event.log("Updated migration version in peer data.")

def _on_database_relation_departed(self, event: RelationDepartedEvent) -> None:
"""Event Handler for database relation departed event."""
logger.error("Missing required relation with postgresql")
self.model.unit.status = BlockedStatus("Missing required relation with postgresql")
self._pop_peer_data(DB_MIGRATION_VERSION_KEY)
if self._container.can_connect():
self._container.stop(self._container_name)
self.unit.status = BlockedStatus("Missing required relation with postgresql")

def _cleanup_peer_data(self) -> None:
if not self._peers:
return
# We need to remove the migration key from peer data. We can't do that in relation
# departed as we can't tell if the event was triggered from a dying unit or if the
# relation is actually departing.
extra_keys = [
k
for k in self._peers.data[self.app].keys()
if k.startswith(DB_MIGRATION_VERSION_KEY) and k != self._migration_peer_data_key
]
for k in extra_keys:
self._pop_peer_data(k)

def _on_admin_ingress_ready(self, event: IngressPerAppReadyEvent) -> None:
if self.unit.is_leader():
Expand Down
21 changes: 15 additions & 6 deletions src/hydra_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,11 +206,17 @@ def create_jwk(self, set_id: str = "hydra.openid.id-token", alg: str = "RS256")
logger.info(f"Successfully created jwk: {json_stdout['keys'][0]['kid']}")
return json_stdout

def run_migration(self, timeout: float = 60) -> Optional[str]:
def run_migration(self, dsn=None, timeout: float = 60) -> Optional[str]:
"""Run hydra migrations."""
cmd = ["hydra", "migrate", "sql", "-e", "--config", self.config_file_path, "--yes"]

_, stderr = self._run_cmd(cmd, timeout=timeout)
cmd = ["hydra", "migrate", "sql", "-e", "--yes"]
if dsn:
env = {"DSN": dsn}
else:
cmd.append("--config")
cmd.append(self.config_file_path)
env = None

_, stderr = self._run_cmd(cmd, timeout=timeout, environment=env)
return stderr

def get_version(self) -> str:
Expand All @@ -229,9 +235,12 @@ def get_version(self) -> str:
return versions[0]

def _run_cmd(
self, cmd: List[str], timeout: float = 20
self,
cmd: List[str],
timeout: float = 20,
environment: Optional[Dict] = None,
) -> Tuple[Union[str, bytes], Union[str, bytes]]:
logger.debug(f"Running cmd: {cmd}")
process = self.container.exec(cmd, timeout=timeout)
process = self.container.exec(cmd, environment=environment, timeout=timeout)
stdout, stderr = process.wait_output()
return stdout, stderr
13 changes: 13 additions & 0 deletions tests/integration/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,3 +330,16 @@ async def test_hydra_scale_up(ops_test: OpsTest) -> None:
timeout=1000,
wait_for_exact_units=3,
)


async def test_hydra_scale_down(ops_test: OpsTest) -> None:
"""Check that hydra works after it is scaled down."""
app = ops_test.model.applications[HYDRA_APP]

await app.scale(1)

await ops_test.model.wait_for_idle(
apps=[HYDRA_APP],
status="active",
timeout=1000,
)
43 changes: 41 additions & 2 deletions tests/unit/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import pytest
import yaml
from ops.model import ActiveStatus, BlockedStatus, WaitingStatus
from ops.pebble import ExecError
from ops.pebble import ExecError, TimeoutError
from ops.testing import Harness
from test_oauth_requirer import CLIENT_CONFIG # type: ignore

Expand Down Expand Up @@ -199,7 +199,7 @@ def test_relation_data(harness: Harness, mocked_run_migration: MagicMock) -> Non
assert relation_data["endpoints"] == "postgresql-k8s-primary.namespace.svc.cluster.local:5432"


def test_relation_departed(harness: Harness, mocked_run_migration: MagicMock) -> None:
def test_relation_departed(harness: Harness) -> None:
db_relation_id = setup_postgres_relation(harness)

harness.remove_relation_unit(db_relation_id, "postgresql-k8s/0")
Expand Down Expand Up @@ -250,6 +250,7 @@ def test_postgres_created_when_migration_has_run(
harness.set_leader(False)
harness.set_can_connect(CONTAINER_NAME, True)
harness.charm.on.hydra_pebble_ready.emit(CONTAINER_NAME)
setup_ingress_relation(harness, "public")
setup_peer_relation(harness)

setup_postgres_relation(harness)
Expand Down Expand Up @@ -1140,3 +1141,41 @@ def test_verify_pebble_layer_tempo_k8s(harness: Harness) -> None:
}

assert harness.charm._hydra_layer.to_dict() == expected_layer


def test_run_migration_action(harness: Harness, mocked_run_migration: MagicMock) -> None:
harness.set_can_connect(CONTAINER_NAME, True)
setup_peer_relation(harness)
setup_postgres_relation(harness)
event = MagicMock()

harness.charm._on_run_migration(event)

mocked_run_migration.assert_called_once()
event.fail.assert_not_called()


def test_error_on_run_migration_action(harness: Harness, mocked_run_migration: MagicMock) -> None:
harness.set_can_connect(CONTAINER_NAME, True)
mocked_run_migration.side_effect = ExecError(
command=[], exit_code=1, stdout="", stderr="Error"
)
event = MagicMock()

harness.charm._on_run_migration(event)

mocked_run_migration.assert_called_once()
event.fail.assert_called()


def test_timeout_on_run_migration_action(
harness: Harness, mocked_run_migration: MagicMock
) -> None:
harness.set_can_connect(CONTAINER_NAME, True)
mocked_run_migration.side_effect = TimeoutError
event = MagicMock()

harness.charm._on_run_migration(event)

mocked_run_migration.assert_called_once()
event.fail.assert_called()

0 comments on commit 2bd042d

Please sign in to comment.