From b14d93b762dbc8a02e9bf93866a41cbf76f6c1d4 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 14 Nov 2024 22:57:21 +0000 Subject: [PATCH] Create one-off scheduled task to delete old OTKs To work around the fact that, pre-https://github.com/element-hq/synapse/pull/17903, our database may have old one-time-keys that the clients have long thrown away the private keys for, we want to delete OTKs that look like they came from libolm. To spread the load a bit, without holding up other background database updates, we use a scheduled task to do the work. --- changelog.d/17934.feature | 1 + synapse/handlers/e2e_keys.py | 35 +++++++++++ .../storage/databases/main/end_to_end_keys.py | 61 +++++++++++++++++++ .../delta/88/05_drop_old_otks.sql.postgres | 19 ++++++ .../main/delta/88/05_drop_old_otks.sql.sqlite | 19 ++++++ 5 files changed, 135 insertions(+) create mode 100644 changelog.d/17934.feature create mode 100644 synapse/storage/schema/main/delta/88/05_drop_old_otks.sql.postgres create mode 100644 synapse/storage/schema/main/delta/88/05_drop_old_otks.sql.sqlite diff --git a/changelog.d/17934.feature b/changelog.d/17934.feature new file mode 100644 index 00000000000..f0e138a30ff --- /dev/null +++ b/changelog.d/17934.feature @@ -0,0 +1 @@ +Add a one-off task to delete old one-time-keys, to guard against us having old OTKs in the database that the client has long forgotten about. diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 315461fefb3..0246b4fb86c 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -39,6 +39,8 @@ from synapse.types import ( JsonDict, JsonMapping, + ScheduledTask, + TaskStatus, UserID, get_domain_from_id, get_verify_key_from_cross_signing_key, @@ -70,6 +72,7 @@ def __init__(self, hs: "HomeServer"): self.is_mine = hs.is_mine self.clock = hs.get_clock() self._worker_lock_handler = hs.get_worker_locks_handler() + self._task_scheduler = hs.get_task_scheduler() federation_registry = hs.get_federation_registry() @@ -116,6 +119,10 @@ def __init__(self, hs: "HomeServer"): hs.config.experimental.msc3984_appservice_key_query ) + self._task_scheduler.register_action( + self._delete_old_one_time_keys_task, "delete_old_otks" + ) + @trace @cancellable async def query_devices( @@ -1574,6 +1581,34 @@ async def has_different_keys(self, user_id: str, body: JsonDict) -> bool: return True return False + async def _delete_old_one_time_keys_task( + self, task: ScheduledTask + ) -> (TaskStatus, Optional[JsonMapping], Optional[str]): + """Scheduler task to delete old one time keys. + + Until Synapse 1.119, Synapse used to issue one-time-keys in a random order, leading to the possibility + that it could still have old OTKs that the client has dropped. This task is scheduled exactly once + by a database schema delta file, and it clears out old one-time-keys that look like they came from libolm. + """ + user = task.result.get("from_user", "") if task.result else "" + while True: + user, rowcount = await self.store.delete_old_otks_for_one_user(user) + if user is None: + # We're done! + return TaskStatus.COMPLETE, None, None + + logger.debug("Deleted %i old one-time-keys for user '%s'", rowcount, user) + + # Store our progress + await self._task_scheduler.update_task(task.id, result={"from_user": user}) + + # Sleep a little before doing the next user. + # + # matrix.org has about 15M users in the e2e_one_time_keys_json table + # (comprising 20M devices). We want this to take about a week, so we need + # to do 25 per second. + await self.clock.sleep(0.04) + def _check_cross_signing_key( key: JsonDict, user_id: str, key_type: str, signing_key: Optional[VerifyKey] = None diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index 1fbc49e7c5a..542514d49fd 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -1453,6 +1453,67 @@ def impl(txn: LoggingTransaction) -> Tuple[bool, Optional[int]]: impl, ) + async def _delete_old_one_time_keys( + self, + _progress: JsonDict, + _batch_size: int, + ) -> int: + """ + A background db migration which will schedule a task to delete one-time-keys older than 1w. + + Until Synapse 1.119, Synapse used to issue one-time-keys in a random order, leading to the possibility + that it could still have old OTKs that the client has dropped. We add a migration which will drop old OTKs, + to flush them out. + """ + # We want this to run reasonably slowly, so rather than doing the work here, + # which would block all other background updates, we schedule a task to do it for us. + await self.hs.get_task_scheduler().schedule_task(DELETE_OLD_OTKS_TASK_NAME) + + await self.db_pool.updates._end_background_update( + _BackgroundUpdates.DELETE_OLD_OTKS_NAME + ) + return 0 + + async def delete_old_otks_for_one_user(self, after_user_id: str) -> (Optional[str], int): + """Deletes old OTKs belonging to one user. + + Returns: + `(user, rows)`, where: + * `user` is the user ID of the updated user, or None if we are don + * `rows` is the number of deleted rows + """ + def impl(txn: LoggingTransaction) -> (Optional[str], int): + # Find the next user + txn.execute( + """ + SELECT user_id FROM e2e_one_time_keys_json WHERE user_id > ? LIMIT 1 + """, + (after_user_id,), + ) + row = txn.fetchone() + if not row: + # We're done! + return None, 0 + (user_id,) = row + + # Delete any old OTKs belonging to that user. + # + # We only actually consider OTKs whose key ID is 6 characters long. These + # keys were likely made by libolm rather than Vodozemac; libolm only kept + # 100 private OTKs, so was far more vulnerable than Vodozemac to throwing + # away keys prematurely. + txn.execute( + """ + DELETE FROM e2e_one_time_keys_json + WHERE user_id = ? AND ts_added_ms < ? AND length(key_id) = 6 + """, + (user_id, self._clock.time_msec() - (7 * 24 * 3600 * 1000)), + ) + + return user_id, txn.rowcount + + return await self.db_pool.runInteraction("delete_old_otks_for_one_user", impl) + class EndToEndKeyStore(EndToEndKeyWorkerStore, SQLBaseStore): def __init__( diff --git a/synapse/storage/schema/main/delta/88/05_drop_old_otks.sql.postgres b/synapse/storage/schema/main/delta/88/05_drop_old_otks.sql.postgres new file mode 100644 index 00000000000..93a68836ee5 --- /dev/null +++ b/synapse/storage/schema/main/delta/88/05_drop_old_otks.sql.postgres @@ -0,0 +1,19 @@ +-- +-- This file is licensed under the Affero General Public License (AGPL) version 3. +-- +-- Copyright (C) 2024 New Vector, Ltd +-- +-- This program is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Affero General Public License as +-- published by the Free Software Foundation, either version 3 of the +-- License, or (at your option) any later version. +-- +-- See the GNU Affero General Public License for more details: +-- . + +-- Until Synapse 1.119, Synapse used to issue one-time-keys in a random order, leading to the possibility +-- that it could still have old OTKs that the client has dropped. +-- +-- We create a scheduled task which will drop old OTKs, to flush them out. +INSERT INTO scheduled_tasks(id, action, status, timestamp) + VALUES ('delete_old_otks_task', 'delete_old_otks', 'scheduled', extract(epoch from current_timestamp) * 1000); diff --git a/synapse/storage/schema/main/delta/88/05_drop_old_otks.sql.sqlite b/synapse/storage/schema/main/delta/88/05_drop_old_otks.sql.sqlite new file mode 100644 index 00000000000..cdc2b5d211c --- /dev/null +++ b/synapse/storage/schema/main/delta/88/05_drop_old_otks.sql.sqlite @@ -0,0 +1,19 @@ +-- +-- This file is licensed under the Affero General Public License (AGPL) version 3. +-- +-- Copyright (C) 2024 New Vector, Ltd +-- +-- This program is free software: you can redistribute it and/or modify +-- it under the terms of the GNU Affero General Public License as +-- published by the Free Software Foundation, either version 3 of the +-- License, or (at your option) any later version. +-- +-- See the GNU Affero General Public License for more details: +-- . + +-- Until Synapse 1.119, Synapse used to issue one-time-keys in a random order, leading to the possibility +-- that it could still have old OTKs that the client has dropped. +-- +-- We create a scheduled task which will drop old OTKs, to flush them out. +INSERT INTO scheduled_tasks(id, action, status, timestamp) + VALUES ('delete_old_otks_task', 'delete_old_otks', 'scheduled', strftime('%s', 'now') * 1000);