From 7ee8c5a4d0fd12c79ffe665d07f1d72d795fb0fc Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 25 Feb 2021 14:08:47 -0500 Subject: [PATCH 1/3] Purge chain cover tables when purging events. --- changelog.d/9498.bugfix | 1 + .../storage/databases/main/purge_events.py | 40 ++++++++++++++++--- 2 files changed, 36 insertions(+), 5 deletions(-) create mode 100644 changelog.d/9498.bugfix diff --git a/changelog.d/9498.bugfix b/changelog.d/9498.bugfix new file mode 100644 index 000000000000..dce0ad0920e2 --- /dev/null +++ b/changelog.d/9498.bugfix @@ -0,0 +1 @@ +Properly purge the event chain cover index when purging history. diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py index ecfc9f20b190..816543537f5a 100644 --- a/synapse/storage/databases/main/purge_events.py +++ b/synapse/storage/databases/main/purge_events.py @@ -28,7 +28,10 @@ class PurgeEventsStore(StateGroupWorkerStore, SQLBaseStore): async def purge_history( self, room_id: str, token: str, delete_local_events: bool ) -> Set[int]: - """Deletes room history before a certain point + """Deletes room history before a certain point. + + Note that only a single purge can occur at once, this is guaranteed via + a higher level (in the PaginationHandler). Args: room_id: @@ -52,7 +55,9 @@ async def purge_history( delete_local_events, ) - def _purge_history_txn(self, txn, room_id, token, delete_local_events): + def _purge_history_txn( + self, txn, room_id: str, token: RoomStreamToken, delete_local_events: bool + ) -> Set[int]: # Tables that should be pruned: # event_auth # event_backward_extremities @@ -103,7 +108,7 @@ def _purge_history_txn(self, txn, room_id, token, delete_local_events): if max_depth < token.topological: # We need to ensure we don't delete all the events from the database # otherwise we wouldn't be able to send any events (due to not - # having any backwards extremeties) + # having any backwards extremities) raise SynapseError( 400, "topological_ordering is greater than forward extremeties" ) @@ -154,7 +159,7 @@ def _purge_history_txn(self, txn, room_id, token, delete_local_events): logger.info("[purge] Finding new backward extremities") - # We calculate the new entries for the backward extremeties by finding + # We calculate the new entries for the backward extremities by finding # events to be purged that are pointed to by events we're not going to # purge. txn.execute( @@ -296,7 +301,7 @@ async def purge_room(self, room_id: str) -> List[int]: "purge_room", self._purge_room_txn, room_id ) - def _purge_room_txn(self, txn, room_id): + def _purge_room_txn(self, txn, room_id: str) -> List[int]: # First we fetch all the state groups that should be deleted, before # we delete that information. txn.execute( @@ -310,6 +315,29 @@ def _purge_room_txn(self, txn, room_id): state_groups = [row[0] for row in txn] + # Get all the auth chains that are referenced by events that are to be + # deleted. + txn.execute( + """ + SELECT chain_id, sequence_number FROM events + LEFT JOIN event_auth_chains USING (event_id) + WHERE room_id = ? + """, + (room_id,), + ) + referenced_chain_id_tuples = list(txn) + + logger.info("[purge] removing events from event_auth_chain_links") + txn.executemany( + "DELETE FROM event_auth_chain_links WHERE " + "(origin_chain_id = ? AND origin_sequence_number = ?) OR " + "(target_chain_id = ? AND target_sequence_number = ?)", + ( + (chain_id, seq_num, chain_id, seq_num) + for (chain_id, seq_num) in referenced_chain_id_tuples + ), + ) + # Now we delete tables which lack an index on room_id but have one on event_id for table in ( "event_auth", @@ -319,6 +347,8 @@ def _purge_room_txn(self, txn, room_id): "event_reference_hashes", "event_relations", "event_to_state_groups", + "event_auth_chains", + "event_auth_chain_to_calculate", "redactions", "rejections", "state_events", From fb83f9a120461f9efee5c475e616a344ea74c0fe Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 25 Feb 2021 14:08:56 -0500 Subject: [PATCH 2/3] Remove an unused variable. --- synapse/storage/purge_events.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/synapse/storage/purge_events.py b/synapse/storage/purge_events.py index 3c4908865f88..4dcd848c59d9 100644 --- a/synapse/storage/purge_events.py +++ b/synapse/storage/purge_events.py @@ -73,9 +73,6 @@ async def _find_unreferenced_groups(self, state_groups: Set[int]) -> Set[int]: Returns: The set of state groups that can be deleted. """ - # Graph of state group -> previous group - graph = {} - # Set of events that we have found to be referenced by events referenced_groups = set() @@ -111,8 +108,6 @@ async def _find_unreferenced_groups(self, state_groups: Set[int]) -> Set[int]: next_to_search |= prevs state_groups_seen |= prevs - graph.update(edges) - to_delete = state_groups_seen - referenced_groups return to_delete From 206eb5c9057ed8dec1652b91558f7bab17f35031 Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 1 Mar 2021 12:28:20 -0500 Subject: [PATCH 3/3] Use multi-line strings. --- synapse/storage/databases/main/purge_events.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/main/purge_events.py b/synapse/storage/databases/main/purge_events.py index 816543537f5a..0836e4af4934 100644 --- a/synapse/storage/databases/main/purge_events.py +++ b/synapse/storage/databases/main/purge_events.py @@ -329,9 +329,11 @@ def _purge_room_txn(self, txn, room_id: str) -> List[int]: logger.info("[purge] removing events from event_auth_chain_links") txn.executemany( - "DELETE FROM event_auth_chain_links WHERE " - "(origin_chain_id = ? AND origin_sequence_number = ?) OR " - "(target_chain_id = ? AND target_sequence_number = ?)", + """ + DELETE FROM event_auth_chain_links WHERE + (origin_chain_id = ? AND origin_sequence_number = ?) OR + (target_chain_id = ? AND target_sequence_number = ?) + """, ( (chain_id, seq_num, chain_id, seq_num) for (chain_id, seq_num) in referenced_chain_id_tuples