From b64d312421976162a8d41246f11652b5003bb66f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 10 May 2017 17:46:41 +0100 Subject: [PATCH 1/3] add some logging to purge_history --- synapse/storage/events.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 2ab44ceaa797..512828cf342d 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2033,6 +2033,8 @@ def _delete_old_state_txn(self, txn, room_id, topological_ordering): for event_id, state_key in event_rows: txn.call_after(self._get_state_group_for_event.invalidate, (event_id,)) + logger.debug("[purge] Finding new backward extremities") + # We calculate the new entries for the backward extremeties by finding # all events that point to events that are to be purged txn.execute( @@ -2045,6 +2047,8 @@ def _delete_old_state_txn(self, txn, room_id, topological_ordering): ) new_backwards_extrems = txn.fetchall() + logger.debug("[purge] replacing backward extremities: %r", new_backwards_extrems) + txn.execute( "DELETE FROM event_backward_extremities WHERE room_id = ?", (room_id,) @@ -2059,6 +2063,8 @@ def _delete_old_state_txn(self, txn, room_id, topological_ordering): ] ) + logger.debug("[purge] finding redundant state groups") + # Get all state groups that are only referenced by events that are # to be deleted. txn.execute( @@ -2076,6 +2082,10 @@ def _delete_old_state_txn(self, txn, room_id, topological_ordering): state_rows = txn.fetchall() state_groups_to_delete = [sg for sg, in state_rows] + logger.debug( + "[purge] finding state groups which depend on redundant state groups" + ) + # Now we get all the state groups that rely on these state groups new_state_edges = [] chunks = [ @@ -2096,6 +2106,8 @@ def _delete_old_state_txn(self, txn, room_id, topological_ordering): # Now we turn the state groups that reference to-be-deleted state groups # to non delta versions. for new_state_edge in new_state_edges: + logger.debug("[purge] de-delta-ing remaining state group %s", + new_state_edge) curr_state = self._get_state_groups_from_groups_txn( txn, [new_state_edge], types=None ) @@ -2132,6 +2144,7 @@ def _delete_old_state_txn(self, txn, room_id, topological_ordering): ], ) + logger.debug("[purge] removing redundant state groups") txn.executemany( "DELETE FROM state_groups_state WHERE state_group = ?", state_rows @@ -2140,12 +2153,15 @@ def _delete_old_state_txn(self, txn, room_id, topological_ordering): "DELETE FROM state_groups WHERE id = ?", state_rows ) + # Delete all non-state + logger.debug("[purge] removing events from event_to_state_groups") txn.executemany( "DELETE FROM event_to_state_groups WHERE event_id = ?", [(event_id,) for event_id, _ in event_rows] ) + logger.debug("[purge] updating room_depth") txn.execute( "UPDATE room_depth SET min_depth = ? WHERE room_id = ?", (topological_ordering, room_id,) @@ -2171,16 +2187,15 @@ def _delete_old_state_txn(self, txn, room_id, topological_ordering): "event_signatures", "rejections", ): + logger.debug("[purge] removing non-state events from %s", table) + txn.executemany( "DELETE FROM %s WHERE event_id = ?" % (table,), to_delete ) - txn.executemany( - "DELETE FROM events WHERE event_id = ?", - to_delete - ) # Mark all state and own events as outliers + logger.debug("[purge] marking events as outliers") txn.executemany( "UPDATE events SET outlier = ?" " WHERE event_id = ?", @@ -2190,6 +2205,8 @@ def _delete_old_state_txn(self, txn, room_id, topological_ordering): ] ) + logger.debug("[purge] done") + @defer.inlineCallbacks def is_event_after(self, event_id1, event_id2): """Returns True if event_id1 is after event_id2 in the stream From 8e345ce46532974aac08c15cf4c90924ec4496d5 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 10 May 2017 18:17:41 +0100 Subject: [PATCH 2/3] Don't de-delta state groups we're about to delete --- synapse/storage/events.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 512828cf342d..2a37e6f1a82b 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2080,19 +2080,14 @@ def _delete_old_state_txn(self, txn, room_id, topological_ordering): ) state_rows = txn.fetchall() - state_groups_to_delete = [sg for sg, in state_rows] - - logger.debug( - "[purge] finding state groups which depend on redundant state groups" - ) + state_groups_to_delete = set([sg for sg, in state_rows]) # Now we get all the state groups that rely on these state groups + logger.debug("[purge] finding state groups which depend on redundant" + " state groups") new_state_edges = [] - chunks = [ - state_groups_to_delete[i:i + 100] - for i in xrange(0, len(state_groups_to_delete), 100) - ] - for chunk in chunks: + for i in xrange(0, len(state_rows), 100): + chunk = [sg for sg, in state_rows[i:i + 100]] rows = self._simple_select_many_txn( txn, table="state_group_edges", @@ -2101,7 +2096,10 @@ def _delete_old_state_txn(self, txn, room_id, topological_ordering): retcols=["state_group"], keyvalues={}, ) - new_state_edges.extend(row["state_group"] for row in rows) + new_state_edges.extend( + row["state_group"] for row in rows + if row["state_group"] not in state_groups_to_delete + ) # Now we turn the state groups that reference to-be-deleted state groups # to non delta versions. From dc026bb16ff552e9424be217ec5c64104c8b193f Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 11 May 2017 10:56:12 +0100 Subject: [PATCH 3/3] Tidy purge code and add some comments Try to make this clearer with more comments and some variable renames --- synapse/storage/events.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 2a37e6f1a82b..dbd63078c6c7 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -2080,14 +2080,19 @@ def _delete_old_state_txn(self, txn, room_id, topological_ordering): ) state_rows = txn.fetchall() + + # make a set of the redundant state groups, so that we can look them up + # efficiently state_groups_to_delete = set([sg for sg, in state_rows]) # Now we get all the state groups that rely on these state groups logger.debug("[purge] finding state groups which depend on redundant" " state groups") - new_state_edges = [] + remaining_state_groups = [] for i in xrange(0, len(state_rows), 100): chunk = [sg for sg, in state_rows[i:i + 100]] + # look for state groups whose prev_state_group is one we are about + # to delete rows = self._simple_select_many_txn( txn, table="state_group_edges", @@ -2096,26 +2101,28 @@ def _delete_old_state_txn(self, txn, room_id, topological_ordering): retcols=["state_group"], keyvalues={}, ) - new_state_edges.extend( + remaining_state_groups.extend( row["state_group"] for row in rows + + # exclude state groups we are about to delete: no point in + # updating them if row["state_group"] not in state_groups_to_delete ) - # Now we turn the state groups that reference to-be-deleted state groups - # to non delta versions. - for new_state_edge in new_state_edges: - logger.debug("[purge] de-delta-ing remaining state group %s", - new_state_edge) + # Now we turn the state groups that reference to-be-deleted state + # groups to non delta versions. + for sg in remaining_state_groups: + logger.debug("[purge] de-delta-ing remaining state group %s", sg) curr_state = self._get_state_groups_from_groups_txn( - txn, [new_state_edge], types=None + txn, [sg], types=None ) - curr_state = curr_state[new_state_edge] + curr_state = curr_state[sg] self._simple_delete_txn( txn, table="state_groups_state", keyvalues={ - "state_group": new_state_edge, + "state_group": sg, } ) @@ -2123,7 +2130,7 @@ def _delete_old_state_txn(self, txn, room_id, topological_ordering): txn, table="state_group_edges", keyvalues={ - "state_group": new_state_edge, + "state_group": sg, } ) @@ -2132,7 +2139,7 @@ def _delete_old_state_txn(self, txn, room_id, topological_ordering): table="state_groups_state", values=[ { - "state_group": new_state_edge, + "state_group": sg, "room_id": room_id, "type": key[0], "state_key": key[1],