From e0d7fab784b31a72fe5215ababbff325782ccef2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 22 Aug 2022 20:13:28 -0500 Subject: [PATCH 01/66] Keep track when we tried to backfill an event --- .../delta/72/04event_backfill_access_time.sql | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 synapse/storage/schema/main/delta/72/04event_backfill_access_time.sql diff --git a/synapse/storage/schema/main/delta/72/04event_backfill_access_time.sql b/synapse/storage/schema/main/delta/72/04event_backfill_access_time.sql new file mode 100644 index 000000000000..bd936e467851 --- /dev/null +++ b/synapse/storage/schema/main/delta/72/04event_backfill_access_time.sql @@ -0,0 +1,26 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +-- Add a table that keeps track of when we last tried to backfill an event. This +-- allows us to be more intelligent when we decide to retry (we don't need to +-- fail over and over) and we can process that event in the background so we +-- don't block on it each time. +CREATE TABLE IF NOT EXISTS event_backfill_access_time( + event_id TEXT NOT NULL, + ts BIGINT NOT NULL +); + +CREATE UNIQUE INDEX IF NOT EXISTS event_backfill_access_time_event_id ON event_backfill_access_time(event_id); From b8d55d38d823ac040a6179091f345edaba38075e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 24 Aug 2022 22:56:50 -0500 Subject: [PATCH 02/66] Record in some fail spots --- synapse/handlers/federation_event.py | 5 +++++ .../storage/databases/main/event_federation.py | 16 ++++++++++++++++ .../delta/72/04event_backfill_access_time.sql | 7 ++++--- 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 32326975a187..9309e4ae9b0b 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -852,6 +852,8 @@ async def _process_pulled_event( self._sanity_check_event(event) except SynapseError as err: logger.warning("Event %s failed sanity check: %s", event_id, err) + if backfilled: + await self._store.record_event_backfill_attempt(event_id) return try: @@ -887,6 +889,9 @@ async def _process_pulled_event( backfilled=backfilled, ) except FederationError as e: + if backfilled: + await self._store.record_event_backfill_attempt(event_id) + if e.code == 403: logger.warning("Pulled event %s failed history check.", event_id) else: diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index c836078da683..3c007a0ee9a1 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1292,6 +1292,22 @@ def _get_backfill_events( return event_id_results + @trace + async def record_event_backfill_attempt(self, event_id: str, room_id: str) -> None: + await self.db_pool.simple_upsert( + table="event_backfill_attempts", + keyvalues={"event_id": event_id}, + values={ + "event_id": event_id, + # TODO: This needs to increment + "num_attempts": 1, + "last_attempt_ts": self._clock.time_msec(), + }, + insertion_values={}, + desc="insert_event_backfill_attempt", + lock=False, + ) + async def get_missing_events( self, room_id: str, diff --git a/synapse/storage/schema/main/delta/72/04event_backfill_access_time.sql b/synapse/storage/schema/main/delta/72/04event_backfill_access_time.sql index bd936e467851..71224c2b48c0 100644 --- a/synapse/storage/schema/main/delta/72/04event_backfill_access_time.sql +++ b/synapse/storage/schema/main/delta/72/04event_backfill_access_time.sql @@ -18,9 +18,10 @@ -- allows us to be more intelligent when we decide to retry (we don't need to -- fail over and over) and we can process that event in the background so we -- don't block on it each time. -CREATE TABLE IF NOT EXISTS event_backfill_access_time( +CREATE TABLE IF NOT EXISTS event_backfill_attempts( event_id TEXT NOT NULL, - ts BIGINT NOT NULL + num_attempts INT NOT NULL, + last_attempt_ts BIGINT NOT NULL ); -CREATE UNIQUE INDEX IF NOT EXISTS event_backfill_access_time_event_id ON event_backfill_access_time(event_id); +CREATE UNIQUE INDEX IF NOT EXISTS event_backfill_attempts_event_id ON event_backfill_attempts(event_id); From bec26e23fe922835ac76b265a060015946008869 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 25 Aug 2022 00:04:59 -0500 Subject: [PATCH 03/66] Record and clear attempts --- synapse/federation/federation_base.py | 7 +++++ .../databases/main/event_federation.py | 4 +-- synapse/storage/databases/main/events.py | 28 +++++++++++++------ 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 4269a98db2db..0ee222239939 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -129,6 +129,13 @@ async def _check_sigs_and_hash( "event_id": pdu.event_id, } ) + + # TODO: Is it okay to assume this is called from backfilling? + # + # In any case, we definately don't want to keep fetching spam over + # and over and failing it. + await self._store.record_event_backfill_attempt(pdu.event_id) + # we redact (to save disk space) as well as soft-failing (to stop # using the event in prev_events). redacted_event = prune_event(pdu) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 3c007a0ee9a1..627577dbafc5 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1293,7 +1293,7 @@ def _get_backfill_events( return event_id_results @trace - async def record_event_backfill_attempt(self, event_id: str, room_id: str) -> None: + async def record_event_backfill_attempt(self, event_id: str) -> None: await self.db_pool.simple_upsert( table="event_backfill_attempts", keyvalues={"event_id": event_id}, @@ -1304,7 +1304,7 @@ async def record_event_backfill_attempt(self, event_id: str, room_id: str) -> No "last_attempt_ts": self._clock.time_msec(), }, insertion_values={}, - desc="insert_event_backfill_attempt", + desc="record_event_backfill_attempt", lock=False, ) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index a4010ee28dca..75254106a1ee 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2435,17 +2435,27 @@ def _update_backward_extremeties( "DELETE FROM event_backward_extremities" " WHERE event_id = ? AND room_id = ?" ) + backward_extremities_to_remove = [ + (ev.event_id, ev.room_id) + for ev in events + if not ev.internal_metadata.is_outlier() + # If we encountered an event with no prev_events, then we might + # as well remove it now because it won't ever have anything else + # to backfill from. + or len(ev.prev_event_ids()) == 0 + ] txn.execute_batch( query, - [ - (ev.event_id, ev.room_id) - for ev in events - if not ev.internal_metadata.is_outlier() - # If we encountered an event with no prev_events, then we might - # as well remove it now because it won't ever have anything else - # to backfill from. - or len(ev.prev_event_ids()) == 0 - ], + backward_extremities_to_remove, + ) + + # Since we no longer need it as a backward extremity, it won't be + # backfilled again so we no longer need to store the backfill attempts + # around it. + query = "DELETE FROM event_backfill_attempts" " WHERE event_id = ?" + txn.execute_batch( + query, + backward_extremities_to_remove, ) From fee37c3a1c18fa8c50394916972e41e74ea12b8e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 25 Aug 2022 00:13:33 -0500 Subject: [PATCH 04/66] Add changelog --- changelog.d/13589.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/13589.feature diff --git a/changelog.d/13589.feature b/changelog.d/13589.feature new file mode 100644 index 000000000000..78fa1ddb5202 --- /dev/null +++ b/changelog.d/13589.feature @@ -0,0 +1 @@ +Keep track when we attempt to backfill an event but fail so we can intelligently back-off in the future. From d1290be6ade3dd095259595a46056351c01db18c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 25 Aug 2022 00:26:57 -0500 Subject: [PATCH 05/66] Remove from when spam checker fails See https://github.com/matrix-org/synapse/pull/13589#discussion_r954515215 --- synapse/federation/federation_base.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 0ee222239939..6326024f2072 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -130,12 +130,6 @@ async def _check_sigs_and_hash( } ) - # TODO: Is it okay to assume this is called from backfilling? - # - # In any case, we definately don't want to keep fetching spam over - # and over and failing it. - await self._store.record_event_backfill_attempt(pdu.event_id) - # we redact (to save disk space) as well as soft-failing (to stop # using the event in prev_events). redacted_event = prune_event(pdu) From f9119d0a9ea8f011babe7f6851a4033d3294f018 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 25 Aug 2022 17:05:28 -0500 Subject: [PATCH 06/66] Custom upsert to increment See https://github.com/matrix-org/synapse/pull/13589#discussion_r954506714 --- synapse/handlers/pagination.py | 3 + .../databases/main/event_federation.py | 79 ++++++++++++++++--- synapse/storage/schema/__init__.py | 2 +- .../01event_backfill_access_time.sql} | 0 4 files changed, 73 insertions(+), 11 deletions(-) rename synapse/storage/schema/main/delta/{72/04event_backfill_access_time.sql => 73/01event_backfill_access_time.sql} (100%) diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index 74e944bce72d..b1f4db5584bc 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -589,6 +589,9 @@ async def get_messages( state, time_now, config=serialize_options ) + # TODO: Remove (just for testing) + await self.store.record_event_backfill_attempt(chunk["chunk"][0]["event_id"]) + return chunk async def _shutdown_and_purge_room( diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 627577dbafc5..ab9cbe4e5578 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1294,20 +1294,79 @@ def _get_backfill_events( @trace async def record_event_backfill_attempt(self, event_id: str) -> None: - await self.db_pool.simple_upsert( + if self.database_engine.can_native_upsert: + await self.db_pool.runInteraction( + "record_event_backfill_attempt", + self._record_event_backfill_attempt_upsert_native_txn, + event_id, + db_autocommit=True, # Safe as its a single upsert + ) + else: + await self.db_pool.runInteraction( + "record_event_backfill_attempt", + self._record_event_backfill_attempt_upsert_emulated_txn, + event_id, + ) + + def _record_event_backfill_attempt_upsert_native_txn( + self, + txn: LoggingTransaction, + event_id: str, + ) -> None: + assert self.database_engine.can_native_upsert + + sql = """ + INSERT INTO event_backfill_attempts ( + event_id, num_attempts, last_attempt_ts + ) + VALUES (?, ?, ?) + ON CONFLICT (event_id) DO UPDATE SET + event_id=EXCLUDED.event_id, + num_attempts=event_backfill_attempts.num_attempts + 1, + last_attempt_ts=EXCLUDED.last_attempt_ts; + """ + + txn.execute( + sql, (event_id, 1, self._clock.time_msec()) # type: ignore[attr-defined] + ) + + def _record_event_backfill_attempt_upsert_emulated_txn( + self, + txn: LoggingTransaction, + event_id: str, + ) -> None: + self.database_engine.lock_table(txn, "event_backfill_attempts") + + prev_row = self.db_pool.simple_select_one_txn( + txn, table="event_backfill_attempts", keyvalues={"event_id": event_id}, - values={ - "event_id": event_id, - # TODO: This needs to increment - "num_attempts": 1, - "last_attempt_ts": self._clock.time_msec(), - }, - insertion_values={}, - desc="record_event_backfill_attempt", - lock=False, + retcols=("num_attempts"), + allow_none=True, ) + if not prev_row: + self.db_pool.simple_insert_txn( + txn, + table="event_backfill_attempts", + values={ + "event_id": event_id, + "num_attempts": 1, + "last_attempt_ts": self._clock.time_msec(), + }, + ) + else: + self.db_pool.simple_update_one_txn( + txn, + table="event_backfill_attempts", + keyvalues={"event_id": event_id}, + updatevalues={ + "event_id": event_id, + "num_attempts": prev_row["num_attempts"] + 1, + "last_attempt_ts": self._clock.time_msec(), + }, + ) + async def get_missing_events( self, room_id: str, diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index a9a88c8bfd8b..4bc8d280019f 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -SCHEMA_VERSION = 72 # remember to update the list below when updating +SCHEMA_VERSION = 73 # remember to update the list below when updating """Represents the expectations made by the codebase about the database schema This should be incremented whenever the codebase changes its requirements on the diff --git a/synapse/storage/schema/main/delta/72/04event_backfill_access_time.sql b/synapse/storage/schema/main/delta/73/01event_backfill_access_time.sql similarity index 100% rename from synapse/storage/schema/main/delta/72/04event_backfill_access_time.sql rename to synapse/storage/schema/main/delta/73/01event_backfill_access_time.sql From f5c6fe7ef934ddef211afcc3b6b15fd3b4a39561 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 25 Aug 2022 17:10:59 -0500 Subject: [PATCH 07/66] Fix lints --- synapse/handlers/pagination.py | 3 --- synapse/storage/databases/main/event_federation.py | 4 +--- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/synapse/handlers/pagination.py b/synapse/handlers/pagination.py index b1f4db5584bc..74e944bce72d 100644 --- a/synapse/handlers/pagination.py +++ b/synapse/handlers/pagination.py @@ -589,9 +589,6 @@ async def get_messages( state, time_now, config=serialize_options ) - # TODO: Remove (just for testing) - await self.store.record_event_backfill_attempt(chunk["chunk"][0]["event_id"]) - return chunk async def _shutdown_and_purge_room( diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index ab9cbe4e5578..6e93a5af3921 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1326,9 +1326,7 @@ def _record_event_backfill_attempt_upsert_native_txn( last_attempt_ts=EXCLUDED.last_attempt_ts; """ - txn.execute( - sql, (event_id, 1, self._clock.time_msec()) # type: ignore[attr-defined] - ) + txn.execute(sql, (event_id, 1, self._clock.time_msec())) def _record_event_backfill_attempt_upsert_emulated_txn( self, From 16ebec01f03a6a3c701a171fb0c48cb590e23973 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 25 Aug 2022 17:15:57 -0500 Subject: [PATCH 08/66] Remove extra whitespace --- synapse/federation/federation_base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index 6326024f2072..4269a98db2db 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -129,7 +129,6 @@ async def _check_sigs_and_hash( "event_id": pdu.event_id, } ) - # we redact (to save disk space) as well as soft-failing (to stop # using the event in prev_events). redacted_event = prune_event(pdu) From ce07aa1aaa9182ff2cecd774b88f7ee980f314f1 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 25 Aug 2022 17:48:38 -0500 Subject: [PATCH 09/66] Move to correct folder --- .../04event_backfill_access_time.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename synapse/storage/schema/main/delta/{73/01event_backfill_access_time.sql => 72/04event_backfill_access_time.sql} (100%) diff --git a/synapse/storage/schema/main/delta/73/01event_backfill_access_time.sql b/synapse/storage/schema/main/delta/72/04event_backfill_access_time.sql similarity index 100% rename from synapse/storage/schema/main/delta/73/01event_backfill_access_time.sql rename to synapse/storage/schema/main/delta/72/04event_backfill_access_time.sql From 5811ba135e3297f99f53b919ea948084b9f75afc Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 25 Aug 2022 17:48:53 -0500 Subject: [PATCH 10/66] Set the version back --- synapse/storage/schema/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index 4bc8d280019f..a9a88c8bfd8b 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -SCHEMA_VERSION = 73 # remember to update the list below when updating +SCHEMA_VERSION = 72 # remember to update the list below when updating """Represents the expectations made by the codebase about the database schema This should be incremented whenever the codebase changes its requirements on the From cf2b0937fed0b9bd59124f9e110f51a5c8c96078 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 25 Aug 2022 17:56:42 -0500 Subject: [PATCH 11/66] Fix `TypeError: not all arguments converted during string formatting` --- synapse/storage/databases/main/events.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 75254106a1ee..005ec1b22c00 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2435,7 +2435,7 @@ def _update_backward_extremeties( "DELETE FROM event_backward_extremities" " WHERE event_id = ? AND room_id = ?" ) - backward_extremities_to_remove = [ + backward_extremity_tuples_to_remove = [ (ev.event_id, ev.room_id) for ev in events if not ev.internal_metadata.is_outlier() @@ -2446,16 +2446,23 @@ def _update_backward_extremeties( ] txn.execute_batch( query, - backward_extremities_to_remove, + backward_extremity_tuples_to_remove, ) # Since we no longer need it as a backward extremity, it won't be # backfilled again so we no longer need to store the backfill attempts # around it. - query = "DELETE FROM event_backfill_attempts" " WHERE event_id = ?" + query = """ + DELETE FROM event_backfill_attempts + WHERE event_id = ? + """ + backward_extremity_event_ids_to_remove = [ + (extremity_tuple[0],) + for extremity_tuple in backward_extremity_tuples_to_remove + ] txn.execute_batch( query, - backward_extremities_to_remove, + backward_extremity_event_ids_to_remove, ) From cbb4194da8fd327989b0d5b6e5bbfabe6272ad4c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 25 Aug 2022 19:34:01 -0500 Subject: [PATCH 12/66] Add test to make sure failed backfill attempts are recorded --- tests/handlers/test_federation_event.py | 156 ++++++++++++++++++++++-- 1 file changed, 149 insertions(+), 7 deletions(-) diff --git a/tests/handlers/test_federation_event.py b/tests/handlers/test_federation_event.py index 51c8dd649822..3da00e8ea201 100644 --- a/tests/handlers/test_federation_event.py +++ b/tests/handlers/test_federation_event.py @@ -12,8 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. from unittest import mock +from typing import ( + List, +) -from synapse.events import make_event_from_dict +from synapse.events import EventBase, make_event_from_dict from synapse.events.snapshot import EventContext from synapse.federation.transport.client import StateRequestResponse from synapse.logging.context import LoggingContext @@ -51,7 +54,16 @@ def test_process_pulled_event_with_missing_state(self) -> None: We check that the pulled event is correctly persisted, and that the state is as we expect. """ - return self._test_process_pulled_event_with_missing_state(False) + results = self._test_process_pulled_event_with_missing_state( + prev_exists_as_outlier=False, + ) + + self._assert_pulled_event(results.get("pulled_event")) + self._assert_state_at_pulled_event( + pulled_event=results.get("pulled_event"), + state_at_pulled_event=results.get("state_at_pulled_event"), + prev_exists_as_outlier=False, + ) def test_process_pulled_event_with_missing_state_where_prev_is_outlier( self, @@ -63,14 +75,127 @@ def test_process_pulled_event_with_missing_state_where_prev_is_outlier( but in this case we already have the prev_event (as an outlier, obviously - if it were a regular event, we wouldn't need to request the state). """ - return self._test_process_pulled_event_with_missing_state(True) + results = self._test_process_pulled_event_with_missing_state( + prev_exists_as_outlier=True, + ) + + self._assert_pulled_event(results.get("pulled_event")) + self._assert_state_at_pulled_event( + pulled_event=results.get("pulled_event"), + state_at_pulled_event=results.get("state_at_pulled_event"), + prev_exists_as_outlier=True, + ) + + def test_process_pulled_event_records_failed_backfill_attempts( + self, + ) -> None: + """ """ + OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}" + main_store = self.hs.get_datastores().main + + # Create the room + user_id = self.register_user("kermit", "test") + tok = self.login("kermit", "test") + room_id = self.helper.create_room_as(room_creator=user_id, tok=tok) + room_version = self.get_success(main_store.get_room_version(room_id)) + + # We expect an outbound request to /state_ids, so stub that out + self.mock_federation_transport_client.get_room_state_ids.return_value = make_awaitable( + { + # Mimic the other server not knowing about the state at all. + # We want to cause Synapse to throw an error (`Unable to get + # missing prev_event $fake_prev_event`) and fail to backfill + # the pulled event. + "pdu_ids": [], + "auth_chain_ids": [], + } + ) + # We also expect an outbound request to /state + self.mock_federation_transport_client.get_room_state.return_value = make_awaitable( + StateRequestResponse( + # Mimic the other server not knowing about the state at all. + # We want to cause Synapse to throw an error (`Unable to get + # missing prev_event $fake_prev_event`) and fail to backfill + # the pulled event. + auth_events=[], + state=[], + ) + ) + + pulled_event = make_event_from_dict( + self.add_hashes_and_signatures_from_other_server( + { + "type": "test_regular_type", + "room_id": room_id, + "sender": OTHER_USER, + "prev_events": [ + # The fake prev event will make the pulled event fail + # the history check (`Unable to get missing prev_event + # $fake_prev_event`) + "$fake_prev_event" + ], + "auth_events": [], + "origin_server_ts": 1, + "depth": 12, + "content": {"body": "pulled"}, + } + ), + room_version, + ) + + # The function under test: try to process the pulled event + with LoggingContext("test"): + self.get_success( + self.hs.get_federation_event_handler()._process_pulled_event( + self.OTHER_SERVER_NAME, pulled_event, backfilled=True + ) + ) + + # Make sure our backfill attempt was recorded + backfill_num_attempts = self.get_success( + main_store.db_pool.simple_select_one_onecol( + table="event_backfill_attempts", + keyvalues={"event_id": pulled_event.event_id}, + retcol="num_attempts", + ) + ) + self.assertEqual(backfill_num_attempts, 1) + + # The function under test: try to process the pulled event again + with LoggingContext("test"): + self.get_success( + self.hs.get_federation_event_handler()._process_pulled_event( + self.OTHER_SERVER_NAME, pulled_event, backfilled=True + ) + ) + + # Make sure our second backfill attempt was recorded + backfill_num_attempts = self.get_success( + main_store.db_pool.simple_select_one_onecol( + table="event_backfill_attempts", + keyvalues={"event_id": pulled_event.event_id}, + retcol="num_attempts", + ) + ) + self.assertEqual(backfill_num_attempts, 2) + + # And as a sanity check, make sure the event was not persisted through all of this. + persisted = self.get_success( + main_store.get_event(pulled_event.event_id, allow_none=True) + ) + self.assertIsNone( + persisted, + "pulled event with invalid signature should not be persisted at all", + ) def _test_process_pulled_event_with_missing_state( - self, prev_exists_as_outlier: bool + self, + *, + backfilled: bool = False, + prev_exists_as_outlier: bool = False, ) -> None: OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}" main_store = self.hs.get_datastores().main - state_storage_controller = self.hs.get_storage_controllers().state # create the room user_id = self.register_user("kermit", "test") @@ -205,10 +330,18 @@ async def get_event(destination: str, event_id: str, timeout=None): with LoggingContext("test"): self.get_success( self.hs.get_federation_event_handler()._process_pulled_event( - self.OTHER_SERVER_NAME, pulled_event, backfilled=False + self.OTHER_SERVER_NAME, pulled_event, backfilled=backfilled ) ) + return { + "pulled_event": pulled_event, + "state_at_pulled_event": state_at_prev_event, + } + + def _assert_pulled_event(self, pulled_event: EventBase) -> None: + main_store = self.hs.get_datastores().main + # check that the event is correctly persisted persisted = self.get_success(main_store.get_event(pulled_event.event_id)) self.assertIsNotNone(persisted, "pulled event was not persisted at all") @@ -216,12 +349,21 @@ async def get_event(destination: str, event_id: str, timeout=None): persisted.internal_metadata.is_outlier(), "pulled event was an outlier" ) + def _assert_state_at_pulled_event( + self, + *, + pulled_event: EventBase, + state_at_pulled_event: List[EventBase], + prev_exists_as_outlier: bool, + ) -> None: + state_storage_controller = self.hs.get_storage_controllers().state + # check that the state at that event is as expected state = self.get_success( state_storage_controller.get_state_ids_for_event(pulled_event.event_id) ) expected_state = { - (e.type, e.state_key): e.event_id for e in state_at_prev_event + (e.type, e.state_key): e.event_id for e in state_at_pulled_event } self.assertEqual(state, expected_state) From 621c5d39c86185829012f6d2e2b4072221c7dd4f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 25 Aug 2022 19:40:44 -0500 Subject: [PATCH 13/66] Clean up test --- tests/handlers/test_federation_event.py | 272 +++++++++++------------- 1 file changed, 123 insertions(+), 149 deletions(-) diff --git a/tests/handlers/test_federation_event.py b/tests/handlers/test_federation_event.py index 3da00e8ea201..beda5457dc35 100644 --- a/tests/handlers/test_federation_event.py +++ b/tests/handlers/test_federation_event.py @@ -12,11 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. from unittest import mock -from typing import ( - List, -) -from synapse.events import EventBase, make_event_from_dict +from synapse.events import make_event_from_dict from synapse.events.snapshot import EventContext from synapse.federation.transport.client import StateRequestResponse from synapse.logging.context import LoggingContext @@ -54,16 +51,7 @@ def test_process_pulled_event_with_missing_state(self) -> None: We check that the pulled event is correctly persisted, and that the state is as we expect. """ - results = self._test_process_pulled_event_with_missing_state( - prev_exists_as_outlier=False, - ) - - self._assert_pulled_event(results.get("pulled_event")) - self._assert_state_at_pulled_event( - pulled_event=results.get("pulled_event"), - state_at_pulled_event=results.get("state_at_pulled_event"), - prev_exists_as_outlier=False, - ) + return self._test_process_pulled_event_with_missing_state(False) def test_process_pulled_event_with_missing_state_where_prev_is_outlier( self, @@ -75,127 +63,14 @@ def test_process_pulled_event_with_missing_state_where_prev_is_outlier( but in this case we already have the prev_event (as an outlier, obviously - if it were a regular event, we wouldn't need to request the state). """ - results = self._test_process_pulled_event_with_missing_state( - prev_exists_as_outlier=True, - ) - - self._assert_pulled_event(results.get("pulled_event")) - self._assert_state_at_pulled_event( - pulled_event=results.get("pulled_event"), - state_at_pulled_event=results.get("state_at_pulled_event"), - prev_exists_as_outlier=True, - ) - - def test_process_pulled_event_records_failed_backfill_attempts( - self, - ) -> None: - """ """ - OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}" - main_store = self.hs.get_datastores().main - - # Create the room - user_id = self.register_user("kermit", "test") - tok = self.login("kermit", "test") - room_id = self.helper.create_room_as(room_creator=user_id, tok=tok) - room_version = self.get_success(main_store.get_room_version(room_id)) - - # We expect an outbound request to /state_ids, so stub that out - self.mock_federation_transport_client.get_room_state_ids.return_value = make_awaitable( - { - # Mimic the other server not knowing about the state at all. - # We want to cause Synapse to throw an error (`Unable to get - # missing prev_event $fake_prev_event`) and fail to backfill - # the pulled event. - "pdu_ids": [], - "auth_chain_ids": [], - } - ) - # We also expect an outbound request to /state - self.mock_federation_transport_client.get_room_state.return_value = make_awaitable( - StateRequestResponse( - # Mimic the other server not knowing about the state at all. - # We want to cause Synapse to throw an error (`Unable to get - # missing prev_event $fake_prev_event`) and fail to backfill - # the pulled event. - auth_events=[], - state=[], - ) - ) - - pulled_event = make_event_from_dict( - self.add_hashes_and_signatures_from_other_server( - { - "type": "test_regular_type", - "room_id": room_id, - "sender": OTHER_USER, - "prev_events": [ - # The fake prev event will make the pulled event fail - # the history check (`Unable to get missing prev_event - # $fake_prev_event`) - "$fake_prev_event" - ], - "auth_events": [], - "origin_server_ts": 1, - "depth": 12, - "content": {"body": "pulled"}, - } - ), - room_version, - ) - - # The function under test: try to process the pulled event - with LoggingContext("test"): - self.get_success( - self.hs.get_federation_event_handler()._process_pulled_event( - self.OTHER_SERVER_NAME, pulled_event, backfilled=True - ) - ) - - # Make sure our backfill attempt was recorded - backfill_num_attempts = self.get_success( - main_store.db_pool.simple_select_one_onecol( - table="event_backfill_attempts", - keyvalues={"event_id": pulled_event.event_id}, - retcol="num_attempts", - ) - ) - self.assertEqual(backfill_num_attempts, 1) - - # The function under test: try to process the pulled event again - with LoggingContext("test"): - self.get_success( - self.hs.get_federation_event_handler()._process_pulled_event( - self.OTHER_SERVER_NAME, pulled_event, backfilled=True - ) - ) - - # Make sure our second backfill attempt was recorded - backfill_num_attempts = self.get_success( - main_store.db_pool.simple_select_one_onecol( - table="event_backfill_attempts", - keyvalues={"event_id": pulled_event.event_id}, - retcol="num_attempts", - ) - ) - self.assertEqual(backfill_num_attempts, 2) - - # And as a sanity check, make sure the event was not persisted through all of this. - persisted = self.get_success( - main_store.get_event(pulled_event.event_id, allow_none=True) - ) - self.assertIsNone( - persisted, - "pulled event with invalid signature should not be persisted at all", - ) + return self._test_process_pulled_event_with_missing_state(True) def _test_process_pulled_event_with_missing_state( - self, - *, - backfilled: bool = False, - prev_exists_as_outlier: bool = False, + self, prev_exists_as_outlier: bool ) -> None: OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}" main_store = self.hs.get_datastores().main + state_storage_controller = self.hs.get_storage_controllers().state # create the room user_id = self.register_user("kermit", "test") @@ -330,18 +205,10 @@ async def get_event(destination: str, event_id: str, timeout=None): with LoggingContext("test"): self.get_success( self.hs.get_federation_event_handler()._process_pulled_event( - self.OTHER_SERVER_NAME, pulled_event, backfilled=backfilled + self.OTHER_SERVER_NAME, pulled_event, backfilled=False ) ) - return { - "pulled_event": pulled_event, - "state_at_pulled_event": state_at_prev_event, - } - - def _assert_pulled_event(self, pulled_event: EventBase) -> None: - main_store = self.hs.get_datastores().main - # check that the event is correctly persisted persisted = self.get_success(main_store.get_event(pulled_event.event_id)) self.assertIsNotNone(persisted, "pulled event was not persisted at all") @@ -349,23 +216,130 @@ def _assert_pulled_event(self, pulled_event: EventBase) -> None: persisted.internal_metadata.is_outlier(), "pulled event was an outlier" ) - def _assert_state_at_pulled_event( - self, - *, - pulled_event: EventBase, - state_at_pulled_event: List[EventBase], - prev_exists_as_outlier: bool, - ) -> None: - state_storage_controller = self.hs.get_storage_controllers().state - # check that the state at that event is as expected state = self.get_success( state_storage_controller.get_state_ids_for_event(pulled_event.event_id) ) expected_state = { - (e.type, e.state_key): e.event_id for e in state_at_pulled_event + (e.type, e.state_key): e.event_id for e in state_at_prev_event } self.assertEqual(state, expected_state) if prev_exists_as_outlier: self.mock_federation_transport_client.get_event.assert_not_called() + + def test_process_pulled_event_records_failed_backfill_attempts( + self, + ) -> None: + """ + Test to make sure that failed backfill attempts for an event are + recorded in the `event_backfill_attempts` table. + + In this test, we pretend we are processing a "pulled" event (eg, via + backfill). The pulled event has a fake `prev_event` which our server has + obviously never seen before so it attempts to request the state at that + `prev_event` which expectedly fails because it's a fake event. Because + the server can't fetch the state at the missing `prev_event`, the + "pulled" event fails the history check and is fails to process. + + We check that we correctly record the number of failed backfill attempts + to the pulled event and as a sanity check, that the "pulled" event isn't + persisted. expect. + """ + OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}" + main_store = self.hs.get_datastores().main + + # Create the room + user_id = self.register_user("kermit", "test") + tok = self.login("kermit", "test") + room_id = self.helper.create_room_as(room_creator=user_id, tok=tok) + room_version = self.get_success(main_store.get_room_version(room_id)) + + # We expect an outbound request to /state_ids, so stub that out + self.mock_federation_transport_client.get_room_state_ids.return_value = make_awaitable( + { + # Mimic the other server not knowing about the state at all. + # We want to cause Synapse to throw an error (`Unable to get + # missing prev_event $fake_prev_event`) and fail to backfill + # the pulled event. + "pdu_ids": [], + "auth_chain_ids": [], + } + ) + # We also expect an outbound request to /state + self.mock_federation_transport_client.get_room_state.return_value = make_awaitable( + StateRequestResponse( + # Mimic the other server not knowing about the state at all. + # We want to cause Synapse to throw an error (`Unable to get + # missing prev_event $fake_prev_event`) and fail to backfill + # the pulled event. + auth_events=[], + state=[], + ) + ) + + pulled_event = make_event_from_dict( + self.add_hashes_and_signatures_from_other_server( + { + "type": "test_regular_type", + "room_id": room_id, + "sender": OTHER_USER, + "prev_events": [ + # The fake prev event will make the pulled event fail + # the history check (`Unable to get missing prev_event + # $fake_prev_event`) + "$fake_prev_event" + ], + "auth_events": [], + "origin_server_ts": 1, + "depth": 12, + "content": {"body": "pulled"}, + } + ), + room_version, + ) + + # The function under test: try to process the pulled event + with LoggingContext("test"): + self.get_success( + self.hs.get_federation_event_handler()._process_pulled_event( + self.OTHER_SERVER_NAME, pulled_event, backfilled=True + ) + ) + + # Make sure our backfill attempt was recorded + backfill_num_attempts = self.get_success( + main_store.db_pool.simple_select_one_onecol( + table="event_backfill_attempts", + keyvalues={"event_id": pulled_event.event_id}, + retcol="num_attempts", + ) + ) + self.assertEqual(backfill_num_attempts, 1) + + # The function under test: try to process the pulled event again + with LoggingContext("test"): + self.get_success( + self.hs.get_federation_event_handler()._process_pulled_event( + self.OTHER_SERVER_NAME, pulled_event, backfilled=True + ) + ) + + # Make sure our second backfill attempt was recorded + backfill_num_attempts = self.get_success( + main_store.db_pool.simple_select_one_onecol( + table="event_backfill_attempts", + keyvalues={"event_id": pulled_event.event_id}, + retcol="num_attempts", + ) + ) + self.assertEqual(backfill_num_attempts, 2) + + # And as a sanity check, make sure the event was not persisted through all of this. + persisted = self.get_success( + main_store.get_event(pulled_event.event_id, allow_none=True) + ) + self.assertIsNone( + persisted, + "pulled event with invalid signature should not be persisted at all", + ) From 75c07bbb5f116800fca164a9ff68bf3c1a94d000 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 25 Aug 2022 19:42:23 -0500 Subject: [PATCH 14/66] Fix comments --- tests/handlers/test_federation_event.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/handlers/test_federation_event.py b/tests/handlers/test_federation_event.py index beda5457dc35..6e1f80a10c51 100644 --- a/tests/handlers/test_federation_event.py +++ b/tests/handlers/test_federation_event.py @@ -325,7 +325,7 @@ def test_process_pulled_event_records_failed_backfill_attempts( ) ) - # Make sure our second backfill attempt was recorded + # Make sure our second backfill attempt was recorded (`num_attempts` was incremented) backfill_num_attempts = self.get_success( main_store.db_pool.simple_select_one_onecol( table="event_backfill_attempts", @@ -341,5 +341,5 @@ def test_process_pulled_event_records_failed_backfill_attempts( ) self.assertIsNone( persisted, - "pulled event with invalid signature should not be persisted at all", + "pulled event that fails the history check should not be persisted at all", ) From 783cce5271f490f7d27255a6a09520030e653096 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 25 Aug 2022 20:08:46 -0500 Subject: [PATCH 15/66] Add test for clearing backfill attempts --- tests/handlers/test_federation_event.py | 108 +++++++++++++++++++++++- 1 file changed, 106 insertions(+), 2 deletions(-) diff --git a/tests/handlers/test_federation_event.py b/tests/handlers/test_federation_event.py index 6e1f80a10c51..6461efbe4536 100644 --- a/tests/handlers/test_federation_event.py +++ b/tests/handlers/test_federation_event.py @@ -235,8 +235,8 @@ def test_process_pulled_event_records_failed_backfill_attempts( Test to make sure that failed backfill attempts for an event are recorded in the `event_backfill_attempts` table. - In this test, we pretend we are processing a "pulled" event (eg, via - backfill). The pulled event has a fake `prev_event` which our server has + In this test, we pretend we are processing a "pulled" event via + backfill. The pulled event has a fake `prev_event` which our server has obviously never seen before so it attempts to request the state at that `prev_event` which expectedly fails because it's a fake event. Because the server can't fetch the state at the missing `prev_event`, the @@ -343,3 +343,107 @@ def test_process_pulled_event_records_failed_backfill_attempts( persisted, "pulled event that fails the history check should not be persisted at all", ) + + def test_process_pulled_event_clears_backfill_attempts_after_being_successfully_persisted( + self, + ) -> None: + """ + Test to make sure that failed backfill attempts + (`event_backfill_attempts` table) for an event are cleared after the + event is successfully persisted. + + In this test, we pretend we are processing a "pulled" event via + backfill. The pulled event succesfully processes and the backward + extremeties are updated along with clearing out any backfill attempts + for those old extremities. + + We check that we correctly cleared failed backfill attempts of the + pulled event. + """ + OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}" + main_store = self.hs.get_datastores().main + + # Create the room + user_id = self.register_user("kermit", "test") + tok = self.login("kermit", "test") + room_id = self.helper.create_room_as(room_creator=user_id, tok=tok) + room_version = self.get_success(main_store.get_room_version(room_id)) + + # allow the remote user to send state events + self.helper.send_state( + room_id, + "m.room.power_levels", + {"events_default": 0, "state_default": 0}, + tok=tok, + ) + + # add the remote user to the room + member_event = self.get_success( + event_injection.inject_member_event(self.hs, room_id, OTHER_USER, "join") + ) + + initial_state_map = self.get_success( + main_store.get_partial_current_state_ids(room_id) + ) + + auth_event_ids = [ + initial_state_map[("m.room.create", "")], + initial_state_map[("m.room.power_levels", "")], + member_event.event_id, + ] + + pulled_event = make_event_from_dict( + self.add_hashes_and_signatures_from_other_server( + { + "type": "test_regular_type", + "room_id": room_id, + "sender": OTHER_USER, + "prev_events": [member_event.event_id], + "auth_events": auth_event_ids, + "origin_server_ts": 1, + "depth": 12, + "content": {"body": "pulled"}, + } + ), + room_version, + ) + + # Fake the "pulled" event failing to backfill once so we can test + # if it's cleared out later on. + self.get_success( + main_store.record_event_backfill_attempt(pulled_event.event_id) + ) + # Make sure we have a backfill attempt recorded for the pulled event + backfill_num_attempts = self.get_success( + main_store.db_pool.simple_select_one_onecol( + table="event_backfill_attempts", + keyvalues={"event_id": pulled_event.event_id}, + retcol="num_attempts", + ) + ) + self.assertEqual(backfill_num_attempts, 1) + + # The function under test: try to process the pulled event + with LoggingContext("test"): + self.get_success( + self.hs.get_federation_event_handler()._process_pulled_event( + self.OTHER_SERVER_NAME, pulled_event, backfilled=True + ) + ) + + # Make sure the backfill attempt for the pulled event are cleared + backfill_num_attempts = self.get_success( + main_store.db_pool.simple_select_one_onecol( + table="event_backfill_attempts", + keyvalues={"event_id": pulled_event.event_id}, + retcol="num_attempts", + allow_none=True, + ) + ) + self.assertIsNone(backfill_num_attempts) + + # And as a sanity check, make sure the "pulled" event was persisted. + persisted = self.get_success( + main_store.get_event(pulled_event.event_id, allow_none=True) + ) + self.assertIsNotNone(persisted, "pulled event was not persisted at all") From 54ef84bd5c27ab2f36bedfd6e5413a5363ad9e40 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 25 Aug 2022 20:11:31 -0500 Subject: [PATCH 16/66] Maybe a better comment --- synapse/storage/databases/main/events.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 005ec1b22c00..4ca326cad8ed 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2449,9 +2449,9 @@ def _update_backward_extremeties( backward_extremity_tuples_to_remove, ) - # Since we no longer need it as a backward extremity, it won't be - # backfilled again so we no longer need to store the backfill attempts - # around it. + # Since we no longer need these backward extremities, it also means that + # they won't be backfilled from again so we no longer need to store the + # backfill attempts around it. query = """ DELETE FROM event_backfill_attempts WHERE event_id = ? From 7bf3e7ffd0e01688c56dc2ed448e71b6b0af98c8 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 25 Aug 2022 21:18:54 -0500 Subject: [PATCH 17/66] WIP: Just working on the query --- synapse/handlers/federation.py | 9 ++- .../databases/main/event_federation.py | 68 +++++++++++++++---- 2 files changed, 64 insertions(+), 13 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index e15196205551..d5903231144b 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -258,7 +258,14 @@ async def _maybe_backfill_inner( backwards_extremities = [ _BackfillPoint(event_id, depth, _BackfillPointType.BACKWARDS_EXTREMITY) for event_id, depth in await self.store.get_oldest_event_ids_with_depth_in_room( - room_id + room_id=room_id, + # We don't want events that come after-in-time from our current + # position when we're backfilling looking backwards. + # + # current_depth (ignore events that come after this, ignore 2-4) + # | + # [0]<--[1]▼<--[2]<--[3]<--[4] + current_depth=current_depth, ) ] diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 6e93a5af3921..42c5c99372d8 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import datetime import itertools import logging from queue import Empty, PriorityQueue @@ -71,6 +72,12 @@ logger = logging.getLogger(__name__) +BACKFILL_EVENT_BACKOFF_UPPER_BOUND_SECONDS: int = int( + datetime.timedelta(days=7).total_seconds() +) +BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_SECONDS: int = int( + datetime.timedelta(hours=1).total_seconds() +) # All the info we need while iterating the DAG while backfilling @attr.s(frozen=True, slots=True, auto_attribs=True) @@ -715,7 +722,9 @@ def _get_auth_chain_difference_txn( @trace @tag_args async def get_oldest_event_ids_with_depth_in_room( - self, room_id: str + self, + room_id: str, + current_depth: int, ) -> List[Tuple[str, int]]: """Gets the oldest events(backwards extremities) in the room along with the aproximate depth. @@ -735,34 +744,69 @@ async def get_oldest_event_ids_with_depth_in_room( def get_oldest_event_ids_with_depth_in_room_txn( txn: LoggingTransaction, room_id: str ) -> List[Tuple[str, int]]: - # Assemble a dictionary with event_id -> depth for the oldest events + # Assemble a tuple lookup of event_id -> depth for the oldest events # we know of in the room. Backwards extremeties are the oldest # events we know of in the room but we only know of them because - # some other event referenced them by prev_event and aren't peristed - # in our database yet (meaning we don't know their depth + # some other event referenced them by prev_event and aren't + # persisted in our database yet (meaning we don't know their depth # specifically). So we need to look for the aproximate depth from # the events connected to the current backwards extremeties. sql = """ - SELECT b.event_id, MAX(e.depth) FROM events as e + SELECT backward_extrem.event_id, MAX(event.depth) FROM events as event /** * Get the edge connections from the event_edges table * so we can see whether this event's prev_events points * to a backward extremity in the next join. */ - INNER JOIN event_edges as g - ON g.event_id = e.event_id + INNER JOIN event_edges as edge + ON edge.event_id = event.event_id /** * We find the "oldest" events in the room by looking for * events connected to backwards extremeties (oldest events * in the room that we know of so far). */ - INNER JOIN event_backward_extremities as b - ON g.prev_event_id = b.event_id - WHERE b.room_id = ? AND g.is_state is ? - GROUP BY b.event_id + INNER JOIN event_backward_extremities as backward_extrem + ON edge.prev_event_id = backward_extrem.event_id + /** + * We use this info to make sure we don't retry to use a backfill point + * if we've already attempted to backfill from it recently. + */ + INNER JOIN event_backfill_attempts as backfill_attempt_info + ON backfill_attempt_info.event_id = backward_extrem.event_id + WHERE + backward_extrem.room_id = ? + /* We only care about normal events because TODO: why? */ + AND edge.is_state is ? /* False */ + /** + * We only want backwards extremities that are older than or at + * the same position of the given `current_depth` (where older + * means less than a given depth). + */ + AND MAX(event.depth) <= current_depth + /** + * Exponential back-off (up to the upper bound) so we don't retry the + * same backfill point over and over. ex. 2hr, 4hr, 8hr, 16hr, etc + */ + AND ? /* current_time */ >= backfill_attempt_info.last_attempt_ts + least(2^backfill_attempt_info.num_attempts * ?, ? /* upper bound */) + /* TODO: Why? */ + GROUP BY backward_extrem.event_id + /** + * Sort from highest (closest to the `max_depth`) to the lowest depth + * because the closest are most relevant to backfill from first. + */ + ORDER BY MAX(event.depth) DESC """ - txn.execute(sql, (room_id, False)) + txn.execute( + sql, + ( + room_id, + False, + self._clock.time_msec(), + 1000 * BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_SECONDS, + 1000 * BACKFILL_EVENT_BACKOFF_UPPER_BOUND_SECONDS, + ), + ) return cast(List[Tuple[str, int]], txn.fetchall()) From 37ff0099c158305365bbb32b5d3f4a7301cd4df3 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 25 Aug 2022 21:26:35 -0500 Subject: [PATCH 18/66] Move comment to where it matters --- synapse/handlers/federation.py | 6 ------ synapse/storage/databases/main/event_federation.py | 7 ++++++- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index d5903231144b..cf5ad5a9e4c3 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -259,12 +259,6 @@ async def _maybe_backfill_inner( _BackfillPoint(event_id, depth, _BackfillPointType.BACKWARDS_EXTREMITY) for event_id, depth in await self.store.get_oldest_event_ids_with_depth_in_room( room_id=room_id, - # We don't want events that come after-in-time from our current - # position when we're backfilling looking backwards. - # - # current_depth (ignore events that come after this, ignore 2-4) - # | - # [0]<--[1]▼<--[2]<--[3]<--[4] current_depth=current_depth, ) ] diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 42c5c99372d8..7283ff617f54 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -780,7 +780,12 @@ def get_oldest_event_ids_with_depth_in_room_txn( /** * We only want backwards extremities that are older than or at * the same position of the given `current_depth` (where older - * means less than a given depth). + * means less than a given depth) because we're looking backwards + * from the `current_depth` when backfilling. + * + * current_depth (ignore events that come after this, ignore 2-4) + * | + * [0]<--[1]▼<--[2]<--[3]<--[4] */ AND MAX(event.depth) <= current_depth /** From a58d191ee21b3a416c4be53029d4e41d462468d5 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 26 Aug 2022 16:52:42 -0500 Subject: [PATCH 19/66] Silly graph pt 1 --- tests/storage/test_event_federation.py | 57 ++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index d92a9ac5b798..a63b0dbdcc80 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -576,6 +576,63 @@ def prev_event_format(prev_event_id: str) -> Union[Tuple[str, dict], str]: ) self.assertEqual(event_id, "$fake_event_id_500") + def test_get_oldest_event_ids_with_depth_in_room(self): + # asdf + room_id = "@ROOM:some-host" + + # The silly graph we use to test grabbing backward extremities, + # where the top is the oldest events. + # 1 (oldest) + # | + # 2 + # / | \ + # ´ | ` + # [b1, b2, b3] | | + # | | | + # A | | + # \ | | + # ` 3 | + # | | + # | [b4, b5, b6] + # | | + # | B + # | / + # 4 ´ + # | + # 5 (newest) + + event_graph = { + "1": [], + "2": ["1"], + "3": ["2"], + "4": ["3"], + "5": ["4"], + "A": ["b1", "b2", "b3"], + "b1": ["2"], + "b2": ["2"], + "b3": ["2"], + "B": ["b4", "b5", "b6"], + "b4": ["3"], + "b5": ["3"], + "b6": ["3"], + } + + depth_map = { + "1": 1, + "2": 2, + "b1": 3, + "b2": 3, + "b3": 3, + "A": 4, + "3": 5, + "b4": 6, + "b5": 6, + "b6": 6, + "B": 7, + "4": 8, + "5": 9, + } + @attr.s class FakeEvent: From f127ad1359553569ca83555083aa73774c33aaa4 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 26 Aug 2022 18:24:09 -0500 Subject: [PATCH 20/66] Silly graph pt 2 --- tests/storage/test_event_federation.py | 115 +++++++++++++++++++------ 1 file changed, 90 insertions(+), 25 deletions(-) diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index a63b0dbdcc80..3ac553659822 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Tuple, Union +from typing import Dict, List, Tuple, Union import attr from parameterized import parameterized @@ -23,6 +23,8 @@ RoomVersion, ) from synapse.events import _EventInternalMetadata +from synapse.storage.database import LoggingTransaction +from synapse.types import JsonDict from synapse.util import json_encoder import tests.unittest @@ -578,34 +580,33 @@ def prev_event_format(prev_event_id: str) -> Union[Tuple[str, dict], str]: def test_get_oldest_event_ids_with_depth_in_room(self): # asdf - room_id = "@ROOM:some-host" + room_id = "!backfill-room-test:some-host" # The silly graph we use to test grabbing backward extremities, # where the top is the oldest events. - # 1 (oldest) - # | - # 2 - # / | \ - # ´ | ` - # [b1, b2, b3] | | - # | | | - # A | | - # \ | | - # ` 3 | - # | | - # | [b4, b5, b6] - # | | - # | B - # | / - # 4 ´ - # | - # 5 (newest) - - event_graph = { + # 1 (oldest) + # | + # 2 ⹁ + # | \ + # | [b1, b2, b3] + # | | + # | A + # | / + # 3 { + # | \ + # | [b4, b5, b6] + # | | + # | B + # | / + # 4 ´ + # | + # 5 (newest) + + event_graph: Dict[str, List[str]] = { "1": [], "2": ["1"], - "3": ["2"], - "4": ["3"], + "3": ["2", "A"], + "4": ["3", "B"], "5": ["4"], "A": ["b1", "b2", "b3"], "b1": ["2"], @@ -617,7 +618,7 @@ def test_get_oldest_event_ids_with_depth_in_room(self): "b6": ["3"], } - depth_map = { + depth_map: Dict[str, int] = { "1": 1, "2": 2, "b1": 3, @@ -633,6 +634,70 @@ def test_get_oldest_event_ids_with_depth_in_room(self): "5": 9, } + # The events we have persisted on our server. + # The rest are events in the room but not backfilled tet. + our_server_events = ["5", "4", "B", "3", "A"] + + complete_event_dict_map: Dict[str, JsonDict] = {} + stream_ordering = 0 + for (event_id, prev_events) in event_graph.items(): + depth = depth_map[event_id] + + complete_event_dict_map[event_id] = { + "event_id": event_id, + "type": "test_regular_type", + "room_id": room_id, + "sender": "@sender", + "prev_events": prev_events, + "auth_events": [], + "origin_server_ts": stream_ordering, + "depth": depth, + "stream_ordering": stream_ordering, + "content": {"body": "event" + event_id}, + } + + stream_ordering += 1 + + def insert_our_server_events(txn: LoggingTransaction): + # Insert our server events + for event_id in our_server_events: + event_dict = complete_event_dict_map.get(event_id) + + self.store.db_pool.simple_insert_txn( + txn, + table="events", + values={ + "event_id": event_dict.get("event_id"), + "type": event_dict.get("type"), + "room_id": event_dict.get("room_id"), + "depth": event_dict.get("depth"), + "topological_ordering": event_dict.get("depth"), + "stream_ordering": event_dict.get("stream_ordering"), + "processed": True, + "outlier": False, + }, + ) + + # Insert the event edges + for event_id in our_server_events: + for prev_event_id in event_graph[event_id]: + self.store.db_pool.simple_insert_txn( + txn, + table="event_edges", + values={ + "event_id": event_id, + "prev_event_id": prev_event_id, + "room_id": room_id, + }, + ) + + self.get_success( + self.store.db_pool.runInteraction( + "insert_our_server_events", + insert_our_server_events, + ) + ) + @attr.s class FakeEvent: From 18abbf4818d6a970fa2a1deb9124beef8b85b828 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 26 Aug 2022 19:02:18 -0500 Subject: [PATCH 21/66] Tests running (not working) --- .../databases/main/event_federation.py | 7 ++- tests/storage/test_event_federation.py | 55 ++++++++++++++++--- tests/utils.py | 3 + 3 files changed, 54 insertions(+), 11 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 7283ff617f54..aa58b58b1b99 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -775,7 +775,7 @@ def get_oldest_event_ids_with_depth_in_room_txn( ON backfill_attempt_info.event_id = backward_extrem.event_id WHERE backward_extrem.room_id = ? - /* We only care about normal events because TODO: why? */ + /* We only care about normal events because TODO: why */ AND edge.is_state is ? /* False */ /** * We only want backwards extremities that are older than or at @@ -787,13 +787,13 @@ def get_oldest_event_ids_with_depth_in_room_txn( * | * [0]<--[1]▼<--[2]<--[3]<--[4] */ - AND MAX(event.depth) <= current_depth + AND event.depth <= ? /* current_depth */ /** * Exponential back-off (up to the upper bound) so we don't retry the * same backfill point over and over. ex. 2hr, 4hr, 8hr, 16hr, etc */ AND ? /* current_time */ >= backfill_attempt_info.last_attempt_ts + least(2^backfill_attempt_info.num_attempts * ?, ? /* upper bound */) - /* TODO: Why? */ + /* TODO: Why */ GROUP BY backward_extrem.event_id /** * Sort from highest (closest to the `max_depth`) to the lowest depth @@ -807,6 +807,7 @@ def get_oldest_event_ids_with_depth_in_room_txn( ( room_id, False, + current_depth, self._clock.time_msec(), 1000 * BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_SECONDS, 1000 * BACKFILL_EVENT_BACKOFF_UPPER_BOUND_SECONDS, diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index 3ac553659822..da8ac745f899 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -30,6 +30,10 @@ import tests.unittest import tests.utils +import logging + +logger = logging.getLogger(__name__) + class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase): def prepare(self, reactor, clock, hs): @@ -578,8 +582,14 @@ def prev_event_format(prev_event_id: str) -> Union[Tuple[str, dict], str]: ) self.assertEqual(event_id, "$fake_event_id_500") - def test_get_oldest_event_ids_with_depth_in_room(self): - # asdf + def _setup_room_for_backfill_tests(self) -> str: + """ + Sets up a room with various events and backward extremities to test + backfill functions against. + + Returns: + room_id to test against + """ room_id = "!backfill-room-test:some-host" # The silly graph we use to test grabbing backward extremities, @@ -636,11 +646,11 @@ def test_get_oldest_event_ids_with_depth_in_room(self): # The events we have persisted on our server. # The rest are events in the room but not backfilled tet. - our_server_events = ["5", "4", "B", "3", "A"] + our_server_events = {"5", "4", "B", "3", "A"} complete_event_dict_map: Dict[str, JsonDict] = {} stream_ordering = 0 - for (event_id, prev_events) in event_graph.items(): + for (event_id, prev_event_ids) in event_graph.items(): depth = depth_map[event_id] complete_event_dict_map[event_id] = { @@ -648,7 +658,7 @@ def test_get_oldest_event_ids_with_depth_in_room(self): "type": "test_regular_type", "room_id": room_id, "sender": "@sender", - "prev_events": prev_events, + "prev_event_ids": prev_event_ids, "auth_events": [], "origin_server_ts": stream_ordering, "depth": depth, @@ -658,7 +668,7 @@ def test_get_oldest_event_ids_with_depth_in_room(self): stream_ordering += 1 - def insert_our_server_events(txn: LoggingTransaction): + def populate_db(txn: LoggingTransaction): # Insert our server events for event_id in our_server_events: event_dict = complete_event_dict_map.get(event_id) @@ -691,13 +701,42 @@ def insert_our_server_events(txn: LoggingTransaction): }, ) + # Insert the backward extremities + prev_events_of_our_events = { + prev_event_id + for our_server_event in our_server_events + for prev_event_id in complete_event_dict_map.get(our_server_event).get( + "prev_event_ids" + ) + } + backward_extremities = prev_events_of_our_events - our_server_events + for backward_extremity in backward_extremities: + self.store.db_pool.simple_insert_txn( + txn, + table="event_backward_extremities", + values={ + "event_id": backward_extremity, + "room_id": room_id, + }, + ) + self.get_success( self.store.db_pool.runInteraction( - "insert_our_server_events", - insert_our_server_events, + "populate_db", + populate_db, ) ) + return room_id + + def test_get_oldest_event_ids_with_depth_in_room(self): + room_id = self._setup_room_for_backfill_tests() + + backfill_points = self.get_success( + self.store.get_oldest_event_ids_with_depth_in_room(room_id, 7) + ) + logger.info("asdfasdf backfill_points=%s", backfill_points) + @attr.s class FakeEvent: diff --git a/tests/utils.py b/tests/utils.py index d2c6d1e85242..4c29c3f9238d 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import atexit import os from typing import Any, Callable, Dict, List, Tuple, Union, overload @@ -54,6 +55,8 @@ # the dbname we will connect to in order to create the base database. POSTGRES_DBNAME_FOR_INITIAL_CREATE = "postgres" +logger = logging.getLogger(__name__) + def setupdb() -> None: # If we're using PostgreSQL, set up the db once From 23310f569e8e9575ae9f667cddfe0f94115d7618 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 26 Aug 2022 19:26:54 -0500 Subject: [PATCH 22/66] Passing test --- synapse/storage/databases/main/event_federation.py | 9 ++++++--- tests/storage/test_event_federation.py | 12 +++++++++--- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index aa58b58b1b99..56ae224231cc 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -771,7 +771,7 @@ def get_oldest_event_ids_with_depth_in_room_txn( * We use this info to make sure we don't retry to use a backfill point * if we've already attempted to backfill from it recently. */ - INNER JOIN event_backfill_attempts as backfill_attempt_info + LEFT JOIN event_backfill_attempts as backfill_attempt_info ON backfill_attempt_info.event_id = backward_extrem.event_id WHERE backward_extrem.room_id = ? @@ -792,14 +792,17 @@ def get_oldest_event_ids_with_depth_in_room_txn( * Exponential back-off (up to the upper bound) so we don't retry the * same backfill point over and over. ex. 2hr, 4hr, 8hr, 16hr, etc */ - AND ? /* current_time */ >= backfill_attempt_info.last_attempt_ts + least(2^backfill_attempt_info.num_attempts * ?, ? /* upper bound */) + AND ( + backfill_attempt_info IS NULL + OR ? /* current_time */ >= backfill_attempt_info.last_attempt_ts + least(2^backfill_attempt_info.num_attempts * ?, ? /* upper bound */) + ) /* TODO: Why */ GROUP BY backward_extrem.event_id /** * Sort from highest (closest to the `max_depth`) to the lowest depth * because the closest are most relevant to backfill from first. */ - ORDER BY MAX(event.depth) DESC + ORDER BY MAX(event.depth) DESC, backward_extrem.event_id DESC """ txn.execute( diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index da8ac745f899..4f7c788ec9ee 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -17,15 +17,18 @@ import attr from parameterized import parameterized +from twisted.test.proto_helpers import MemoryReactor + from synapse.api.room_versions import ( KNOWN_ROOM_VERSIONS, EventFormatVersions, RoomVersion, ) from synapse.events import _EventInternalMetadata +from synapse.server import HomeServer from synapse.storage.database import LoggingTransaction from synapse.types import JsonDict -from synapse.util import json_encoder +from synapse.util import Clock, json_encoder import tests.unittest import tests.utils @@ -36,7 +39,7 @@ class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase): - def prepare(self, reactor, clock, hs): + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.store = hs.get_datastores().main def test_get_prev_events_for_room(self): @@ -735,7 +738,10 @@ def test_get_oldest_event_ids_with_depth_in_room(self): backfill_points = self.get_success( self.store.get_oldest_event_ids_with_depth_in_room(room_id, 7) ) - logger.info("asdfasdf backfill_points=%s", backfill_points) + backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] + self.assertListEqual( + backfill_event_ids, ["b6", "b5", "b4", "2", "b3", "b2", "b1"] + ) @attr.s From 64e01d893fc1ee98ed6bcde5054f2dfd93d913b7 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 26 Aug 2022 22:35:40 -0500 Subject: [PATCH 23/66] Add test for A and B --- tests/storage/test_event_federation.py | 10 ++++++++++ tests/utils.py | 3 --- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index 4f7c788ec9ee..7caa97bade38 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -735,6 +735,7 @@ def populate_db(txn: LoggingTransaction): def test_get_oldest_event_ids_with_depth_in_room(self): room_id = self._setup_room_for_backfill_tests() + # Try at B backfill_points = self.get_success( self.store.get_oldest_event_ids_with_depth_in_room(room_id, 7) ) @@ -743,6 +744,15 @@ def test_get_oldest_event_ids_with_depth_in_room(self): backfill_event_ids, ["b6", "b5", "b4", "2", "b3", "b2", "b1"] ) + # Try at A + backfill_points = self.get_success( + self.store.get_oldest_event_ids_with_depth_in_room(room_id, 4) + ) + backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] + # Event "2" is not included here because we only know the aproximate + # depth of 5 from our event "3". + self.assertListEqual(backfill_event_ids, ["b3", "b2", "b1"]) + @attr.s class FakeEvent: diff --git a/tests/utils.py b/tests/utils.py index 4c29c3f9238d..d2c6d1e85242 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -13,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging import atexit import os from typing import Any, Callable, Dict, List, Tuple, Union, overload @@ -55,8 +54,6 @@ # the dbname we will connect to in order to create the base database. POSTGRES_DBNAME_FOR_INITIAL_CREATE = "postgres" -logger = logging.getLogger(__name__) - def setupdb() -> None: # If we're using PostgreSQL, set up the db once From 47bac256d0e296f6630ca38a683c82310cc63d46 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 26 Aug 2022 23:08:33 -0500 Subject: [PATCH 24/66] Add tests for backfill attempts --- .../databases/main/event_federation.py | 2 +- tests/storage/test_event_federation.py | 109 +++++++++++++++++- 2 files changed, 104 insertions(+), 7 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 56ae224231cc..4fba39e04441 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -780,7 +780,7 @@ def get_oldest_event_ids_with_depth_in_room_txn( /** * We only want backwards extremities that are older than or at * the same position of the given `current_depth` (where older - * means less than a given depth) because we're looking backwards + * means less than the given depth) because we're looking backwards * from the `current_depth` when backfilling. * * current_depth (ignore events that come after this, ignore 2-4) diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index 7caa97bade38..f8ff88708aea 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -12,9 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Dict, List, Tuple, Union +from typing import Any, Dict, List, Tuple, Union import attr +import datetime from parameterized import parameterized from twisted.test.proto_helpers import MemoryReactor @@ -38,6 +39,12 @@ logger = logging.getLogger(__name__) +@attr.s(auto_attribs=True, frozen=True, slots=True) +class _BackfillSetupInfo: + room_id: str + depth_map: Dict[str, int] + + class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase): def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.store = hs.get_datastores().main @@ -585,7 +592,7 @@ def prev_event_format(prev_event_id: str) -> Union[Tuple[str, dict], str]: ) self.assertEqual(event_id, "$fake_event_id_500") - def _setup_room_for_backfill_tests(self) -> str: + def _setup_room_for_backfill_tests(self) -> _BackfillSetupInfo: """ Sets up a room with various events and backward extremities to test backfill functions against. @@ -730,14 +737,22 @@ def populate_db(txn: LoggingTransaction): ) ) - return room_id + return _BackfillSetupInfo(room_id=room_id, depth_map=depth_map) def test_get_oldest_event_ids_with_depth_in_room(self): - room_id = self._setup_room_for_backfill_tests() + """ + Test to make sure only backfill points that are older and come before + the `current_depth` are returned. + """ + setup_info = self._setup_room_for_backfill_tests() + room_id = setup_info.room_id + depth_map = setup_info.depth_map # Try at B backfill_points = self.get_success( - self.store.get_oldest_event_ids_with_depth_in_room(room_id, 7) + self.store.get_oldest_event_ids_with_depth_in_room( + room_id, depth_map.get("B") + ) ) backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] self.assertListEqual( @@ -746,13 +761,95 @@ def test_get_oldest_event_ids_with_depth_in_room(self): # Try at A backfill_points = self.get_success( - self.store.get_oldest_event_ids_with_depth_in_room(room_id, 4) + self.store.get_oldest_event_ids_with_depth_in_room( + room_id, depth_map.get("A") + ) ) backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] # Event "2" is not included here because we only know the aproximate # depth of 5 from our event "3". self.assertListEqual(backfill_event_ids, ["b3", "b2", "b1"]) + def test_get_oldest_event_ids_with_depth_in_room_excludes_events_we_have_attempted( + self, + ): + """ + Test to make sure that events we have attempted to backfill (and within + backoff timeout duration) do not show up as an event to backfill again. + """ + setup_info = self._setup_room_for_backfill_tests() + room_id = setup_info.room_id + depth_map = setup_info.depth_map + + # Record some attempts to backfill these events which will make + # `get_oldest_event_ids_with_depth_in_room` exclude them because we + # haven't passed the backoff interval. + self.get_success(self.store.record_event_backfill_attempt("b5")) + self.get_success(self.store.record_event_backfill_attempt("b4")) + self.get_success(self.store.record_event_backfill_attempt("b3")) + self.get_success(self.store.record_event_backfill_attempt("b2")) + + # No time has passed since we attempted to backfill ^ + + # Try at B + backfill_points = self.get_success( + self.store.get_oldest_event_ids_with_depth_in_room( + room_id, depth_map.get("B") + ) + ) + backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] + # Only the backfill points that we didn't record earlier exist here. + self.assertListEqual(backfill_event_ids, ["b6", "2", "b1"]) + + def test_get_oldest_event_ids_with_depth_in_room_attempted_event_retry_after_backoff_duration( + self, + ): + """ + Test to make sure after we fake attempt to backfill event "b3" many times, + we can see retry and see the "b3" again after the backoff timeout duration + has exceeded. + """ + setup_info = self._setup_room_for_backfill_tests() + room_id = setup_info.room_id + depth_map = setup_info.depth_map + + # Record some attempts to backfill these events which will make + # `get_oldest_event_ids_with_depth_in_room` exclude them because we + # haven't passed the backoff interval. + self.get_success(self.store.record_event_backfill_attempt("b3")) + self.get_success(self.store.record_event_backfill_attempt("b1")) + self.get_success(self.store.record_event_backfill_attempt("b1")) + self.get_success(self.store.record_event_backfill_attempt("b1")) + self.get_success(self.store.record_event_backfill_attempt("b1")) + + # Now advance time by 2 hours and we should only be able to see "b3" + # because we have waited long enough for the single attempt (2^1 hours) + # but we still shouldn't see "b1" because we haven't waited long enough + # for this many attempts. We didn't do anything to "b2" so it should be + # visible regardless. + self.reactor.advance(datetime.timedelta(hours=2).total_seconds()) + + # Try at A and make sure that "b3" is not in the list because we've already attemted many times + backfill_points = self.get_success( + self.store.get_oldest_event_ids_with_depth_in_room( + room_id, depth_map.get("A") + ) + ) + backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] + self.assertListEqual(backfill_event_ids, ["b3", "b2"]) + + # Now advance time by 20 hours and see if we can now backfill it + self.reactor.advance(datetime.timedelta(hours=20).total_seconds()) + + # Try at A again after we advanced the time and we should see "b3" again + backfill_points = self.get_success( + self.store.get_oldest_event_ids_with_depth_in_room( + room_id, depth_map.get("A") + ) + ) + backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] + self.assertListEqual(backfill_event_ids, ["b3", "b2", "b1"]) + @attr.s class FakeEvent: From 2ebed9d3930ea59be9a44176c19645a672440aa8 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 26 Aug 2022 23:12:46 -0500 Subject: [PATCH 25/66] Remove `GROUP BY backward_extrem.event_id` (seems unnecessary) See https://github.com/matrix-org/synapse/pull/13635#discussion_r956534524 --- synapse/storage/databases/main/event_federation.py | 6 ++---- tests/storage/test_event_federation.py | 11 ++++++----- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 4fba39e04441..6c86d12c95f5 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -752,7 +752,7 @@ def get_oldest_event_ids_with_depth_in_room_txn( # specifically). So we need to look for the aproximate depth from # the events connected to the current backwards extremeties. sql = """ - SELECT backward_extrem.event_id, MAX(event.depth) FROM events as event + SELECT backward_extrem.event_id, event.depth FROM events as event /** * Get the edge connections from the event_edges table * so we can see whether this event's prev_events points @@ -796,13 +796,11 @@ def get_oldest_event_ids_with_depth_in_room_txn( backfill_attempt_info IS NULL OR ? /* current_time */ >= backfill_attempt_info.last_attempt_ts + least(2^backfill_attempt_info.num_attempts * ?, ? /* upper bound */) ) - /* TODO: Why */ - GROUP BY backward_extrem.event_id /** * Sort from highest (closest to the `max_depth`) to the lowest depth * because the closest are most relevant to backfill from first. */ - ORDER BY MAX(event.depth) DESC, backward_extrem.event_id DESC + ORDER BY event.depth DESC, backward_extrem.event_id DESC """ txn.execute( diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index f8ff88708aea..49c5a3a166b6 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -748,7 +748,7 @@ def test_get_oldest_event_ids_with_depth_in_room(self): room_id = setup_info.room_id depth_map = setup_info.depth_map - # Try at B + # Try at "B" backfill_points = self.get_success( self.store.get_oldest_event_ids_with_depth_in_room( room_id, depth_map.get("B") @@ -759,7 +759,7 @@ def test_get_oldest_event_ids_with_depth_in_room(self): backfill_event_ids, ["b6", "b5", "b4", "2", "b3", "b2", "b1"] ) - # Try at A + # Try at "A" backfill_points = self.get_success( self.store.get_oldest_event_ids_with_depth_in_room( room_id, depth_map.get("A") @@ -791,7 +791,7 @@ def test_get_oldest_event_ids_with_depth_in_room_excludes_events_we_have_attempt # No time has passed since we attempted to backfill ^ - # Try at B + # Try at "B" backfill_points = self.get_success( self.store.get_oldest_event_ids_with_depth_in_room( room_id, depth_map.get("B") @@ -829,7 +829,8 @@ def test_get_oldest_event_ids_with_depth_in_room_attempted_event_retry_after_bac # visible regardless. self.reactor.advance(datetime.timedelta(hours=2).total_seconds()) - # Try at A and make sure that "b3" is not in the list because we've already attemted many times + # Try at "A" and make sure that "b3" is not in the list because we've + # already attemted many times backfill_points = self.get_success( self.store.get_oldest_event_ids_with_depth_in_room( room_id, depth_map.get("A") @@ -841,7 +842,7 @@ def test_get_oldest_event_ids_with_depth_in_room_attempted_event_retry_after_bac # Now advance time by 20 hours and see if we can now backfill it self.reactor.advance(datetime.timedelta(hours=20).total_seconds()) - # Try at A again after we advanced the time and we should see "b3" again + # Try at "A" again after we advanced the time and we should see "b3" again backfill_points = self.get_success( self.store.get_oldest_event_ids_with_depth_in_room( room_id, depth_map.get("A") From 60b3b9297aefece242d454c5e22e08a7f2509fa6 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 26 Aug 2022 23:21:59 -0500 Subject: [PATCH 26/66] Clarify why that much time --- tests/storage/test_event_federation.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index 49c5a3a166b6..eca68bc9e02b 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -839,10 +839,11 @@ def test_get_oldest_event_ids_with_depth_in_room_attempted_event_retry_after_bac backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] self.assertListEqual(backfill_event_ids, ["b3", "b2"]) - # Now advance time by 20 hours and see if we can now backfill it + # Now advance time by 20 hours (above 2^4 because we made 4 attemps) and + # see if we can now backfill it self.reactor.advance(datetime.timedelta(hours=20).total_seconds()) - # Try at "A" again after we advanced the time and we should see "b3" again + # Try at "A" again after we advanced enough time and we should see "b3" again backfill_points = self.get_success( self.store.get_oldest_event_ids_with_depth_in_room( room_id, depth_map.get("A") From e9f603d05b100bfd474f1c430d8dc60df3315575 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 26 Aug 2022 23:24:18 -0500 Subject: [PATCH 27/66] Label ? slot --- synapse/storage/databases/main/event_federation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 6c86d12c95f5..eb2c83314b80 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -794,7 +794,7 @@ def get_oldest_event_ids_with_depth_in_room_txn( */ AND ( backfill_attempt_info IS NULL - OR ? /* current_time */ >= backfill_attempt_info.last_attempt_ts + least(2^backfill_attempt_info.num_attempts * ?, ? /* upper bound */) + OR ? /* current_time */ >= backfill_attempt_info.last_attempt_ts + least(2^backfill_attempt_info.num_attempts * ? /* step */, ? /* upper bound */) ) /** * Sort from highest (closest to the `max_depth`) to the lowest depth From a8f1464735400dbd0f2a7658d16891edf296464c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 26 Aug 2022 23:27:58 -0500 Subject: [PATCH 28/66] Better explanation --- tests/storage/test_event_federation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index eca68bc9e02b..a1950532b13e 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -766,8 +766,8 @@ def test_get_oldest_event_ids_with_depth_in_room(self): ) ) backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] - # Event "2" is not included here because we only know the aproximate - # depth of 5 from our event "3". + # Event "2" has a depth of 2 but is not included here because we only + # know the approximate depth of 5 from our event "3". self.assertListEqual(backfill_event_ids, ["b3", "b2", "b1"]) def test_get_oldest_event_ids_with_depth_in_room_excludes_events_we_have_attempted( From bbd1c94e53f02ca24375e70f769451861f92a1c0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 26 Aug 2022 23:30:53 -0500 Subject: [PATCH 29/66] Add changelog --- changelog.d/13635.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/13635.feature diff --git a/changelog.d/13635.feature b/changelog.d/13635.feature new file mode 100644 index 000000000000..d86bf7ed809f --- /dev/null +++ b/changelog.d/13635.feature @@ -0,0 +1 @@ +Exponentially backoff from backfilling the same event over and over. From dd1db25a4f7c4da51f37340f14ba20d1abceb117 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 26 Aug 2022 23:52:01 -0500 Subject: [PATCH 30/66] Fix lints --- .../databases/main/event_federation.py | 10 ++++- tests/storage/test_event_federation.py | 39 +++++++------------ 2 files changed, 23 insertions(+), 26 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index eb2c83314b80..1abc39103a78 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -79,6 +79,7 @@ datetime.timedelta(hours=1).total_seconds() ) + # All the info we need while iterating the DAG while backfilling @attr.s(frozen=True, slots=True, auto_attribs=True) class BackfillQueueNavigationItem: @@ -789,7 +790,7 @@ def get_oldest_event_ids_with_depth_in_room_txn( */ AND event.depth <= ? /* current_depth */ /** - * Exponential back-off (up to the upper bound) so we don't retry the + * Exponential back-off (up to the upper bound) so we don't retry the * same backfill point over and over. ex. 2hr, 4hr, 8hr, 16hr, etc */ AND ( @@ -1616,7 +1617,12 @@ async def get_next_staged_event_id_for_room( self, room_id: str, ) -> Optional[Tuple[str, str]]: - """Get the next event ID in the staging area for the given room.""" + """ + Get the next event ID in the staging area for the given room. + + Returns: + Tuple of the `origin` and `event_id` + """ def _get_next_staged_event_id_for_room_txn( txn: LoggingTransaction, diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index a1950532b13e..6d116d631a78 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -12,10 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, Dict, List, Tuple, Union +import datetime +import logging +from typing import Dict, List, Tuple, Union import attr -import datetime from parameterized import parameterized from twisted.test.proto_helpers import MemoryReactor @@ -34,8 +35,6 @@ import tests.unittest import tests.utils -import logging - logger = logging.getLogger(__name__) @@ -587,9 +586,11 @@ def prev_event_format(prev_event_id: str) -> Union[Tuple[str, dict], str]: ) self.assertEqual(count, 1) - _, event_id = self.get_success( + next_staged_event_info = self.get_success( self.store.get_next_staged_event_id_for_room(room_id) ) + assert next_staged_event_info + _, event_id = next_staged_event_info self.assertEqual(event_id, "$fake_event_id_500") def _setup_room_for_backfill_tests(self) -> _BackfillSetupInfo: @@ -669,7 +670,7 @@ def _setup_room_for_backfill_tests(self) -> _BackfillSetupInfo: "room_id": room_id, "sender": "@sender", "prev_event_ids": prev_event_ids, - "auth_events": [], + "auth_event_ids": [], "origin_server_ts": stream_ordering, "depth": depth, "stream_ordering": stream_ordering, @@ -681,7 +682,7 @@ def _setup_room_for_backfill_tests(self) -> _BackfillSetupInfo: def populate_db(txn: LoggingTransaction): # Insert our server events for event_id in our_server_events: - event_dict = complete_event_dict_map.get(event_id) + event_dict = complete_event_dict_map[event_id] self.store.db_pool.simple_insert_txn( txn, @@ -715,9 +716,9 @@ def populate_db(txn: LoggingTransaction): prev_events_of_our_events = { prev_event_id for our_server_event in our_server_events - for prev_event_id in complete_event_dict_map.get(our_server_event).get( + for prev_event_id in complete_event_dict_map[our_server_event][ "prev_event_ids" - ) + ] } backward_extremities = prev_events_of_our_events - our_server_events for backward_extremity in backward_extremities: @@ -750,9 +751,7 @@ def test_get_oldest_event_ids_with_depth_in_room(self): # Try at "B" backfill_points = self.get_success( - self.store.get_oldest_event_ids_with_depth_in_room( - room_id, depth_map.get("B") - ) + self.store.get_oldest_event_ids_with_depth_in_room(room_id, depth_map["B"]) ) backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] self.assertListEqual( @@ -761,9 +760,7 @@ def test_get_oldest_event_ids_with_depth_in_room(self): # Try at "A" backfill_points = self.get_success( - self.store.get_oldest_event_ids_with_depth_in_room( - room_id, depth_map.get("A") - ) + self.store.get_oldest_event_ids_with_depth_in_room(room_id, depth_map["A"]) ) backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] # Event "2" has a depth of 2 but is not included here because we only @@ -793,9 +790,7 @@ def test_get_oldest_event_ids_with_depth_in_room_excludes_events_we_have_attempt # Try at "B" backfill_points = self.get_success( - self.store.get_oldest_event_ids_with_depth_in_room( - room_id, depth_map.get("B") - ) + self.store.get_oldest_event_ids_with_depth_in_room(room_id, depth_map["B"]) ) backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] # Only the backfill points that we didn't record earlier exist here. @@ -832,9 +827,7 @@ def test_get_oldest_event_ids_with_depth_in_room_attempted_event_retry_after_bac # Try at "A" and make sure that "b3" is not in the list because we've # already attemted many times backfill_points = self.get_success( - self.store.get_oldest_event_ids_with_depth_in_room( - room_id, depth_map.get("A") - ) + self.store.get_oldest_event_ids_with_depth_in_room(room_id, depth_map["A"]) ) backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] self.assertListEqual(backfill_event_ids, ["b3", "b2"]) @@ -845,9 +838,7 @@ def test_get_oldest_event_ids_with_depth_in_room_attempted_event_retry_after_bac # Try at "A" again after we advanced enough time and we should see "b3" again backfill_points = self.get_success( - self.store.get_oldest_event_ids_with_depth_in_room( - room_id, depth_map.get("A") - ) + self.store.get_oldest_event_ids_with_depth_in_room(room_id, depth_map["A"]) ) backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] self.assertListEqual(backfill_event_ids, ["b3", "b2", "b1"]) From c583eef90551889c52d26b5644d6904fee1b5d43 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Sat, 27 Aug 2022 00:07:38 -0500 Subject: [PATCH 31/66] Update docstring --- synapse/storage/databases/main/event_federation.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 1abc39103a78..c817a0c35c55 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -727,8 +727,9 @@ async def get_oldest_event_ids_with_depth_in_room( room_id: str, current_depth: int, ) -> List[Tuple[str, int]]: - """Gets the oldest events(backwards extremities) in the room along with the - aproximate depth. + """ + Gets the oldest events(backwards extremities) in the room lower than the + given `current_depth` along with the aproximate depth. We use this function so that we can compare and see if someones current depth at their current scrollback is within pagination range of the @@ -737,6 +738,9 @@ async def get_oldest_event_ids_with_depth_in_room( Args: room_id: Room where we want to find the oldest events + current_depth: The depth at the users current scrollback because + we only care about finding events older than the given + `current_depth` when scrolling and paginating backwards. Returns: List of (event_id, depth) tuples From ea4a3ad94723efc34099f3ab0c5b0f1cd74894f9 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Sat, 27 Aug 2022 01:03:28 -0500 Subject: [PATCH 32/66] Apply same changes to `get_insertion_event_backward_extremities_in_room` --- synapse/handlers/federation.py | 64 +++++----- .../databases/main/event_federation.py | 109 +++++++++++++----- tests/storage/test_event_federation.py | 20 ++-- 3 files changed, 120 insertions(+), 73 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index cf5ad5a9e4c3..66c8fc6c5ae4 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -38,7 +38,7 @@ from unpaddedbase64 import decode_base64 from synapse import event_auth -from synapse.api.constants import EventContentFields, EventTypes, Membership +from synapse.api.constants import EventContentFields, EventTypes, MAX_DEPTH, Membership from synapse.api.errors import ( AuthError, CodeMessageException, @@ -257,7 +257,7 @@ async def _maybe_backfill_inner( """ backwards_extremities = [ _BackfillPoint(event_id, depth, _BackfillPointType.BACKWARDS_EXTREMITY) - for event_id, depth in await self.store.get_oldest_event_ids_with_depth_in_room( + for event_id, depth in await self.store.get_backfill_points_in_room( room_id=room_id, current_depth=current_depth, ) @@ -268,7 +268,8 @@ async def _maybe_backfill_inner( insertion_events_to_be_backfilled = [ _BackfillPoint(event_id, depth, _BackfillPointType.INSERTION_PONT) for event_id, depth in await self.store.get_insertion_event_backward_extremities_in_room( - room_id + room_id=room_id, + current_depth=current_depth, ) ] logger.debug( @@ -277,10 +278,6 @@ async def _maybe_backfill_inner( insertion_events_to_be_backfilled, ) - if not backwards_extremities and not insertion_events_to_be_backfilled: - logger.debug("Not backfilling as no extremeties found.") - return False - # we now have a list of potential places to backpaginate from. We prefer to # start with the most recent (ie, max depth), so let's sort the list. sorted_backfill_points: List[_BackfillPoint] = sorted( @@ -301,6 +298,26 @@ async def _maybe_backfill_inner( sorted_backfill_points, ) + # If we have no backfill points lower than the `current_depth` then + # either we can a) bail or b) still attempt to backfill. We opt to try + # backfilling anyway just in case we do get relevant events. + if not sorted_backfill_points and current_depth != MAX_DEPTH: + logger.debug( + "_maybe_backfill_inner: all backfill points are *after* current depth. Backfilling anyway." + ) + return self._maybe_backfill_inner( + room_id=room_id, + # We use `MAX_DEPTH` so that we find all backfill points next + # time (all events are below the `MAX_DEPTH`) + current_depth=MAX_DEPTH, + limit=limit, + processing_start_time=processing_start_time, + ) + elif not sorted_backfill_points and current_depth == MAX_DEPTH: + # Even after trying again with `MAX_DEPTH`, we didn't find any + # backward extremities to backfill from. + logger.debug("Not backfilling as no backward extremeties found.") + # If we're approaching an extremity we trigger a backfill, otherwise we # no-op. # @@ -314,43 +331,16 @@ async def _maybe_backfill_inner( # XXX: shouldn't we do this *after* the filter by depth below? Again, we don't # care about events that have happened after our current position. # - max_depth = sorted_backfill_points[0].depth - if current_depth - 2 * limit > max_depth: + max_depth_of_backfill_points = sorted_backfill_points[0].depth + if current_depth - 2 * limit > max_depth_of_backfill_points: logger.debug( "Not backfilling as we don't need to. %d < %d - 2 * %d", - max_depth, + max_depth_of_backfill_points, current_depth, limit, ) return False - # We ignore extremities that have a greater depth than our current depth - # as: - # 1. we don't really care about getting events that have happened - # after our current position; and - # 2. we have likely previously tried and failed to backfill from that - # extremity, so to avoid getting "stuck" requesting the same - # backfill repeatedly we drop those extremities. - # - # However, we need to check that the filtered extremities are non-empty. - # If they are empty then either we can a) bail or b) still attempt to - # backfill. We opt to try backfilling anyway just in case we do get - # relevant events. - # - filtered_sorted_backfill_points = [ - t for t in sorted_backfill_points if t.depth <= current_depth - ] - if filtered_sorted_backfill_points: - logger.debug( - "_maybe_backfill_inner: backfill points before current depth: %s", - filtered_sorted_backfill_points, - ) - sorted_backfill_points = filtered_sorted_backfill_points - else: - logger.debug( - "_maybe_backfill_inner: all backfill points are *after* current depth. Backfilling anyway." - ) - # For performance's sake, we only want to paginate from a particular extremity # if we can actually see the events we'll get. Otherwise, we'd just spend a lot # of resources to get redacted events. We check each extremity in turn and diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index c817a0c35c55..e5cbbe4ee771 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -722,23 +722,32 @@ def _get_auth_chain_difference_txn( @trace @tag_args - async def get_oldest_event_ids_with_depth_in_room( + async def get_backfill_points_in_room( self, room_id: str, current_depth: int, ) -> List[Tuple[str, int]]: """ - Gets the oldest events(backwards extremities) in the room lower than the - given `current_depth` along with the aproximate depth. - - We use this function so that we can compare and see if someones current - depth at their current scrollback is within pagination range of the - event extremeties. If the current depth is close to the depth of given - oldest event, we can trigger a backfill. + Gets the oldest events(backwards extremities) in the room along with the + aproximate depth. + + We use this function so that we can compare and see if someones + `current_depth` at their current scrollback is within pagination range + of the event extremeties. If the `current_depth` is close to the depth + of given oldest event, we can trigger a backfill. + + We ignore extremities that have a greater depth than our `current_depth` + as: + 1. we don't really care about getting events that have happened + after our current position; and + 2. by the nature of paginating and scrolling back, we have likely + previously tried and failed to backfill from that extremity, so + to avoid getting "stuck" requesting the same backfill repeatedly + we drop those extremities. Args: room_id: Room where we want to find the oldest events - current_depth: The depth at the users current scrollback because + current_depth: The depth at the users current scrollback position because we only care about finding events older than the given `current_depth` when scrolling and paginating backwards. @@ -746,7 +755,7 @@ async def get_oldest_event_ids_with_depth_in_room( List of (event_id, depth) tuples """ - def get_oldest_event_ids_with_depth_in_room_txn( + def get_backfill_points_in_room_txn( txn: LoggingTransaction, room_id: str ) -> List[Tuple[str, int]]: # Assemble a tuple lookup of event_id -> depth for the oldest events @@ -802,8 +811,10 @@ def get_oldest_event_ids_with_depth_in_room_txn( OR ? /* current_time */ >= backfill_attempt_info.last_attempt_ts + least(2^backfill_attempt_info.num_attempts * ? /* step */, ? /* upper bound */) ) /** - * Sort from highest (closest to the `max_depth`) to the lowest depth + * Sort from highest (closest to the `current_depth`) to the lowest depth * because the closest are most relevant to backfill from first. + * Then tie-break on alphabetical order of the event_ids so we get a + * consistent ordering which is nice when asserting things in tests. */ ORDER BY event.depth DESC, backward_extrem.event_id DESC """ @@ -823,24 +834,40 @@ def get_oldest_event_ids_with_depth_in_room_txn( return cast(List[Tuple[str, int]], txn.fetchall()) return await self.db_pool.runInteraction( - "get_oldest_event_ids_with_depth_in_room", - get_oldest_event_ids_with_depth_in_room_txn, + "get_backfill_points_in_room", + get_backfill_points_in_room_txn, room_id, ) @trace async def get_insertion_event_backward_extremities_in_room( - self, room_id: str + self, + room_id: str, + current_depth: int, ) -> List[Tuple[str, int]]: - """Get the insertion events we know about that we haven't backfilled yet. - - We use this function so that we can compare and see if someones current - depth at their current scrollback is within pagination range of the - insertion event. If the current depth is close to the depth of given - insertion event, we can trigger a backfill. + """ + Get the insertion events we know about that we haven't backfilled yet + along with the aproximate depth. + + We use this function so that we can compare and see if someones + `current_depth` at their current scrollback is within pagination range + of the insertion event. If the `current_depth` is close to the depth + of the given insertion event, we can trigger a backfill. + + We ignore insertion events that have a greater depth than our `current_depth` + as: + 1. we don't really care about getting events that have happened + after our current position; and + 2. by the nature of paginating and scrolling back, we have likely + previously tried and failed to backfill from that insertion event, so + to avoid getting "stuck" requesting the same backfill repeatedly + we drop those insertion event. Args: room_id: Room where we want to find the oldest events + current_depth: The depth at the users current scrollback position because + we only care about finding events older than the given + `current_depth` when scrolling and paginating backwards. Returns: List of (event_id, depth) tuples @@ -850,16 +877,46 @@ def get_insertion_event_backward_extremities_in_room_txn( txn: LoggingTransaction, room_id: str ) -> List[Tuple[str, int]]: sql = """ - SELECT b.event_id, MAX(e.depth) FROM insertion_events as i + SELECT insertion_event_extremity.event_id, event.depth FROM insertion_events as insertion_event /* We only want insertion events that are also marked as backwards extremities */ - INNER JOIN insertion_event_extremities as b USING (event_id) + INNER JOIN insertion_event_extremities as insertion_event_extremity USING (event_id) /* Get the depth of the insertion event from the events table */ - INNER JOIN events AS e USING (event_id) - WHERE b.room_id = ? - GROUP BY b.event_id + INNER JOIN events AS event USING (event_id) + /** + * We use this info to make sure we don't retry to use a backfill point + * if we've already attempted to backfill from it recently. + */ + LEFT JOIN event_backfill_attempts as backfill_attempt_info USING (event_id) + WHERE + insertion_event_extremity.room_id = ? + AND event.depth <= ? /* current_depth */ + /** + * Exponential back-off (up to the upper bound) so we don't retry the + * same backfill point over and over. ex. 2hr, 4hr, 8hr, 16hr, etc + */ + AND ( + backfill_attempt_info IS NULL + OR ? /* current_time */ >= backfill_attempt_info.last_attempt_ts + least(2^backfill_attempt_info.num_attempts * ? /* step */, ? /* upper bound */) + ) + /** + * Sort from highest (closest to the `current_depth`) to the lowest depth + * because the closest are most relevant to backfill from first. + * Then tie-break on alphabetical order of the event_ids so we get a + * consistent ordering which is nice when asserting things in tests. + */ + ORDER BY event.depth DESC, insertion_event_extremity.event_id DESC """ - txn.execute(sql, (room_id,)) + txn.execute( + sql, + ( + room_id, + current_depth, + self._clock.time_msec(), + 1000 * BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_SECONDS, + 1000 * BACKFILL_EVENT_BACKOFF_UPPER_BOUND_SECONDS, + ), + ) return cast(List[Tuple[str, int]], txn.fetchall()) return await self.db_pool.runInteraction( diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index 6d116d631a78..53437fbb4391 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -740,7 +740,7 @@ def populate_db(txn: LoggingTransaction): return _BackfillSetupInfo(room_id=room_id, depth_map=depth_map) - def test_get_oldest_event_ids_with_depth_in_room(self): + def test_get_backfill_points_in_room(self): """ Test to make sure only backfill points that are older and come before the `current_depth` are returned. @@ -751,7 +751,7 @@ def test_get_oldest_event_ids_with_depth_in_room(self): # Try at "B" backfill_points = self.get_success( - self.store.get_oldest_event_ids_with_depth_in_room(room_id, depth_map["B"]) + self.store.get_backfill_points_in_room(room_id, depth_map["B"]) ) backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] self.assertListEqual( @@ -760,14 +760,14 @@ def test_get_oldest_event_ids_with_depth_in_room(self): # Try at "A" backfill_points = self.get_success( - self.store.get_oldest_event_ids_with_depth_in_room(room_id, depth_map["A"]) + self.store.get_backfill_points_in_room(room_id, depth_map["A"]) ) backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] # Event "2" has a depth of 2 but is not included here because we only # know the approximate depth of 5 from our event "3". self.assertListEqual(backfill_event_ids, ["b3", "b2", "b1"]) - def test_get_oldest_event_ids_with_depth_in_room_excludes_events_we_have_attempted( + def test_get_backfill_points_in_room_excludes_events_we_have_attempted( self, ): """ @@ -779,7 +779,7 @@ def test_get_oldest_event_ids_with_depth_in_room_excludes_events_we_have_attempt depth_map = setup_info.depth_map # Record some attempts to backfill these events which will make - # `get_oldest_event_ids_with_depth_in_room` exclude them because we + # `get_backfill_points_in_room` exclude them because we # haven't passed the backoff interval. self.get_success(self.store.record_event_backfill_attempt("b5")) self.get_success(self.store.record_event_backfill_attempt("b4")) @@ -790,13 +790,13 @@ def test_get_oldest_event_ids_with_depth_in_room_excludes_events_we_have_attempt # Try at "B" backfill_points = self.get_success( - self.store.get_oldest_event_ids_with_depth_in_room(room_id, depth_map["B"]) + self.store.get_backfill_points_in_room(room_id, depth_map["B"]) ) backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] # Only the backfill points that we didn't record earlier exist here. self.assertListEqual(backfill_event_ids, ["b6", "2", "b1"]) - def test_get_oldest_event_ids_with_depth_in_room_attempted_event_retry_after_backoff_duration( + def test_get_backfill_points_in_room_attempted_event_retry_after_backoff_duration( self, ): """ @@ -809,7 +809,7 @@ def test_get_oldest_event_ids_with_depth_in_room_attempted_event_retry_after_bac depth_map = setup_info.depth_map # Record some attempts to backfill these events which will make - # `get_oldest_event_ids_with_depth_in_room` exclude them because we + # `get_backfill_points_in_room` exclude them because we # haven't passed the backoff interval. self.get_success(self.store.record_event_backfill_attempt("b3")) self.get_success(self.store.record_event_backfill_attempt("b1")) @@ -827,7 +827,7 @@ def test_get_oldest_event_ids_with_depth_in_room_attempted_event_retry_after_bac # Try at "A" and make sure that "b3" is not in the list because we've # already attemted many times backfill_points = self.get_success( - self.store.get_oldest_event_ids_with_depth_in_room(room_id, depth_map["A"]) + self.store.get_backfill_points_in_room(room_id, depth_map["A"]) ) backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] self.assertListEqual(backfill_event_ids, ["b3", "b2"]) @@ -838,7 +838,7 @@ def test_get_oldest_event_ids_with_depth_in_room_attempted_event_retry_after_bac # Try at "A" again after we advanced enough time and we should see "b3" again backfill_points = self.get_success( - self.store.get_oldest_event_ids_with_depth_in_room(room_id, depth_map["A"]) + self.store.get_backfill_points_in_room(room_id, depth_map["A"]) ) backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] self.assertListEqual(backfill_event_ids, ["b3", "b2", "b1"]) From f495752ac01ec661edb7f707f95fe3f55e9e0b7c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Sat, 27 Aug 2022 02:25:51 -0500 Subject: [PATCH 33/66] Use power and capitalize AS --- synapse/storage/databases/main/event_federation.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index e5cbbe4ee771..3cbb243970b7 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -766,26 +766,26 @@ def get_backfill_points_in_room_txn( # specifically). So we need to look for the aproximate depth from # the events connected to the current backwards extremeties. sql = """ - SELECT backward_extrem.event_id, event.depth FROM events as event + SELECT backward_extrem.event_id, event.depth FROM events AS event /** * Get the edge connections from the event_edges table * so we can see whether this event's prev_events points * to a backward extremity in the next join. */ - INNER JOIN event_edges as edge + INNER JOIN event_edges AS edge ON edge.event_id = event.event_id /** * We find the "oldest" events in the room by looking for * events connected to backwards extremeties (oldest events * in the room that we know of so far). */ - INNER JOIN event_backward_extremities as backward_extrem + INNER JOIN event_backward_extremities AS backward_extrem ON edge.prev_event_id = backward_extrem.event_id /** * We use this info to make sure we don't retry to use a backfill point * if we've already attempted to backfill from it recently. */ - LEFT JOIN event_backfill_attempts as backfill_attempt_info + LEFT JOIN event_backfill_attempts AS backfill_attempt_info ON backfill_attempt_info.event_id = backward_extrem.event_id WHERE backward_extrem.room_id = ? @@ -808,7 +808,7 @@ def get_backfill_points_in_room_txn( */ AND ( backfill_attempt_info IS NULL - OR ? /* current_time */ >= backfill_attempt_info.last_attempt_ts + least(2^backfill_attempt_info.num_attempts * ? /* step */, ? /* upper bound */) + OR ? /* current_time */ >= backfill_attempt_info.last_attempt_ts + least(power(2, backfill_attempt_info.num_attempts) * ? /* step */, ? /* upper bound */) ) /** * Sort from highest (closest to the `current_depth`) to the lowest depth @@ -886,7 +886,7 @@ def get_insertion_event_backward_extremities_in_room_txn( * We use this info to make sure we don't retry to use a backfill point * if we've already attempted to backfill from it recently. */ - LEFT JOIN event_backfill_attempts as backfill_attempt_info USING (event_id) + LEFT JOIN event_backfill_attempts AS backfill_attempt_info USING (event_id) WHERE insertion_event_extremity.room_id = ? AND event.depth <= ? /* current_depth */ @@ -896,7 +896,7 @@ def get_insertion_event_backward_extremities_in_room_txn( */ AND ( backfill_attempt_info IS NULL - OR ? /* current_time */ >= backfill_attempt_info.last_attempt_ts + least(2^backfill_attempt_info.num_attempts * ? /* step */, ? /* upper bound */) + OR ? /* current_time */ >= backfill_attempt_info.last_attempt_ts + least(power(2, backfill_attempt_info.num_attempts) * ? /* step */, ? /* upper bound */) ) /** * Sort from highest (closest to the `current_depth`) to the lowest depth From f2061b9ecac5c762e4b0974c9ac1d4cb6adeb2c0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 31 Aug 2022 13:57:40 -0500 Subject: [PATCH 34/66] Use SQLite compatible power of 2 (left shift) See https://github.com/matrix-org/synapse/pull/13635#discussion_r956543645 Shout out to @reivilibre --- .../storage/databases/main/event_federation.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 3cbb243970b7..b1046dd6a38a 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -804,11 +804,16 @@ def get_backfill_points_in_room_txn( AND event.depth <= ? /* current_depth */ /** * Exponential back-off (up to the upper bound) so we don't retry the - * same backfill point over and over. ex. 2hr, 4hr, 8hr, 16hr, etc + * same backfill point over and over. ex. 2hr, 4hr, 8hr, 16hr, etc. + * + * We use `2 << (n - 1)` as a power of 2 equivalent for compatibility + * with SQLite. The left shift equivalent only works with powers of 2 + * because left shift is a binary operation (base-2). Otherwise, we + * would use `power(2, n)` or the power operator, `2^n`. */ AND ( backfill_attempt_info IS NULL - OR ? /* current_time */ >= backfill_attempt_info.last_attempt_ts + least(power(2, backfill_attempt_info.num_attempts) * ? /* step */, ? /* upper bound */) + OR ? /* current_time */ >= backfill_attempt_info.last_attempt_ts + least((2 << (backfill_attempt_info.num_attempts - 1)) * ? /* step */, ? /* upper bound */) ) /** * Sort from highest (closest to the `current_depth`) to the lowest depth @@ -893,10 +898,15 @@ def get_insertion_event_backward_extremities_in_room_txn( /** * Exponential back-off (up to the upper bound) so we don't retry the * same backfill point over and over. ex. 2hr, 4hr, 8hr, 16hr, etc + * + * We use `2 << (n - 1)` as a power of 2 equivalent for compatibility + * with SQLite. The left shift equivalent only works with powers of 2 + * because left shift is a binary operation (base-2). Otherwise, we + * would use `power(2, n)` or the power operator, `2^n`. */ AND ( backfill_attempt_info IS NULL - OR ? /* current_time */ >= backfill_attempt_info.last_attempt_ts + least(power(2, backfill_attempt_info.num_attempts) * ? /* step */, ? /* upper bound */) + OR ? /* current_time */ >= backfill_attempt_info.last_attempt_ts + least((2 << (backfill_attempt_info.num_attempts - 1)) * ? /* step */, ? /* upper bound */) ) /** * Sort from highest (closest to the `current_depth`) to the lowest depth From e4192d73e794380b08744922d55ac2d114c01d71 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 31 Aug 2022 14:35:28 -0500 Subject: [PATCH 35/66] Update table name with "failed" and include room_id in the primary key See: - https://github.com/matrix-org/synapse/pull/13589#discussion_r956531173 - https://github.com/matrix-org/synapse/pull/13589#discussion_r959836863 - https://github.com/matrix-org/synapse/pull/13589#discussion_r959836134 - https://github.com/matrix-org/synapse/pull/13589#discussion_r959838812 --- .../databases/main/event_federation.py | 33 ++++++++++--------- synapse/storage/databases/main/events.py | 17 ++++------ .../delta/72/04event_backfill_access_time.sql | 10 +++--- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 6e93a5af3921..b314bf557b4f 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1293,11 +1293,12 @@ def _get_backfill_events( return event_id_results @trace - async def record_event_backfill_attempt(self, event_id: str) -> None: + async def record_event_backfill_attempt(self, room_id: str, event_id: str) -> None: if self.database_engine.can_native_upsert: await self.db_pool.runInteraction( "record_event_backfill_attempt", self._record_event_backfill_attempt_upsert_native_txn, + room_id, event_id, db_autocommit=True, # Safe as its a single upsert ) @@ -1305,40 +1306,42 @@ async def record_event_backfill_attempt(self, event_id: str) -> None: await self.db_pool.runInteraction( "record_event_backfill_attempt", self._record_event_backfill_attempt_upsert_emulated_txn, + room_id, event_id, ) def _record_event_backfill_attempt_upsert_native_txn( self, txn: LoggingTransaction, + room_id: str, event_id: str, ) -> None: assert self.database_engine.can_native_upsert sql = """ - INSERT INTO event_backfill_attempts ( - event_id, num_attempts, last_attempt_ts + INSERT INTO event_failed_backfill_attempts ( + room_id, event_id, num_attempts, last_attempt_ts ) - VALUES (?, ?, ?) - ON CONFLICT (event_id) DO UPDATE SET - event_id=EXCLUDED.event_id, - num_attempts=event_backfill_attempts.num_attempts + 1, + VALUES (?, ?, ?, ?) + ON CONFLICT (room_id, event_id) DO UPDATE SET + num_attempts=event_failed_backfill_attempts.num_attempts + 1, last_attempt_ts=EXCLUDED.last_attempt_ts; """ - txn.execute(sql, (event_id, 1, self._clock.time_msec())) + txn.execute(sql, (room_id, event_id, 1, self._clock.time_msec())) def _record_event_backfill_attempt_upsert_emulated_txn( self, txn: LoggingTransaction, + room_id: str, event_id: str, ) -> None: - self.database_engine.lock_table(txn, "event_backfill_attempts") + self.database_engine.lock_table(txn, "event_failed_backfill_attempts") prev_row = self.db_pool.simple_select_one_txn( txn, - table="event_backfill_attempts", - keyvalues={"event_id": event_id}, + table="event_failed_backfill_attempts", + keyvalues={"room_id": room_id, "event_id": event_id}, retcols=("num_attempts"), allow_none=True, ) @@ -1346,8 +1349,9 @@ def _record_event_backfill_attempt_upsert_emulated_txn( if not prev_row: self.db_pool.simple_insert_txn( txn, - table="event_backfill_attempts", + table="event_failed_backfill_attempts", values={ + "room_id": room_id, "event_id": event_id, "num_attempts": 1, "last_attempt_ts": self._clock.time_msec(), @@ -1356,10 +1360,9 @@ def _record_event_backfill_attempt_upsert_emulated_txn( else: self.db_pool.simple_update_one_txn( txn, - table="event_backfill_attempts", - keyvalues={"event_id": event_id}, + table="event_failed_backfill_attempts", + keyvalues={"room_id": room_id, "event_id": event_id}, updatevalues={ - "event_id": event_id, "num_attempts": prev_row["num_attempts"] + 1, "last_attempt_ts": self._clock.time_msec(), }, diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 4ca326cad8ed..881819fc5774 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2449,20 +2449,17 @@ def _update_backward_extremeties( backward_extremity_tuples_to_remove, ) - # Since we no longer need these backward extremities, it also means that - # they won't be backfilled from again so we no longer need to store the - # backfill attempts around it. + # Clear out the failed backfill attempts after we successfully pulled + # the event. Since we no longer need these events as backward + # extremities, it also means that they won't be backfilled from again so + # we no longer need to store the backfill attempts around it. query = """ - DELETE FROM event_backfill_attempts - WHERE event_id = ? + DELETE FROM event_failed_backfill_attempts + WHERE event_id = ? and room_id = ? """ - backward_extremity_event_ids_to_remove = [ - (extremity_tuple[0],) - for extremity_tuple in backward_extremity_tuples_to_remove - ] txn.execute_batch( query, - backward_extremity_event_ids_to_remove, + backward_extremity_tuples_to_remove, ) diff --git a/synapse/storage/schema/main/delta/72/04event_backfill_access_time.sql b/synapse/storage/schema/main/delta/72/04event_backfill_access_time.sql index 71224c2b48c0..fbd366809e7e 100644 --- a/synapse/storage/schema/main/delta/72/04event_backfill_access_time.sql +++ b/synapse/storage/schema/main/delta/72/04event_backfill_access_time.sql @@ -14,14 +14,14 @@ */ --- Add a table that keeps track of when we last tried to backfill an event. This +-- Add a table that keeps track of when we failed to backfill an event. This -- allows us to be more intelligent when we decide to retry (we don't need to -- fail over and over) and we can process that event in the background so we -- don't block on it each time. -CREATE TABLE IF NOT EXISTS event_backfill_attempts( +CREATE TABLE IF NOT EXISTS event_failed_backfill_attempts( + room_id TEXT NOT NULL, event_id TEXT NOT NULL, num_attempts INT NOT NULL, - last_attempt_ts BIGINT NOT NULL + last_attempt_ts BIGINT NOT NULL, + PRIMARY KEY (room_id, event_id) ); - -CREATE UNIQUE INDEX IF NOT EXISTS event_backfill_attempts_event_id ON event_backfill_attempts(event_id); From 7a449326afd72a639dc122bddb5e4d042d6b5158 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 31 Aug 2022 14:45:51 -0500 Subject: [PATCH 36/66] Rename to record_event_failed_backfill_attempt See https://github.com/matrix-org/synapse/pull/13589#discussion_r959838055 --- synapse/handlers/federation_event.py | 8 ++++-- .../databases/main/event_federation.py | 27 ++++++++++++++----- ...l => 04event_failed_backfill_attempts.sql} | 0 tests/handlers/test_federation_event.py | 16 ++++++----- 4 files changed, 35 insertions(+), 16 deletions(-) rename synapse/storage/schema/main/delta/72/{04event_backfill_access_time.sql => 04event_failed_backfill_attempts.sql} (100%) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 695dde417dfc..60dd024cb154 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -863,7 +863,9 @@ async def _process_pulled_event( except SynapseError as err: logger.warning("Event %s failed sanity check: %s", event_id, err) if backfilled: - await self._store.record_event_backfill_attempt(event_id) + await self._store.record_event_failed_backfill_attempt( + event.room_id, event_id + ) return try: @@ -900,7 +902,9 @@ async def _process_pulled_event( ) except FederationError as e: if backfilled: - await self._store.record_event_backfill_attempt(event_id) + await self._store.record_event_failed_backfill_attempt( + event.room_id, event_id + ) if e.code == 403: logger.warning("Pulled event %s failed history check.", event_id) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index b314bf557b4f..e4fd9b1a0c56 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1293,24 +1293,37 @@ def _get_backfill_events( return event_id_results @trace - async def record_event_backfill_attempt(self, room_id: str, event_id: str) -> None: + async def record_event_failed_backfill_attempt( + self, room_id: str, event_id: str + ) -> None: + """ + Record when we fail to backfill an event. + + This information allows us to be more intelligent when we decide to + retry (we don't need to fail over and over) and we can process that + event in the background so we don't block on it each time. + + Args: + room_id: The room where the event failed to backfill from + event_id: The event that failed to be backfilled + """ if self.database_engine.can_native_upsert: await self.db_pool.runInteraction( - "record_event_backfill_attempt", - self._record_event_backfill_attempt_upsert_native_txn, + "record_event_failed_backfill_attempt", + self._record_event_failed_backfill_attempt_upsert_native_txn, room_id, event_id, db_autocommit=True, # Safe as its a single upsert ) else: await self.db_pool.runInteraction( - "record_event_backfill_attempt", - self._record_event_backfill_attempt_upsert_emulated_txn, + "record_event_failed_backfill_attempt", + self._record_event_failed_backfill_attempt_upsert_emulated_txn, room_id, event_id, ) - def _record_event_backfill_attempt_upsert_native_txn( + def _record_event_failed_backfill_attempt_upsert_native_txn( self, txn: LoggingTransaction, room_id: str, @@ -1330,7 +1343,7 @@ def _record_event_backfill_attempt_upsert_native_txn( txn.execute(sql, (room_id, event_id, 1, self._clock.time_msec())) - def _record_event_backfill_attempt_upsert_emulated_txn( + def _record_event_failed_backfill_attempt_upsert_emulated_txn( self, txn: LoggingTransaction, room_id: str, diff --git a/synapse/storage/schema/main/delta/72/04event_backfill_access_time.sql b/synapse/storage/schema/main/delta/72/04event_failed_backfill_attempts.sql similarity index 100% rename from synapse/storage/schema/main/delta/72/04event_backfill_access_time.sql rename to synapse/storage/schema/main/delta/72/04event_failed_backfill_attempts.sql diff --git a/tests/handlers/test_federation_event.py b/tests/handlers/test_federation_event.py index 6461efbe4536..102129025b2f 100644 --- a/tests/handlers/test_federation_event.py +++ b/tests/handlers/test_federation_event.py @@ -233,7 +233,7 @@ def test_process_pulled_event_records_failed_backfill_attempts( ) -> None: """ Test to make sure that failed backfill attempts for an event are - recorded in the `event_backfill_attempts` table. + recorded in the `event_failed_backfill_attempts` table. In this test, we pretend we are processing a "pulled" event via backfill. The pulled event has a fake `prev_event` which our server has @@ -310,7 +310,7 @@ def test_process_pulled_event_records_failed_backfill_attempts( # Make sure our backfill attempt was recorded backfill_num_attempts = self.get_success( main_store.db_pool.simple_select_one_onecol( - table="event_backfill_attempts", + table="event_failed_backfill_attempts", keyvalues={"event_id": pulled_event.event_id}, retcol="num_attempts", ) @@ -328,7 +328,7 @@ def test_process_pulled_event_records_failed_backfill_attempts( # Make sure our second backfill attempt was recorded (`num_attempts` was incremented) backfill_num_attempts = self.get_success( main_store.db_pool.simple_select_one_onecol( - table="event_backfill_attempts", + table="event_failed_backfill_attempts", keyvalues={"event_id": pulled_event.event_id}, retcol="num_attempts", ) @@ -349,7 +349,7 @@ def test_process_pulled_event_clears_backfill_attempts_after_being_successfully_ ) -> None: """ Test to make sure that failed backfill attempts - (`event_backfill_attempts` table) for an event are cleared after the + (`event_failed_backfill_attempts` table) for an event are cleared after the event is successfully persisted. In this test, we pretend we are processing a "pulled" event via @@ -411,12 +411,14 @@ def test_process_pulled_event_clears_backfill_attempts_after_being_successfully_ # Fake the "pulled" event failing to backfill once so we can test # if it's cleared out later on. self.get_success( - main_store.record_event_backfill_attempt(pulled_event.event_id) + main_store.record_event_failed_backfill_attempt( + pulled_event.room_id, pulled_event.event_id + ) ) # Make sure we have a backfill attempt recorded for the pulled event backfill_num_attempts = self.get_success( main_store.db_pool.simple_select_one_onecol( - table="event_backfill_attempts", + table="event_failed_backfill_attempts", keyvalues={"event_id": pulled_event.event_id}, retcol="num_attempts", ) @@ -434,7 +436,7 @@ def test_process_pulled_event_clears_backfill_attempts_after_being_successfully_ # Make sure the backfill attempt for the pulled event are cleared backfill_num_attempts = self.get_success( main_store.db_pool.simple_select_one_onecol( - table="event_backfill_attempts", + table="event_failed_backfill_attempts", keyvalues={"event_id": pulled_event.event_id}, retcol="num_attempts", allow_none=True, From 506a8ddbdd131b995ca222f12a0251858df9b52e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 31 Aug 2022 15:19:36 -0500 Subject: [PATCH 37/66] Changes after merging madlittlemods/keep-track-when-we-tried-to-backfill-an-event --- .../storage/databases/main/event_federation.py | 16 +++++++++------- tests/storage/test_event_federation.py | 18 +++++++++--------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 485940c46302..b1d991c04116 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -785,8 +785,10 @@ def get_backfill_points_in_room_txn( * We use this info to make sure we don't retry to use a backfill point * if we've already attempted to backfill from it recently. */ - LEFT JOIN event_backfill_attempts AS backfill_attempt_info - ON backfill_attempt_info.event_id = backward_extrem.event_id + LEFT JOIN event_failed_backfill_attempts AS failed_backfill_attempt_info + ON + failed_backfill_attempt_info.room_id = backward_extrem.room_id + AND failed_backfill_attempt_info.event_id = backward_extrem.event_id WHERE backward_extrem.room_id = ? /* We only care about normal events because TODO: why */ @@ -812,8 +814,8 @@ def get_backfill_points_in_room_txn( * would use `power(2, n)` or the power operator, `2^n`. */ AND ( - backfill_attempt_info IS NULL - OR ? /* current_time */ >= backfill_attempt_info.last_attempt_ts + least((2 << (backfill_attempt_info.num_attempts - 1)) * ? /* step */, ? /* upper bound */) + failed_backfill_attempt_info IS NULL + OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + least((2 << (failed_backfill_attempt_info.num_attempts - 1)) * ? /* step */, ? /* upper bound */) ) /** * Sort from highest (closest to the `current_depth`) to the lowest depth @@ -891,7 +893,7 @@ def get_insertion_event_backward_extremities_in_room_txn( * We use this info to make sure we don't retry to use a backfill point * if we've already attempted to backfill from it recently. */ - LEFT JOIN event_backfill_attempts AS backfill_attempt_info USING (event_id) + LEFT JOIN event_failed_backfill_attempts AS failed_backfill_attempt_info USING (room_id, event_id) WHERE insertion_event_extremity.room_id = ? AND event.depth <= ? /* current_depth */ @@ -905,8 +907,8 @@ def get_insertion_event_backward_extremities_in_room_txn( * would use `power(2, n)` or the power operator, `2^n`. */ AND ( - backfill_attempt_info IS NULL - OR ? /* current_time */ >= backfill_attempt_info.last_attempt_ts + least((2 << (backfill_attempt_info.num_attempts - 1)) * ? /* step */, ? /* upper bound */) + failed_backfill_attempt_info IS NULL + OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + least((2 << (failed_backfill_attempt_info.num_attempts - 1)) * ? /* step */, ? /* upper bound */) ) /** * Sort from highest (closest to the `current_depth`) to the lowest depth diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index 53437fbb4391..dd1ea704c4c5 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -781,10 +781,10 @@ def test_get_backfill_points_in_room_excludes_events_we_have_attempted( # Record some attempts to backfill these events which will make # `get_backfill_points_in_room` exclude them because we # haven't passed the backoff interval. - self.get_success(self.store.record_event_backfill_attempt("b5")) - self.get_success(self.store.record_event_backfill_attempt("b4")) - self.get_success(self.store.record_event_backfill_attempt("b3")) - self.get_success(self.store.record_event_backfill_attempt("b2")) + self.get_success(self.store.record_event_failed_backfill_attempt(room_id, "b5")) + self.get_success(self.store.record_event_failed_backfill_attempt(room_id, "b4")) + self.get_success(self.store.record_event_failed_backfill_attempt(room_id, "b3")) + self.get_success(self.store.record_event_failed_backfill_attempt(room_id, "b2")) # No time has passed since we attempted to backfill ^ @@ -811,11 +811,11 @@ def test_get_backfill_points_in_room_attempted_event_retry_after_backoff_duratio # Record some attempts to backfill these events which will make # `get_backfill_points_in_room` exclude them because we # haven't passed the backoff interval. - self.get_success(self.store.record_event_backfill_attempt("b3")) - self.get_success(self.store.record_event_backfill_attempt("b1")) - self.get_success(self.store.record_event_backfill_attempt("b1")) - self.get_success(self.store.record_event_backfill_attempt("b1")) - self.get_success(self.store.record_event_backfill_attempt("b1")) + self.get_success(self.store.record_event_failed_backfill_attempt(room_id, "b3")) + self.get_success(self.store.record_event_failed_backfill_attempt(room_id, "b1")) + self.get_success(self.store.record_event_failed_backfill_attempt(room_id, "b1")) + self.get_success(self.store.record_event_failed_backfill_attempt(room_id, "b1")) + self.get_success(self.store.record_event_failed_backfill_attempt(room_id, "b1")) # Now advance time by 2 hours and we should only be able to see "b3" # because we have waited long enough for the single attempt (2^1 hours) From 361ce5cd0627c2a11701305e921ad410f778711d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 31 Aug 2022 15:37:39 -0500 Subject: [PATCH 38/66] Use compatible least/min on each db platform --- synapse/storage/databases/main/event_federation.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index b1d991c04116..30f3e16c8d67 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -44,7 +44,7 @@ ) from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.databases.main.signatures import SignatureWorkerStore -from synapse.storage.engines import PostgresEngine +from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.types import JsonDict from synapse.util import json_encoder from synapse.util.caches.descriptors import cached @@ -815,7 +815,7 @@ def get_backfill_points_in_room_txn( */ AND ( failed_backfill_attempt_info IS NULL - OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + least((2 << (failed_backfill_attempt_info.num_attempts - 1)) * ? /* step */, ? /* upper bound */) + OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + /*least*/%s((2 << (failed_backfill_attempt_info.num_attempts - 1)) * ? /* step */, ? /* upper bound */) ) /** * Sort from highest (closest to the `current_depth`) to the lowest depth @@ -826,8 +826,15 @@ def get_backfill_points_in_room_txn( ORDER BY event.depth DESC, backward_extrem.event_id DESC """ + if isinstance(self.database_engine, PostgresEngine): + least_function = "least" + elif isinstance(self.database_engine, Sqlite3Engine): + least_function = "min" + else: + raise RuntimeError("Unknown database engine") + txn.execute( - sql, + sql % (least_function,), ( room_id, False, From b09d8a20077437035cdee0bdb15eee9f5fa40db8 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 31 Aug 2022 15:57:36 -0500 Subject: [PATCH 39/66] Fix SQLite no such column error when comparing table to null See https://github.com/matrix-org/synapse/pull/13635#discussion_r959929330 Fix the following error: ``` sqlite3.OperationalError: no such column: failed_backfill_attempt_info ``` --- synapse/storage/databases/main/event_federation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 30f3e16c8d67..b241918cdc5b 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -814,7 +814,7 @@ def get_backfill_points_in_room_txn( * would use `power(2, n)` or the power operator, `2^n`. */ AND ( - failed_backfill_attempt_info IS NULL + failed_backfill_attempt_info.event_id IS NULL OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + /*least*/%s((2 << (failed_backfill_attempt_info.num_attempts - 1)) * ? /* step */, ? /* upper bound */) ) /** @@ -914,7 +914,7 @@ def get_insertion_event_backward_extremities_in_room_txn( * would use `power(2, n)` or the power operator, `2^n`. */ AND ( - failed_backfill_attempt_info IS NULL + failed_backfill_attempt_info.event_id IS NULL OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + least((2 << (failed_backfill_attempt_info.num_attempts - 1)) * ? /* step */, ? /* upper bound */) ) /** From 965d1424b85ddce29c703b0a29100e09d3286f48 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 31 Aug 2022 16:03:59 -0500 Subject: [PATCH 40/66] Add comment about how these are sorted by depth now See https://github.com/matrix-org/synapse/pull/13635#discussion_r959929765 --- synapse/storage/databases/main/event_federation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index b241918cdc5b..520cc31aa5d4 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -729,7 +729,7 @@ async def get_backfill_points_in_room( ) -> List[Tuple[str, int]]: """ Gets the oldest events(backwards extremities) in the room along with the - aproximate depth. + aproximate depth (sorted by depth descending). We use this function so that we can compare and see if someones `current_depth` at their current scrollback is within pagination range @@ -752,7 +752,7 @@ async def get_backfill_points_in_room( `current_depth` when scrolling and paginating backwards. Returns: - List of (event_id, depth) tuples + List of (event_id, depth) tuples (sorted by depth descending). """ def get_backfill_points_in_room_txn( @@ -861,7 +861,7 @@ async def get_insertion_event_backward_extremities_in_room( ) -> List[Tuple[str, int]]: """ Get the insertion events we know about that we haven't backfilled yet - along with the aproximate depth. + along with the aproximate depth (sorted by depth descending). We use this function so that we can compare and see if someones `current_depth` at their current scrollback is within pagination range @@ -884,7 +884,7 @@ async def get_insertion_event_backward_extremities_in_room( `current_depth` when scrolling and paginating backwards. Returns: - List of (event_id, depth) tuples + List of (event_id, depth) tuples (sorted by depth descending) """ def get_insertion_event_backward_extremities_in_room_txn( From 267777fb05e919882a97df044d0fc228bf2b4cf3 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 31 Aug 2022 16:07:16 -0500 Subject: [PATCH 41/66] Apply same least compatiblity to insertion event extremities --- synapse/storage/databases/main/event_federation.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 520cc31aa5d4..da93dd5fc20b 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -915,7 +915,7 @@ def get_insertion_event_backward_extremities_in_room_txn( */ AND ( failed_backfill_attempt_info.event_id IS NULL - OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + least((2 << (failed_backfill_attempt_info.num_attempts - 1)) * ? /* step */, ? /* upper bound */) + OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + /*least*/%s((2 << (failed_backfill_attempt_info.num_attempts - 1)) * ? /* step */, ? /* upper bound */) ) /** * Sort from highest (closest to the `current_depth`) to the lowest depth @@ -926,8 +926,15 @@ def get_insertion_event_backward_extremities_in_room_txn( ORDER BY event.depth DESC, insertion_event_extremity.event_id DESC """ + if isinstance(self.database_engine, PostgresEngine): + least_function = "least" + elif isinstance(self.database_engine, Sqlite3Engine): + least_function = "min" + else: + raise RuntimeError("Unknown database engine") + txn.execute( - sql, + sql % (least_function,), ( room_id, current_depth, From d0cd42aa6ac93ff27b8f92ef5e40100df2760d06 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 31 Aug 2022 16:12:56 -0500 Subject: [PATCH 42/66] Fix lints --- synapse/handlers/federation.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 6a99796405b8..cbdb6ce45a47 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -38,7 +38,7 @@ from unpaddedbase64 import decode_base64 from synapse import event_auth -from synapse.api.constants import EventContentFields, EventTypes, MAX_DEPTH, Membership +from synapse.api.constants import MAX_DEPTH, EventContentFields, EventTypes, Membership from synapse.api.errors import ( AuthError, CodeMessageException, @@ -274,7 +274,7 @@ async def _maybe_backfill_inner( logger.debug( "_maybe_backfill_inner: all backfill points are *after* current depth. Backfilling anyway." ) - return self._maybe_backfill_inner( + return await self._maybe_backfill_inner( room_id=room_id, # We use `MAX_DEPTH` so that we find all backfill points next # time (all events are below the `MAX_DEPTH`) @@ -285,7 +285,10 @@ async def _maybe_backfill_inner( elif not sorted_backfill_points and current_depth == MAX_DEPTH: # Even after trying again with `MAX_DEPTH`, we didn't find any # backward extremities to backfill from. - logger.debug("Not backfilling as no backward extremeties found.") + logger.debug( + "_maybe_backfill_inner: Not backfilling as no backward extremeties found." + ) + return False # If we're approaching an extremity we trigger a backfill, otherwise we # no-op. From 3d9f625574f7346ba1c232de535be886b60f0df5 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 31 Aug 2022 21:27:53 -0500 Subject: [PATCH 43/66] Try fix ambiguous column (remove unsued table) See https://github.com/matrix-org/synapse/pull/13635#discussion_r960162968 --- synapse/storage/databases/main/event_federation.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index da93dd5fc20b..7cc316ea921f 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -891,9 +891,10 @@ def get_insertion_event_backward_extremities_in_room_txn( txn: LoggingTransaction, room_id: str ) -> List[Tuple[str, int]]: sql = """ - SELECT insertion_event_extremity.event_id, event.depth FROM insertion_events as insertion_event + SELECT + insertion_event_extremity.event_id, event.depth /* We only want insertion events that are also marked as backwards extremities */ - INNER JOIN insertion_event_extremities as insertion_event_extremity USING (event_id) + FROM insertion_event_extremities AS insertion_event_extremity /* Get the depth of the insertion event from the events table */ INNER JOIN events AS event USING (event_id) /** From 33a3c64afb0073258403326e4015b27f3ca7bd6d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 31 Aug 2022 21:35:16 -0500 Subject: [PATCH 44/66] Fix ambiguous column See https://github.com/matrix-org/synapse/pull/13635#discussion_r960162968 ``` psycopg2.errors.AmbiguousColumn: common column name "room_id" appears more than once in left table ``` --- synapse/storage/databases/main/event_federation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 7cc316ea921f..cbb643297e10 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -901,7 +901,10 @@ def get_insertion_event_backward_extremities_in_room_txn( * We use this info to make sure we don't retry to use a backfill point * if we've already attempted to backfill from it recently. */ - LEFT JOIN event_failed_backfill_attempts AS failed_backfill_attempt_info USING (room_id, event_id) + LEFT JOIN event_failed_backfill_attempts AS failed_backfill_attempt_info + ON + failed_backfill_attempt_info.room_id = insertion_event_extremity.room_id + AND failed_backfill_attempt_info.event_id = insertion_event_extremity.event_id WHERE insertion_event_extremity.room_id = ? AND event.depth <= ? /* current_depth */ From 6736d10b0997434fa73f66a743789189210b162e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 31 Aug 2022 23:16:26 -0500 Subject: [PATCH 45/66] Add tests for get_insertion_event_backward_extremities_in_room --- tests/storage/test_event_federation.py | 191 ++++++++++++++++++++++++- 1 file changed, 189 insertions(+), 2 deletions(-) diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index dd1ea704c4c5..6a3ca158a406 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -21,6 +21,7 @@ from twisted.test.proto_helpers import MemoryReactor +from synapse.api.constants import EventTypes from synapse.api.room_versions import ( KNOWN_ROOM_VERSIONS, EventFormatVersions, @@ -599,7 +600,8 @@ def _setup_room_for_backfill_tests(self) -> _BackfillSetupInfo: backfill functions against. Returns: - room_id to test against + _BackfillSetupInfo including the `room_id` to test against and + `depth_map` of events in the room """ room_id = "!backfill-room-test:some-host" @@ -733,7 +735,7 @@ def populate_db(txn: LoggingTransaction): self.get_success( self.store.db_pool.runInteraction( - "populate_db", + "_setup_room_for_backfill_tests_populate_db", populate_db, ) ) @@ -843,6 +845,191 @@ def test_get_backfill_points_in_room_attempted_event_retry_after_backoff_duratio backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] self.assertListEqual(backfill_event_ids, ["b3", "b2", "b1"]) + def _setup_room_for_insertion_backfill_tests(self) -> _BackfillSetupInfo: + """ + Sets up a room with various insertion event backward extremities to test + backfill functions against. + + Returns: + _BackfillSetupInfo including the `room_id` to test against and + `depth_map` of events in the room + """ + room_id = "!backfill-room-test:some-host" + + depth_map: Dict[str, int] = { + "1": 1, + "2": 2, + "insertion_eventA": 3, + "3": 4, + "insertion_eventB": 5, + "4": 6, + "5": 7, + } + + def populate_db(txn: LoggingTransaction): + # Insert our server events + stream_ordering = 0 + for event_id, depth in depth_map.items(): + self.store.db_pool.simple_insert_txn( + txn, + table="events", + values={ + "event_id": event_id, + "type": EventTypes.MSC2716_INSERTION + if event_id.startswith("insertion_event") + else "test_regular_type", + "room_id": room_id, + "depth": depth, + "topological_ordering": depth, + "stream_ordering": stream_ordering, + "processed": True, + "outlier": False, + }, + ) + + if event_id.startswith("insertion_event"): + self.store.db_pool.simple_insert_txn( + txn, + table="insertion_event_extremities", + values={ + "event_id": event_id, + "room_id": room_id, + }, + ) + + stream_ordering += 1 + + self.get_success( + self.store.db_pool.runInteraction( + "_setup_room_for_insertion_backfill_tests_populate_db", + populate_db, + ) + ) + + return _BackfillSetupInfo(room_id=room_id, depth_map=depth_map) + + def test_get_insertion_event_backward_extremities_in_room(self): + """ + Test to make sure only insertion event backward extremities that are + older and come before the `current_depth` are returned. + """ + setup_info = self._setup_room_for_insertion_backfill_tests() + room_id = setup_info.room_id + depth_map = setup_info.depth_map + + # Try at "insertion_eventB" + backfill_points = self.get_success( + self.store.get_insertion_event_backward_extremities_in_room( + room_id, depth_map["insertion_eventB"] + ) + ) + backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] + self.assertListEqual( + backfill_event_ids, ["insertion_eventB", "insertion_eventA"] + ) + + # Try at "insertion_eventA" + backfill_points = self.get_success( + self.store.get_insertion_event_backward_extremities_in_room( + room_id, depth_map["insertion_eventA"] + ) + ) + backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] + # Event "2" has a depth of 2 but is not included here because we only + # know the approximate depth of 5 from our event "3". + self.assertListEqual(backfill_event_ids, ["insertion_eventA"]) + + def test_get_insertion_event_backward_extremities_in_room_excludes_events_we_have_attempted( + self, + ): + """ + Test to make sure that events we have attempted to backfill (and within + backoff timeout duration) do not show up as an event to backfill again. + """ + setup_info = self._setup_room_for_insertion_backfill_tests() + room_id = setup_info.room_id + depth_map = setup_info.depth_map + + logger.info("depth_map=%s", depth_map) + + # Record some attempts to backfill these events which will make + # `get_insertion_event_backward_extremities_in_room` exclude them + # because we haven't passed the backoff interval. + self.get_success( + self.store.record_event_failed_backfill_attempt(room_id, "insertion_eventA") + ) + + # No time has passed since we attempted to backfill ^ + + # Try at "B" + backfill_points = self.get_success( + self.store.get_insertion_event_backward_extremities_in_room( + room_id, depth_map["insertion_eventB"] + ) + ) + backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] + # Only the backfill points that we didn't record earlier exist here. + self.assertListEqual(backfill_event_ids, ["insertion_eventB"]) + + def test_get_insertion_event_backward_extremities_in_room_attempted_event_retry_after_backoff_duration( + self, + ): + """ + Test to make sure after we fake attempt to backfill event "b3" many times, + we can see retry and see the "b3" again after the backoff timeout duration + has exceeded. + """ + setup_info = self._setup_room_for_insertion_backfill_tests() + room_id = setup_info.room_id + depth_map = setup_info.depth_map + + # Record some attempts to backfill these events which will make + # `get_backfill_points_in_room` exclude them because we + # haven't passed the backoff interval. + self.get_success( + self.store.record_event_failed_backfill_attempt(room_id, "insertion_eventA") + ) + self.get_success( + self.store.record_event_failed_backfill_attempt(room_id, "insertion_eventA") + ) + self.get_success( + self.store.record_event_failed_backfill_attempt(room_id, "insertion_eventA") + ) + self.get_success( + self.store.record_event_failed_backfill_attempt(room_id, "insertion_eventA") + ) + + # Now advance time by 2 hours and we should only be able to see "b3" + # because we have waited long enough for the single attempt (2^1 hours) + # but we still shouldn't see "b1" because we haven't waited long enough + # for this many attempts. We didn't do anything to "b2" so it should be + # visible regardless. + self.reactor.advance(datetime.timedelta(hours=2).total_seconds()) + + # Try at "insertion_eventA" and make sure that "insertion_eventA" is not + # in the list because we've already attemted many times + backfill_points = self.get_success( + self.store.get_insertion_event_backward_extremities_in_room( + room_id, depth_map["insertion_eventA"] + ) + ) + backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] + self.assertListEqual(backfill_event_ids, []) + + # Now advance time by 20 hours (above 2^4 because we made 4 attemps) and + # see if we can now backfill it + self.reactor.advance(datetime.timedelta(hours=20).total_seconds()) + + # Try at "insertion_eventA" again after we advanced enough time and we + # should see "insertion_eventA" again + backfill_points = self.get_success( + self.store.get_insertion_event_backward_extremities_in_room( + room_id, depth_map["insertion_eventA"] + ) + ) + backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] + self.assertListEqual(backfill_event_ids, ["insertion_eventA"]) + @attr.s class FakeEvent: From 6eba1d4133db0effc42b797465294b848c6c1078 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 31 Aug 2022 23:32:08 -0500 Subject: [PATCH 46/66] Fix up test descriptions --- tests/storage/test_event_federation.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index 6a3ca158a406..037dd854beb8 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -943,8 +943,9 @@ def test_get_insertion_event_backward_extremities_in_room_excludes_events_we_hav self, ): """ - Test to make sure that events we have attempted to backfill (and within - backoff timeout duration) do not show up as an event to backfill again. + Test to make sure that insertion events we have attempted to backfill + (and within backoff timeout duration) do not show up as an event to + backfill again. """ setup_info = self._setup_room_for_insertion_backfill_tests() room_id = setup_info.room_id @@ -975,9 +976,10 @@ def test_get_insertion_event_backward_extremities_in_room_attempted_event_retry_ self, ): """ - Test to make sure after we fake attempt to backfill event "b3" many times, - we can see retry and see the "b3" again after the backoff timeout duration - has exceeded. + Test to make sure after we fake attempt to backfill event + "insertion_eventA" many times, we can see retry and see the + "insertion_eventA" again after the backoff timeout duration has + exceeded. """ setup_info = self._setup_room_for_insertion_backfill_tests() room_id = setup_info.room_id From 1464101c468484cee27882698a5eb32e1d4a5fc0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 1 Sep 2022 14:56:58 -0500 Subject: [PATCH 47/66] Add _unsafe_to_upsert_tables check See https://github.com/matrix-org/synapse/pull/13589#discussion_r959840527 --- synapse/storage/databases/main/event_federation.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index e4fd9b1a0c56..95e58480e4fd 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1307,7 +1307,11 @@ async def record_event_failed_backfill_attempt( room_id: The room where the event failed to backfill from event_id: The event that failed to be backfilled """ - if self.database_engine.can_native_upsert: + if ( + self.database_engine.can_native_upsert + and "event_failed_backfill_attempts" + not in self.db_pool._unsafe_to_upsert_tables + ): await self.db_pool.runInteraction( "record_event_failed_backfill_attempt", self._record_event_failed_backfill_attempt_upsert_native_txn, From 71c77382f7181e989e1bc51ab1134979884cb9e1 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 1 Sep 2022 15:20:09 -0500 Subject: [PATCH 48/66] Add foreign key references See https://github.com/matrix-org/synapse/pull/13589#discussion_r960568864 --- .../schema/main/delta/72/04event_failed_backfill_attempts.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/schema/main/delta/72/04event_failed_backfill_attempts.sql b/synapse/storage/schema/main/delta/72/04event_failed_backfill_attempts.sql index fbd366809e7e..f39fff0d60c7 100644 --- a/synapse/storage/schema/main/delta/72/04event_failed_backfill_attempts.sql +++ b/synapse/storage/schema/main/delta/72/04event_failed_backfill_attempts.sql @@ -19,8 +19,8 @@ -- fail over and over) and we can process that event in the background so we -- don't block on it each time. CREATE TABLE IF NOT EXISTS event_failed_backfill_attempts( - room_id TEXT NOT NULL, - event_id TEXT NOT NULL, + room_id TEXT NOT NULL REFERENCES rooms (room_id), + event_id TEXT NOT NULL REFERENCES events (event_id), num_attempts INT NOT NULL, last_attempt_ts BIGINT NOT NULL, PRIMARY KEY (room_id, event_id) From d45b0783557eb3b0f60393cbb6b9d9fd6769f1e3 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 1 Sep 2022 16:11:49 -0500 Subject: [PATCH 49/66] Remove reference to event that won't be in the events table See https://github.com/matrix-org/synapse/pull/13589#discussion_r961104120 --- .../schema/main/delta/72/04event_failed_backfill_attempts.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/schema/main/delta/72/04event_failed_backfill_attempts.sql b/synapse/storage/schema/main/delta/72/04event_failed_backfill_attempts.sql index f39fff0d60c7..e3c557540cdc 100644 --- a/synapse/storage/schema/main/delta/72/04event_failed_backfill_attempts.sql +++ b/synapse/storage/schema/main/delta/72/04event_failed_backfill_attempts.sql @@ -20,7 +20,7 @@ -- don't block on it each time. CREATE TABLE IF NOT EXISTS event_failed_backfill_attempts( room_id TEXT NOT NULL REFERENCES rooms (room_id), - event_id TEXT NOT NULL REFERENCES events (event_id), + event_id TEXT NOT NULL, num_attempts INT NOT NULL, last_attempt_ts BIGINT NOT NULL, PRIMARY KEY (room_id, event_id) From 599e212f87dffd6ee921a208dd21f2b163f6d419 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 1 Sep 2022 18:11:32 -0500 Subject: [PATCH 50/66] Fix approximate typo --- synapse/storage/databases/main/event_federation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 41c8234ae82f..ad5b68f0210b 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -729,7 +729,7 @@ async def get_backfill_points_in_room( ) -> List[Tuple[str, int]]: """ Gets the oldest events(backwards extremities) in the room along with the - aproximate depth (sorted by depth descending). + approximate depth (sorted by depth descending). We use this function so that we can compare and see if someones `current_depth` at their current scrollback is within pagination range @@ -763,7 +763,7 @@ def get_backfill_points_in_room_txn( # events we know of in the room but we only know of them because # some other event referenced them by prev_event and aren't # persisted in our database yet (meaning we don't know their depth - # specifically). So we need to look for the aproximate depth from + # specifically). So we need to look for the approximate depth from # the events connected to the current backwards extremeties. sql = """ SELECT backward_extrem.event_id, event.depth FROM events AS event @@ -861,7 +861,7 @@ async def get_insertion_event_backward_extremities_in_room( ) -> List[Tuple[str, int]]: """ Get the insertion events we know about that we haven't backfilled yet - along with the aproximate depth (sorted by depth descending). + along with the approximate depth (sorted by depth descending). We use this function so that we can compare and see if someones `current_depth` at their current scrollback is within pagination range From bc8046b66ae3228c897a2a47f6ca8df2d26669fd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 1 Sep 2022 18:17:49 -0500 Subject: [PATCH 51/66] Clarify what depth sort See https://github.com/matrix-org/synapse/pull/13635#discussion_r960483583 --- .../databases/main/event_federation.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index ad5b68f0210b..89781ce57669 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -729,7 +729,8 @@ async def get_backfill_points_in_room( ) -> List[Tuple[str, int]]: """ Gets the oldest events(backwards extremities) in the room along with the - approximate depth (sorted by depth descending). + approximate depth. Sorted by depth, highest to lowest (descending) so the closest + events to the `current_depth` are first in the list. We use this function so that we can compare and see if someones `current_depth` at their current scrollback is within pagination range @@ -747,12 +748,14 @@ async def get_backfill_points_in_room( Args: room_id: Room where we want to find the oldest events - current_depth: The depth at the users current scrollback position because - we only care about finding events older than the given + current_depth: The depth at the users current scrollback position + because we only care about finding events older than the given `current_depth` when scrolling and paginating backwards. Returns: - List of (event_id, depth) tuples (sorted by depth descending). + List of (event_id, depth) tuples. Sorted by depth, highest to lowest + (descending) so the closest events to the `current_depth` are first + in the list. """ def get_backfill_points_in_room_txn( @@ -861,7 +864,9 @@ async def get_insertion_event_backward_extremities_in_room( ) -> List[Tuple[str, int]]: """ Get the insertion events we know about that we haven't backfilled yet - along with the approximate depth (sorted by depth descending). + along with the approximate depth. Sorted by depth, highest to lowest + (descending) so the closest events to the `current_depth` are first + in the list. We use this function so that we can compare and see if someones `current_depth` at their current scrollback is within pagination range @@ -884,7 +889,9 @@ async def get_insertion_event_backward_extremities_in_room( `current_depth` when scrolling and paginating backwards. Returns: - List of (event_id, depth) tuples (sorted by depth descending) + List of (event_id, depth) tuples. Sorted by depth, highest to lowest + (descending) so the closest events to the `current_depth` are first + in the list. """ def get_insertion_event_backward_extremities_in_room_txn( From ea080064d96d2d56414613208554afad4e828623 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 1 Sep 2022 18:20:33 -0500 Subject: [PATCH 52/66] Fix typos See https://github.com/matrix-org/synapse/pull/13635#discussion_r960484875 --- synapse/storage/databases/main/event_federation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 89781ce57669..532ce0e8698e 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -732,9 +732,9 @@ async def get_backfill_points_in_room( approximate depth. Sorted by depth, highest to lowest (descending) so the closest events to the `current_depth` are first in the list. - We use this function so that we can compare and see if someones + We use this function so that we can compare and see if a client's `current_depth` at their current scrollback is within pagination range - of the event extremeties. If the `current_depth` is close to the depth + of the event extremities. If the `current_depth` is close to the depth of given oldest event, we can trigger a backfill. We ignore extremities that have a greater depth than our `current_depth` From 9a85bb4e81ff4fbfe2ac5b5ffc3928b12532d1c5 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 1 Sep 2022 18:21:08 -0500 Subject: [PATCH 53/66] Normal is not obvious See https://github.com/matrix-org/synapse/pull/13635#discussion_r960487479 --- synapse/storage/databases/main/event_federation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 532ce0e8698e..455cf744aade 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -794,7 +794,7 @@ def get_backfill_points_in_room_txn( AND failed_backfill_attempt_info.event_id = backward_extrem.event_id WHERE backward_extrem.room_id = ? - /* We only care about normal events because TODO: why */ + /* We only care about non-state events because TODO: why */ AND edge.is_state is ? /* False */ /** * We only want backwards extremities that are older than or at From 7204cceec41082b8c6558e61343c3850467ec353 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 1 Sep 2022 18:31:36 -0500 Subject: [PATCH 54/66] Fix left-shift math See https://github.com/matrix-org/synapse/pull/13635#discussion_r960492041 --- .../databases/main/event_federation.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 455cf744aade..a2bd3ed6f759 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -811,14 +811,14 @@ def get_backfill_points_in_room_txn( * Exponential back-off (up to the upper bound) so we don't retry the * same backfill point over and over. ex. 2hr, 4hr, 8hr, 16hr, etc. * - * We use `2 << (n - 1)` as a power of 2 equivalent for compatibility - * with SQLite. The left shift equivalent only works with powers of 2 - * because left shift is a binary operation (base-2). Otherwise, we - * would use `power(2, n)` or the power operator, `2^n`. + * We use `1 << n` as a power of 2 equivalent for compatibility + * with older SQLites. The left shift equivalent only works with + * powers of 2 because left shift is a binary operation (base-2). + * Otherwise, we would use `power(2, n)` or the power operator, `2^n`. */ AND ( failed_backfill_attempt_info.event_id IS NULL - OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + /*least*/%s((2 << (failed_backfill_attempt_info.num_attempts - 1)) * ? /* step */, ? /* upper bound */) + OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + /*least*/%s((1 << failed_backfill_attempt_info.num_attempts) * ? /* step */, ? /* upper bound */) ) /** * Sort from highest (closest to the `current_depth`) to the lowest depth @@ -919,14 +919,14 @@ def get_insertion_event_backward_extremities_in_room_txn( * Exponential back-off (up to the upper bound) so we don't retry the * same backfill point over and over. ex. 2hr, 4hr, 8hr, 16hr, etc * - * We use `2 << (n - 1)` as a power of 2 equivalent for compatibility - * with SQLite. The left shift equivalent only works with powers of 2 - * because left shift is a binary operation (base-2). Otherwise, we - * would use `power(2, n)` or the power operator, `2^n`. + * We use `1 << n` as a power of 2 equivalent for compatibility + * with older SQLites. The left shift equivalent only works with + * powers of 2 because left shift is a binary operation (base-2). + * Otherwise, we would use `power(2, n)` or the power operator, `2^n`. */ AND ( failed_backfill_attempt_info.event_id IS NULL - OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + /*least*/%s((2 << (failed_backfill_attempt_info.num_attempts - 1)) * ? /* step */, ? /* upper bound */) + OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + /*least*/%s((1 << failed_backfill_attempt_info.num_attempts) * ? /* step */, ? /* upper bound */) ) /** * Sort from highest (closest to the `current_depth`) to the lowest depth From 8f214b1499524937b11f56aaf00958f1352b5f92 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 1 Sep 2022 19:49:14 -0500 Subject: [PATCH 55/66] Fix foreign key constraint --- tests/storage/test_event_federation.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index 037dd854beb8..3e3668300a44 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -682,6 +682,19 @@ def _setup_room_for_backfill_tests(self) -> _BackfillSetupInfo: stream_ordering += 1 def populate_db(txn: LoggingTransaction): + # Insert the room to satisfy the foreign key constraint of + # `event_failed_backfill_attempts` + self.store.db_pool.simple_insert_txn( + txn, + "rooms", + { + "room_id": room_id, + "creator": "room_creator_user_id", + "is_public": True, + "room_version": "6", + }, + ) + # Insert our server events for event_id in our_server_events: event_dict = complete_event_dict_map[event_id] @@ -867,6 +880,19 @@ def _setup_room_for_insertion_backfill_tests(self) -> _BackfillSetupInfo: } def populate_db(txn: LoggingTransaction): + # Insert the room to satisfy the foreign key constraint of + # `event_failed_backfill_attempts` + self.store.db_pool.simple_insert_txn( + txn, + "rooms", + { + "room_id": room_id, + "creator": "room_creator_user_id", + "is_public": True, + "room_version": "6", + }, + ) + # Insert our server events stream_ordering = 0 for event_id, depth in depth_map.items(): From 63bec99dff683e68ae38837ef4bee837a90e1b22 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 9 Sep 2022 15:32:41 -0500 Subject: [PATCH 56/66] Remove emulated upsert code (all of our dbs no support it) We now specify a SQLite version that supports upserts natively so we no longer need the emulated version, https://github.com/matrix-org/synapse/pull/13760 See: - https://github.com/matrix-org/synapse/pull/13589#discussion_r961101746 - https://github.com/matrix-org/synapse/pull/13589#discussion_r961102072 --- .../databases/main/event_federation.py | 66 +++---------------- 1 file changed, 8 insertions(+), 58 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 3324a894e2c8..885ff189bcad 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1309,27 +1309,15 @@ async def record_event_failed_backfill_attempt( room_id: The room where the event failed to backfill from event_id: The event that failed to be backfilled """ - if ( - self.database_engine.can_native_upsert - and "event_failed_backfill_attempts" - not in self.db_pool._unsafe_to_upsert_tables - ): - await self.db_pool.runInteraction( - "record_event_failed_backfill_attempt", - self._record_event_failed_backfill_attempt_upsert_native_txn, - room_id, - event_id, - db_autocommit=True, # Safe as its a single upsert - ) - else: - await self.db_pool.runInteraction( - "record_event_failed_backfill_attempt", - self._record_event_failed_backfill_attempt_upsert_emulated_txn, - room_id, - event_id, - ) + await self.db_pool.runInteraction( + "record_event_failed_backfill_attempt", + self._record_event_failed_backfill_attempt_upsert_txn, + room_id, + event_id, + db_autocommit=True, # Safe as it's a single upsert + ) - def _record_event_failed_backfill_attempt_upsert_native_txn( + def _record_event_failed_backfill_attempt_upsert_txn( self, txn: LoggingTransaction, room_id: str, @@ -1349,44 +1337,6 @@ def _record_event_failed_backfill_attempt_upsert_native_txn( txn.execute(sql, (room_id, event_id, 1, self._clock.time_msec())) - def _record_event_failed_backfill_attempt_upsert_emulated_txn( - self, - txn: LoggingTransaction, - room_id: str, - event_id: str, - ) -> None: - self.database_engine.lock_table(txn, "event_failed_backfill_attempts") - - prev_row = self.db_pool.simple_select_one_txn( - txn, - table="event_failed_backfill_attempts", - keyvalues={"room_id": room_id, "event_id": event_id}, - retcols=("num_attempts"), - allow_none=True, - ) - - if not prev_row: - self.db_pool.simple_insert_txn( - txn, - table="event_failed_backfill_attempts", - values={ - "room_id": room_id, - "event_id": event_id, - "num_attempts": 1, - "last_attempt_ts": self._clock.time_msec(), - }, - ) - else: - self.db_pool.simple_update_one_txn( - txn, - table="event_failed_backfill_attempts", - keyvalues={"room_id": room_id, "event_id": event_id}, - updatevalues={ - "num_attempts": prev_row["num_attempts"] + 1, - "last_attempt_ts": self._clock.time_msec(), - }, - ) - async def get_missing_events( self, room_id: str, From 31d7502dba2f2365557c7a0b7647ace6e926a004 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 9 Sep 2022 15:51:51 -0500 Subject: [PATCH 57/66] Table rename `event_failed_pull_attempts` See https://github.com/matrix-org/synapse/pull/13589#discussion_r967238724 --- synapse/handlers/federation_event.py | 4 +-- .../databases/main/event_federation.py | 20 +++++------ synapse/storage/databases/main/events.py | 2 +- ...s.sql => 07event_failed_pull_attempts.sql} | 11 +++--- tests/handlers/test_federation_event.py | 34 +++++++++---------- 5 files changed, 35 insertions(+), 36 deletions(-) rename synapse/storage/schema/main/delta/72/{04event_failed_backfill_attempts.sql => 07event_failed_pull_attempts.sql} (67%) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 60dd024cb154..cffbf68432c1 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -863,7 +863,7 @@ async def _process_pulled_event( except SynapseError as err: logger.warning("Event %s failed sanity check: %s", event_id, err) if backfilled: - await self._store.record_event_failed_backfill_attempt( + await self._store.record_event_failed_pull_attempt( event.room_id, event_id ) return @@ -902,7 +902,7 @@ async def _process_pulled_event( ) except FederationError as e: if backfilled: - await self._store.record_event_failed_backfill_attempt( + await self._store.record_event_failed_pull_attempt( event.room_id, event_id ) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 885ff189bcad..7459f5d9841f 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1295,43 +1295,41 @@ def _get_backfill_events( return event_id_results @trace - async def record_event_failed_backfill_attempt( + async def record_event_failed_pull_attempt( self, room_id: str, event_id: str ) -> None: """ - Record when we fail to backfill an event. + Record when we fail to pull an event over federation. This information allows us to be more intelligent when we decide to retry (we don't need to fail over and over) and we can process that event in the background so we don't block on it each time. Args: - room_id: The room where the event failed to backfill from - event_id: The event that failed to be backfilled + room_id: The room where the event failed to pull from + event_id: The event that failed to be fetched or processed """ await self.db_pool.runInteraction( - "record_event_failed_backfill_attempt", - self._record_event_failed_backfill_attempt_upsert_txn, + "record_event_failed_pull_attempt", + self._record_event_failed_pull_attempt_upsert_txn, room_id, event_id, db_autocommit=True, # Safe as it's a single upsert ) - def _record_event_failed_backfill_attempt_upsert_txn( + def _record_event_failed_pull_attempt_upsert_txn( self, txn: LoggingTransaction, room_id: str, event_id: str, ) -> None: - assert self.database_engine.can_native_upsert - sql = """ - INSERT INTO event_failed_backfill_attempts ( + INSERT INTO event_failed_pull_attempts ( room_id, event_id, num_attempts, last_attempt_ts ) VALUES (?, ?, ?, ?) ON CONFLICT (room_id, event_id) DO UPDATE SET - num_attempts=event_failed_backfill_attempts.num_attempts + 1, + num_attempts=event_failed_pull_attempts.num_attempts + 1, last_attempt_ts=EXCLUDED.last_attempt_ts; """ diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 881819fc5774..6fc2fbcc53fe 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -2454,7 +2454,7 @@ def _update_backward_extremeties( # extremities, it also means that they won't be backfilled from again so # we no longer need to store the backfill attempts around it. query = """ - DELETE FROM event_failed_backfill_attempts + DELETE FROM event_failed_pull_attempts WHERE event_id = ? and room_id = ? """ txn.execute_batch( diff --git a/synapse/storage/schema/main/delta/72/04event_failed_backfill_attempts.sql b/synapse/storage/schema/main/delta/72/07event_failed_pull_attempts.sql similarity index 67% rename from synapse/storage/schema/main/delta/72/04event_failed_backfill_attempts.sql rename to synapse/storage/schema/main/delta/72/07event_failed_pull_attempts.sql index e3c557540cdc..9ed9e38e6dca 100644 --- a/synapse/storage/schema/main/delta/72/04event_failed_backfill_attempts.sql +++ b/synapse/storage/schema/main/delta/72/07event_failed_pull_attempts.sql @@ -14,11 +14,12 @@ */ --- Add a table that keeps track of when we failed to backfill an event. This --- allows us to be more intelligent when we decide to retry (we don't need to --- fail over and over) and we can process that event in the background so we --- don't block on it each time. -CREATE TABLE IF NOT EXISTS event_failed_backfill_attempts( +-- Add a table that keeps track of when we failed to pull an event over +-- federation (via /backfill, `/event`, `/get_missing_events`, etc). This allows +-- us to be more intelligent when we decide to retry (we don't need to fail over +-- and over) and we can process that event in the background so we don't block +-- on it each time. +CREATE TABLE IF NOT EXISTS event_failed_pull_attempts( room_id TEXT NOT NULL REFERENCES rooms (room_id), event_id TEXT NOT NULL, num_attempts INT NOT NULL, diff --git a/tests/handlers/test_federation_event.py b/tests/handlers/test_federation_event.py index 102129025b2f..ad33144ade34 100644 --- a/tests/handlers/test_federation_event.py +++ b/tests/handlers/test_federation_event.py @@ -233,7 +233,7 @@ def test_process_pulled_event_records_failed_backfill_attempts( ) -> None: """ Test to make sure that failed backfill attempts for an event are - recorded in the `event_failed_backfill_attempts` table. + recorded in the `event_failed_pull_attempts` table. In this test, we pretend we are processing a "pulled" event via backfill. The pulled event has a fake `prev_event` which our server has @@ -242,9 +242,9 @@ def test_process_pulled_event_records_failed_backfill_attempts( the server can't fetch the state at the missing `prev_event`, the "pulled" event fails the history check and is fails to process. - We check that we correctly record the number of failed backfill attempts - to the pulled event and as a sanity check, that the "pulled" event isn't - persisted. expect. + We check that we correctly record the number of failed pull attempts + of the pulled event and as a sanity check, that the "pulled" event isn't + persisted. """ OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}" main_store = self.hs.get_datastores().main @@ -307,10 +307,10 @@ def test_process_pulled_event_records_failed_backfill_attempts( ) ) - # Make sure our backfill attempt was recorded + # Make sure our failed pull attempt was recorded backfill_num_attempts = self.get_success( main_store.db_pool.simple_select_one_onecol( - table="event_failed_backfill_attempts", + table="event_failed_pull_attempts", keyvalues={"event_id": pulled_event.event_id}, retcol="num_attempts", ) @@ -325,10 +325,10 @@ def test_process_pulled_event_records_failed_backfill_attempts( ) ) - # Make sure our second backfill attempt was recorded (`num_attempts` was incremented) + # Make sure our second failed pull attempt was recorded (`num_attempts` was incremented) backfill_num_attempts = self.get_success( main_store.db_pool.simple_select_one_onecol( - table="event_failed_backfill_attempts", + table="event_failed_pull_attempts", keyvalues={"event_id": pulled_event.event_id}, retcol="num_attempts", ) @@ -348,16 +348,16 @@ def test_process_pulled_event_clears_backfill_attempts_after_being_successfully_ self, ) -> None: """ - Test to make sure that failed backfill attempts - (`event_failed_backfill_attempts` table) for an event are cleared after the + Test to make sure that failed pull attempts + (`event_failed_pull_attempts` table) for an event are cleared after the event is successfully persisted. In this test, we pretend we are processing a "pulled" event via backfill. The pulled event succesfully processes and the backward - extremeties are updated along with clearing out any backfill attempts + extremeties are updated along with clearing out any failed pull attempts for those old extremities. - We check that we correctly cleared failed backfill attempts of the + We check that we correctly cleared failed pull attempts of the pulled event. """ OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}" @@ -411,14 +411,14 @@ def test_process_pulled_event_clears_backfill_attempts_after_being_successfully_ # Fake the "pulled" event failing to backfill once so we can test # if it's cleared out later on. self.get_success( - main_store.record_event_failed_backfill_attempt( + main_store.record_event_failed_pull_attempt( pulled_event.room_id, pulled_event.event_id ) ) - # Make sure we have a backfill attempt recorded for the pulled event + # Make sure we have a failed pull attempt recorded for the pulled event backfill_num_attempts = self.get_success( main_store.db_pool.simple_select_one_onecol( - table="event_failed_backfill_attempts", + table="event_failed_pull_attempts", keyvalues={"event_id": pulled_event.event_id}, retcol="num_attempts", ) @@ -433,10 +433,10 @@ def test_process_pulled_event_clears_backfill_attempts_after_being_successfully_ ) ) - # Make sure the backfill attempt for the pulled event are cleared + # Make sure the failed pull attempts for the pulled event are cleared backfill_num_attempts = self.get_success( main_store.db_pool.simple_select_one_onecol( - table="event_failed_backfill_attempts", + table="event_failed_pull_attempts", keyvalues={"event_id": pulled_event.event_id}, retcol="num_attempts", allow_none=True, From 0b5f1db174287c7a09f1d632d2f71aec49b9fd91 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 9 Sep 2022 16:03:15 -0500 Subject: [PATCH 58/66] Add `last_cause` column See https://github.com/matrix-org/synapse/pull/13589#discussion_r967235752 --- synapse/handlers/federation_event.py | 4 ++-- synapse/storage/databases/main/event_federation.py | 14 +++++++++----- .../main/delta/72/07event_failed_pull_attempts.sql | 1 + tests/handlers/test_federation_event.py | 2 +- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index cffbf68432c1..3f5530963693 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -864,7 +864,7 @@ async def _process_pulled_event( logger.warning("Event %s failed sanity check: %s", event_id, err) if backfilled: await self._store.record_event_failed_pull_attempt( - event.room_id, event_id + event.room_id, event_id, str(err) ) return @@ -903,7 +903,7 @@ async def _process_pulled_event( except FederationError as e: if backfilled: await self._store.record_event_failed_pull_attempt( - event.room_id, event_id + event.room_id, event_id, str(e) ) if e.code == 403: diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 7459f5d9841f..ef477978ed63 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -1296,7 +1296,7 @@ def _get_backfill_events( @trace async def record_event_failed_pull_attempt( - self, room_id: str, event_id: str + self, room_id: str, event_id: str, cause: str ) -> None: """ Record when we fail to pull an event over federation. @@ -1308,12 +1308,14 @@ async def record_event_failed_pull_attempt( Args: room_id: The room where the event failed to pull from event_id: The event that failed to be fetched or processed + cause: The error message or reason that we failed to pull the event """ await self.db_pool.runInteraction( "record_event_failed_pull_attempt", self._record_event_failed_pull_attempt_upsert_txn, room_id, event_id, + cause, db_autocommit=True, # Safe as it's a single upsert ) @@ -1322,18 +1324,20 @@ def _record_event_failed_pull_attempt_upsert_txn( txn: LoggingTransaction, room_id: str, event_id: str, + cause: str, ) -> None: sql = """ INSERT INTO event_failed_pull_attempts ( - room_id, event_id, num_attempts, last_attempt_ts + room_id, event_id, num_attempts, last_attempt_ts, last_cause ) - VALUES (?, ?, ?, ?) + VALUES (?, ?, ?, ?, ?) ON CONFLICT (room_id, event_id) DO UPDATE SET num_attempts=event_failed_pull_attempts.num_attempts + 1, - last_attempt_ts=EXCLUDED.last_attempt_ts; + last_attempt_ts=EXCLUDED.last_attempt_ts, + last_cause=EXCLUDED.last_cause; """ - txn.execute(sql, (room_id, event_id, 1, self._clock.time_msec())) + txn.execute(sql, (room_id, event_id, 1, self._clock.time_msec(), cause)) async def get_missing_events( self, diff --git a/synapse/storage/schema/main/delta/72/07event_failed_pull_attempts.sql b/synapse/storage/schema/main/delta/72/07event_failed_pull_attempts.sql index 9ed9e38e6dca..d397ee10826d 100644 --- a/synapse/storage/schema/main/delta/72/07event_failed_pull_attempts.sql +++ b/synapse/storage/schema/main/delta/72/07event_failed_pull_attempts.sql @@ -24,5 +24,6 @@ CREATE TABLE IF NOT EXISTS event_failed_pull_attempts( event_id TEXT NOT NULL, num_attempts INT NOT NULL, last_attempt_ts BIGINT NOT NULL, + last_cause TEXT NOT NULL, PRIMARY KEY (room_id, event_id) ); diff --git a/tests/handlers/test_federation_event.py b/tests/handlers/test_federation_event.py index ad33144ade34..b5b89405a4f2 100644 --- a/tests/handlers/test_federation_event.py +++ b/tests/handlers/test_federation_event.py @@ -412,7 +412,7 @@ def test_process_pulled_event_clears_backfill_attempts_after_being_successfully_ # if it's cleared out later on. self.get_success( main_store.record_event_failed_pull_attempt( - pulled_event.room_id, pulled_event.event_id + pulled_event.room_id, pulled_event.event_id, "fake cause" ) ) # Make sure we have a failed pull attempt recorded for the pulled event From 1347686828b019e4e704d8d4deba107acec6c07d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 13 Sep 2022 16:06:20 -0500 Subject: [PATCH 59/66] Update schema version summary --- synapse/storage/schema/__init__.py | 2 ++ ...ailed_pull_attempts.sql => 09event_failed_pull_attempts.sql} | 0 2 files changed, 2 insertions(+) rename synapse/storage/schema/main/delta/72/{07event_failed_pull_attempts.sql => 09event_failed_pull_attempts.sql} (100%) diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index 32cda5e3ba2b..76ec65731c12 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -77,6 +77,8 @@ - Tables related to groups are dropped. - Unused column application_services_state.last_txn is dropped - Cache invalidation stream id sequence now begins at 2 to match code expectation. + - Add table `event_failed_pull_attempts` to keep track when we fail to pull + events over federation. """ diff --git a/synapse/storage/schema/main/delta/72/07event_failed_pull_attempts.sql b/synapse/storage/schema/main/delta/72/09event_failed_pull_attempts.sql similarity index 100% rename from synapse/storage/schema/main/delta/72/07event_failed_pull_attempts.sql rename to synapse/storage/schema/main/delta/72/09event_failed_pull_attempts.sql From 57182dce9487eefb928acf2a25fd91b1ce9e83c7 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Sep 2022 12:45:24 -0500 Subject: [PATCH 60/66] Remove backfilled check since we plan to go general anyway See https://github.com/matrix-org/synapse/pull/13589#discussion_r969250906 --- synapse/handlers/federation_event.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index 3f5530963693..9e065e1116b5 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -862,10 +862,9 @@ async def _process_pulled_event( self._sanity_check_event(event) except SynapseError as err: logger.warning("Event %s failed sanity check: %s", event_id, err) - if backfilled: - await self._store.record_event_failed_pull_attempt( - event.room_id, event_id, str(err) - ) + await self._store.record_event_failed_pull_attempt( + event.room_id, event_id, str(err) + ) return try: @@ -901,10 +900,9 @@ async def _process_pulled_event( backfilled=backfilled, ) except FederationError as e: - if backfilled: - await self._store.record_event_failed_pull_attempt( - event.room_id, event_id, str(e) - ) + await self._store.record_event_failed_pull_attempt( + event.room_id, event_id, str(e) + ) if e.code == 403: logger.warning("Pulled event %s failed history check.", event_id) From 70019d2cffdc14ca5f486474019b22f8f479ee12 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Sep 2022 12:53:44 -0500 Subject: [PATCH 61/66] Move change to latest delta 73 --- synapse/storage/schema/__init__.py | 4 ++-- .../01event_failed_pull_attempts.sql} | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename synapse/storage/schema/main/delta/{72/09event_failed_pull_attempts.sql => 73/01event_failed_pull_attempts.sql} (100%) diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index a1951c4ec164..68e055c66471 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -77,12 +77,12 @@ - Tables related to groups are dropped. - Unused column application_services_state.last_txn is dropped - Cache invalidation stream id sequence now begins at 2 to match code expectation. - - Add table `event_failed_pull_attempts` to keep track when we fail to pull - events over federation. Changes in SCHEMA_VERSION = 73; - thread_id column is added to event_push_actions, event_push_actions_staging event_push_summary, receipts_linearized, and receipts_graph. + - Add table `event_failed_pull_attempts` to keep track when we fail to pull + events over federation. """ diff --git a/synapse/storage/schema/main/delta/72/09event_failed_pull_attempts.sql b/synapse/storage/schema/main/delta/73/01event_failed_pull_attempts.sql similarity index 100% rename from synapse/storage/schema/main/delta/72/09event_failed_pull_attempts.sql rename to synapse/storage/schema/main/delta/73/01event_failed_pull_attempts.sql From 7ea40b1585f6f5d8762092b185a54f298231686d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Sep 2022 15:23:14 -0500 Subject: [PATCH 62/66] Updates after schema changes in the other PR --- .../databases/main/event_federation.py | 4 +- tests/storage/test_event_federation.py | 60 ++++++++++++++----- 2 files changed, 46 insertions(+), 18 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index dfee4bf1246e..db5a74cdfa0d 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -789,7 +789,7 @@ def get_backfill_points_in_room_txn( * We use this info to make sure we don't retry to use a backfill point * if we've already attempted to backfill from it recently. */ - LEFT JOIN event_failed_backfill_attempts AS failed_backfill_attempt_info + LEFT JOIN event_failed_pull_attempts AS failed_backfill_attempt_info ON failed_backfill_attempt_info.room_id = backward_extrem.room_id AND failed_backfill_attempt_info.event_id = backward_extrem.event_id @@ -909,7 +909,7 @@ def get_insertion_event_backward_extremities_in_room_txn( * We use this info to make sure we don't retry to use a backfill point * if we've already attempted to backfill from it recently. */ - LEFT JOIN event_failed_backfill_attempts AS failed_backfill_attempt_info + LEFT JOIN event_failed_pull_attempts AS failed_backfill_attempt_info ON failed_backfill_attempt_info.room_id = insertion_event_extremity.room_id AND failed_backfill_attempt_info.event_id = insertion_event_extremity.event_id diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index c7928d141ecc..5235a8947e34 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -683,7 +683,7 @@ def _setup_room_for_backfill_tests(self) -> _BackfillSetupInfo: def populate_db(txn: LoggingTransaction): # Insert the room to satisfy the foreign key constraint of - # `event_failed_backfill_attempts` + # `event_failed_pull_attempts` self.store.db_pool.simple_insert_txn( txn, "rooms", @@ -796,10 +796,18 @@ def test_get_backfill_points_in_room_excludes_events_we_have_attempted( # Record some attempts to backfill these events which will make # `get_backfill_points_in_room` exclude them because we # haven't passed the backoff interval. - self.get_success(self.store.record_event_failed_backfill_attempt(room_id, "b5")) - self.get_success(self.store.record_event_failed_backfill_attempt(room_id, "b4")) - self.get_success(self.store.record_event_failed_backfill_attempt(room_id, "b3")) - self.get_success(self.store.record_event_failed_backfill_attempt(room_id, "b2")) + self.get_success( + self.store.record_event_failed_pull_attempt(room_id, "b5", "fake cause") + ) + self.get_success( + self.store.record_event_failed_pull_attempt(room_id, "b4", "fake cause") + ) + self.get_success( + self.store.record_event_failed_pull_attempt(room_id, "b3", "fake cause") + ) + self.get_success( + self.store.record_event_failed_pull_attempt(room_id, "b2", "fake cause") + ) # No time has passed since we attempted to backfill ^ @@ -826,11 +834,21 @@ def test_get_backfill_points_in_room_attempted_event_retry_after_backoff_duratio # Record some attempts to backfill these events which will make # `get_backfill_points_in_room` exclude them because we # haven't passed the backoff interval. - self.get_success(self.store.record_event_failed_backfill_attempt(room_id, "b3")) - self.get_success(self.store.record_event_failed_backfill_attempt(room_id, "b1")) - self.get_success(self.store.record_event_failed_backfill_attempt(room_id, "b1")) - self.get_success(self.store.record_event_failed_backfill_attempt(room_id, "b1")) - self.get_success(self.store.record_event_failed_backfill_attempt(room_id, "b1")) + self.get_success( + self.store.record_event_failed_pull_attempt(room_id, "b3", "fake cause") + ) + self.get_success( + self.store.record_event_failed_pull_attempt(room_id, "b1", "fake cause") + ) + self.get_success( + self.store.record_event_failed_pull_attempt(room_id, "b1", "fake cause") + ) + self.get_success( + self.store.record_event_failed_pull_attempt(room_id, "b1", "fake cause") + ) + self.get_success( + self.store.record_event_failed_pull_attempt(room_id, "b1", "fake cause") + ) # Now advance time by 2 hours and we should only be able to see "b3" # because we have waited long enough for the single attempt (2^1 hours) @@ -881,7 +899,7 @@ def _setup_room_for_insertion_backfill_tests(self) -> _BackfillSetupInfo: def populate_db(txn: LoggingTransaction): # Insert the room to satisfy the foreign key constraint of - # `event_failed_backfill_attempts` + # `event_failed_pull_attempts` self.store.db_pool.simple_insert_txn( txn, "rooms", @@ -983,7 +1001,9 @@ def test_get_insertion_event_backward_extremities_in_room_excludes_events_we_hav # `get_insertion_event_backward_extremities_in_room` exclude them # because we haven't passed the backoff interval. self.get_success( - self.store.record_event_failed_backfill_attempt(room_id, "insertion_eventA") + self.store.record_event_failed_pull_attempt( + room_id, "insertion_eventA", "fake cause" + ) ) # No time has passed since we attempted to backfill ^ @@ -1015,16 +1035,24 @@ def test_get_insertion_event_backward_extremities_in_room_attempted_event_retry_ # `get_backfill_points_in_room` exclude them because we # haven't passed the backoff interval. self.get_success( - self.store.record_event_failed_backfill_attempt(room_id, "insertion_eventA") + self.store.record_event_failed_pull_attempt( + room_id, "insertion_eventA", "fake cause" + ) ) self.get_success( - self.store.record_event_failed_backfill_attempt(room_id, "insertion_eventA") + self.store.record_event_failed_pull_attempt( + room_id, "insertion_eventA", "fake cause" + ) ) self.get_success( - self.store.record_event_failed_backfill_attempt(room_id, "insertion_eventA") + self.store.record_event_failed_pull_attempt( + room_id, "insertion_eventA", "fake cause" + ) ) self.get_success( - self.store.record_event_failed_backfill_attempt(room_id, "insertion_eventA") + self.store.record_event_failed_pull_attempt( + room_id, "insertion_eventA", "fake cause" + ) ) # Now advance time by 2 hours and we should only be able to see "b3" From 40ec8d89002e1578c5cf4c19a2a1c50073fa17b9 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Sep 2022 15:27:56 -0500 Subject: [PATCH 63/66] Remove debug logging --- tests/storage/test_event_federation.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index 5235a8947e34..6b686251eccd 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -13,7 +13,6 @@ # limitations under the License. import datetime -import logging from typing import Dict, List, Tuple, Union import attr @@ -36,8 +35,6 @@ import tests.unittest import tests.utils -logger = logging.getLogger(__name__) - @attr.s(auto_attribs=True, frozen=True, slots=True) class _BackfillSetupInfo: @@ -995,8 +992,6 @@ def test_get_insertion_event_backward_extremities_in_room_excludes_events_we_hav room_id = setup_info.room_id depth_map = setup_info.depth_map - logger.info("depth_map=%s", depth_map) - # Record some attempts to backfill these events which will make # `get_insertion_event_backward_extremities_in_room` exclude them # because we haven't passed the backoff interval. From 1208540c9b42052206453bedd9dc70486ac04fdc Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 22 Sep 2022 15:01:35 -0500 Subject: [PATCH 64/66] Remove orthogonal `current_depth` changes See https://github.com/matrix-org/synapse/pull/13635#discussion_r977866614 We can re-introduce these in a future PR --- synapse/handlers/federation.py | 70 ++++++++------- .../databases/main/event_federation.py | 76 +++------------- tests/storage/test_event_federation.py | 90 +++++++------------ 3 files changed, 79 insertions(+), 157 deletions(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index cbdb6ce45a47..72f4cc4e3e38 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -38,7 +38,7 @@ from unpaddedbase64 import decode_base64 from synapse import event_auth -from synapse.api.constants import MAX_DEPTH, EventContentFields, EventTypes, Membership +from synapse.api.constants import EventContentFields, EventTypes, Membership from synapse.api.errors import ( AuthError, CodeMessageException, @@ -226,10 +226,7 @@ async def _maybe_backfill_inner( """ backwards_extremities = [ _BackfillPoint(event_id, depth, _BackfillPointType.BACKWARDS_EXTREMITY) - for event_id, depth in await self.store.get_backfill_points_in_room( - room_id=room_id, - current_depth=current_depth, - ) + for event_id, depth in await self.store.get_backfill_points_in_room(room_id) ] insertion_events_to_be_backfilled: List[_BackfillPoint] = [] @@ -237,8 +234,7 @@ async def _maybe_backfill_inner( insertion_events_to_be_backfilled = [ _BackfillPoint(event_id, depth, _BackfillPointType.INSERTION_PONT) for event_id, depth in await self.store.get_insertion_event_backward_extremities_in_room( - room_id=room_id, - current_depth=current_depth, + room_id ) ] logger.debug( @@ -247,6 +243,10 @@ async def _maybe_backfill_inner( insertion_events_to_be_backfilled, ) + if not backwards_extremities and not insertion_events_to_be_backfilled: + logger.debug("Not backfilling as no extremeties found.") + return False + # we now have a list of potential places to backpaginate from. We prefer to # start with the most recent (ie, max depth), so let's sort the list. sorted_backfill_points: List[_BackfillPoint] = sorted( @@ -267,29 +267,6 @@ async def _maybe_backfill_inner( sorted_backfill_points, ) - # If we have no backfill points lower than the `current_depth` then - # either we can a) bail or b) still attempt to backfill. We opt to try - # backfilling anyway just in case we do get relevant events. - if not sorted_backfill_points and current_depth != MAX_DEPTH: - logger.debug( - "_maybe_backfill_inner: all backfill points are *after* current depth. Backfilling anyway." - ) - return await self._maybe_backfill_inner( - room_id=room_id, - # We use `MAX_DEPTH` so that we find all backfill points next - # time (all events are below the `MAX_DEPTH`) - current_depth=MAX_DEPTH, - limit=limit, - processing_start_time=processing_start_time, - ) - elif not sorted_backfill_points and current_depth == MAX_DEPTH: - # Even after trying again with `MAX_DEPTH`, we didn't find any - # backward extremities to backfill from. - logger.debug( - "_maybe_backfill_inner: Not backfilling as no backward extremeties found." - ) - return False - # If we're approaching an extremity we trigger a backfill, otherwise we # no-op. # @@ -303,16 +280,43 @@ async def _maybe_backfill_inner( # XXX: shouldn't we do this *after* the filter by depth below? Again, we don't # care about events that have happened after our current position. # - max_depth_of_backfill_points = sorted_backfill_points[0].depth - if current_depth - 2 * limit > max_depth_of_backfill_points: + max_depth = sorted_backfill_points[0].depth + if current_depth - 2 * limit > max_depth: logger.debug( "Not backfilling as we don't need to. %d < %d - 2 * %d", - max_depth_of_backfill_points, + max_depth, current_depth, limit, ) return False + # We ignore extremities that have a greater depth than our current depth + # as: + # 1. we don't really care about getting events that have happened + # after our current position; and + # 2. we have likely previously tried and failed to backfill from that + # extremity, so to avoid getting "stuck" requesting the same + # backfill repeatedly we drop those extremities. + # + # However, we need to check that the filtered extremities are non-empty. + # If they are empty then either we can a) bail or b) still attempt to + # backfill. We opt to try backfilling anyway just in case we do get + # relevant events. + # + filtered_sorted_backfill_points = [ + t for t in sorted_backfill_points if t.depth <= current_depth + ] + if filtered_sorted_backfill_points: + logger.debug( + "_maybe_backfill_inner: backfill points before current depth: %s", + filtered_sorted_backfill_points, + ) + sorted_backfill_points = filtered_sorted_backfill_points + else: + logger.debug( + "_maybe_backfill_inner: all backfill points are *after* current depth. Backfilling anyway." + ) + # For performance's sake, we only want to paginate from a particular extremity # if we can actually see the events we'll get. Otherwise, we'd just spend a lot # of resources to get redacted events. We check each extremity in turn and diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index db5a74cdfa0d..90bd6979f95f 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -726,37 +726,17 @@ def _get_auth_chain_difference_txn( async def get_backfill_points_in_room( self, room_id: str, - current_depth: int, ) -> List[Tuple[str, int]]: """ Gets the oldest events(backwards extremities) in the room along with the - approximate depth. Sorted by depth, highest to lowest (descending) so the closest - events to the `current_depth` are first in the list. - - We use this function so that we can compare and see if a client's - `current_depth` at their current scrollback is within pagination range - of the event extremities. If the `current_depth` is close to the depth - of given oldest event, we can trigger a backfill. - - We ignore extremities that have a greater depth than our `current_depth` - as: - 1. we don't really care about getting events that have happened - after our current position; and - 2. by the nature of paginating and scrolling back, we have likely - previously tried and failed to backfill from that extremity, so - to avoid getting "stuck" requesting the same backfill repeatedly - we drop those extremities. + approximate depth. Sorted by depth, highest to lowest (descending). Args: room_id: Room where we want to find the oldest events - current_depth: The depth at the users current scrollback position - because we only care about finding events older than the given - `current_depth` when scrolling and paginating backwards. Returns: List of (event_id, depth) tuples. Sorted by depth, highest to lowest - (descending) so the closest events to the `current_depth` are first - in the list. + (descending) """ def get_backfill_points_in_room_txn( @@ -797,17 +777,6 @@ def get_backfill_points_in_room_txn( backward_extrem.room_id = ? /* We only care about non-state events because TODO: why */ AND edge.is_state is ? /* False */ - /** - * We only want backwards extremities that are older than or at - * the same position of the given `current_depth` (where older - * means less than the given depth) because we're looking backwards - * from the `current_depth` when backfilling. - * - * current_depth (ignore events that come after this, ignore 2-4) - * | - * [0]<--[1]▼<--[2]<--[3]<--[4] - */ - AND event.depth <= ? /* current_depth */ /** * Exponential back-off (up to the upper bound) so we don't retry the * same backfill point over and over. ex. 2hr, 4hr, 8hr, 16hr, etc. @@ -822,10 +791,9 @@ def get_backfill_points_in_room_txn( OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + /*least*/%s((1 << failed_backfill_attempt_info.num_attempts) * ? /* step */, ? /* upper bound */) ) /** - * Sort from highest (closest to the `current_depth`) to the lowest depth - * because the closest are most relevant to backfill from first. - * Then tie-break on alphabetical order of the event_ids so we get a - * consistent ordering which is nice when asserting things in tests. + * Sort from highest to the lowest depth. Then tie-break on + * alphabetical order of the event_ids so we get a consistent + * ordering which is nice when asserting things in tests. */ ORDER BY event.depth DESC, backward_extrem.event_id DESC """ @@ -842,7 +810,6 @@ def get_backfill_points_in_room_txn( ( room_id, False, - current_depth, self._clock.time_msec(), 1000 * BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_SECONDS, 1000 * BACKFILL_EVENT_BACKOFF_UPPER_BOUND_SECONDS, @@ -861,38 +828,18 @@ def get_backfill_points_in_room_txn( async def get_insertion_event_backward_extremities_in_room( self, room_id: str, - current_depth: int, ) -> List[Tuple[str, int]]: """ Get the insertion events we know about that we haven't backfilled yet along with the approximate depth. Sorted by depth, highest to lowest - (descending) so the closest events to the `current_depth` are first - in the list. - - We use this function so that we can compare and see if someones - `current_depth` at their current scrollback is within pagination range - of the insertion event. If the `current_depth` is close to the depth - of the given insertion event, we can trigger a backfill. - - We ignore insertion events that have a greater depth than our `current_depth` - as: - 1. we don't really care about getting events that have happened - after our current position; and - 2. by the nature of paginating and scrolling back, we have likely - previously tried and failed to backfill from that insertion event, so - to avoid getting "stuck" requesting the same backfill repeatedly - we drop those insertion event. + (descending). Args: room_id: Room where we want to find the oldest events - current_depth: The depth at the users current scrollback position because - we only care about finding events older than the given - `current_depth` when scrolling and paginating backwards. Returns: List of (event_id, depth) tuples. Sorted by depth, highest to lowest - (descending) so the closest events to the `current_depth` are first - in the list. + (descending) """ def get_insertion_event_backward_extremities_in_room_txn( @@ -915,7 +862,6 @@ def get_insertion_event_backward_extremities_in_room_txn( AND failed_backfill_attempt_info.event_id = insertion_event_extremity.event_id WHERE insertion_event_extremity.room_id = ? - AND event.depth <= ? /* current_depth */ /** * Exponential back-off (up to the upper bound) so we don't retry the * same backfill point over and over. ex. 2hr, 4hr, 8hr, 16hr, etc @@ -930,10 +876,9 @@ def get_insertion_event_backward_extremities_in_room_txn( OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + /*least*/%s((1 << failed_backfill_attempt_info.num_attempts) * ? /* step */, ? /* upper bound */) ) /** - * Sort from highest (closest to the `current_depth`) to the lowest depth - * because the closest are most relevant to backfill from first. - * Then tie-break on alphabetical order of the event_ids so we get a - * consistent ordering which is nice when asserting things in tests. + * Sort from highest to the lowest depth. Then tie-break on + * alphabetical order of the event_ids so we get a consistent + * ordering which is nice when asserting things in tests. */ ORDER BY event.depth DESC, insertion_event_extremity.event_id DESC """ @@ -949,7 +894,6 @@ def get_insertion_event_backward_extremities_in_room_txn( sql % (least_function,), ( room_id, - current_depth, self._clock.time_msec(), 1000 * BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_SECONDS, 1000 * BACKFILL_EVENT_BACKOFF_UPPER_BOUND_SECONDS, diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index 6b686251eccd..80fe718e9ddf 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -754,31 +754,20 @@ def populate_db(txn: LoggingTransaction): def test_get_backfill_points_in_room(self): """ - Test to make sure only backfill points that are older and come before - the `current_depth` are returned. + Test to make sure we get some backfill points """ setup_info = self._setup_room_for_backfill_tests() room_id = setup_info.room_id depth_map = setup_info.depth_map - # Try at "B" backfill_points = self.get_success( - self.store.get_backfill_points_in_room(room_id, depth_map["B"]) + self.store.get_backfill_points_in_room(room_id) ) backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] self.assertListEqual( backfill_event_ids, ["b6", "b5", "b4", "2", "b3", "b2", "b1"] ) - # Try at "A" - backfill_points = self.get_success( - self.store.get_backfill_points_in_room(room_id, depth_map["A"]) - ) - backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] - # Event "2" has a depth of 2 but is not included here because we only - # know the approximate depth of 5 from our event "3". - self.assertListEqual(backfill_event_ids, ["b3", "b2", "b1"]) - def test_get_backfill_points_in_room_excludes_events_we_have_attempted( self, ): @@ -808,9 +797,8 @@ def test_get_backfill_points_in_room_excludes_events_we_have_attempted( # No time has passed since we attempted to backfill ^ - # Try at "B" backfill_points = self.get_success( - self.store.get_backfill_points_in_room(room_id, depth_map["B"]) + self.store.get_backfill_points_in_room(room_id) ) backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] # Only the backfill points that we didn't record earlier exist here. @@ -854,24 +842,26 @@ def test_get_backfill_points_in_room_attempted_event_retry_after_backoff_duratio # visible regardless. self.reactor.advance(datetime.timedelta(hours=2).total_seconds()) - # Try at "A" and make sure that "b3" is not in the list because we've - # already attemted many times + # Make sure that "b1" is not in the list because we've + # already attempted many times backfill_points = self.get_success( - self.store.get_backfill_points_in_room(room_id, depth_map["A"]) + self.store.get_backfill_points_in_room(room_id) ) backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] - self.assertListEqual(backfill_event_ids, ["b3", "b2"]) + self.assertListEqual(backfill_event_ids, ["b6", "b5", "b4", "2", "b3", "b2"]) # Now advance time by 20 hours (above 2^4 because we made 4 attemps) and # see if we can now backfill it self.reactor.advance(datetime.timedelta(hours=20).total_seconds()) - # Try at "A" again after we advanced enough time and we should see "b3" again + # Try again after we advanced enough time and we should see "b3" again backfill_points = self.get_success( - self.store.get_backfill_points_in_room(room_id, depth_map["A"]) + self.store.get_backfill_points_in_room(room_id) ) backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] - self.assertListEqual(backfill_event_ids, ["b3", "b2", "b1"]) + self.assertListEqual( + backfill_event_ids, ["b6", "b5", "b4", "2", "b3", "b2", "b1"] + ) def _setup_room_for_insertion_backfill_tests(self) -> _BackfillSetupInfo: """ @@ -951,35 +941,20 @@ def populate_db(txn: LoggingTransaction): def test_get_insertion_event_backward_extremities_in_room(self): """ - Test to make sure only insertion event backward extremities that are - older and come before the `current_depth` are returned. + Test to make sure insertion event backward extremities are returned. """ setup_info = self._setup_room_for_insertion_backfill_tests() room_id = setup_info.room_id depth_map = setup_info.depth_map - # Try at "insertion_eventB" backfill_points = self.get_success( - self.store.get_insertion_event_backward_extremities_in_room( - room_id, depth_map["insertion_eventB"] - ) + self.store.get_insertion_event_backward_extremities_in_room(room_id) ) backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] self.assertListEqual( backfill_event_ids, ["insertion_eventB", "insertion_eventA"] ) - # Try at "insertion_eventA" - backfill_points = self.get_success( - self.store.get_insertion_event_backward_extremities_in_room( - room_id, depth_map["insertion_eventA"] - ) - ) - backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] - # Event "2" has a depth of 2 but is not included here because we only - # know the approximate depth of 5 from our event "3". - self.assertListEqual(backfill_event_ids, ["insertion_eventA"]) - def test_get_insertion_event_backward_extremities_in_room_excludes_events_we_have_attempted( self, ): @@ -1003,11 +978,8 @@ def test_get_insertion_event_backward_extremities_in_room_excludes_events_we_hav # No time has passed since we attempted to backfill ^ - # Try at "B" backfill_points = self.get_success( - self.store.get_insertion_event_backward_extremities_in_room( - room_id, depth_map["insertion_eventB"] - ) + self.store.get_insertion_event_backward_extremities_in_room(room_id) ) backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] # Only the backfill points that we didn't record earlier exist here. @@ -1029,6 +1001,11 @@ def test_get_insertion_event_backward_extremities_in_room_attempted_event_retry_ # Record some attempts to backfill these events which will make # `get_backfill_points_in_room` exclude them because we # haven't passed the backoff interval. + self.get_success( + self.store.record_event_failed_pull_attempt( + room_id, "insertion_eventB", "fake cause" + ) + ) self.get_success( self.store.record_event_failed_pull_attempt( room_id, "insertion_eventA", "fake cause" @@ -1050,22 +1027,19 @@ def test_get_insertion_event_backward_extremities_in_room_attempted_event_retry_ ) ) - # Now advance time by 2 hours and we should only be able to see "b3" - # because we have waited long enough for the single attempt (2^1 hours) - # but we still shouldn't see "b1" because we haven't waited long enough - # for this many attempts. We didn't do anything to "b2" so it should be - # visible regardless. + # Now advance time by 2 hours and we should only be able to see + # "insertion_eventB" because we have waited long enough for the single + # attempt (2^1 hours) but we still shouldn't see "insertion_eventA" + # because we haven't waited long enough for this many attempts. self.reactor.advance(datetime.timedelta(hours=2).total_seconds()) - # Try at "insertion_eventA" and make sure that "insertion_eventA" is not - # in the list because we've already attemted many times + # Make sure that "insertion_eventA" is not in the list because we've + # already attempted many times backfill_points = self.get_success( - self.store.get_insertion_event_backward_extremities_in_room( - room_id, depth_map["insertion_eventA"] - ) + self.store.get_insertion_event_backward_extremities_in_room(room_id) ) backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] - self.assertListEqual(backfill_event_ids, []) + self.assertListEqual(backfill_event_ids, ["insertion_eventB"]) # Now advance time by 20 hours (above 2^4 because we made 4 attemps) and # see if we can now backfill it @@ -1074,12 +1048,12 @@ def test_get_insertion_event_backward_extremities_in_room_attempted_event_retry_ # Try at "insertion_eventA" again after we advanced enough time and we # should see "insertion_eventA" again backfill_points = self.get_success( - self.store.get_insertion_event_backward_extremities_in_room( - room_id, depth_map["insertion_eventA"] - ) + self.store.get_insertion_event_backward_extremities_in_room(room_id) ) backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] - self.assertListEqual(backfill_event_ids, ["insertion_eventA"]) + self.assertListEqual( + backfill_event_ids, ["insertion_eventB", "insertion_eventA"] + ) @attr.s From a121bc3c412baf000e0744d5dae3b424e7ea8360 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 22 Sep 2022 15:12:56 -0500 Subject: [PATCH 65/66] Fix lints --- tests/storage/test_event_federation.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index 80fe718e9ddf..85739c464ec7 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -758,7 +758,6 @@ def test_get_backfill_points_in_room(self): """ setup_info = self._setup_room_for_backfill_tests() room_id = setup_info.room_id - depth_map = setup_info.depth_map backfill_points = self.get_success( self.store.get_backfill_points_in_room(room_id) @@ -777,7 +776,6 @@ def test_get_backfill_points_in_room_excludes_events_we_have_attempted( """ setup_info = self._setup_room_for_backfill_tests() room_id = setup_info.room_id - depth_map = setup_info.depth_map # Record some attempts to backfill these events which will make # `get_backfill_points_in_room` exclude them because we @@ -814,7 +812,6 @@ def test_get_backfill_points_in_room_attempted_event_retry_after_backoff_duratio """ setup_info = self._setup_room_for_backfill_tests() room_id = setup_info.room_id - depth_map = setup_info.depth_map # Record some attempts to backfill these events which will make # `get_backfill_points_in_room` exclude them because we @@ -945,7 +942,6 @@ def test_get_insertion_event_backward_extremities_in_room(self): """ setup_info = self._setup_room_for_insertion_backfill_tests() room_id = setup_info.room_id - depth_map = setup_info.depth_map backfill_points = self.get_success( self.store.get_insertion_event_backward_extremities_in_room(room_id) @@ -965,7 +961,6 @@ def test_get_insertion_event_backward_extremities_in_room_excludes_events_we_hav """ setup_info = self._setup_room_for_insertion_backfill_tests() room_id = setup_info.room_id - depth_map = setup_info.depth_map # Record some attempts to backfill these events which will make # `get_insertion_event_backward_extremities_in_room` exclude them @@ -996,7 +991,6 @@ def test_get_insertion_event_backward_extremities_in_room_attempted_event_retry_ """ setup_info = self._setup_room_for_insertion_backfill_tests() room_id = setup_info.room_id - depth_map = setup_info.depth_map # Record some attempts to backfill these events which will make # `get_backfill_points_in_room` exclude them because we From 491aac69971f87f75db1c5cac4bb18b08afa707a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 22 Sep 2022 16:55:43 -0500 Subject: [PATCH 66/66] Add context for why we have the is_state condition See https://github.com/matrix-org/synapse/pull/13635#discussion_r978067310 --- synapse/storage/databases/main/event_federation.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 90bd6979f95f..3251fca6fbf2 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -775,7 +775,14 @@ def get_backfill_points_in_room_txn( AND failed_backfill_attempt_info.event_id = backward_extrem.event_id WHERE backward_extrem.room_id = ? - /* We only care about non-state events because TODO: why */ + /* We only care about non-state edges because we used to use + * `event_edges` for two different sorts of "edges" (the current + * event DAG, but also a link to the previous state, for state + * events). These legacy state event edges can be distinguished by + * `is_state` and are removed from the codebase and schema but + * because the schema change is in a background update, it's not + * necessarily safe to assume that it will have been completed. + */ AND edge.is_state is ? /* False */ /** * Exponential back-off (up to the upper bound) so we don't retry the