Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Faster joins: filter out non local events when a room doesn't have its full state #14404

Merged
merged 9 commits into from
Nov 21, 2022
1 change: 1 addition & 0 deletions changelog.d/14404.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Faster joins: filter out non local events when a room doesn't have its full state.
1 change: 1 addition & 0 deletions synapse/federation/sender/per_destination_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,7 @@ async def _catch_up_transmission_loop(self) -> None:
new_pdus = await filter_events_for_server(
self._storage_controllers,
self._destination,
self._server_name,
new_pdus,
redact=False,
)
Expand Down
15 changes: 10 additions & 5 deletions synapse/handlers/federation.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,7 @@ async def _maybe_backfill_inner(
filtered_extremities = await filter_events_for_server(
self._storage_controllers,
self.server_name,
self.server_name,
events_to_check,
redact=False,
check_history_visibility_only=True,
Expand Down Expand Up @@ -1231,7 +1232,9 @@ async def get_state_ids_for_pdu(self, room_id: str, event_id: str) -> List[str]:
async def on_backfill_request(
self, origin: str, room_id: str, pdu_list: List[str], limit: int
) -> List[EventBase]:
await self._event_auth_handler.assert_host_in_room(room_id, origin)
# We allow partially joined rooms since in this case we are filtering out
# non-local events in `filter_events_for_server`.
await self._event_auth_handler.assert_host_in_room(room_id, origin, True)

# Synapse asks for 100 events per backfill request. Do not allow more.
limit = min(limit, 100)
Expand All @@ -1252,7 +1255,7 @@ async def on_backfill_request(
)

events = await filter_events_for_server(
self._storage_controllers, origin, events
self._storage_controllers, origin, self.server_name, events
)

return events
Expand Down Expand Up @@ -1283,7 +1286,7 @@ async def get_persisted_pdu(
await self._event_auth_handler.assert_host_in_room(event.room_id, origin)

events = await filter_events_for_server(
self._storage_controllers, origin, [event]
self._storage_controllers, origin, self.server_name, [event]
)
event = events[0]
return event
Expand All @@ -1296,7 +1299,9 @@ async def on_get_missing_events(
latest_events: List[str],
limit: int,
) -> List[EventBase]:
await self._event_auth_handler.assert_host_in_room(room_id, origin)
# We allow partially joined rooms since in this case we are filtering out
# non-local events in `filter_events_for_server`.
await self._event_auth_handler.assert_host_in_room(room_id, origin, True)

# Only allow up to 20 events to be retrieved per request.
limit = min(limit, 20)
Expand All @@ -1309,7 +1314,7 @@ async def on_get_missing_events(
)

missing_events = await filter_events_for_server(
self._storage_controllers, origin, missing_events
self._storage_controllers, origin, self.server_name, missing_events
)

return missing_events
Expand Down
29 changes: 26 additions & 3 deletions synapse/visibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,8 @@ def get_effective_room_visibility_from_state(state: StateMap[EventBase]) -> str:

async def filter_events_for_server(
storage: StorageControllers,
server_name: str,
target_server_name: str,
local_server_name: str,
events: List[EventBase],
redact: bool = True,
check_history_visibility_only: bool = False,
Expand Down Expand Up @@ -603,7 +604,7 @@ def check_event_is_visible(
# if the server is either in the room or has been invited
# into the room.
for ev in memberships.values():
assert get_domain_from_id(ev.state_key) == server_name
assert get_domain_from_id(ev.state_key) == target_server_name

memtype = ev.membership
if memtype == Membership.JOIN:
Expand All @@ -622,6 +623,24 @@ def check_event_is_visible(
# to no users having been erased.
erased_senders = {}

# Filter out non-local events when we are in the middle of a partial join, since our servers
# list can be out of date and we could leak events to servers not in the room anymore.
# This can also be true for local events but we consider it to be an acceptable risk.

# We do this check as a first step and before retrieving membership events because
# otherwise a room could be fully joined after we retrieve those, which would then bypass
# this check but would base the filtering on an outdated view of the membership events.

partial_state_invisible_events = set()
if not check_history_visibility_only:
for e in events:
sender_domain = get_domain_from_id(e.sender)
if (
sender_domain != local_server_name
and await storage.main.is_partial_state_room(e.room_id)
):
partial_state_invisible_events.add(e)

# Let's check to see if all the events have a history visibility
# of "shared" or "world_readable". If that's the case then we don't
# need to check membership (as we know the server is in the room).
Expand All @@ -636,7 +655,7 @@ def check_event_is_visible(
if event_to_history_vis[e.event_id]
not in (HistoryVisibility.SHARED, HistoryVisibility.WORLD_READABLE)
],
server_name,
target_server_name,
)

to_return = []
Expand All @@ -645,6 +664,10 @@ def check_event_is_visible(
visible = check_event_is_visible(
event_to_history_vis[e.event_id], event_to_memberships.get(e.event_id, {})
)

if e in partial_state_invisible_events:
visible = False

if visible and not erased:
to_return.append(e)
elif redact:
Expand Down
10 changes: 5 additions & 5 deletions tests/test_visibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def test_filtering(self) -> None:

filtered = self.get_success(
filter_events_for_server(
self._storage_controllers, "test_server", events_to_filter
self._storage_controllers, "test_server", "hs", events_to_filter
)
)

Expand All @@ -83,7 +83,7 @@ def test_filter_outlier(self) -> None:
self.assertEqual(
self.get_success(
filter_events_for_server(
self._storage_controllers, "remote_hs", [outlier]
self._storage_controllers, "remote_hs", "hs", [outlier]
)
),
[outlier],
Expand All @@ -94,7 +94,7 @@ def test_filter_outlier(self) -> None:

filtered = self.get_success(
filter_events_for_server(
self._storage_controllers, "remote_hs", [outlier, evt]
self._storage_controllers, "remote_hs", "local_hs", [outlier, evt]
)
)
self.assertEqual(len(filtered), 2, f"expected 2 results, got: {filtered}")
Expand All @@ -106,7 +106,7 @@ def test_filter_outlier(self) -> None:
# be redacted)
filtered = self.get_success(
filter_events_for_server(
self._storage_controllers, "other_server", [outlier, evt]
self._storage_controllers, "other_server", "local_hs", [outlier, evt]
)
)
self.assertEqual(filtered[0], outlier)
Expand Down Expand Up @@ -141,7 +141,7 @@ def test_erased_user(self) -> None:
# ... and the filtering happens.
filtered = self.get_success(
filter_events_for_server(
self._storage_controllers, "test_server", events_to_filter
self._storage_controllers, "test_server", "local_hs", events_to_filter
)
)

Expand Down