Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Fix bug in device list caching when remote users leave rooms #13749

Merged
merged 8 commits into from
Sep 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/13749.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a long standing bug where device lists would remain cached when remote users left and rejoined the last room shared with the local homeserver.
11 changes: 0 additions & 11 deletions synapse/handlers/device.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@
JsonDict,
StreamKeyType,
StreamToken,
UserID,
get_domain_from_id,
get_verify_key_from_cross_signing_key,
)
Expand Down Expand Up @@ -324,8 +323,6 @@ def __init__(self, hs: "HomeServer"):
self.device_list_updater.incoming_device_list_update,
)

hs.get_distributor().observe("user_left_room", self.user_left_room)

# Whether `_handle_new_device_update_async` is currently processing.
self._handle_new_device_update_is_processing = False

Expand Down Expand Up @@ -569,14 +566,6 @@ async def notify_user_signature_update(
StreamKeyType.DEVICE_LIST, position, users=[from_user_id]
)

async def user_left_room(self, user: UserID, room_id: str) -> None:
user_id = user.to_string()
room_ids = await self.store.get_rooms_for_user(user_id)
if not room_ids:
# We no longer share rooms with this user, so we'll no longer
# receive device updates. Mark this in DB.
await self.store.mark_remote_user_device_list_as_unsubscribed(user_id)

async def store_dehydrated_device(
self,
user_id: str,
Expand Down
26 changes: 26 additions & 0 deletions synapse/handlers/e2e_keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,32 @@ async def query_devices(
user_ids_not_in_cache,
remote_results,
) = await self.store.get_user_devices_from_cache(query_list)

# Check that the homeserver still shares a room with all cached users.
# Note that this check may be slightly racy when a remote user leaves a
# room after we have fetched their cached device list. In the worst case
# we will do extra federation queries for devices that we had cached.
cached_users = set(remote_results.keys())
valid_cached_users = (
await self.store.get_users_server_still_shares_room_with(
remote_results.keys()
)
)
invalid_cached_users = cached_users - valid_cached_users
if invalid_cached_users:
# Fix up results. If we get here, there is either a bug in device
# list tracking, or we hit the race mentioned above.
user_ids_not_in_cache.update(invalid_cached_users)
for invalid_user_id in invalid_cached_users:
remote_results.pop(invalid_user_id)
# This log message may be removed if it turns out it's almost
# entirely triggered by races.
logger.error(
"Devices for %s were cached, but the server no longer shares "
"any rooms with them. The cached device lists are stale.",
invalid_cached_users,
)

for user_id, devices in remote_results.items():
user_devices = results.setdefault(user_id, {})
for device_id, device in devices.items():
Expand Down
20 changes: 17 additions & 3 deletions synapse/storage/controllers/persist_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,9 +598,9 @@ async def _persist_event_batch(
# room
state_delta_for_room: Dict[str, DeltaState] = {}

# Set of remote users which were in rooms the server has left. We
# should check if we still share any rooms and if not we mark their
# device lists as stale.
# Set of remote users which were in rooms the server has left or who may
# have left rooms the server is in. We should check if we still share any
# rooms and if not we mark their device lists as stale.
potentially_left_users: Set[str] = set()

if not backfilled:
Expand Down Expand Up @@ -725,6 +725,20 @@ async def _persist_event_batch(
current_state = {}
delta.no_longer_in_room = True

# Add all remote users that might have left rooms.
potentially_left_users.update(
user_id
for event_type, user_id in delta.to_delete
if event_type == EventTypes.Member
and not self.is_mine_id(user_id)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Needs #13746, otherwise we can raise errors when there are malformed user IDs in the room state.

)
potentially_left_users.update(
user_id
for event_type, user_id in delta.to_insert.keys()
if event_type == EventTypes.Member
and not self.is_mine_id(user_id)
)

state_delta_for_room[room_id] = delta

await self.persist_events_store._persist_events_and_state_updates(
Expand Down
8 changes: 7 additions & 1 deletion tests/handlers/test_e2e_keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -891,14 +891,20 @@ def test_query_all_devices_caches_result(self, device_ids: Iterable[str]) -> Non
new_callable=mock.MagicMock,
return_value=make_awaitable(["some_room_id"]),
)
mock_get_users = mock.patch.object(
self.store,
"get_users_server_still_shares_room_with",
new_callable=mock.MagicMock,
return_value=make_awaitable({remote_user_id}),
)
mock_request = mock.patch.object(
self.hs.get_federation_client(),
"query_user_devices",
new_callable=mock.MagicMock,
return_value=make_awaitable(response_body),
)

with mock_get_rooms, mock_request as mocked_federation_request:
with mock_get_rooms, mock_get_users, mock_request as mocked_federation_request:
# Make the first query and sanity check it succeeds.
response_1 = self.get_success(
e2e_handler.query_devices(
Expand Down