diff --git a/changelog.d/13826.bugfix b/changelog.d/13826.bugfix new file mode 100644 index 000000000000..8ffafec07b33 --- /dev/null +++ b/changelog.d/13826.bugfix @@ -0,0 +1 @@ +Fix a long standing bug where device lists would remain cached when remote users left and rejoined the last room shared with the local homeserver. diff --git a/synapse/handlers/e2e_keys.py b/synapse/handlers/e2e_keys.py index 8eed63ccf3ac..09a2492afc9e 100644 --- a/synapse/handlers/e2e_keys.py +++ b/synapse/handlers/e2e_keys.py @@ -188,18 +188,21 @@ async def query_devices( ) invalid_cached_users = cached_users - valid_cached_users if invalid_cached_users: - # Fix up results. If we get here, there is either a bug in device - # list tracking, or we hit the race mentioned above. + # Fix up results. If we get here, it means there was either a bug in + # device list tracking, or we hit the race mentioned above. + # TODO: In practice, this path is hit fairly often in existing + # deployments when clients query the keys of departed remote + # users. A background update to mark the appropriate device + # lists as unsubscribed is needed. + # https://github.com/matrix-org/synapse/issues/13651 + # Note that this currently introduces a failure mode when clients + # are trying to decrypt old messages from a remote user whose + # homeserver is no longer available. We may want to consider falling + # back to the cached data when we fail to retrieve a device list + # over federation for such remote users. user_ids_not_in_cache.update(invalid_cached_users) for invalid_user_id in invalid_cached_users: remote_results.pop(invalid_user_id) - # This log message may be removed if it turns out it's almost - # entirely triggered by races. - logger.error( - "Devices for %s were cached, but the server no longer shares " - "any rooms with them. The cached device lists are stale.", - invalid_cached_users, - ) for user_id, devices in remote_results.items(): user_devices = results.setdefault(user_id, {})