-
Notifications
You must be signed in to change notification settings - Fork 221
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Filter out rooms from the room directory being served to other homeservers when those rooms block that homeserver by their Access Control Lists. #16759
Changes from 10 commits
5413007
7c0354b
efc6966
f7a2078
a937f08
f969202
c04dff6
d2ad5b9
b2cab24
db6a030
f260a7e
a51b959
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Filter out rooms from the room directory being served to other homeservers when those rooms block that homeserver by their Access Control Lists. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,7 +19,7 @@ | |
# | ||
|
||
import logging | ||
from typing import TYPE_CHECKING, Any, Optional, Tuple | ||
from typing import TYPE_CHECKING, Any, List, Optional, Tuple | ||
|
||
import attr | ||
import msgpack | ||
|
@@ -54,6 +54,9 @@ | |
# This is used to indicate we should only return rooms published to the main list. | ||
EMPTY_THIRD_PARTY_ID = ThirdPartyInstanceID(None, None) | ||
|
||
# Maximum number of local public rooms returned over the CS or SS API | ||
MAX_PUBLIC_ROOMS_IN_RESPONSE = 100 | ||
|
||
|
||
class RoomListHandler: | ||
def __init__(self, hs: "HomeServer"): | ||
|
@@ -74,7 +77,7 @@ async def get_local_public_room_list( | |
since_token: Optional[str] = None, | ||
search_filter: Optional[dict] = None, | ||
network_tuple: Optional[ThirdPartyInstanceID] = EMPTY_THIRD_PARTY_ID, | ||
from_federation: bool = False, | ||
from_federation_origin: Optional[str] = None, | ||
) -> JsonDict: | ||
"""Generate a local public room list. | ||
|
||
|
@@ -89,7 +92,8 @@ async def get_local_public_room_list( | |
This can be (None, None) to indicate the main list, or a particular | ||
appservice and network id to use an appservice specific one. | ||
Setting to None returns all public rooms across all lists. | ||
from_federation: true iff the request comes from the federation API | ||
from_federation_origin: the server name of the requester, or None | ||
if the request is not from federation. | ||
""" | ||
if not self.enable_room_list_search: | ||
return {"chunk": [], "total_room_count_estimate": 0} | ||
|
@@ -102,36 +106,43 @@ async def get_local_public_room_list( | |
network_tuple, | ||
) | ||
|
||
if search_filter: | ||
capped_limit: int = ( | ||
MAX_PUBLIC_ROOMS_IN_RESPONSE | ||
if limit is None or limit > MAX_PUBLIC_ROOMS_IN_RESPONSE | ||
else limit | ||
) | ||
|
||
if search_filter or from_federation_origin is not None: | ||
# We explicitly don't bother caching searches or requests for | ||
# appservice specific lists. | ||
# We also don't bother caching requests from federated homeservers. | ||
logger.info("Bypassing cache as search request.") | ||
|
||
return await self._get_public_room_list( | ||
limit, | ||
capped_limit, | ||
since_token, | ||
search_filter, | ||
network_tuple=network_tuple, | ||
from_federation=from_federation, | ||
from_federation_origin=from_federation_origin, | ||
) | ||
|
||
key = (limit, since_token, network_tuple) | ||
key = (capped_limit, since_token, network_tuple) | ||
return await self.response_cache.wrap( | ||
key, | ||
self._get_public_room_list, | ||
limit, | ||
capped_limit, | ||
since_token, | ||
network_tuple=network_tuple, | ||
from_federation=from_federation, | ||
from_federation_origin=from_federation_origin, | ||
) | ||
|
||
async def _get_public_room_list( | ||
self, | ||
limit: Optional[int] = None, | ||
limit: int, | ||
since_token: Optional[str] = None, | ||
search_filter: Optional[dict] = None, | ||
network_tuple: Optional[ThirdPartyInstanceID] = EMPTY_THIRD_PARTY_ID, | ||
from_federation: bool = False, | ||
from_federation_origin: Optional[str] = None, | ||
) -> JsonDict: | ||
"""Generate a public room list. | ||
Args: | ||
|
@@ -142,8 +153,8 @@ async def _get_public_room_list( | |
This can be (None, None) to indicate the main list, or a particular | ||
appservice and network id to use an appservice specific one. | ||
Setting to None returns all public rooms across all lists. | ||
from_federation: Whether this request originated from a | ||
federating server or a client. Used for room filtering. | ||
from_federation_origin: the server name of the requester, or None | ||
if the request is not from federation. | ||
""" | ||
|
||
# Pagination tokens work by storing the room ID sent in the last batch, | ||
|
@@ -165,16 +176,24 @@ async def _get_public_room_list( | |
forwards = True | ||
has_batch_token = False | ||
|
||
# we request one more than wanted to see if there are more pages to come | ||
probing_limit = limit + 1 if limit is not None else None | ||
if from_federation_origin is None: | ||
# Client-Server API: | ||
# we request one more than wanted to see if there are more pages to come | ||
probing_limit = limit + 1 | ||
else: | ||
# Federation API: | ||
# we request a handful more in case any get filtered out by ACLs | ||
# as a best easy effort attempt to return the full number of entries | ||
# specified by `limit`. | ||
probing_limit = limit + 10 | ||
|
||
results = await self.store.get_largest_public_rooms( | ||
network_tuple, | ||
search_filter, | ||
probing_limit, | ||
bounds=bounds, | ||
forwards=forwards, | ||
ignore_non_federatable=from_federation, | ||
ignore_non_federatable=from_federation_origin is not None, | ||
) | ||
|
||
def build_room_entry(room: LargestRoomStats) -> JsonDict: | ||
|
@@ -195,59 +214,118 @@ def build_room_entry(room: LargestRoomStats) -> JsonDict: | |
# Filter out Nones – rather omit the field altogether | ||
return {k: v for k, v in entry.items() if v is not None} | ||
|
||
response: JsonDict = {} | ||
# Build a list of up to `limit` entries. | ||
room_entries: List[JsonDict] = [] | ||
rooms_iterator = results if forwards else reversed(results) | ||
|
||
# Track the first and last 'considered' rooms so that we can provide correct | ||
# next_batch/prev_batch tokens. | ||
# This is needed because the loop might finish early when | ||
# `len(room_entries) >= limit` and we might be left with rooms we didn't | ||
# 'consider' (iterate over) and we should save those rooms for the next | ||
# batch. | ||
first_considered_room: Optional[LargestRoomStats] = None | ||
last_considered_room: Optional[LargestRoomStats] = None | ||
cut_off_due_to_limit: bool = False | ||
|
||
for room_result in rooms_iterator: | ||
if len(room_entries) >= limit: | ||
cut_off_due_to_limit = True | ||
break | ||
|
||
if first_considered_room is None: | ||
first_considered_room = room_result | ||
last_considered_room = room_result | ||
|
||
if from_federation_origin is not None: | ||
# If this is a federated request, apply server ACLs if the room has any set | ||
acl_evaluator = ( | ||
await self._storage_controllers.state.get_server_acl_for_room( | ||
room_result.room_id | ||
) | ||
) | ||
|
||
if acl_evaluator is not None: | ||
if not acl_evaluator.server_matches_acl_event( | ||
from_federation_origin | ||
): | ||
# the requesting server is ACL blocked by the room, | ||
# don't show in directory | ||
continue | ||
|
||
room_entries.append(build_room_entry(room_result)) | ||
|
||
if not forwards: | ||
# If we are paginating backwards, we still return the chunk in | ||
# biggest-first order, so reverse again. | ||
room_entries.reverse() | ||
# Swap the order of first/last considered rooms. | ||
first_considered_room, last_considered_room = ( | ||
last_considered_room, | ||
first_considered_room, | ||
) | ||
|
||
response: JsonDict = { | ||
"chunk": room_entries, | ||
} | ||
num_results = len(results) | ||
if limit is not None: | ||
more_to_come = num_results == probing_limit | ||
|
||
# Depending on direction we trim either the front or back. | ||
if forwards: | ||
results = results[:limit] | ||
more_to_come_from_database = num_results == probing_limit | ||
|
||
if forwards and has_batch_token: | ||
# If there was a token given then we assume that there | ||
# must be previous results, even if there were no results in this batch. | ||
if first_considered_room is not None: | ||
response["prev_batch"] = RoomListNextBatch( | ||
last_joined_members=first_considered_room.joined_members, | ||
last_room_id=first_considered_room.room_id, | ||
direction_is_forward=False, | ||
).to_token() | ||
else: | ||
results = results[-limit:] | ||
else: | ||
more_to_come = False | ||
# If we didn't find any results this time, | ||
# we don't have an actual room ID to put in the token. | ||
# So instead we re-use the room ID from last time but make the | ||
# bound inclusive, by tacking on a 0x01 byte at the end | ||
# (s+"\x00" is the first string following s, but we can't use "\x00" | ||
# in Postgres, so use "\x01" anyway.) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this an existing bug or is this somehow caused by the new filtering? I'm somewhat failing to see why we need to add this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is an existing bug. I hit it during testing and thought it was suspicious enough it was worth fixing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, right. If There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the previous chunk had rooms To go back to the prior chunk again, you need the query to have a bound of If we give the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh right, yes I see. Has to be said that I'm not a huge fan of just chucking a byte on the end, but I suppose it works. Can we use the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. those are a bit cleverer, yeah. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup fair. I think your solution would have worked fine, but might just cause confusion having invalid room IDs in it, I dunno 🤷 Thanks for changing |
||
assert batch_token is not None | ||
response["prev_batch"] = RoomListNextBatch( | ||
last_joined_members=batch_token.last_joined_members, | ||
last_room_id=batch_token.last_room_id + "\x01", | ||
direction_is_forward=False, | ||
).to_token() | ||
|
||
if num_results > 0: | ||
final_entry = results[-1] | ||
initial_entry = results[0] | ||
|
||
assert first_considered_room is not None | ||
assert last_considered_room is not None | ||
if forwards: | ||
if has_batch_token: | ||
# If there was a token given then we assume that there | ||
# must be previous results. | ||
response["prev_batch"] = RoomListNextBatch( | ||
last_joined_members=initial_entry.joined_members, | ||
last_room_id=initial_entry.room_id, | ||
direction_is_forward=False, | ||
).to_token() | ||
|
||
if more_to_come: | ||
if more_to_come_from_database or cut_off_due_to_limit: | ||
response["next_batch"] = RoomListNextBatch( | ||
last_joined_members=final_entry.joined_members, | ||
last_room_id=final_entry.room_id, | ||
last_joined_members=last_considered_room.joined_members, | ||
last_room_id=last_considered_room.room_id, | ||
direction_is_forward=True, | ||
).to_token() | ||
else: | ||
else: # backwards | ||
if has_batch_token: | ||
response["next_batch"] = RoomListNextBatch( | ||
last_joined_members=final_entry.joined_members, | ||
last_room_id=final_entry.room_id, | ||
last_joined_members=last_considered_room.joined_members, | ||
last_room_id=last_considered_room.room_id, | ||
direction_is_forward=True, | ||
).to_token() | ||
|
||
if more_to_come: | ||
if more_to_come_from_database or cut_off_due_to_limit: | ||
response["prev_batch"] = RoomListNextBatch( | ||
last_joined_members=initial_entry.joined_members, | ||
last_room_id=initial_entry.room_id, | ||
last_joined_members=first_considered_room.joined_members, | ||
last_room_id=first_considered_room.room_id, | ||
direction_is_forward=False, | ||
).to_token() | ||
|
||
response["chunk"] = [build_room_entry(r) for r in results] | ||
|
||
# We can't efficiently count the total number of rooms that are not | ||
# blocked by ACLs, but this is just an estimate so that should be | ||
# good enough. | ||
response["total_room_count_estimate"] = await self.store.count_public_rooms( | ||
network_tuple, | ||
ignore_non_federatable=from_federation, | ||
ignore_non_federatable=from_federation_origin is not None, | ||
search_filter=search_filter, | ||
) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
from http import HTTPStatus | ||
from typing import Optional, Set | ||
|
||
from synapse.rest import admin | ||
from synapse.rest.client import directory, login, room | ||
from synapse.types import JsonDict | ||
|
||
from tests import unittest | ||
|
||
|
||
class RoomListHandlerTestCase(unittest.HomeserverTestCase): | ||
servlets = [ | ||
admin.register_servlets, | ||
login.register_servlets, | ||
room.register_servlets, | ||
directory.register_servlets, | ||
] | ||
|
||
def _create_published_room( | ||
self, tok: str, extra_content: Optional[JsonDict] = None | ||
) -> str: | ||
room_id = self.helper.create_room_as(tok=tok, extra_content=extra_content) | ||
channel = self.make_request( | ||
"PUT", | ||
f"/_matrix/client/v3/directory/list/room/{room_id}?access_token={tok}", | ||
content={ | ||
"visibility": "public", | ||
}, | ||
) | ||
assert channel.code == HTTPStatus.OK, f"couldn't publish room: {channel.result}" | ||
return room_id | ||
|
||
def test_acls_applied_to_room_directory_results(self) -> None: | ||
""" | ||
Creates 3 rooms. Room 2 has an ACL that only permits the homeservers | ||
`test` and `test2` to access it. | ||
|
||
We then simulate `test2` and `test3` requesting the room directory and assert | ||
that `test3` does not see room 2, but `test2` sees all 3. | ||
""" | ||
self.register_user("u1", "p1") | ||
u1tok = self.login("u1", "p1") | ||
room1 = self._create_published_room(u1tok) | ||
|
||
room2 = self._create_published_room( | ||
u1tok, | ||
extra_content={ | ||
"initial_state": [ | ||
{ | ||
"type": "m.room.server_acl", | ||
"content": { | ||
"allow": ["test", "test2"], | ||
}, | ||
} | ||
] | ||
}, | ||
) | ||
|
||
room3 = self._create_published_room(u1tok) | ||
|
||
room_list = self.get_success( | ||
self.hs.get_room_list_handler().get_local_public_room_list( | ||
limit=50, from_federation_origin="test2" | ||
) | ||
) | ||
room_ids_in_test2_list: Set[str] = { | ||
entry["room_id"] for entry in room_list["chunk"] | ||
} | ||
|
||
room_list = self.get_success( | ||
self.hs.get_room_list_handler().get_local_public_room_list( | ||
limit=50, from_federation_origin="test3" | ||
) | ||
) | ||
room_ids_in_test3_list: Set[str] = { | ||
entry["room_id"] for entry in room_list["chunk"] | ||
} | ||
|
||
self.assertEqual( | ||
room_ids_in_test2_list, | ||
{room1, room2, room3}, | ||
"test2 should be able to see all 3 rooms", | ||
) | ||
self.assertEqual( | ||
room_ids_in_test3_list, | ||
{room1, room3}, | ||
"test3 should be able to see only 2 rooms", | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's update this log line too
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. Have also demoted to debug since it seems a bit inappropriate for info, but let me know if you disagree