Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Filter out rooms from the room directory being served to other homeservers when those rooms block that homeserver by their Access Control Lists. #16759

Merged
merged 12 commits into from
Jan 8, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/16759.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Filter out rooms from the room directory being served to other homeservers when those rooms block that homeserver by their Access Control Lists.
7 changes: 5 additions & 2 deletions synapse/federation/transport/server/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,10 @@ async def on_GET(
limit = None

data = await self.handler.get_local_public_room_list(
limit, since_token, network_tuple=network_tuple, from_federation=True
limit,
since_token,
network_tuple=network_tuple,
from_federation_origin=origin,
)
return 200, data

Expand Down Expand Up @@ -195,7 +198,7 @@ async def on_POST(
since_token=since_token,
search_filter=search_filter,
network_tuple=network_tuple,
from_federation=True,
from_federation_origin=origin,
)

return 200, data
Expand Down
176 changes: 127 additions & 49 deletions synapse/handlers/room_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#

import logging
from typing import TYPE_CHECKING, Any, Optional, Tuple
from typing import TYPE_CHECKING, Any, List, Optional, Tuple

import attr
import msgpack
Expand Down Expand Up @@ -54,6 +54,9 @@
# This is used to indicate we should only return rooms published to the main list.
EMPTY_THIRD_PARTY_ID = ThirdPartyInstanceID(None, None)

# Maximum number of local public rooms returned over the CS or SS API
MAX_PUBLIC_ROOMS_IN_RESPONSE = 100


class RoomListHandler:
def __init__(self, hs: "HomeServer"):
Expand All @@ -74,7 +77,7 @@ async def get_local_public_room_list(
since_token: Optional[str] = None,
search_filter: Optional[dict] = None,
network_tuple: Optional[ThirdPartyInstanceID] = EMPTY_THIRD_PARTY_ID,
from_federation: bool = False,
from_federation_origin: Optional[str] = None,
) -> JsonDict:
"""Generate a local public room list.

Expand All @@ -89,7 +92,8 @@ async def get_local_public_room_list(
This can be (None, None) to indicate the main list, or a particular
appservice and network id to use an appservice specific one.
Setting to None returns all public rooms across all lists.
from_federation: true iff the request comes from the federation API
from_federation_origin: the server name of the requester, or None
if the request is not from federation.
"""
if not self.enable_room_list_search:
return {"chunk": [], "total_room_count_estimate": 0}
Expand All @@ -102,36 +106,43 @@ async def get_local_public_room_list(
network_tuple,
)

if search_filter:
capped_limit: int = (
MAX_PUBLIC_ROOMS_IN_RESPONSE
if limit is None or limit > MAX_PUBLIC_ROOMS_IN_RESPONSE
else limit
)

if search_filter or from_federation_origin is not None:
# We explicitly don't bother caching searches or requests for
# appservice specific lists.
# We also don't bother caching requests from federated homeservers.
logger.info("Bypassing cache as search request.")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's update this log line too

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. Have also demoted to debug since it seems a bit inappropriate for info, but let me know if you disagree


return await self._get_public_room_list(
limit,
capped_limit,
since_token,
search_filter,
network_tuple=network_tuple,
from_federation=from_federation,
from_federation_origin=from_federation_origin,
)

key = (limit, since_token, network_tuple)
key = (capped_limit, since_token, network_tuple)
return await self.response_cache.wrap(
key,
self._get_public_room_list,
limit,
capped_limit,
since_token,
network_tuple=network_tuple,
from_federation=from_federation,
from_federation_origin=from_federation_origin,
)

async def _get_public_room_list(
self,
limit: Optional[int] = None,
limit: int,
since_token: Optional[str] = None,
search_filter: Optional[dict] = None,
network_tuple: Optional[ThirdPartyInstanceID] = EMPTY_THIRD_PARTY_ID,
from_federation: bool = False,
from_federation_origin: Optional[str] = None,
) -> JsonDict:
"""Generate a public room list.
Args:
Expand All @@ -142,8 +153,8 @@ async def _get_public_room_list(
This can be (None, None) to indicate the main list, or a particular
appservice and network id to use an appservice specific one.
Setting to None returns all public rooms across all lists.
from_federation: Whether this request originated from a
federating server or a client. Used for room filtering.
from_federation_origin: the server name of the requester, or None
if the request is not from federation.
"""

# Pagination tokens work by storing the room ID sent in the last batch,
Expand All @@ -165,16 +176,24 @@ async def _get_public_room_list(
forwards = True
has_batch_token = False

# we request one more than wanted to see if there are more pages to come
probing_limit = limit + 1 if limit is not None else None
if from_federation_origin is None:
# Client-Server API:
# we request one more than wanted to see if there are more pages to come
probing_limit = limit + 1
else:
# Federation API:
# we request a handful more in case any get filtered out by ACLs
# as a best easy effort attempt to return the full number of entries
# specified by `limit`.
probing_limit = limit + 10

results = await self.store.get_largest_public_rooms(
network_tuple,
search_filter,
probing_limit,
bounds=bounds,
forwards=forwards,
ignore_non_federatable=from_federation,
ignore_non_federatable=from_federation_origin is not None,
)

def build_room_entry(room: LargestRoomStats) -> JsonDict:
Expand All @@ -195,59 +214,118 @@ def build_room_entry(room: LargestRoomStats) -> JsonDict:
# Filter out Nones – rather omit the field altogether
return {k: v for k, v in entry.items() if v is not None}

response: JsonDict = {}
# Build a list of up to `limit` entries.
room_entries: List[JsonDict] = []
rooms_iterator = results if forwards else reversed(results)

# Track the first and last 'considered' rooms so that we can provide correct
# next_batch/prev_batch tokens.
# This is needed because the loop might finish early when
# `len(room_entries) >= limit` and we might be left with rooms we didn't
# 'consider' (iterate over) and we should save those rooms for the next
# batch.
first_considered_room: Optional[LargestRoomStats] = None
last_considered_room: Optional[LargestRoomStats] = None
cut_off_due_to_limit: bool = False

for room_result in rooms_iterator:
if len(room_entries) >= limit:
cut_off_due_to_limit = True
break

if first_considered_room is None:
first_considered_room = room_result
last_considered_room = room_result

if from_federation_origin is not None:
# If this is a federated request, apply server ACLs if the room has any set
acl_evaluator = (
await self._storage_controllers.state.get_server_acl_for_room(
room_result.room_id
)
)

if acl_evaluator is not None:
if not acl_evaluator.server_matches_acl_event(
from_federation_origin
):
# the requesting server is ACL blocked by the room,
# don't show in directory
continue

room_entries.append(build_room_entry(room_result))

if not forwards:
# If we are paginating backwards, we still return the chunk in
# biggest-first order, so reverse again.
room_entries.reverse()
# Swap the order of first/last considered rooms.
first_considered_room, last_considered_room = (
last_considered_room,
first_considered_room,
)

response: JsonDict = {
"chunk": room_entries,
}
num_results = len(results)
if limit is not None:
more_to_come = num_results == probing_limit

# Depending on direction we trim either the front or back.
if forwards:
results = results[:limit]
more_to_come_from_database = num_results == probing_limit

if forwards and has_batch_token:
# If there was a token given then we assume that there
# must be previous results, even if there were no results in this batch.
if first_considered_room is not None:
response["prev_batch"] = RoomListNextBatch(
last_joined_members=first_considered_room.joined_members,
last_room_id=first_considered_room.room_id,
direction_is_forward=False,
).to_token()
else:
results = results[-limit:]
else:
more_to_come = False
# If we didn't find any results this time,
# we don't have an actual room ID to put in the token.
# So instead we re-use the room ID from last time but make the
# bound inclusive, by tacking on a 0x01 byte at the end
# (s+"\x00" is the first string following s, but we can't use "\x00"
# in Postgres, so use "\x01" anyway.)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this an existing bug or is this somehow caused by the new filtering? I'm somewhat failing to see why we need to add this

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is an existing bug. I hit it during testing and thought it was suspicious enough it was worth fixing

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, right. If first_considered_room is None then doesn't that mean that we have reached the end of the list and there are no more rooms? And if so then can we not reuse since_token for the bound?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the previous chunk had rooms !A, !B, !C then (from memory) you get a next_chunk token saying !C. The query that follows for the next request has a bound of > !C.

To go back to the prior chunk again, you need the query to have a bound of < !D. But if we have an empty chunk, we don't have any known room ID to stand in for !D, hence the idea of cheekily constructing a fake one "!C\x01"...

If we give the since_token of !C back, then going back to the prior chunk will only return !A, !B as rooms.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh right, yes I see. Has to be said that I'm not a huge fan of just chucking a byte on the end, but I suppose it works.

Can we use the RoomListNextBatch(0, "") since we know we're at the end? Or maybe we should always use the since_token for the prev batch and change the equality bounds on the DB query to avoid all this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

those are a bit cleverer, yeah. RoomListNextBatch(0, "") sounds like a reasonable idea to me. I think flipping the bounds would also have been fine but I know there is scope to mess that up fairly easily...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup fair. I think your solution would have worked fine, but might just cause confusion having invalid room IDs in it, I dunno 🤷 Thanks for changing

assert batch_token is not None
response["prev_batch"] = RoomListNextBatch(
last_joined_members=batch_token.last_joined_members,
last_room_id=batch_token.last_room_id + "\x01",
direction_is_forward=False,
).to_token()

if num_results > 0:
final_entry = results[-1]
initial_entry = results[0]

assert first_considered_room is not None
assert last_considered_room is not None
if forwards:
if has_batch_token:
# If there was a token given then we assume that there
# must be previous results.
response["prev_batch"] = RoomListNextBatch(
last_joined_members=initial_entry.joined_members,
last_room_id=initial_entry.room_id,
direction_is_forward=False,
).to_token()

if more_to_come:
if more_to_come_from_database or cut_off_due_to_limit:
response["next_batch"] = RoomListNextBatch(
last_joined_members=final_entry.joined_members,
last_room_id=final_entry.room_id,
last_joined_members=last_considered_room.joined_members,
last_room_id=last_considered_room.room_id,
direction_is_forward=True,
).to_token()
else:
else: # backwards
if has_batch_token:
response["next_batch"] = RoomListNextBatch(
last_joined_members=final_entry.joined_members,
last_room_id=final_entry.room_id,
last_joined_members=last_considered_room.joined_members,
last_room_id=last_considered_room.room_id,
direction_is_forward=True,
).to_token()

if more_to_come:
if more_to_come_from_database or cut_off_due_to_limit:
response["prev_batch"] = RoomListNextBatch(
last_joined_members=initial_entry.joined_members,
last_room_id=initial_entry.room_id,
last_joined_members=first_considered_room.joined_members,
last_room_id=first_considered_room.room_id,
direction_is_forward=False,
).to_token()

response["chunk"] = [build_room_entry(r) for r in results]

# We can't efficiently count the total number of rooms that are not
# blocked by ACLs, but this is just an estimate so that should be
# good enough.
response["total_room_count_estimate"] = await self.store.count_public_rooms(
network_tuple,
ignore_non_federatable=from_federation,
ignore_non_federatable=from_federation_origin is not None,
search_filter=search_filter,
)

Expand Down
88 changes: 88 additions & 0 deletions tests/handlers/test_room_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
from http import HTTPStatus
from typing import Optional, Set

from synapse.rest import admin
from synapse.rest.client import directory, login, room
from synapse.types import JsonDict

from tests import unittest


class RoomListHandlerTestCase(unittest.HomeserverTestCase):
servlets = [
admin.register_servlets,
login.register_servlets,
room.register_servlets,
directory.register_servlets,
]

def _create_published_room(
self, tok: str, extra_content: Optional[JsonDict] = None
) -> str:
room_id = self.helper.create_room_as(tok=tok, extra_content=extra_content)
channel = self.make_request(
"PUT",
f"/_matrix/client/v3/directory/list/room/{room_id}?access_token={tok}",
content={
"visibility": "public",
},
)
assert channel.code == HTTPStatus.OK, f"couldn't publish room: {channel.result}"
return room_id

def test_acls_applied_to_room_directory_results(self) -> None:
"""
Creates 3 rooms. Room 2 has an ACL that only permits the homeservers
`test` and `test2` to access it.

We then simulate `test2` and `test3` requesting the room directory and assert
that `test3` does not see room 2, but `test2` sees all 3.
"""
self.register_user("u1", "p1")
u1tok = self.login("u1", "p1")
room1 = self._create_published_room(u1tok)

room2 = self._create_published_room(
u1tok,
extra_content={
"initial_state": [
{
"type": "m.room.server_acl",
"content": {
"allow": ["test", "test2"],
},
}
]
},
)

room3 = self._create_published_room(u1tok)

room_list = self.get_success(
self.hs.get_room_list_handler().get_local_public_room_list(
limit=50, from_federation_origin="test2"
)
)
room_ids_in_test2_list: Set[str] = {
entry["room_id"] for entry in room_list["chunk"]
}

room_list = self.get_success(
self.hs.get_room_list_handler().get_local_public_room_list(
limit=50, from_federation_origin="test3"
)
)
room_ids_in_test3_list: Set[str] = {
entry["room_id"] for entry in room_list["chunk"]
}

self.assertEqual(
room_ids_in_test2_list,
{room1, room2, room3},
"test2 should be able to see all 3 rooms",
)
self.assertEqual(
room_ids_in_test3_list,
{room1, room3},
"test3 should be able to see only 2 rooms",
)
Loading