element-hq · reivilibre · Jan 8, 2024 · Dec 19, 2023 · Dec 19, 2023 · Dec 19, 2023
@@ -0,0 +1 @@
+Filter out rooms from the room directory being served to other homeservers when those rooms block that homeserver by their Access Control Lists.
@@ -154,7 +154,10 @@ async def on_GET(
             limit = None
 
         data = await self.handler.get_local_public_room_list(
-            limit, since_token, network_tuple=network_tuple, from_federation=True
+            limit,
+            since_token,
+            network_tuple=network_tuple,
+            from_federation_origin=origin,
         )
         return 200, data
 
@@ -195,7 +198,7 @@ async def on_POST(
             since_token=since_token,
             search_filter=search_filter,
             network_tuple=network_tuple,
-            from_federation=True,
+            from_federation_origin=origin,
         )
 
         return 200, data

@@ -19,7 +19,7 @@
 #
 
 import logging
-from typing import TYPE_CHECKING, Any, Optional, Tuple
+from typing import TYPE_CHECKING, Any, List, Optional, Tuple
 
 import attr
 import msgpack
@@ -54,6 +54,9 @@
 # This is used to indicate we should only return rooms published to the main list.
 EMPTY_THIRD_PARTY_ID = ThirdPartyInstanceID(None, None)
 
+# Maximum number of local public rooms returned over the CS or SS API
+MAX_PUBLIC_ROOMS_IN_RESPONSE = 100
+
 
 class RoomListHandler:
     def __init__(self, hs: "HomeServer"):
@@ -74,7 +77,7 @@ async def get_local_public_room_list(
         since_token: Optional[str] = None,
         search_filter: Optional[dict] = None,
         network_tuple: Optional[ThirdPartyInstanceID] = EMPTY_THIRD_PARTY_ID,
-        from_federation: bool = False,
+        from_federation_origin: Optional[str] = None,
     ) -> JsonDict:
         """Generate a local public room list.
 
@@ -89,7 +92,8 @@ async def get_local_public_room_list(
                 This can be (None, None) to indicate the main list, or a particular
                 appservice and network id to use an appservice specific one.
                 Setting to None returns all public rooms across all lists.
-            from_federation: true iff the request comes from the federation API
+            from_federation_origin: the server name of the requester, or None
+                if the request is not from federation.
         """
         if not self.enable_room_list_search:
             return {"chunk": [], "total_room_count_estimate": 0}
@@ -102,36 +106,43 @@ async def get_local_public_room_list(
             network_tuple,
         )
 
-        if search_filter:
+        capped_limit: int = (
+            MAX_PUBLIC_ROOMS_IN_RESPONSE
+            if limit is None or limit > MAX_PUBLIC_ROOMS_IN_RESPONSE
+            else limit
+        )
+
+        if search_filter or from_federation_origin is not None:
             # We explicitly don't bother caching searches or requests for
             # appservice specific lists.
+            # We also don't bother caching requests from federated homeservers.
             logger.info("Bypassing cache as search request.")
 
             return await self._get_public_room_list(
-                limit,
+                capped_limit,
                 since_token,
                 search_filter,
                 network_tuple=network_tuple,
-                from_federation=from_federation,
+                from_federation_origin=from_federation_origin,
             )
 
-        key = (limit, since_token, network_tuple)
+        key = (capped_limit, since_token, network_tuple)
         return await self.response_cache.wrap(
             key,
             self._get_public_room_list,
-            limit,
+            capped_limit,
             since_token,
             network_tuple=network_tuple,
-            from_federation=from_federation,
+            from_federation_origin=from_federation_origin,
         )
 
     async def _get_public_room_list(
         self,
-        limit: Optional[int] = None,
+        limit: int,
         since_token: Optional[str] = None,
         search_filter: Optional[dict] = None,
         network_tuple: Optional[ThirdPartyInstanceID] = EMPTY_THIRD_PARTY_ID,
-        from_federation: bool = False,
+        from_federation_origin: Optional[str] = None,
     ) -> JsonDict:
         """Generate a public room list.
         Args:
@@ -142,8 +153,8 @@ async def _get_public_room_list(
                 This can be (None, None) to indicate the main list, or a particular
                 appservice and network id to use an appservice specific one.
                 Setting to None returns all public rooms across all lists.
-            from_federation: Whether this request originated from a
-                federating server or a client. Used for room filtering.
+            from_federation_origin: the server name of the requester, or None
+                if the request is not from federation.
         """
 
         # Pagination tokens work by storing the room ID sent in the last batch,
@@ -165,16 +176,24 @@ async def _get_public_room_list(
             forwards = True
             has_batch_token = False
 
-        # we request one more than wanted to see if there are more pages to come
-        probing_limit = limit + 1 if limit is not None else None
+        if from_federation_origin is None:
+            # Client-Server API:
+            # we request one more than wanted to see if there are more pages to come
+            probing_limit = limit + 1
+        else:
+            # Federation API:
+            # we request a handful more in case any get filtered out by ACLs
+            # as a best easy effort attempt to return the full number of entries
+            # specified by `limit`.
+            probing_limit = limit + 10
 
         results = await self.store.get_largest_public_rooms(
             network_tuple,
             search_filter,
             probing_limit,
             bounds=bounds,
             forwards=forwards,
-            ignore_non_federatable=from_federation,
+            ignore_non_federatable=from_federation_origin is not None,
         )
 
         def build_room_entry(room: LargestRoomStats) -> JsonDict:
@@ -195,59 +214,118 @@ def build_room_entry(room: LargestRoomStats) -> JsonDict:
             # Filter out Nones – rather omit the field altogether
             return {k: v for k, v in entry.items() if v is not None}
 
-        response: JsonDict = {}
+        # Build a list of up to `limit` entries.
+        room_entries: List[JsonDict] = []
+        rooms_iterator = results if forwards else reversed(results)
+
+        # Track the first and last 'considered' rooms so that we can provide correct
+        # next_batch/prev_batch tokens.
+        # This is needed because the loop might finish early when
+        # `len(room_entries) >= limit` and we might be left with rooms we didn't
+        # 'consider' (iterate over) and we should save those rooms for the next
+        # batch.
+        first_considered_room: Optional[LargestRoomStats] = None
+        last_considered_room: Optional[LargestRoomStats] = None
+        cut_off_due_to_limit: bool = False
+
+        for room_result in rooms_iterator:
+            if len(room_entries) >= limit:
+                cut_off_due_to_limit = True
+                break
+
+            if first_considered_room is None:
+                first_considered_room = room_result
+            last_considered_room = room_result
+
+            if from_federation_origin is not None:
+                # If this is a federated request, apply server ACLs if the room has any set
+                acl_evaluator = (
+                    await self._storage_controllers.state.get_server_acl_for_room(
+                        room_result.room_id
+                    )
+                )
+
+                if acl_evaluator is not None:
+                    if not acl_evaluator.server_matches_acl_event(
+                        from_federation_origin
+                    ):
+                        # the requesting server is ACL blocked by the room,
+                        # don't show in directory
+                        continue
+
+            room_entries.append(build_room_entry(room_result))
+
+        if not forwards:
+            # If we are paginating backwards, we still return the chunk in
+            # biggest-first order, so reverse again.
+            room_entries.reverse()
+            # Swap the order of first/last considered rooms.
+            first_considered_room, last_considered_room = (
+                last_considered_room,
+                first_considered_room,
+            )
+
+        response: JsonDict = {
+            "chunk": room_entries,
+        }
         num_results = len(results)
-        if limit is not None:
-            more_to_come = num_results == probing_limit
 
-            # Depending on direction we trim either the front or back.
-            if forwards:
-                results = results[:limit]
+        more_to_come_from_database = num_results == probing_limit
+
+        if forwards and has_batch_token:
+            # If there was a token given then we assume that there
+            # must be previous results, even if there were no results in this batch.
+            if first_considered_room is not None:
+                response["prev_batch"] = RoomListNextBatch(
+                    last_joined_members=first_considered_room.joined_members,
+                    last_room_id=first_considered_room.room_id,
+                    direction_is_forward=False,
+                ).to_token()
             else:
-                results = results[-limit:]
-        else:
-            more_to_come = False
+                # If we didn't find any results this time,
+                # we don't have an actual room ID to put in the token.
+                # So instead we re-use the room ID from last time but make the
+                # bound inclusive, by tacking on a 0x01 byte at the end
+                # (s+"\x00" is the first string following s, but we can't use "\x00"
+                # in Postgres, so use "\x01" anyway.)
+                assert batch_token is not None
+                response["prev_batch"] = RoomListNextBatch(
+                    last_joined_members=batch_token.last_joined_members,
+                    last_room_id=batch_token.last_room_id + "\x01",
+                    direction_is_forward=False,
+                ).to_token()
 
         if num_results > 0:
-            final_entry = results[-1]
-            initial_entry = results[0]
-
+            assert first_considered_room is not None
+            assert last_considered_room is not None
             if forwards:
-                if has_batch_token:
-                    # If there was a token given then we assume that there
-                    # must be previous results.
-                    response["prev_batch"] = RoomListNextBatch(
-                        last_joined_members=initial_entry.joined_members,
-                        last_room_id=initial_entry.room_id,
-                        direction_is_forward=False,
-                    ).to_token()
-
-                if more_to_come:
+                if more_to_come_from_database or cut_off_due_to_limit:
                     response["next_batch"] = RoomListNextBatch(
-                        last_joined_members=final_entry.joined_members,
-                        last_room_id=final_entry.room_id,
+                        last_joined_members=last_considered_room.joined_members,
+                        last_room_id=last_considered_room.room_id,
                         direction_is_forward=True,
                     ).to_token()
-            else:
+            else:  # backwards
                 if has_batch_token:
                     response["next_batch"] = RoomListNextBatch(
-                        last_joined_members=final_entry.joined_members,
-                        last_room_id=final_entry.room_id,
+                        last_joined_members=last_considered_room.joined_members,
+                        last_room_id=last_considered_room.room_id,
                         direction_is_forward=True,
                     ).to_token()
 
-                if more_to_come:
+                if more_to_come_from_database or cut_off_due_to_limit:
                     response["prev_batch"] = RoomListNextBatch(
-                        last_joined_members=initial_entry.joined_members,
-                        last_room_id=initial_entry.room_id,
+                        last_joined_members=first_considered_room.joined_members,
+                        last_room_id=first_considered_room.room_id,
                         direction_is_forward=False,
                     ).to_token()
 
-        response["chunk"] = [build_room_entry(r) for r in results]
-
+        # We can't efficiently count the total number of rooms that are not
+        # blocked by ACLs, but this is just an estimate so that should be
+        # good enough.
         response["total_room_count_estimate"] = await self.store.count_public_rooms(
             network_tuple,
-            ignore_non_federatable=from_federation,
+            ignore_non_federatable=from_federation_origin is not None,
             search_filter=search_filter,
         )
 

@@ -0,0 +1,88 @@
+from http import HTTPStatus
+from typing import Optional, Set
+
+from synapse.rest import admin
+from synapse.rest.client import directory, login, room
+from synapse.types import JsonDict
+
+from tests import unittest
+
+
+class RoomListHandlerTestCase(unittest.HomeserverTestCase):
+    servlets = [
+        admin.register_servlets,
+        login.register_servlets,
+        room.register_servlets,
+        directory.register_servlets,
+    ]
+
+    def _create_published_room(
+        self, tok: str, extra_content: Optional[JsonDict] = None
+    ) -> str:
+        room_id = self.helper.create_room_as(tok=tok, extra_content=extra_content)
+        channel = self.make_request(
+            "PUT",
+            f"/_matrix/client/v3/directory/list/room/{room_id}?access_token={tok}",
+            content={
+                "visibility": "public",
+            },
+        )
+        assert channel.code == HTTPStatus.OK, f"couldn't publish room: {channel.result}"
+        return room_id
+
+    def test_acls_applied_to_room_directory_results(self) -> None:
+        """
+        Creates 3 rooms. Room 2 has an ACL that only permits the homeservers
+        `test` and `test2` to access it.
+
+        We then simulate `test2` and `test3` requesting the room directory and assert
+        that `test3` does not see room 2, but `test2` sees all 3.
+        """
+        self.register_user("u1", "p1")
+        u1tok = self.login("u1", "p1")
+        room1 = self._create_published_room(u1tok)
+
+        room2 = self._create_published_room(
+            u1tok,
+            extra_content={
+                "initial_state": [
+                    {
+                        "type": "m.room.server_acl",
+                        "content": {
+                            "allow": ["test", "test2"],
+                        },
+                    }
+                ]
+            },
+        )
+
+        room3 = self._create_published_room(u1tok)
+
+        room_list = self.get_success(
+            self.hs.get_room_list_handler().get_local_public_room_list(
+                limit=50, from_federation_origin="test2"
+            )
+        )
+        room_ids_in_test2_list: Set[str] = {
+            entry["room_id"] for entry in room_list["chunk"]
+        }
+
+        room_list = self.get_success(
+            self.hs.get_room_list_handler().get_local_public_room_list(
+                limit=50, from_federation_origin="test3"
+            )
+        )
+        room_ids_in_test3_list: Set[str] = {
+            entry["room_id"] for entry in room_list["chunk"]
+        }
+
+        self.assertEqual(
+            room_ids_in_test2_list,
+            {room1, room2, room3},
+            "test2 should be able to see all 3 rooms",
+        )
+        self.assertEqual(
+            room_ids_in_test3_list,
+            {room1, room3},
+            "test3 should be able to see only 2 rooms",
+        )
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Filter out rooms from the room directory being served to other homeservers when those rooms block that homeserver by their Access Control Lists.