From 9f3afd29dfe03ec5bb5f569d09136c93a6a24126 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 20 May 2022 18:50:59 +0100 Subject: [PATCH 01/50] Don't pull out stuff for push --- synapse/push/bulk_push_rule_evaluator.py | 143 +++---------------- synapse/storage/databases/main/cache.py | 1 + synapse/storage/databases/main/events.py | 4 + synapse/storage/databases/main/roommember.py | 9 ++ 4 files changed, 30 insertions(+), 127 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 7791b289e259..1d32760a7bcf 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -452,77 +452,27 @@ async def get_rules( self.room_push_rule_cache_metrics.inc_hits() return self.data.rules_by_user - self.room_push_rule_cache_metrics.inc_misses() - - ret_rules_by_user = {} - missing_member_event_ids = {} - if state_group and self.data.state_group == context.prev_group: - # If we have a simple delta then we can reuse most of the previous - # results. - ret_rules_by_user = self.data.rules_by_user - current_state_ids = context.delta_ids - - push_rules_delta_state_cache_metric.inc_hits() - else: - current_state_ids = await context.get_current_state_ids() - push_rules_delta_state_cache_metric.inc_misses() - # Ensure the state IDs exist. - assert current_state_ids is not None - - push_rules_state_size_counter.inc(len(current_state_ids)) - - logger.debug( - "Looking for member changes in %r %r", state_group, current_state_ids + local_users = await self.store.get_local_users_in_room( + self.room_id, on_invalidate=self.invalidate_all_cb ) - # Loop through to see which member events we've seen and have rules - # for and which we need to fetch - for key in current_state_ids: - typ, user_id = key - if typ != EventTypes.Member: - continue - - if user_id in self.data.uninteresting_user_set: - continue - - if not self.is_mine_id(user_id): - self.data.uninteresting_user_set.add(user_id) - continue - - if self.store.get_if_app_services_interested_in_user(user_id): - self.data.uninteresting_user_set.add(user_id) - continue + if event.type == EventTypes.Member and event.membership == Membership.JOIN: + if self.is_mine_id(event.state_key): + local_users = list(local_users) + local_users.append(event.state_key) - event_id = current_state_ids[key] + ret_rules_by_user = await self.store.bulk_get_push_rules( + local_users, on_invalidate=self.invalidate_all_cb + ) - res = self.data.member_map.get(event_id, None) - if res: - if res.membership == Membership.JOIN: - rules = self.data.rules_by_user.get(res.user_id, None) - if rules: - ret_rules_by_user[res.user_id] = rules - continue + logger.info("Users in room: %s", local_users) - # If a user has left a room we remove their push rule. If they - # joined then we re-add it later in _update_rules_with_member_event_ids - ret_rules_by_user.pop(user_id, None) - missing_member_event_ids[user_id] = event_id - - if missing_member_event_ids: - # If we have some member events we haven't seen, look them up - # and fetch push rules for them if appropriate. - logger.debug("Found new member events %r", missing_member_event_ids) - await self._update_rules_with_member_event_ids( - ret_rules_by_user, missing_member_event_ids, state_group, event - ) - else: - # The push rules didn't change but lets update the cache anyway - self.update_cache( - self.data.sequence, - members={}, # There were no membership changes - rules_by_user=ret_rules_by_user, - state_group=state_group, - ) + self.update_cache( + self.data.sequence, + members={}, # There were no membership changes + rules_by_user=ret_rules_by_user, + state_group=state_group, + ) if logger.isEnabledFor(logging.DEBUG): logger.debug( @@ -530,67 +480,6 @@ async def get_rules( ) return ret_rules_by_user - async def _update_rules_with_member_event_ids( - self, - ret_rules_by_user: Dict[str, list], - member_event_ids: Dict[str, str], - state_group: Optional[int], - event: EventBase, - ) -> None: - """Update the partially filled rules_by_user dict by fetching rules for - any newly joined users in the `member_event_ids` list. - - Args: - ret_rules_by_user: Partially filled dict of push rules. Gets - updated with any new rules. - member_event_ids: Dict of user id to event id for membership events - that have happened since the last time we filled rules_by_user - state_group: The state group we are currently computing push rules - for. Used when updating the cache. - event: The event we are currently computing push rules for. - """ - sequence = self.data.sequence - - members = await self.store.get_membership_from_event_ids( - member_event_ids.values() - ) - - # If the event is a join event then it will be in current state events - # map but not in the DB, so we have to explicitly insert it. - if event.type == EventTypes.Member: - for event_id in member_event_ids.values(): - if event_id == event.event_id: - members[event_id] = EventIdMembership( - user_id=event.state_key, membership=event.membership - ) - - if logger.isEnabledFor(logging.DEBUG): - logger.debug("Found members %r: %r", self.room_id, members.values()) - - joined_user_ids = { - entry.user_id - for entry in members.values() - if entry and entry.membership == Membership.JOIN - } - - logger.debug("Joined: %r", joined_user_ids) - - # Previously we only considered users with pushers or read receipts in that - # room. We can't do this anymore because we use push actions to calculate unread - # counts, which don't rely on the user having pushers or sent a read receipt into - # the room. Therefore we just need to filter for local users here. - user_ids = list(filter(self.is_mine_id, joined_user_ids)) - - rules_by_user = await self.store.bulk_get_push_rules( - user_ids, on_invalidate=self.invalidate_all_cb - ) - - ret_rules_by_user.update( - item for item in rules_by_user.items() if item[0] is not None - ) - - self.update_cache(sequence, members, ret_rules_by_user, state_group) - def update_cache( self, sequence: int, diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py index 1653a6a9b694..a07d48f66c95 100644 --- a/synapse/storage/databases/main/cache.py +++ b/synapse/storage/databases/main/cache.py @@ -217,6 +217,7 @@ def _invalidate_caches_for_event( if etype == EventTypes.Member: self._membership_stream_cache.entity_has_changed(state_key, stream_ordering) self.get_invited_rooms_for_local_user.invalidate((state_key,)) + self.get_local_users_in_room.invalidate((room_id,)) if relates_to: self.get_relations_for_event.invalidate((relates_to,)) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 17e35cf63e68..46a004ce7985 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1766,6 +1766,10 @@ def _store_room_members_txn( self.store.get_invited_rooms_for_local_user.invalidate, (event.state_key,), ) + txn.call_after( + self.store.get_local_users_in_room.invalidate, + (event.room_id,), + ) # The `_get_membership_from_event_id` is immutable, except for the # case where we look up an event *before* persisting it. diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 31bc8c56011a..ac6f568e6852 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -444,6 +444,15 @@ def _get_rooms_for_local_user_where_membership_is_txn( return results + @cached() + async def get_local_users_in_room(self, room_id: str) -> List[str]: + return await self.db_pool.simple_select_onecol( + table="local_current_membership", + keyvalues={"room_id": room_id, "membership": Membership.JOIN}, + retcol="user_id", + desc="get_local_users_in_room", + ) + async def get_local_current_membership_for_user_in_room( self, user_id: str, room_id: str ) -> Tuple[Optional[str], Optional[str]]: From fe6175aaa2fa6ec7279cb43169125e68a1c0bc39 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 10 May 2022 11:09:57 +0100 Subject: [PATCH 02/50] Reduce state that push rules pull from DB --- synapse/push/bulk_push_rule_evaluator.py | 6 +++++- synapse/storage/_base.py | 3 +++ synapse/storage/databases/main/cache.py | 1 + synapse/storage/databases/main/events.py | 4 ++++ synapse/storage/databases/main/roommember.py | 9 +++++++++ 5 files changed, 22 insertions(+), 1 deletion(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 1d32760a7bcf..2409c6f5ba7f 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -267,6 +267,10 @@ async def action_for_event_by_user( room_members = await self.store.get_joined_users_from_context(event, context) + room_member_count = await self.store.get_number_joined_users_in_room( + event.room_id + ) + ( power_levels, sender_power_level, @@ -278,7 +282,7 @@ async def action_for_event_by_user( evaluator = PushRuleEvaluatorForEvent( event, - len(room_members), + room_member_count, sender_power_level, power_levels, relations, diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index abfc56b0616c..177b9a90c5aa 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -75,6 +75,9 @@ def _invalidate_state_caches( self._attempt_to_invalidate_cache( "get_users_in_room_with_profiles", (room_id,) ) + self._attempt_to_invalidate_cache( + "get_number_joined_users_in_room.invalidate", (room_id,) + ) # Purge other caches based on room state. self._attempt_to_invalidate_cache("get_room_summary", (room_id,)) diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py index a07d48f66c95..17f7bf66bf1b 100644 --- a/synapse/storage/databases/main/cache.py +++ b/synapse/storage/databases/main/cache.py @@ -218,6 +218,7 @@ def _invalidate_caches_for_event( self._membership_stream_cache.entity_has_changed(state_key, stream_ordering) self.get_invited_rooms_for_local_user.invalidate((state_key,)) self.get_local_users_in_room.invalidate((room_id,)) + self.get_number_joined_users_in_room((room_id,)) if relates_to: self.get_relations_for_event.invalidate((relates_to,)) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 46a004ce7985..5787f4fc559d 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1770,6 +1770,10 @@ def _store_room_members_txn( self.store.get_local_users_in_room.invalidate, (event.room_id,), ) + txn.call_after( + self.store.get_number_joined_users_in_room.invalidate, + (event.room_id,), + ) # The `_get_membership_from_event_id` is immutable, except for the # case where we look up an event *before* persisting it. diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index ac6f568e6852..9bb83ac8ac75 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -337,6 +337,15 @@ def _get_room_summary_txn( "get_room_summary", _get_room_summary_txn ) + @cached() + async def get_number_joined_users_in_room(self, room_id: str) -> int: + return await self.db_pool.simple_select_one_onecol( + table="current_state_events", + keyvalues={"room_id": room_id, "membership": Membership.JOIN}, + retcol="COUNT(*)", + desc="get_number_joined_users_in_room", + ) + @cached() async def get_invited_rooms_for_local_user( self, user_id: str From 24c3974f01ae9078f69add01c071df49cd6fbe8e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 20 May 2022 18:57:26 +0100 Subject: [PATCH 03/50] Ignore display name push --- synapse/push/bulk_push_rule_evaluator.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 2409c6f5ba7f..4d3a1a10fa5f 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -265,7 +265,8 @@ async def action_for_event_by_user( rules_by_user = await self._get_rules_for_event(event, context) actions_by_user: Dict[str, List[Union[dict, str]]] = {} - room_members = await self.store.get_joined_users_from_context(event, context) + # FIXME!!! + # room_members = await self.store.get_joined_users_from_context(event, context) room_member_count = await self.store.get_number_joined_users_in_room( event.room_id @@ -303,9 +304,10 @@ async def action_for_event_by_user( continue display_name = None - profile_info = room_members.get(uid) - if profile_info: - display_name = profile_info.display_name + # FIXME!!! + # profile_info = room_members.get(uid) + # if profile_info: + # display_name = profile_info.display_name if not display_name: # Handle the case where we are pushing a membership event to From 944f62759e25f7df49bdee8c15bf048ffc7aa5ad Mon Sep 17 00:00:00 2001 From: "H. Shay" Date: Wed, 8 Jun 2022 11:00:14 -0700 Subject: [PATCH 04/50] fetch display name --- synapse/push/bulk_push_rule_evaluator.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 4d3a1a10fa5f..1cd615206c24 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -34,6 +34,7 @@ from ..storage.state import StateFilter from .push_rule_evaluator import PushRuleEvaluatorForEvent +from ..types import get_localpart_from_id if TYPE_CHECKING: from synapse.server import HomeServer @@ -265,9 +266,6 @@ async def action_for_event_by_user( rules_by_user = await self._get_rules_for_event(event, context) actions_by_user: Dict[str, List[Union[dict, str]]] = {} - # FIXME!!! - # room_members = await self.store.get_joined_users_from_context(event, context) - room_member_count = await self.store.get_number_joined_users_in_room( event.room_id ) @@ -303,11 +301,9 @@ async def action_for_event_by_user( if uid in ignorers: continue - display_name = None - # FIXME!!! - # profile_info = room_members.get(uid) - # if profile_info: - # display_name = profile_info.display_name + localpart = get_localpart_from_id(uid) + profile_info = await self.store.get_profileinfo(localpart) + display_name = profile_info.display_name if not display_name: # Handle the case where we are pushing a membership event to From 05005c4ca304437166aa8cec97cf0a24dcb83620 Mon Sep 17 00:00:00 2001 From: "H. Shay" Date: Sun, 12 Jun 2022 22:23:30 -0700 Subject: [PATCH 05/50] filter visibility of push event --- synapse/push/bulk_push_rule_evaluator.py | 132 ++++++++++++++++++++++- 1 file changed, 129 insertions(+), 3 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 1cd615206c24..d4efd6039d23 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -20,21 +20,31 @@ import attr from prometheus_client import Counter -from synapse.api.constants import EventTypes, Membership, RelationTypes +from synapse.api.constants import ( + EventTypes, + HistoryVisibility, + Membership, + RelationTypes, +) from synapse.event_auth import auth_types_for_event, get_user_power_level from synapse.events import EventBase, relation_from_event from synapse.events.snapshot import EventContext from synapse.state import POWER_KEY from synapse.storage.databases.main.roommember import EventIdMembership +from synapse.storage.state import StateFilter +from synapse.types import get_localpart_from_id from synapse.util.async_helpers import Linearizer from synapse.util.caches import CacheMetric, register_cache from synapse.util.caches.descriptors import lru_cache from synapse.util.caches.lrucache import LruCache from synapse.util.metrics import measure_func +from synapse.visibility import ( + MEMBERSHIP_PRIORITY, + VISIBILITY_PRIORITY, + get_effective_room_visibility_from_state, +) -from ..storage.state import StateFilter from .push_rule_evaluator import PushRuleEvaluatorForEvent -from ..types import get_localpart_from_id if TYPE_CHECKING: from synapse.server import HomeServer @@ -301,6 +311,12 @@ async def action_for_event_by_user( if uid in ignorers: continue + # check for the case where user joins a room without being allowed to see history, and then the server + # receives a delayed event from before the user joined, which they should not be pushed for + visible = await self._check_visibility(uid, event, context) + if not visible: + continue + localpart = get_localpart_from_id(uid) profile_info = await self.store.get_profileinfo(localpart) display_name = profile_info.display_name @@ -343,6 +359,116 @@ async def action_for_event_by_user( count_as_unread, ) + async def _check_visibility( + self, user_id: str, event: EventBase, context: EventContext + ) -> Optional[EventBase]: + """ + Checks whether the room history should be visible to the user at the time of this event. + Args: + user_id: user_id to be checked + event: the event to be checked + context: EventContext for the event to be checked + Returns: the event if the room history is visible to the user at the time of the event, None if not + """ + filter = StateFilter.from_types( + [(EventTypes.Member, user_id), (EventTypes.RoomHistoryVisibility, "")] + ) + state_map = await context.get_prev_state_ids(filter) + + # Use events rather than event ids as content from the events are needed below + updated_state_map = {} + for state_key, event_id in state_map.items(): + updated_state_map[state_key] = await self.store.get_event(event_id) + + if event.is_state(): + current_state_key = (event.type, event.state_key) + # Add current event to updated_state_map, we need to do this here as it has not been persisted to the db yet + updated_state_map[current_state_key] = event + + # Get the room_visibility at the time of the event. + visibility = get_effective_room_visibility_from_state(updated_state_map) + + # Always allow history visibility events on boundaries. This is done + # by setting the effective visibility to the least restrictive + # of the old vs new. + if event.type == EventTypes.RoomHistoryVisibility: + prev_content = event.unsigned.get("prev_content", {}) + prev_visibility = prev_content.get("history_visibility", None) + + if prev_visibility not in VISIBILITY_PRIORITY: + prev_visibility = HistoryVisibility.SHARED + + new_priority = VISIBILITY_PRIORITY.index(visibility) + old_priority = VISIBILITY_PRIORITY.index(prev_visibility) + if old_priority < new_priority: + visibility = prev_visibility + + # likewise, if the event is the user's own membership event, use + # the 'most joined' membership + membership = None + if event.type == EventTypes.Member and event.state_key == user_id: + membership = event.content.get("membership", None) + if membership not in MEMBERSHIP_PRIORITY: + membership = "leave" + # members can see their own membership invite + if membership == Membership.INVITE: + return event + + prev_content = event.unsigned.get("prev_content", {}) + prev_membership = prev_content.get("membership", None) + if prev_membership not in MEMBERSHIP_PRIORITY: + prev_membership = "leave" + + # Always allow the user to see their own leave events, otherwise + # they won't see the room disappear if they reject the invite + # + # (Note this doesn't work for out-of-band invite rejections, which don't + # have prev_state populated. They are handled above in the outlier code.) + if membership == "leave" and ( + prev_membership == "join" or prev_membership == "invite" + ): + return event + + new_priority = MEMBERSHIP_PRIORITY.index(membership) + old_priority = MEMBERSHIP_PRIORITY.index(prev_membership) + if old_priority < new_priority: + membership = prev_membership + + # otherwise, get the user's membership at the time of the event. + if membership is None: + membership_event = updated_state_map.get((EventTypes.Member, user_id), None) + if membership_event: + membership = membership_event.membership + + # if the user was a member of the room at the time of the event, + # they can see it. + if membership == Membership.JOIN: + return event + + # otherwise, it depends on the room visibility. + if visibility == HistoryVisibility.JOINED: + # we weren't a member at the time of the event, so we can't + # see this event. + return None + + elif visibility == HistoryVisibility.INVITED: + # user can also see the event if they were *invited* at the time + # of the event. + return event if membership == Membership.INVITE else None + + elif visibility == HistoryVisibility.SHARED: + # if the visibility is shared, users cannot see the event unless + # they have *subsequently* joined the room (or were members at the + # time, of course) + # + # XXX: if the user has subsequently joined and then left again, + # ideally we would share history up to the point they left. But + # we don't know when they left. We just treat it as though they + # never joined, and restrict access. + return None + + return event + MemberMap = Dict[str, Optional[EventIdMembership]] Rule = Dict[str, dict] From c16260eb42c0885337009c769d808e9132f91698 Mon Sep 17 00:00:00 2001 From: "H. Shay" Date: Mon, 13 Jun 2022 16:49:57 -0700 Subject: [PATCH 06/50] add filter_events_for_client_with_context and move _check_visibility to visibility.py --- synapse/push/bulk_push_rule_evaluator.py | 129 ++-------------------- synapse/visibility.py | 131 ++++++++++++++++++++++- 2 files changed, 134 insertions(+), 126 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index d4efd6039d23..71449831800f 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -20,12 +20,7 @@ import attr from prometheus_client import Counter -from synapse.api.constants import ( - EventTypes, - HistoryVisibility, - Membership, - RelationTypes, -) +from synapse.api.constants import EventTypes, Membership, RelationTypes from synapse.event_auth import auth_types_for_event, get_user_power_level from synapse.events import EventBase, relation_from_event from synapse.events.snapshot import EventContext @@ -38,11 +33,7 @@ from synapse.util.caches.descriptors import lru_cache from synapse.util.caches.lrucache import LruCache from synapse.util.metrics import measure_func -from synapse.visibility import ( - MEMBERSHIP_PRIORITY, - VISIBILITY_PRIORITY, - get_effective_room_visibility_from_state, -) +from synapse.visibility import filter_events_for_client_with_state from .push_rule_evaluator import PushRuleEvaluatorForEvent @@ -311,9 +302,11 @@ async def action_for_event_by_user( if uid in ignorers: continue - # check for the case where user joins a room without being allowed to see history, and then the server + # This is a check for the case where user joins a room without being allowed to see history, and then the server # receives a delayed event from before the user joined, which they should not be pushed for - visible = await self._check_visibility(uid, event, context) + visible = await filter_events_for_client_with_state( + self.store, uid, event, context + ) if not visible: continue @@ -359,116 +352,6 @@ async def action_for_event_by_user( count_as_unread, ) - async def _check_visibility( - self, user_id: str, event: EventBase, context: EventContext - ) -> Optional[EventBase]: - """ - Checks whether the room history should be visible to the user at the time of this event. - Args: - user_id: user_id to be checked - event: the event to be checked - context: EventContext for the event to be checked - Returns: the event if the room history is visible to the user at the time of the event, None if not - """ - filter = StateFilter.from_types( - [(EventTypes.Member, user_id), (EventTypes.RoomHistoryVisibility, "")] - ) - state_map = await context.get_prev_state_ids(filter) - - # Use events rather than event ids as content from the events are needed below - updated_state_map = {} - for state_key, event_id in state_map.items(): - updated_state_map[state_key] = await self.store.get_event(event_id) - - if event.is_state(): - current_state_key = (event.type, event.state_key) - # Add current event to updated_state_map, we need to do this here as it has not been persisted to the db yet - updated_state_map[current_state_key] = event - - # Get the room_visibility at the time of the event. - visibility = get_effective_room_visibility_from_state(updated_state_map) - - # Always allow history visibility events on boundaries. This is done - # by setting the effective visibility to the least restrictive - # of the old vs new. - if event.type == EventTypes.RoomHistoryVisibility: - prev_content = event.unsigned.get("prev_content", {}) - prev_visibility = prev_content.get("history_visibility", None) - - if prev_visibility not in VISIBILITY_PRIORITY: - prev_visibility = HistoryVisibility.SHARED - - new_priority = VISIBILITY_PRIORITY.index(visibility) - old_priority = VISIBILITY_PRIORITY.index(prev_visibility) - if old_priority < new_priority: - visibility = prev_visibility - - # likewise, if the event is the user's own membership event, use - # the 'most joined' membership - membership = None - if event.type == EventTypes.Member and event.state_key == user_id: - membership = event.content.get("membership", None) - if membership not in MEMBERSHIP_PRIORITY: - membership = "leave" - # members can see their own membership invite - if membership == Membership.INVITE: - return event - - prev_content = event.unsigned.get("prev_content", {}) - prev_membership = prev_content.get("membership", None) - if prev_membership not in MEMBERSHIP_PRIORITY: - prev_membership = "leave" - - # Always allow the user to see their own leave events, otherwise - # they won't see the room disappear if they reject the invite - # - # (Note this doesn't work for out-of-band invite rejections, which don't - # have prev_state populated. They are handled above in the outlier code.) - if membership == "leave" and ( - prev_membership == "join" or prev_membership == "invite" - ): - return event - - new_priority = MEMBERSHIP_PRIORITY.index(membership) - old_priority = MEMBERSHIP_PRIORITY.index(prev_membership) - if old_priority < new_priority: - membership = prev_membership - - # otherwise, get the user's membership at the time of the event. - if membership is None: - membership_event = updated_state_map.get((EventTypes.Member, user_id), None) - if membership_event: - membership = membership_event.membership - - # if the user was a member of the room at the time of the event, - # they can see it. - if membership == Membership.JOIN: - return event - - # otherwise, it depends on the room visibility. - if visibility == HistoryVisibility.JOINED: - # we weren't a member at the time of the event, so we can't - # see this event. - return None - - elif visibility == HistoryVisibility.INVITED: - # user can also see the event if they were *invited* at the time - # of the event. - return event if membership == Membership.INVITE else None - - elif visibility == HistoryVisibility.SHARED: - # if the visibility is shared, users cannot see the event unless - # they have *subsequently* joined the room (or were members at the - # time, of course) - # - # XXX: if the user has subsequently joined and then left again, - # ideally we would share history up to the point they left. But - # we don't know when they left. We just treat it as though they - # never joined, and restrict access. - return None - - return event - MemberMap = Dict[str, Optional[EventIdMembership]] Rule = Dict[str, dict] diff --git a/synapse/visibility.py b/synapse/visibility.py index 8aaa8c709fad..ed1c5533e323 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -19,7 +19,9 @@ from synapse.api.constants import EventTypes, HistoryVisibility, Membership from synapse.events import EventBase +from synapse.events.snapshot import EventContext from synapse.events.utils import prune_event +from synapse.storage import DataStore from synapse.storage.controllers import StorageControllers from synapse.storage.state import StateFilter from synapse.types import RetentionPolicy, StateMap, get_domain_from_id @@ -105,13 +107,10 @@ def allowed(event: EventBase) -> Optional[EventBase]: """ Args: event: event to check - Returns: None if the user cannot see this event at all - a redacted copy of the event if they can only see a redacted version - the original event if they can see it as normal. """ # Only run some checks if these events aren't about to be sent to clients. This is @@ -258,6 +257,132 @@ def allowed(event: EventBase) -> Optional[EventBase]: return [ev for ev in filtered_events if ev] +async def filter_events_for_client_with_state( + store: DataStore, user_id: str, event: EventBase, context: EventContext +) -> Optional[EventBase]: + """ + Checks to see if an event is visible to the user at the time of the event + Args: + store: databases + user_id: user_id to be checked + event: the event to be checked + context: EventContext for the event to be checked + Returns: the event if the room history is visible to the user at the time of the event, None if not + """ + filter = StateFilter.from_types( + [(EventTypes.Member, user_id), (EventTypes.RoomHistoryVisibility, "")] + ) + state_map = await context.get_prev_state_ids(filter) + + # Use events rather than event ids as content from the events are needed in _check_visibility + updated_state_map = {} + for state_key, event_id in state_map.items(): + updated_state_map[state_key] = await store.get_event(event_id) + + if event.is_state(): + current_state_key = (event.type, event.state_key) + # Add current event to updated_state_map, we need to do this here as it may not have been persisted to the db yet + updated_state_map[current_state_key] = event + + filtered_event = await _check_visibility(user_id, event, updated_state_map) + return filtered_event + + +async def _check_visibility( + user_id: str, event: EventBase, state_map: StateMap, is_peeking=False +) -> Optional[EventBase]: + """ + Checks whether the room history should be visible to the user at the time of this event. + Args: + + Returns: the event if the room history is visible to the user at the time of the event, None if not + """ + # Get the room_visibility at the time of the event. + visibility = get_effective_room_visibility_from_state(state_map) + + # Always allow history visibility events on boundaries. This is done + # by setting the effective visibility to the least restrictive + # of the old vs new. + if event.type == EventTypes.RoomHistoryVisibility: + prev_content = event.unsigned.get("prev_content", {}) + prev_visibility = prev_content.get("history_visibility", None) + + if prev_visibility not in VISIBILITY_PRIORITY: + prev_visibility = HistoryVisibility.SHARED + + new_priority = VISIBILITY_PRIORITY.index(visibility) + old_priority = VISIBILITY_PRIORITY.index(prev_visibility) + if old_priority < new_priority: + visibility = prev_visibility + + # likewise, if the event is the user's own membership event, use + # the 'most joined' membership + membership = None + if event.type == EventTypes.Member and event.state_key == user_id: + membership = event.content.get("membership", None) + if membership not in MEMBERSHIP_PRIORITY: + membership = "leave" + # members can see their own membership invite + if membership == Membership.INVITE: + return event + + prev_content = event.unsigned.get("prev_content", {}) + prev_membership = prev_content.get("membership", None) + if prev_membership not in MEMBERSHIP_PRIORITY: + prev_membership = "leave" + + # Always allow the user to see their own leave events, otherwise + # they won't see the room disappear if they reject the invite + # + # (Note this doesn't work for out-of-band invite rejections, which don't + # have prev_state populated. They are handled above in the outlier code.) + if membership == "leave" and ( + prev_membership == "join" or prev_membership == "invite" + ): + return event + + new_priority = MEMBERSHIP_PRIORITY.index(membership) + old_priority = MEMBERSHIP_PRIORITY.index(prev_membership) + if old_priority < new_priority: + membership = prev_membership + + # otherwise, get the user's membership at the time of the event. + if membership is None: + membership_event = state_map.get((EventTypes.Member, user_id), None) + if membership_event: + membership = membership_event.membership + + # if the user was a member of the room at the time of the event, + # they can see it. + if membership == Membership.JOIN: + return event + + # otherwise, it depends on the room visibility. + + if visibility == HistoryVisibility.JOINED: + # we weren't a member at the time of the event, so we can't + # see this event. + return None + + elif visibility == HistoryVisibility.INVITED: + # user can also see the event if they were *invited* at the time + # of the event. + return event if membership == Membership.INVITE else None + + elif visibility == HistoryVisibility.SHARED and is_peeking: + # if the visibility is shared, users cannot see the event unless + # they have *subsequently* joined the room (or were members at the + # time, of course) + # + # XXX: if the user has subsequently joined and then left again, + # ideally we would share history up to the point they left. But + # we don't know when they left. We just treat it as though they + # never joined, and restrict access. + return None + + return event + + def get_effective_room_visibility_from_state(state: StateMap[EventBase]) -> str: """Get the actual history vis, from a state map including the history_visibility event From d1378f50aeb923d27089433ba09b5d63604fd7a1 Mon Sep 17 00:00:00 2001 From: "H. Shay" Date: Wed, 15 Jun 2022 10:27:39 -0700 Subject: [PATCH 07/50] refactor `filter_events_for_client` to use _check_visibility --- synapse/visibility.py | 117 +++++++++--------------------------------- 1 file changed, 24 insertions(+), 93 deletions(-) diff --git a/synapse/visibility.py b/synapse/visibility.py index ed1c5533e323..8985c422fd8c 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -159,95 +159,10 @@ def allowed(event: EventBase) -> Optional[EventBase]: return None state = event_id_to_state[event.event_id] - - # get the room_visibility at the time of the event. - visibility = get_effective_room_visibility_from_state(state) - - # Always allow history visibility events on boundaries. This is done - # by setting the effective visibility to the least restrictive - # of the old vs new. - if event.type == EventTypes.RoomHistoryVisibility: - prev_content = event.unsigned.get("prev_content", {}) - prev_visibility = prev_content.get("history_visibility", None) - - if prev_visibility not in VISIBILITY_PRIORITY: - prev_visibility = HistoryVisibility.SHARED - - new_priority = VISIBILITY_PRIORITY.index(visibility) - old_priority = VISIBILITY_PRIORITY.index(prev_visibility) - if old_priority < new_priority: - visibility = prev_visibility - - # likewise, if the event is the user's own membership event, use - # the 'most joined' membership - membership = None - if event.type == EventTypes.Member and event.state_key == user_id: - membership = event.content.get("membership", None) - if membership not in MEMBERSHIP_PRIORITY: - membership = "leave" - - prev_content = event.unsigned.get("prev_content", {}) - prev_membership = prev_content.get("membership", None) - if prev_membership not in MEMBERSHIP_PRIORITY: - prev_membership = "leave" - - # Always allow the user to see their own leave events, otherwise - # they won't see the room disappear if they reject the invite - # - # (Note this doesn't work for out-of-band invite rejections, which don't - # have prev_state populated. They are handled above in the outlier code.) - if membership == "leave" and ( - prev_membership == "join" or prev_membership == "invite" - ): - return event - - new_priority = MEMBERSHIP_PRIORITY.index(membership) - old_priority = MEMBERSHIP_PRIORITY.index(prev_membership) - if old_priority < new_priority: - membership = prev_membership - - # otherwise, get the user's membership at the time of the event. - if membership is None: - membership_event = state.get((EventTypes.Member, user_id), None) - if membership_event: - membership = membership_event.membership - - # if the user was a member of the room at the time of the event, - # they can see it. - if membership == Membership.JOIN: - return event - - # otherwise, it depends on the room visibility. - - if visibility == HistoryVisibility.JOINED: - # we weren't a member at the time of the event, so we can't - # see this event. - return None - - elif visibility == HistoryVisibility.INVITED: - # user can also see the event if they were *invited* at the time - # of the event. - return event if membership == Membership.INVITE else None - - elif visibility == HistoryVisibility.SHARED and is_peeking: - # if the visibility is shared, users cannot see the event unless - # they have *subsequently* joined the room (or were members at the - # time, of course) - # - # XXX: if the user has subsequently joined and then left again, - # ideally we would share history up to the point they left. But - # we don't know when they left. We just treat it as though they - # never joined, and restrict access. - return None - - # the visibility is either shared or world_readable, and the user was - # not a member at the time. We allow it, provided the original sender - # has not requested their data to be erased, in which case, we return - # a redacted version. - if erased_senders[event.sender]: - return prune_event(event) - - return event + visible_event = _check_visibility( + user_id, event, state, is_peeking, erased_senders + ) + return visible_event # Check each event: gives an iterable of None or (a potentially modified) # EventBase. @@ -284,17 +199,25 @@ async def filter_events_for_client_with_state( # Add current event to updated_state_map, we need to do this here as it may not have been persisted to the db yet updated_state_map[current_state_key] = event - filtered_event = await _check_visibility(user_id, event, updated_state_map) + filtered_event = _check_visibility(user_id, event, updated_state_map) return filtered_event -async def _check_visibility( - user_id: str, event: EventBase, state_map: StateMap, is_peeking=False +def _check_visibility( + user_id: str, + event: EventBase, + state_map: StateMap, + is_peeking: bool = False, + erased_senders: Optional[dict] = None, ) -> Optional[EventBase]: """ Checks whether the room history should be visible to the user at the time of this event. Args: - + user_id: user_id for the user to be checked + event: the event to be checked + state_map: state at the event to be checked + is_peeking: whether the user in question is peeking + erased_senders: optional dict matching a user id to a boolean representing whether the user has requested erasure Returns: the event if the room history is visible to the user at the time of the event, None if not """ # Get the room_visibility at the time of the event. @@ -380,6 +303,14 @@ async def _check_visibility( # never joined, and restrict access. return None + # the visibility is either shared or world_readable, and the user was + # not a member at the time. We allow it, provided the original sender + # has not requested their data to be erased, in which case, we return + # a redacted version. + if erased_senders: + if erased_senders[event.sender]: + return prune_event(event) + return event From 6c837819ff3166d14a9648b1b5f512a61a0588f6 Mon Sep 17 00:00:00 2001 From: "H. Shay" Date: Wed, 15 Jun 2022 10:27:52 -0700 Subject: [PATCH 08/50] fix typo in cache.py --- synapse/storage/databases/main/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py index 17f7bf66bf1b..296d2d8f74e6 100644 --- a/synapse/storage/databases/main/cache.py +++ b/synapse/storage/databases/main/cache.py @@ -218,7 +218,7 @@ def _invalidate_caches_for_event( self._membership_stream_cache.entity_has_changed(state_key, stream_ordering) self.get_invited_rooms_for_local_user.invalidate((state_key,)) self.get_local_users_in_room.invalidate((room_id,)) - self.get_number_joined_users_in_room((room_id,)) + self.get_number_joined_users_in_room.invalidate((room_id,)) if relates_to: self.get_relations_for_event.invalidate((relates_to,)) From fb0a78c60a71d6cdc1dad86c763a00f12accd014 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 20 May 2022 18:50:59 +0100 Subject: [PATCH 09/50] Don't pull out stuff for push --- synapse/push/bulk_push_rule_evaluator.py | 143 +++---------------- synapse/storage/databases/main/cache.py | 1 + synapse/storage/databases/main/events.py | 4 + synapse/storage/databases/main/roommember.py | 9 ++ 4 files changed, 30 insertions(+), 127 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 7791b289e259..f5b9738b8c0f 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -452,77 +452,27 @@ async def get_rules( self.room_push_rule_cache_metrics.inc_hits() return self.data.rules_by_user - self.room_push_rule_cache_metrics.inc_misses() - - ret_rules_by_user = {} - missing_member_event_ids = {} - if state_group and self.data.state_group == context.prev_group: - # If we have a simple delta then we can reuse most of the previous - # results. - ret_rules_by_user = self.data.rules_by_user - current_state_ids = context.delta_ids - - push_rules_delta_state_cache_metric.inc_hits() - else: - current_state_ids = await context.get_current_state_ids() - push_rules_delta_state_cache_metric.inc_misses() - # Ensure the state IDs exist. - assert current_state_ids is not None - - push_rules_state_size_counter.inc(len(current_state_ids)) - - logger.debug( - "Looking for member changes in %r %r", state_group, current_state_ids + local_users = await self.store.get_local_users_in_room( + self.room_id, on_invalidate=self.invalidate_all_cb ) - # Loop through to see which member events we've seen and have rules - # for and which we need to fetch - for key in current_state_ids: - typ, user_id = key - if typ != EventTypes.Member: - continue - - if user_id in self.data.uninteresting_user_set: - continue - - if not self.is_mine_id(user_id): - self.data.uninteresting_user_set.add(user_id) - continue - - if self.store.get_if_app_services_interested_in_user(user_id): - self.data.uninteresting_user_set.add(user_id) - continue + if event.type == EventTypes.Member and event.membership == Membership.JOIN: + if self.is_mine_id(event.state_key): + local_users = list(local_users) + local_users.append(event.state_key) - event_id = current_state_ids[key] + ret_rules_by_user = await self.store.bulk_get_push_rules( + local_users, on_invalidate=self.invalidate_all_cb + ) - res = self.data.member_map.get(event_id, None) - if res: - if res.membership == Membership.JOIN: - rules = self.data.rules_by_user.get(res.user_id, None) - if rules: - ret_rules_by_user[res.user_id] = rules - continue + logger.debug("Users in room: %s", local_users) - # If a user has left a room we remove their push rule. If they - # joined then we re-add it later in _update_rules_with_member_event_ids - ret_rules_by_user.pop(user_id, None) - missing_member_event_ids[user_id] = event_id - - if missing_member_event_ids: - # If we have some member events we haven't seen, look them up - # and fetch push rules for them if appropriate. - logger.debug("Found new member events %r", missing_member_event_ids) - await self._update_rules_with_member_event_ids( - ret_rules_by_user, missing_member_event_ids, state_group, event - ) - else: - # The push rules didn't change but lets update the cache anyway - self.update_cache( - self.data.sequence, - members={}, # There were no membership changes - rules_by_user=ret_rules_by_user, - state_group=state_group, - ) + self.update_cache( + self.data.sequence, + members={}, # There were no membership changes + rules_by_user=ret_rules_by_user, + state_group=state_group, + ) if logger.isEnabledFor(logging.DEBUG): logger.debug( @@ -530,67 +480,6 @@ async def get_rules( ) return ret_rules_by_user - async def _update_rules_with_member_event_ids( - self, - ret_rules_by_user: Dict[str, list], - member_event_ids: Dict[str, str], - state_group: Optional[int], - event: EventBase, - ) -> None: - """Update the partially filled rules_by_user dict by fetching rules for - any newly joined users in the `member_event_ids` list. - - Args: - ret_rules_by_user: Partially filled dict of push rules. Gets - updated with any new rules. - member_event_ids: Dict of user id to event id for membership events - that have happened since the last time we filled rules_by_user - state_group: The state group we are currently computing push rules - for. Used when updating the cache. - event: The event we are currently computing push rules for. - """ - sequence = self.data.sequence - - members = await self.store.get_membership_from_event_ids( - member_event_ids.values() - ) - - # If the event is a join event then it will be in current state events - # map but not in the DB, so we have to explicitly insert it. - if event.type == EventTypes.Member: - for event_id in member_event_ids.values(): - if event_id == event.event_id: - members[event_id] = EventIdMembership( - user_id=event.state_key, membership=event.membership - ) - - if logger.isEnabledFor(logging.DEBUG): - logger.debug("Found members %r: %r", self.room_id, members.values()) - - joined_user_ids = { - entry.user_id - for entry in members.values() - if entry and entry.membership == Membership.JOIN - } - - logger.debug("Joined: %r", joined_user_ids) - - # Previously we only considered users with pushers or read receipts in that - # room. We can't do this anymore because we use push actions to calculate unread - # counts, which don't rely on the user having pushers or sent a read receipt into - # the room. Therefore we just need to filter for local users here. - user_ids = list(filter(self.is_mine_id, joined_user_ids)) - - rules_by_user = await self.store.bulk_get_push_rules( - user_ids, on_invalidate=self.invalidate_all_cb - ) - - ret_rules_by_user.update( - item for item in rules_by_user.items() if item[0] is not None - ) - - self.update_cache(sequence, members, ret_rules_by_user, state_group) - def update_cache( self, sequence: int, diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py index 1653a6a9b694..a07d48f66c95 100644 --- a/synapse/storage/databases/main/cache.py +++ b/synapse/storage/databases/main/cache.py @@ -217,6 +217,7 @@ def _invalidate_caches_for_event( if etype == EventTypes.Member: self._membership_stream_cache.entity_has_changed(state_key, stream_ordering) self.get_invited_rooms_for_local_user.invalidate((state_key,)) + self.get_local_users_in_room.invalidate((room_id,)) if relates_to: self.get_relations_for_event.invalidate((relates_to,)) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index a3e12f1e9be4..18a16e23b3d6 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1766,6 +1766,10 @@ def _store_room_members_txn( self.store.get_invited_rooms_for_local_user.invalidate, (event.state_key,), ) + txn.call_after( + self.store.get_local_users_in_room.invalidate, + (event.room_id,), + ) # The `_get_membership_from_event_id` is immutable, except for the # case where we look up an event *before* persisting it. diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 31bc8c56011a..ac6f568e6852 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -444,6 +444,15 @@ def _get_rooms_for_local_user_where_membership_is_txn( return results + @cached() + async def get_local_users_in_room(self, room_id: str) -> List[str]: + return await self.db_pool.simple_select_onecol( + table="local_current_membership", + keyvalues={"room_id": room_id, "membership": Membership.JOIN}, + retcol="user_id", + desc="get_local_users_in_room", + ) + async def get_local_current_membership_for_user_in_room( self, user_id: str, room_id: str ) -> Tuple[Optional[str], Optional[str]]: From 83ff92d76330cf46aaadedbaf2f871c1fe85ec14 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 10 May 2022 11:09:57 +0100 Subject: [PATCH 10/50] Reduce state that push rules pull from DB --- synapse/push/bulk_push_rule_evaluator.py | 6 +++++- synapse/storage/_base.py | 3 +++ synapse/storage/databases/main/cache.py | 1 + synapse/storage/databases/main/events.py | 4 ++++ synapse/storage/databases/main/roommember.py | 9 +++++++++ 5 files changed, 22 insertions(+), 1 deletion(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index f5b9738b8c0f..92e5e4a30ba2 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -267,6 +267,10 @@ async def action_for_event_by_user( room_members = await self.store.get_joined_users_from_context(event, context) + room_member_count = await self.store.get_number_joined_users_in_room( + event.room_id + ) + ( power_levels, sender_power_level, @@ -278,7 +282,7 @@ async def action_for_event_by_user( evaluator = PushRuleEvaluatorForEvent( event, - len(room_members), + room_member_count, sender_power_level, power_levels, relations, diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index abfc56b0616c..177b9a90c5aa 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -75,6 +75,9 @@ def _invalidate_state_caches( self._attempt_to_invalidate_cache( "get_users_in_room_with_profiles", (room_id,) ) + self._attempt_to_invalidate_cache( + "get_number_joined_users_in_room.invalidate", (room_id,) + ) # Purge other caches based on room state. self._attempt_to_invalidate_cache("get_room_summary", (room_id,)) diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py index a07d48f66c95..296d2d8f74e6 100644 --- a/synapse/storage/databases/main/cache.py +++ b/synapse/storage/databases/main/cache.py @@ -218,6 +218,7 @@ def _invalidate_caches_for_event( self._membership_stream_cache.entity_has_changed(state_key, stream_ordering) self.get_invited_rooms_for_local_user.invalidate((state_key,)) self.get_local_users_in_room.invalidate((room_id,)) + self.get_number_joined_users_in_room.invalidate((room_id,)) if relates_to: self.get_relations_for_event.invalidate((relates_to,)) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 18a16e23b3d6..dceb3d6c3597 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1770,6 +1770,10 @@ def _store_room_members_txn( self.store.get_local_users_in_room.invalidate, (event.room_id,), ) + txn.call_after( + self.store.get_number_joined_users_in_room.invalidate, + (event.room_id,), + ) # The `_get_membership_from_event_id` is immutable, except for the # case where we look up an event *before* persisting it. diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index ac6f568e6852..9bb83ac8ac75 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -337,6 +337,15 @@ def _get_room_summary_txn( "get_room_summary", _get_room_summary_txn ) + @cached() + async def get_number_joined_users_in_room(self, room_id: str) -> int: + return await self.db_pool.simple_select_one_onecol( + table="current_state_events", + keyvalues={"room_id": room_id, "membership": Membership.JOIN}, + retcol="COUNT(*)", + desc="get_number_joined_users_in_room", + ) + @cached() async def get_invited_rooms_for_local_user( self, user_id: str From 4966b03e6531ae8c07dab5e144c4d902a4e1b97f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 20 May 2022 18:57:26 +0100 Subject: [PATCH 11/50] Fetch display names from DB. --- synapse/push/bulk_push_rule_evaluator.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 92e5e4a30ba2..49a1db36daa9 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -34,6 +34,7 @@ from ..storage.state import StateFilter from .push_rule_evaluator import PushRuleEvaluatorForEvent +from ..types import get_localpart_from_id if TYPE_CHECKING: from synapse.server import HomeServer @@ -265,8 +266,6 @@ async def action_for_event_by_user( rules_by_user = await self._get_rules_for_event(event, context) actions_by_user: Dict[str, List[Union[dict, str]]] = {} - room_members = await self.store.get_joined_users_from_context(event, context) - room_member_count = await self.store.get_number_joined_users_in_room( event.room_id ) @@ -302,10 +301,9 @@ async def action_for_event_by_user( if uid in ignorers: continue - display_name = None - profile_info = room_members.get(uid) - if profile_info: - display_name = profile_info.display_name + localpart = get_localpart_from_id(uid) + profile_info = await self.store.get_profileinfo(localpart) + display_name = profile_info.display_name if not display_name: # Handle the case where we are pushing a membership event to From 21951c33b0800d09375483e1fd5710650b152ce1 Mon Sep 17 00:00:00 2001 From: "H. Shay" Date: Sun, 12 Jun 2022 22:23:30 -0700 Subject: [PATCH 12/50] filter visibility of push event --- synapse/push/bulk_push_rule_evaluator.py | 15 +- synapse/visibility.py | 230 ++++++++++++++--------- 2 files changed, 156 insertions(+), 89 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 49a1db36daa9..2471fae964ec 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -26,15 +26,16 @@ from synapse.events.snapshot import EventContext from synapse.state import POWER_KEY from synapse.storage.databases.main.roommember import EventIdMembership +from synapse.storage.state import StateFilter +from synapse.types import get_localpart_from_id from synapse.util.async_helpers import Linearizer from synapse.util.caches import CacheMetric, register_cache from synapse.util.caches.descriptors import lru_cache from synapse.util.caches.lrucache import LruCache from synapse.util.metrics import measure_func +from synapse.visibility import filter_events_for_client_with_state -from ..storage.state import StateFilter from .push_rule_evaluator import PushRuleEvaluatorForEvent -from ..types import get_localpart_from_id if TYPE_CHECKING: from synapse.server import HomeServer @@ -301,6 +302,16 @@ async def action_for_event_by_user( if uid in ignorers: continue + # This is a check for the case where user joins a room without being + # allowed to see history, and then the server receives a delayed + # event from before the user joined, which they should not be pushed + # for + visible = await filter_events_for_client_with_state( + self.store, uid, event, context + ) + if not visible: + continue + localpart = get_localpart_from_id(uid) profile_info = await self.store.get_profileinfo(localpart) display_name = profile_info.display_name diff --git a/synapse/visibility.py b/synapse/visibility.py index 8aaa8c709fad..8985c422fd8c 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -19,7 +19,9 @@ from synapse.api.constants import EventTypes, HistoryVisibility, Membership from synapse.events import EventBase +from synapse.events.snapshot import EventContext from synapse.events.utils import prune_event +from synapse.storage import DataStore from synapse.storage.controllers import StorageControllers from synapse.storage.state import StateFilter from synapse.types import RetentionPolicy, StateMap, get_domain_from_id @@ -105,13 +107,10 @@ def allowed(event: EventBase) -> Optional[EventBase]: """ Args: event: event to check - Returns: None if the user cannot see this event at all - a redacted copy of the event if they can only see a redacted version - the original event if they can see it as normal. """ # Only run some checks if these events aren't about to be sent to clients. This is @@ -160,102 +159,159 @@ def allowed(event: EventBase) -> Optional[EventBase]: return None state = event_id_to_state[event.event_id] + visible_event = _check_visibility( + user_id, event, state, is_peeking, erased_senders + ) + return visible_event - # get the room_visibility at the time of the event. - visibility = get_effective_room_visibility_from_state(state) - - # Always allow history visibility events on boundaries. This is done - # by setting the effective visibility to the least restrictive - # of the old vs new. - if event.type == EventTypes.RoomHistoryVisibility: - prev_content = event.unsigned.get("prev_content", {}) - prev_visibility = prev_content.get("history_visibility", None) - - if prev_visibility not in VISIBILITY_PRIORITY: - prev_visibility = HistoryVisibility.SHARED - - new_priority = VISIBILITY_PRIORITY.index(visibility) - old_priority = VISIBILITY_PRIORITY.index(prev_visibility) - if old_priority < new_priority: - visibility = prev_visibility - - # likewise, if the event is the user's own membership event, use - # the 'most joined' membership - membership = None - if event.type == EventTypes.Member and event.state_key == user_id: - membership = event.content.get("membership", None) - if membership not in MEMBERSHIP_PRIORITY: - membership = "leave" - - prev_content = event.unsigned.get("prev_content", {}) - prev_membership = prev_content.get("membership", None) - if prev_membership not in MEMBERSHIP_PRIORITY: - prev_membership = "leave" - - # Always allow the user to see their own leave events, otherwise - # they won't see the room disappear if they reject the invite - # - # (Note this doesn't work for out-of-band invite rejections, which don't - # have prev_state populated. They are handled above in the outlier code.) - if membership == "leave" and ( - prev_membership == "join" or prev_membership == "invite" - ): - return event + # Check each event: gives an iterable of None or (a potentially modified) + # EventBase. + filtered_events = map(allowed, events) - new_priority = MEMBERSHIP_PRIORITY.index(membership) - old_priority = MEMBERSHIP_PRIORITY.index(prev_membership) - if old_priority < new_priority: - membership = prev_membership + # Turn it into a list and remove None entries before returning. + return [ev for ev in filtered_events if ev] - # otherwise, get the user's membership at the time of the event. - if membership is None: - membership_event = state.get((EventTypes.Member, user_id), None) - if membership_event: - membership = membership_event.membership - # if the user was a member of the room at the time of the event, - # they can see it. - if membership == Membership.JOIN: - return event +async def filter_events_for_client_with_state( + store: DataStore, user_id: str, event: EventBase, context: EventContext +) -> Optional[EventBase]: + """ + Checks to see if an event is visible to the user at the time of the event + Args: + store: databases + user_id: user_id to be checked + event: the event to be checked + context: EventContext for the event to be checked + Returns: the event if the room history is visible to the user at the time of the event, None if not + """ + filter = StateFilter.from_types( + [(EventTypes.Member, user_id), (EventTypes.RoomHistoryVisibility, "")] + ) + state_map = await context.get_prev_state_ids(filter) - # otherwise, it depends on the room visibility. + # Use events rather than event ids as content from the events are needed in _check_visibility + updated_state_map = {} + for state_key, event_id in state_map.items(): + updated_state_map[state_key] = await store.get_event(event_id) - if visibility == HistoryVisibility.JOINED: - # we weren't a member at the time of the event, so we can't - # see this event. - return None + if event.is_state(): + current_state_key = (event.type, event.state_key) + # Add current event to updated_state_map, we need to do this here as it may not have been persisted to the db yet + updated_state_map[current_state_key] = event - elif visibility == HistoryVisibility.INVITED: - # user can also see the event if they were *invited* at the time - # of the event. - return event if membership == Membership.INVITE else None - - elif visibility == HistoryVisibility.SHARED and is_peeking: - # if the visibility is shared, users cannot see the event unless - # they have *subsequently* joined the room (or were members at the - # time, of course) - # - # XXX: if the user has subsequently joined and then left again, - # ideally we would share history up to the point they left. But - # we don't know when they left. We just treat it as though they - # never joined, and restrict access. - return None + filtered_event = _check_visibility(user_id, event, updated_state_map) + return filtered_event - # the visibility is either shared or world_readable, and the user was - # not a member at the time. We allow it, provided the original sender - # has not requested their data to be erased, in which case, we return - # a redacted version. - if erased_senders[event.sender]: - return prune_event(event) +def _check_visibility( + user_id: str, + event: EventBase, + state_map: StateMap, + is_peeking: bool = False, + erased_senders: Optional[dict] = None, +) -> Optional[EventBase]: + """ + Checks whether the room history should be visible to the user at the time of this event. + Args: + user_id: user_id for the user to be checked + event: the event to be checked + state_map: state at the event to be checked + is_peeking: whether the user in question is peeking + erased_senders: optional dict matching a user id to a boolean representing whether the user has requested erasure + Returns: the event if the room history is visible to the user at the time of the event, None if not + """ + # Get the room_visibility at the time of the event. + visibility = get_effective_room_visibility_from_state(state_map) + + # Always allow history visibility events on boundaries. This is done + # by setting the effective visibility to the least restrictive + # of the old vs new. + if event.type == EventTypes.RoomHistoryVisibility: + prev_content = event.unsigned.get("prev_content", {}) + prev_visibility = prev_content.get("history_visibility", None) + + if prev_visibility not in VISIBILITY_PRIORITY: + prev_visibility = HistoryVisibility.SHARED + + new_priority = VISIBILITY_PRIORITY.index(visibility) + old_priority = VISIBILITY_PRIORITY.index(prev_visibility) + if old_priority < new_priority: + visibility = prev_visibility + + # likewise, if the event is the user's own membership event, use + # the 'most joined' membership + membership = None + if event.type == EventTypes.Member and event.state_key == user_id: + membership = event.content.get("membership", None) + if membership not in MEMBERSHIP_PRIORITY: + membership = "leave" + # members can see their own membership invite + if membership == Membership.INVITE: + return event + + prev_content = event.unsigned.get("prev_content", {}) + prev_membership = prev_content.get("membership", None) + if prev_membership not in MEMBERSHIP_PRIORITY: + prev_membership = "leave" + + # Always allow the user to see their own leave events, otherwise + # they won't see the room disappear if they reject the invite + # + # (Note this doesn't work for out-of-band invite rejections, which don't + # have prev_state populated. They are handled above in the outlier code.) + if membership == "leave" and ( + prev_membership == "join" or prev_membership == "invite" + ): + return event + + new_priority = MEMBERSHIP_PRIORITY.index(membership) + old_priority = MEMBERSHIP_PRIORITY.index(prev_membership) + if old_priority < new_priority: + membership = prev_membership + + # otherwise, get the user's membership at the time of the event. + if membership is None: + membership_event = state_map.get((EventTypes.Member, user_id), None) + if membership_event: + membership = membership_event.membership + + # if the user was a member of the room at the time of the event, + # they can see it. + if membership == Membership.JOIN: return event - # Check each event: gives an iterable of None or (a potentially modified) - # EventBase. - filtered_events = map(allowed, events) + # otherwise, it depends on the room visibility. + + if visibility == HistoryVisibility.JOINED: + # we weren't a member at the time of the event, so we can't + # see this event. + return None + + elif visibility == HistoryVisibility.INVITED: + # user can also see the event if they were *invited* at the time + # of the event. + return event if membership == Membership.INVITE else None + + elif visibility == HistoryVisibility.SHARED and is_peeking: + # if the visibility is shared, users cannot see the event unless + # they have *subsequently* joined the room (or were members at the + # time, of course) + # + # XXX: if the user has subsequently joined and then left again, + # ideally we would share history up to the point they left. But + # we don't know when they left. We just treat it as though they + # never joined, and restrict access. + return None + + # the visibility is either shared or world_readable, and the user was + # not a member at the time. We allow it, provided the original sender + # has not requested their data to be erased, in which case, we return + # a redacted version. + if erased_senders: + if erased_senders[event.sender]: + return prune_event(event) - # Turn it into a list and remove None entries before returning. - return [ev for ev in filtered_events if ev] + return event def get_effective_room_visibility_from_state(state: StateMap[EventBase]) -> str: From f7ace023cb3de8d638eea69cb48ca9af0e8f000a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 16 Jun 2022 09:51:25 +0100 Subject: [PATCH 13/50] Newsfile --- changelog.d/13078.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/13078.misc diff --git a/changelog.d/13078.misc b/changelog.d/13078.misc new file mode 100644 index 000000000000..3835e97ad9c7 --- /dev/null +++ b/changelog.d/13078.misc @@ -0,0 +1 @@ +Reduce memory consumption when processing incoming events in large rooms. From 4a0836e4828836ca56f2ac92bd5fec5863ed0cfe Mon Sep 17 00:00:00 2001 From: "H. Shay" Date: Sat, 2 Jul 2022 09:16:59 -0700 Subject: [PATCH 14/50] remove rulesforroom and use room-specific profiles --- synapse/push/bulk_push_rule_evaluator.py | 252 ++++------------------- 1 file changed, 39 insertions(+), 213 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 3181ea8e0ae5..1be5c5e754f9 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -17,7 +17,6 @@ import logging from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Set, Tuple, Union -import attr from prometheus_client import Counter from synapse.api.constants import EventTypes, Membership, RelationTypes @@ -27,11 +26,8 @@ from synapse.state import POWER_KEY from synapse.storage.databases.main.roommember import EventIdMembership from synapse.storage.state import StateFilter -from synapse.types import get_localpart_from_id -from synapse.util.async_helpers import Linearizer -from synapse.util.caches import CacheMetric, register_cache +from synapse.util.caches import register_cache from synapse.util.caches.descriptors import lru_cache -from synapse.util.caches.lrucache import LruCache from synapse.util.metrics import measure_func from synapse.visibility import filter_events_for_client_with_state @@ -50,15 +46,6 @@ "synapse_push_bulk_push_rule_evaluator_push_rules_state_size_counter", "" ) -# Measures whether we use the fast path of using state deltas, or if we have to -# recalculate from scratch -push_rules_delta_state_cache_metric = register_cache( - "cache", - "push_rules_delta_state_cache_metric", - cache=[], # Meaningless size, as this isn't a cache that stores values - resizable=False, -) - STATE_EVENT_TYPES_TO_MARK_UNREAD = { EventTypes.Topic, @@ -113,10 +100,6 @@ def __init__(self, hs: "HomeServer"): self.clock = hs.get_clock() self._event_auth_handler = hs.get_event_auth_handler() - # Used by `RulesForRoom` to ensure only one thing mutates the cache at a - # time. Keyed off room_id. - self._rules_linearizer = Linearizer(name="rules_for_room") - self.room_push_rule_cache_metrics = register_cache( "cache", "room_push_rule_cache", @@ -127,8 +110,34 @@ def __init__(self, hs: "HomeServer"): # Whether to support MSC3772 is supported. self._relations_match_enabled = self.hs.config.experimental.msc3772_enabled + @lru_cache() + async def get_rules(self, event: EventBase) -> Dict[str, List[Dict[str, dict]]]: + """Given an event return the rules for all users who are + currently in the room. + """ + + local_users = await self.store.get_local_users_in_room(event.room_id) + + if event.type == EventTypes.Member and event.membership == Membership.JOIN: + if self.hs.is_mine_id(event.state_key): + local_users = list(local_users) + local_users.append(event.state_key) + + ret_rules_by_user = await self.store.bulk_get_push_rules(local_users) + + logger.debug("Users in room: %s", local_users) + + if logger.isEnabledFor(logging.DEBUG): + logger.debug( + "Returning push rules for %r %r", + event.room_id, + ret_rules_by_user.keys(), + ) + return ret_rules_by_user + async def _get_rules_for_event( - self, event: EventBase, context: EventContext + self, + event: EventBase, ) -> Dict[str, List[Dict[str, Any]]]: """This gets the rules for all users in the room at the time of the event, as well as the push rules for the invitee if the event is an invite. @@ -136,19 +145,7 @@ async def _get_rules_for_event( Returns: dict of user_id -> push_rules """ - room_id = event.room_id - - rules_for_room_data = self._get_rules_for_room(room_id) - rules_for_room = RulesForRoom( - hs=self.hs, - room_id=room_id, - rules_for_room_cache=self._get_rules_for_room.cache, - room_push_rule_cache_metrics=self.room_push_rule_cache_metrics, - linearizer=self._rules_linearizer, - cached_data=rules_for_room_data, - ) - - rules_by_user = await rules_for_room.get_rules(event, context) + rules_by_user = await self.get_rules(event) # if this event is an invite event, we may need to run rules for the user # who's been invited, otherwise they won't get told they've been invited @@ -162,14 +159,6 @@ async def _get_rules_for_event( return rules_by_user - @lru_cache() - def _get_rules_for_room(self, room_id: str) -> "RulesForRoomData": - """Get the current RulesForRoomData object for the given room id""" - # It's important that the RulesForRoomData object gets added to self._get_rules_for_room.cache - # before any lookup methods get called on it as otherwise there may be - # a race if invalidate_all gets called (which assumes its in the cache) - return RulesForRoomData() - async def _get_power_levels_and_sender_level( self, event: EventBase, context: EventContext ) -> Tuple[dict, int]: @@ -264,7 +253,7 @@ async def action_for_event_by_user( count_as_unread = _should_count_as_unread(event, context) - rules_by_user = await self._get_rules_for_event(event, context) + rules_by_user = await self._get_rules_for_event(event) actions_by_user: Dict[str, List[Union[dict, str]]] = {} room_member_count = await self.store.get_number_joined_users_in_room( @@ -295,6 +284,11 @@ async def action_for_event_by_user( else: ignorers = frozenset() + users = list(rules_by_user.keys()) + profiles = await self.store.get_users_in_room_with_profiles( + event.room_id, users + ) + for uid, rules in rules_by_user.items(): if event.sender == uid: continue @@ -310,9 +304,10 @@ async def action_for_event_by_user( if not visible: continue - localpart = get_localpart_from_id(uid) - profile_info = await self.store.get_profileinfo(localpart) - display_name = profile_info.display_name + display_name = None + profile = profiles.get(uid) + if profile: + display_name = profile.display_name if not display_name: # Handle the case where we are pushing a membership event to @@ -357,172 +352,3 @@ async def action_for_event_by_user( Rule = Dict[str, dict] RulesByUser = Dict[str, List[Rule]] StateGroup = Union[object, int] - - -@attr.s(slots=True, auto_attribs=True) -class RulesForRoomData: - """The data stored in the cache by `RulesForRoom`. - - We don't store `RulesForRoom` directly in the cache as we want our caches to - *only* include data, and not references to e.g. the data stores. - """ - - # event_id -> EventIdMembership - member_map: MemberMap = attr.Factory(dict) - # user_id -> rules - rules_by_user: RulesByUser = attr.Factory(dict) - - # The last state group we updated the caches for. If the state_group of - # a new event comes along, we know that we can just return the cached - # result. - # On invalidation of the rules themselves (if the user changes them), - # we invalidate everything and set state_group to `object()` - state_group: StateGroup = attr.Factory(object) - - # A sequence number to keep track of when we're allowed to update the - # cache. We bump the sequence number when we invalidate the cache. If - # the sequence number changes while we're calculating stuff we should - # not update the cache with it. - sequence: int = 0 - - # A cache of user_ids that we *know* aren't interesting, e.g. user_ids - # owned by AS's, or remote users, etc. (I.e. users we will never need to - # calculate push for) - # These never need to be invalidated as we will never set up push for - # them. - uninteresting_user_set: Set[str] = attr.Factory(set) - - -class RulesForRoom: - """Caches push rules for users in a room. - - This efficiently handles users joining/leaving the room by not invalidating - the entire cache for the room. - - A new instance is constructed for each call to - `BulkPushRuleEvaluator._get_rules_for_event`, with the cached data from - previous calls passed in. - """ - - def __init__( - self, - hs: "HomeServer", - room_id: str, - rules_for_room_cache: LruCache, - room_push_rule_cache_metrics: CacheMetric, - linearizer: Linearizer, - cached_data: RulesForRoomData, - ): - """ - Args: - hs: The HomeServer object. - room_id: The room ID. - rules_for_room_cache: The cache object that caches these - RoomsForUser objects. - room_push_rule_cache_metrics: The metrics object - linearizer: The linearizer used to ensure only one thing mutates - the cache at a time. Keyed off room_id - cached_data: Cached data from previous calls to `self.get_rules`, - can be mutated. - """ - self.room_id = room_id - self.is_mine_id = hs.is_mine_id - self.store = hs.get_datastores().main - self.room_push_rule_cache_metrics = room_push_rule_cache_metrics - - # Used to ensure only one thing mutates the cache at a time. Keyed off - # room_id. - self.linearizer = linearizer - - self.data = cached_data - - # We need to be clever on the invalidating caches callbacks, as - # otherwise the invalidation callback holds a reference to the object, - # potentially causing it to leak. - # To get around this we pass a function that on invalidations looks ups - # the RoomsForUser entry in the cache, rather than keeping a reference - # to self around in the callback. - self.invalidate_all_cb = _Invalidation(rules_for_room_cache, room_id) - - async def get_rules( - self, event: EventBase, context: EventContext - ) -> Dict[str, List[Dict[str, dict]]]: - """Given an event context return the rules for all users who are - currently in the room. - """ - state_group = context.state_group - - if state_group and self.data.state_group == state_group: - logger.debug("Using cached rules for %r", self.room_id) - self.room_push_rule_cache_metrics.inc_hits() - return self.data.rules_by_user - - async with self.linearizer.queue(self.room_id): - if state_group and self.data.state_group == state_group: - logger.debug("Using cached rules for %r", self.room_id) - self.room_push_rule_cache_metrics.inc_hits() - return self.data.rules_by_user - - local_users = await self.store.get_local_users_in_room( - self.room_id, on_invalidate=self.invalidate_all_cb - ) - - if event.type == EventTypes.Member and event.membership == Membership.JOIN: - if self.is_mine_id(event.state_key): - local_users = list(local_users) - local_users.append(event.state_key) - - ret_rules_by_user = await self.store.bulk_get_push_rules( - local_users, on_invalidate=self.invalidate_all_cb - ) - - logger.debug("Users in room: %s", local_users) - - self.update_cache( - self.data.sequence, - members={}, # There were no membership changes - rules_by_user=ret_rules_by_user, - state_group=state_group, - ) - - if logger.isEnabledFor(logging.DEBUG): - logger.debug( - "Returning push rules for %r %r", self.room_id, ret_rules_by_user.keys() - ) - return ret_rules_by_user - - def update_cache( - self, - sequence: int, - members: MemberMap, - rules_by_user: RulesByUser, - state_group: StateGroup, - ) -> None: - if sequence == self.data.sequence: - self.data.member_map.update(members) - self.data.rules_by_user = rules_by_user - self.data.state_group = state_group - - -@attr.attrs(slots=True, frozen=True, auto_attribs=True) -class _Invalidation: - # _Invalidation is passed as an `on_invalidate` callback to bulk_get_push_rules, - # which means that it it is stored on the bulk_get_push_rules cache entry. In order - # to ensure that we don't accumulate lots of redundant callbacks on the cache entry, - # we need to ensure that two _Invalidation objects are "equal" if they refer to the - # same `cache` and `room_id`. - # - # attrs provides suitable __hash__ and __eq__ methods, provided we remember to - # set `frozen=True`. - - cache: LruCache - room_id: str - - def __call__(self) -> None: - rules_data = self.cache.get(self.room_id, None, update_metrics=False) - if rules_data: - rules_data.sequence += 1 - rules_data.state_group = object() - rules_data.member_map = {} - rules_data.rules_by_user = {} - push_rules_invalidation_counter.inc() From 6f9dd6f3c2244a1e94939168596e80fb78765da9 Mon Sep 17 00:00:00 2001 From: "H. Shay" Date: Sat, 2 Jul 2022 09:17:33 -0700 Subject: [PATCH 15/50] alter _get_users_in_room_with_profiles to take an optional list of user ids --- synapse/storage/databases/main/roommember.py | 40 ++++++++++++++------ 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 9bb83ac8ac75..8253495509ac 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -212,9 +212,9 @@ def get_users_in_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[s txn.execute(sql, (room_id, Membership.JOIN)) return [r[0] for r in txn] - @cached(max_entries=100000, iterable=True) + @cached(max_entries=100000, iterable=True, uncached_args=["user_ids"]) async def get_users_in_room_with_profiles( - self, room_id: str + self, room_id: str, user_ids: Optional[List[str]] = None ) -> Dict[str, ProfileInfo]: """Get a mapping from user ID to profile information for all users in a given room. @@ -233,15 +233,33 @@ async def get_users_in_room_with_profiles( def _get_users_in_room_with_profiles( txn: LoggingTransaction, ) -> Dict[str, ProfileInfo]: - sql = """ - SELECT state_key, display_name, avatar_url FROM room_memberships as m - INNER JOIN current_state_events as c - ON m.event_id = c.event_id - AND m.room_id = c.room_id - AND m.user_id = c.state_key - WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ? - """ - txn.execute(sql, (room_id, Membership.JOIN)) + if user_ids: + clause, ids = make_in_list_sql_clause( + self.database_engine, "m.user_id", user_ids + ) + + sql = """ + SELECT state_key, display_name, avatar_url FROM room_memberships as m + INNER JOIN current_state_events as c + ON m.event_id = c.event_id + AND m.room_id = c.room_id + AND m.user_id = c.state_key + WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ? AND %s + """ % ( + clause, + ) + txn.execute(sql, (room_id, Membership.JOIN, *ids)) + + else: + sql = """ + SELECT state_key, display_name, avatar_url FROM room_memberships as m + INNER JOIN current_state_events as c + ON m.event_id = c.event_id + AND m.room_id = c.room_id + AND m.user_id = c.state_key + WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ? + """ + txn.execute(sql, (room_id, Membership.JOIN)) return {r[0]: ProfileInfo(display_name=r[1], avatar_url=r[2]) for r in txn} From 696dc02b36ec956f91e6ab1f6ec4415eb40fee95 Mon Sep 17 00:00:00 2001 From: "H. Shay" Date: Tue, 5 Jul 2022 11:08:44 -0700 Subject: [PATCH 16/50] remove .invalidate --- synapse/storage/_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 177b9a90c5aa..741eaa95f63b 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -76,7 +76,7 @@ def _invalidate_state_caches( "get_users_in_room_with_profiles", (room_id,) ) self._attempt_to_invalidate_cache( - "get_number_joined_users_in_room.invalidate", (room_id,) + "get_number_joined_users_in_room", (room_id,) ) # Purge other caches based on room state. From caf6222ed8cc5e30c5c0cc5c9e2d2b3f7fdc1321 Mon Sep 17 00:00:00 2001 From: "H. Shay" Date: Tue, 5 Jul 2022 11:09:13 -0700 Subject: [PATCH 17/50] add docstrings --- synapse/storage/databases/main/roommember.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 8253495509ac..70749c23fd53 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -443,6 +443,17 @@ def _get_rooms_for_local_user_where_membership_is_txn( user_id: str, membership_list: List[str], ) -> List[RoomsForUser]: + """Get all the rooms for this *local* user where the membership for this user + matches one in the membership list. + + Args: + user_id: The user ID. + membership_list: A list of synapse.api.constants.Membership + values which the user must be in. + + Returns: + The RoomsForUser that the user matches the membership types. + """ # Paranoia check. if not self.hs.is_mine_id(user_id): raise Exception( @@ -473,6 +484,9 @@ def _get_rooms_for_local_user_where_membership_is_txn( @cached() async def get_local_users_in_room(self, room_id: str) -> List[str]: + """ + Retrieves a list of the current roommembers who are local to the server. + """ return await self.db_pool.simple_select_onecol( table="local_current_membership", keyvalues={"room_id": room_id, "membership": Membership.JOIN}, From 39dfbf4012d0d31fb1b84f2c89f56aa7b570bdc8 Mon Sep 17 00:00:00 2001 From: "H. Shay" Date: Tue, 5 Jul 2022 13:45:29 -0700 Subject: [PATCH 18/50] use @cached instead of lrucache --- synapse/push/bulk_push_rule_evaluator.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 1be5c5e754f9..08b4a3990c6a 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -27,7 +27,7 @@ from synapse.storage.databases.main.roommember import EventIdMembership from synapse.storage.state import StateFilter from synapse.util.caches import register_cache -from synapse.util.caches.descriptors import lru_cache +from synapse.util.caches.descriptors import _CacheContext, cached from synapse.util.metrics import measure_func from synapse.visibility import filter_events_for_client_with_state @@ -110,27 +110,33 @@ def __init__(self, hs: "HomeServer"): # Whether to support MSC3772 is supported. self._relations_match_enabled = self.hs.config.experimental.msc3772_enabled - @lru_cache() - async def get_rules(self, event: EventBase) -> Dict[str, List[Dict[str, dict]]]: + @cached(cache_context=True, uncached_args=("event",)) + async def get_rules( + self, room_id: str, event: EventBase, cache_context: _CacheContext + ) -> Dict[str, List[Dict[str, dict]]]: """Given an event return the rules for all users who are currently in the room. """ - local_users = await self.store.get_local_users_in_room(event.room_id) + local_users = await self.store.get_local_users_in_room( + room_id, on_invalidate=cache_context.invalidate + ) if event.type == EventTypes.Member and event.membership == Membership.JOIN: if self.hs.is_mine_id(event.state_key): local_users = list(local_users) local_users.append(event.state_key) - ret_rules_by_user = await self.store.bulk_get_push_rules(local_users) + ret_rules_by_user = await self.store.bulk_get_push_rules( + local_users, on_invalidate=cache_context.invalidate + ) logger.debug("Users in room: %s", local_users) if logger.isEnabledFor(logging.DEBUG): logger.debug( "Returning push rules for %r %r", - event.room_id, + room_id, ret_rules_by_user.keys(), ) return ret_rules_by_user @@ -145,7 +151,7 @@ async def _get_rules_for_event( Returns: dict of user_id -> push_rules """ - rules_by_user = await self.get_rules(event) + rules_by_user = await self.get_rules(event.room_id, event) # if this event is an invite event, we may need to run rules for the user # who's been invited, otherwise they won't get told they've been invited From 9f770f56cc0a8039d7fd82d278967c54a9a4342c Mon Sep 17 00:00:00 2001 From: "H. Shay" Date: Tue, 5 Jul 2022 14:23:29 -0700 Subject: [PATCH 19/50] move visibility check out of loop --- synapse/push/bulk_push_rule_evaluator.py | 13 +++++++------ synapse/visibility.py | 21 +++++++++++++-------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 08b4a3990c6a..bae2a185a1d3 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -295,6 +295,12 @@ async def action_for_event_by_user( event.room_id, users ) + # This is a check for the case where user joins a room without being allowed to see history, and then the server + # receives a delayed event from before the user joined, which they should not be pushed for + uids_with_visibility = await filter_events_for_client_with_state( + self.store, users, event, context + ) + for uid, rules in rules_by_user.items(): if event.sender == uid: continue @@ -302,12 +308,7 @@ async def action_for_event_by_user( if uid in ignorers: continue - # This is a check for the case where user joins a room without being allowed to see history, and then the server - # receives a delayed event from before the user joined, which they should not be pushed for - visible = await filter_events_for_client_with_state( - self.store, uid, event, context - ) - if not visible: + if uid not in uids_with_visibility: continue display_name = None diff --git a/synapse/visibility.py b/synapse/visibility.py index 8985c422fd8c..8d7a70617feb 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -173,19 +173,19 @@ def allowed(event: EventBase) -> Optional[EventBase]: async def filter_events_for_client_with_state( - store: DataStore, user_id: str, event: EventBase, context: EventContext -) -> Optional[EventBase]: + store: DataStore, user_ids: List[str], event: EventBase, context: EventContext +) -> List[str]: """ - Checks to see if an event is visible to the user at the time of the event + Checks to see if an event is visible to the users in the list at the time of the event Args: store: databases - user_id: user_id to be checked + user_ids: user_ids to be checked event: the event to be checked context: EventContext for the event to be checked - Returns: the event if the room history is visible to the user at the time of the event, None if not + Returns: list of uids for whom the event is visible """ filter = StateFilter.from_types( - [(EventTypes.Member, user_id), (EventTypes.RoomHistoryVisibility, "")] + [(EventTypes.Member, None), (EventTypes.RoomHistoryVisibility, "")] ) state_map = await context.get_prev_state_ids(filter) @@ -199,8 +199,13 @@ async def filter_events_for_client_with_state( # Add current event to updated_state_map, we need to do this here as it may not have been persisted to the db yet updated_state_map[current_state_key] = event - filtered_event = _check_visibility(user_id, event, updated_state_map) - return filtered_event + uids_with_visibility = [] + for id in user_ids: + can_see = _check_visibility(id, event, updated_state_map) + if can_see: + uids_with_visibility.append(id) + + return uids_with_visibility def _check_visibility( From 3f8a515e7d648922cc4758d5acb721fcba2f62cc Mon Sep 17 00:00:00 2001 From: "H. Shay" Date: Tue, 5 Jul 2022 16:24:03 -0700 Subject: [PATCH 20/50] add soft-fail check --- synapse/visibility.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/synapse/visibility.py b/synapse/visibility.py index 8d7a70617feb..e6d89fab3492 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -184,6 +184,10 @@ async def filter_events_for_client_with_state( context: EventContext for the event to be checked Returns: list of uids for whom the event is visible """ + # None of the users should see the event if it is soft_failed + if event.internal_metadata.is_soft_failed(): + return [] + filter = StateFilter.from_types( [(EventTypes.Member, None), (EventTypes.RoomHistoryVisibility, "")] ) From df1105d3147205844391924cb73d080e27073bc3 Mon Sep 17 00:00:00 2001 From: "H. Shay" Date: Wed, 6 Jul 2022 09:18:46 -0700 Subject: [PATCH 21/50] lint --- synapse/storage/databases/main/roommember.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 70749c23fd53..a49bfa9146b6 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -444,15 +444,15 @@ def _get_rooms_for_local_user_where_membership_is_txn( membership_list: List[str], ) -> List[RoomsForUser]: """Get all the rooms for this *local* user where the membership for this user - matches one in the membership list. + matches one in the membership list. - Args: - user_id: The user ID. - membership_list: A list of synapse.api.constants.Membership - values which the user must be in. + Args: + user_id: The user ID. + membership_list: A list of synapse.api.constants.Membership + values which the user must be in. - Returns: - The RoomsForUser that the user matches the membership types. + Returns: + The RoomsForUser that the user matches the membership types. """ # Paranoia check. if not self.hs.is_mine_id(user_id): From 0452aef53859461c12f0d6adeb70eefb0481fc32 Mon Sep 17 00:00:00 2001 From: "H. Shay" Date: Thu, 7 Jul 2022 10:29:15 -0700 Subject: [PATCH 22/50] separate out functions and add caches --- synapse/push/bulk_push_rule_evaluator.py | 2 +- synapse/storage/databases/main/roommember.py | 93 ++++++++++++++------ 2 files changed, 65 insertions(+), 30 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index bae2a185a1d3..ee62b804e289 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -291,7 +291,7 @@ async def action_for_event_by_user( ignorers = frozenset() users = list(rules_by_user.keys()) - profiles = await self.store.get_users_in_room_with_profiles( + profiles = await self.store.get_subset_users_in_room_with_profiles( event.room_id, users ) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index a49bfa9146b6..0fb420a4f57b 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -212,9 +212,62 @@ def get_users_in_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[s txn.execute(sql, (room_id, Membership.JOIN)) return [r[0] for r in txn] - @cached(max_entries=100000, iterable=True, uncached_args=["user_ids"]) + @cached() + def get_user_in_room_with_profile( + self, room_id: str, user_id: str + ) -> Dict[str, ProfileInfo]: + raise NotImplementedError() + + @cachedList( + cached_method_name="get_user_in_room_with_profile", list_name="user_ids" + ) + async def get_subset_users_in_room_with_profiles( + self, room_id: str, user_ids: List[str] + ) -> Dict[str, ProfileInfo]: + """Get a mapping from user ID to profile information for a list of users in a given room. + + The profile information comes directly from this room's `m.room.member` + events, and so may be specific to this room rather than part of a user's + global profile. To avoid privacy leaks, the profile data should only be + revealed to users who are already in this room. + + Args: + room_id: The ID of the room to retrieve the users of. + user_ids: a list of users in the room to run the query for + + Returns: + A mapping from user ID to ProfileInfo. + """ + + def _get_subset_users_in_room_with_profiles( + txn: LoggingTransaction, + ) -> Dict[str, ProfileInfo]: + clause, ids = make_in_list_sql_clause( + self.database_engine, "m.user_id", user_ids + ) + + sql = """ + SELECT state_key, display_name, avatar_url FROM room_memberships as m + INNER JOIN current_state_events as c + ON m.event_id = c.event_id + AND m.room_id = c.room_id + AND m.user_id = c.state_key + WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ? AND %s + """ % ( + clause, + ) + txn.execute(sql, (room_id, Membership.JOIN, *ids)) + + return {r[0]: ProfileInfo(display_name=r[1], avatar_url=r[2]) for r in txn} + + return await self.db_pool.runInteraction( + "get_subset_users_in_room_with_profiles", + _get_subset_users_in_room_with_profiles, + ) + + @cached(max_entries=100000, iterable=True) async def get_users_in_room_with_profiles( - self, room_id: str, user_ids: Optional[List[str]] = None + self, room_id: str ) -> Dict[str, ProfileInfo]: """Get a mapping from user ID to profile information for all users in a given room. @@ -233,33 +286,15 @@ async def get_users_in_room_with_profiles( def _get_users_in_room_with_profiles( txn: LoggingTransaction, ) -> Dict[str, ProfileInfo]: - if user_ids: - clause, ids = make_in_list_sql_clause( - self.database_engine, "m.user_id", user_ids - ) - - sql = """ - SELECT state_key, display_name, avatar_url FROM room_memberships as m - INNER JOIN current_state_events as c - ON m.event_id = c.event_id - AND m.room_id = c.room_id - AND m.user_id = c.state_key - WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ? AND %s - """ % ( - clause, - ) - txn.execute(sql, (room_id, Membership.JOIN, *ids)) - - else: - sql = """ - SELECT state_key, display_name, avatar_url FROM room_memberships as m - INNER JOIN current_state_events as c - ON m.event_id = c.event_id - AND m.room_id = c.room_id - AND m.user_id = c.state_key - WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ? - """ - txn.execute(sql, (room_id, Membership.JOIN)) + sql = """ + SELECT state_key, display_name, avatar_url FROM room_memberships as m + INNER JOIN current_state_events as c + ON m.event_id = c.event_id + AND m.room_id = c.room_id + AND m.user_id = c.state_key + WHERE c.type = 'm.room.member' AND c.room_id = ? AND m.membership = ? + """ + txn.execute(sql, (room_id, Membership.JOIN)) return {r[0]: ProfileInfo(display_name=r[1], avatar_url=r[2]) for r in txn} From 0d9282d255b3632d4db912f1b8c9cfa88a76fb2b Mon Sep 17 00:00:00 2001 From: "H. Shay" Date: Thu, 7 Jul 2022 10:34:35 -0700 Subject: [PATCH 23/50] ditch cache on get_rules --- synapse/push/bulk_push_rule_evaluator.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index ee62b804e289..caad0e8830ba 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -27,7 +27,6 @@ from synapse.storage.databases.main.roommember import EventIdMembership from synapse.storage.state import StateFilter from synapse.util.caches import register_cache -from synapse.util.caches.descriptors import _CacheContext, cached from synapse.util.metrics import measure_func from synapse.visibility import filter_events_for_client_with_state @@ -110,26 +109,21 @@ def __init__(self, hs: "HomeServer"): # Whether to support MSC3772 is supported. self._relations_match_enabled = self.hs.config.experimental.msc3772_enabled - @cached(cache_context=True, uncached_args=("event",)) async def get_rules( - self, room_id: str, event: EventBase, cache_context: _CacheContext + self, room_id: str, event: EventBase ) -> Dict[str, List[Dict[str, dict]]]: """Given an event return the rules for all users who are currently in the room. """ - local_users = await self.store.get_local_users_in_room( - room_id, on_invalidate=cache_context.invalidate - ) + local_users = await self.store.get_local_users_in_room(room_id) if event.type == EventTypes.Member and event.membership == Membership.JOIN: if self.hs.is_mine_id(event.state_key): local_users = list(local_users) local_users.append(event.state_key) - ret_rules_by_user = await self.store.bulk_get_push_rules( - local_users, on_invalidate=cache_context.invalidate - ) + ret_rules_by_user = await self.store.bulk_get_push_rules(local_users) logger.debug("Users in room: %s", local_users) From d318c2f98f024d8ee316792936110c4c3b9c8e23 Mon Sep 17 00:00:00 2001 From: "H. Shay" Date: Thu, 7 Jul 2022 11:10:55 -0700 Subject: [PATCH 24/50] build list of user member events rather than pull all membership out of db --- synapse/visibility.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/synapse/visibility.py b/synapse/visibility.py index e6d89fab3492..8eb49738cedc 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -188,10 +188,13 @@ async def filter_events_for_client_with_state( if event.internal_metadata.is_soft_failed(): return [] - filter = StateFilter.from_types( - [(EventTypes.Member, None), (EventTypes.RoomHistoryVisibility, "")] - ) - state_map = await context.get_prev_state_ids(filter) + filter_list = [] + for user_id in user_ids: + filter_list.append((EventTypes.Member, user_id)) + filter_list.append((EventTypes.RoomHistoryVisibility, "")) + + state_filter = StateFilter.from_types(filter_list) + state_map = await context.get_prev_state_ids(state_filter) # Use events rather than event ids as content from the events are needed in _check_visibility updated_state_map = {} From 6f74c5157ce73d295ce032e0aca6ec5adab57b09 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 8 Jul 2022 10:26:30 +0100 Subject: [PATCH 25/50] Invalidate cache --- synapse/storage/_base.py | 5 +++++ synapse/storage/databases/main/cache.py | 1 + synapse/storage/databases/main/events.py | 4 ++++ synapse/storage/databases/main/roommember.py | 7 ++++--- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 741eaa95f63b..66f566579844 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -79,6 +79,11 @@ def _invalidate_state_caches( "get_number_joined_users_in_room", (room_id,) ) + for user_id in members_changed: + self._attempt_to_invalidate_cache( + "get_user_in_room_with_profile", (room_id, user_id) + ) + # Purge other caches based on room state. self._attempt_to_invalidate_cache("get_room_summary", (room_id,)) self._attempt_to_invalidate_cache("get_partial_current_state_ids", (room_id,)) diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py index 296d2d8f74e6..445617d0206c 100644 --- a/synapse/storage/databases/main/cache.py +++ b/synapse/storage/databases/main/cache.py @@ -219,6 +219,7 @@ def _invalidate_caches_for_event( self.get_invited_rooms_for_local_user.invalidate((state_key,)) self.get_local_users_in_room.invalidate((room_id,)) self.get_number_joined_users_in_room.invalidate((room_id,)) + self.get_user_in_room_with_profile.invalidate((room_id, state_key)) if relates_to: self.get_relations_for_event.invalidate((relates_to,)) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index dceb3d6c3597..1c63a3c43c54 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1774,6 +1774,10 @@ def _store_room_members_txn( self.store.get_number_joined_users_in_room.invalidate, (event.room_id,), ) + txn.call_after( + self.store.get_user_in_room_with_profile.invalidate, + (event.room_id, event.state_key), + ) # The `_get_membership_from_event_id` is immutable, except for the # case where we look up an event *before* persisting it. diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 0fb420a4f57b..d2632945936c 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -224,7 +224,8 @@ def get_user_in_room_with_profile( async def get_subset_users_in_room_with_profiles( self, room_id: str, user_ids: List[str] ) -> Dict[str, ProfileInfo]: - """Get a mapping from user ID to profile information for a list of users in a given room. + """Get a mapping from user ID to profile information for a list of users + in a given room. The profile information comes directly from this room's `m.room.member` events, and so may be specific to this room rather than part of a user's @@ -232,8 +233,8 @@ async def get_subset_users_in_room_with_profiles( revealed to users who are already in this room. Args: - room_id: The ID of the room to retrieve the users of. - user_ids: a list of users in the room to run the query for + room_id: The ID of the room to retrieve the users of. user_ids: a + list of users in the room to run the query for Returns: A mapping from user ID to ProfileInfo. From a1274bfb2e34a3a1290e90d6cc9064bca0828c24 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 8 Jul 2022 10:28:51 +0100 Subject: [PATCH 26/50] Rewrap comments --- synapse/visibility.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/synapse/visibility.py b/synapse/visibility.py index 8eb49738cedc..908356ba0580 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -176,13 +176,17 @@ async def filter_events_for_client_with_state( store: DataStore, user_ids: List[str], event: EventBase, context: EventContext ) -> List[str]: """ - Checks to see if an event is visible to the users in the list at the time of the event + Checks to see if an event is visible to the users in the list at the time of + the event + Args: store: databases user_ids: user_ids to be checked event: the event to be checked context: EventContext for the event to be checked - Returns: list of uids for whom the event is visible + + Returns: + list of uids for whom the event is visible """ # None of the users should see the event if it is soft_failed if event.internal_metadata.is_soft_failed(): @@ -196,14 +200,16 @@ async def filter_events_for_client_with_state( state_filter = StateFilter.from_types(filter_list) state_map = await context.get_prev_state_ids(state_filter) - # Use events rather than event ids as content from the events are needed in _check_visibility + # Use events rather than event ids as content from the events are needed in + # _check_visibility updated_state_map = {} for state_key, event_id in state_map.items(): updated_state_map[state_key] = await store.get_event(event_id) if event.is_state(): current_state_key = (event.type, event.state_key) - # Add current event to updated_state_map, we need to do this here as it may not have been persisted to the db yet + # Add current event to updated_state_map, we need to do this here as it + # may not have been persisted to the db yet updated_state_map[current_state_key] = event uids_with_visibility = [] @@ -223,14 +229,20 @@ def _check_visibility( erased_senders: Optional[dict] = None, ) -> Optional[EventBase]: """ - Checks whether the room history should be visible to the user at the time of this event. + Checks whether the room history should be visible to the user at the time + of this event. + Args: user_id: user_id for the user to be checked event: the event to be checked state_map: state at the event to be checked is_peeking: whether the user in question is peeking - erased_senders: optional dict matching a user id to a boolean representing whether the user has requested erasure - Returns: the event if the room history is visible to the user at the time of the event, None if not + erased_senders: optional dict matching a user id to a boolean + representing whether the user has requested erasure + + Returns: + the event if the room history is visible to the user at the time + of the event, None if not """ # Get the room_visibility at the time of the event. visibility = get_effective_room_visibility_from_state(state_map) From 3a29f863f1d38bf4843d2f8c2a44a06ec6fd9d71 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 8 Jul 2022 10:32:34 +0100 Subject: [PATCH 27/50] Batch fetch events --- synapse/visibility.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/synapse/visibility.py b/synapse/visibility.py index 908356ba0580..479b0635dcd0 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -202,9 +202,13 @@ async def filter_events_for_client_with_state( # Use events rather than event ids as content from the events are needed in # _check_visibility + event_map = await store.get_events(state_map.values(), get_prev_content=False) + updated_state_map = {} for state_key, event_id in state_map.items(): - updated_state_map[state_key] = await store.get_event(event_id) + state_event = event_map.get(event_id) + if state_event: + updated_state_map[state_key] = state_event if event.is_state(): current_state_key = (event.type, event.state_key) From e7811498e94144300167671400f99fcdad6db72a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 8 Jul 2022 10:49:27 +0100 Subject: [PATCH 28/50] Split out 'allowed' check in visibility --- synapse/visibility.py | 313 ++++++++++++++++++++++++------------------ 1 file changed, 176 insertions(+), 137 deletions(-) diff --git a/synapse/visibility.py b/synapse/visibility.py index 8aaa8c709fad..f15eedf48d1b 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -23,6 +23,7 @@ from synapse.storage.controllers import StorageControllers from synapse.storage.state import StateFilter from synapse.types import RetentionPolicy, StateMap, get_domain_from_id +from synapse.util import Clock logger = logging.getLogger(__name__) @@ -102,160 +103,198 @@ async def filter_events_for_client( ] = await storage.main.get_retention_policy_for_room(room_id) def allowed(event: EventBase) -> Optional[EventBase]: - """ - Args: - event: event to check - - Returns: - None if the user cannot see this event at all - - a redacted copy of the event if they can only see a redacted - version - - the original event if they can see it as normal. - """ - # Only run some checks if these events aren't about to be sent to clients. This is - # because, if this is not the case, we're probably only checking if the users can - # see events in the room at that point in the DAG, and that shouldn't be decided - # on those checks. - if filter_send_to_client: - if event.type == EventTypes.Dummy: - return None - - if not event.is_state() and event.sender in ignore_list: - return None - - # Until MSC2261 has landed we can't redact malicious alias events, so for - # now we temporarily filter out m.room.aliases entirely to mitigate - # abuse, while we spec a better solution to advertising aliases - # on rooms. - if event.type == EventTypes.Aliases: - return None - - # Don't try to apply the room's retention policy if the event is a state - # event, as MSC1763 states that retention is only considered for non-state - # events. - if not event.is_state(): - retention_policy = retention_policies[event.room_id] - max_lifetime = retention_policy.max_lifetime - - if max_lifetime is not None: - oldest_allowed_ts = storage.main.clock.time_msec() - max_lifetime - - if event.origin_server_ts < oldest_allowed_ts: - return None - - if event.event_id in always_include_ids: - return event - - # we need to handle outliers separately, since we don't have the room state. - if event.internal_metadata.outlier: - # Normally these can't be seen by clients, but we make an exception for - # for out-of-band membership events (eg, incoming invites, or rejections of - # said invite) for the user themselves. - if event.type == EventTypes.Member and event.state_key == user_id: - logger.debug("Returning out-of-band-membership event %s", event) - return event + return _check_client_allowed_to_see_event( + user_id=user_id, + event=event, + clock=storage.main.clock, + filter_send_to_client=filter_send_to_client, + sender_ignored=event.sender in ignore_list, + always_include_ids=always_include_ids, + retention_policy=retention_policies[room_id], + state=event_id_to_state.get(event.event_id), + is_peeking=is_peeking, + sender_erased=event.sender in erased_senders, + ) - return None + # Check each event: gives an iterable of None or (a potentially modified) + # EventBase. + filtered_events = map(allowed, events) - state = event_id_to_state[event.event_id] + # Turn it into a list and remove None entries before returning. + return [ev for ev in filtered_events if ev] - # get the room_visibility at the time of the event. - visibility = get_effective_room_visibility_from_state(state) - # Always allow history visibility events on boundaries. This is done - # by setting the effective visibility to the least restrictive - # of the old vs new. - if event.type == EventTypes.RoomHistoryVisibility: - prev_content = event.unsigned.get("prev_content", {}) - prev_visibility = prev_content.get("history_visibility", None) +def _check_client_allowed_to_see_event( + user_id: str, + event: EventBase, + clock: Clock, + filter_send_to_client: bool, + is_peeking: bool, + always_include_ids: FrozenSet[str], + sender_ignored: bool, + retention_policy: RetentionPolicy, + state: Optional[StateMap[EventBase]], + sender_erased: bool, +) -> Optional[EventBase]: + """Check with the given user is allowed to see the given event + + See `filter_events_for_client` for details about args - if prev_visibility not in VISIBILITY_PRIORITY: - prev_visibility = HistoryVisibility.SHARED + Args: + user_id + event + clock + filter_send_to_client + is_peeking + always_include_ids + sender_ignored: Whether the user is ignoring the event sender + retention_policy: The retention policy of the room + state: The state at the event, unless its an outlier + sender_erased: Whether the event sender has been marked as "erased" - new_priority = VISIBILITY_PRIORITY.index(visibility) - old_priority = VISIBILITY_PRIORITY.index(prev_visibility) - if old_priority < new_priority: - visibility = prev_visibility + Returns: + None if the user cannot see this event at all - # likewise, if the event is the user's own membership event, use - # the 'most joined' membership - membership = None - if event.type == EventTypes.Member and event.state_key == user_id: - membership = event.content.get("membership", None) - if membership not in MEMBERSHIP_PRIORITY: - membership = "leave" - - prev_content = event.unsigned.get("prev_content", {}) - prev_membership = prev_content.get("membership", None) - if prev_membership not in MEMBERSHIP_PRIORITY: - prev_membership = "leave" - - # Always allow the user to see their own leave events, otherwise - # they won't see the room disappear if they reject the invite - # - # (Note this doesn't work for out-of-band invite rejections, which don't - # have prev_state populated. They are handled above in the outlier code.) - if membership == "leave" and ( - prev_membership == "join" or prev_membership == "invite" - ): - return event - - new_priority = MEMBERSHIP_PRIORITY.index(membership) - old_priority = MEMBERSHIP_PRIORITY.index(prev_membership) - if old_priority < new_priority: - membership = prev_membership - - # otherwise, get the user's membership at the time of the event. - if membership is None: - membership_event = state.get((EventTypes.Member, user_id), None) - if membership_event: - membership = membership_event.membership - - # if the user was a member of the room at the time of the event, - # they can see it. - if membership == Membership.JOIN: - return event + a redacted copy of the event if they can only see a redacted + version - # otherwise, it depends on the room visibility. + the original event if they can see it as normal. + """ + # Only run some checks if these events aren't about to be sent to clients. This is + # because, if this is not the case, we're probably only checking if the users can + # see events in the room at that point in the DAG, and that shouldn't be decided + # on those checks. + if filter_send_to_client: + if event.type == EventTypes.Dummy: + return None - if visibility == HistoryVisibility.JOINED: - # we weren't a member at the time of the event, so we can't - # see this event. + if not event.is_state() and sender_ignored: return None - elif visibility == HistoryVisibility.INVITED: - # user can also see the event if they were *invited* at the time - # of the event. - return event if membership == Membership.INVITE else None - - elif visibility == HistoryVisibility.SHARED and is_peeking: - # if the visibility is shared, users cannot see the event unless - # they have *subsequently* joined the room (or were members at the - # time, of course) - # - # XXX: if the user has subsequently joined and then left again, - # ideally we would share history up to the point they left. But - # we don't know when they left. We just treat it as though they - # never joined, and restrict access. + # Until MSC2261 has landed we can't redact malicious alias events, so for + # now we temporarily filter out m.room.aliases entirely to mitigate + # abuse, while we spec a better solution to advertising aliases + # on rooms. + if event.type == EventTypes.Aliases: return None - # the visibility is either shared or world_readable, and the user was - # not a member at the time. We allow it, provided the original sender - # has not requested their data to be erased, in which case, we return - # a redacted version. - if erased_senders[event.sender]: - return prune_event(event) + # Don't try to apply the room's retention policy if the event is a state + # event, as MSC1763 states that retention is only considered for non-state + # events. + if not event.is_state(): + max_lifetime = retention_policy.max_lifetime + + if max_lifetime is not None: + oldest_allowed_ts = clock.time_msec() - max_lifetime + if event.origin_server_ts < oldest_allowed_ts: + return None + + if event.event_id in always_include_ids: return event - # Check each event: gives an iterable of None or (a potentially modified) - # EventBase. - filtered_events = map(allowed, events) + # we need to handle outliers separately, since we don't have the room state. + if event.internal_metadata.outlier: + # Normally these can't be seen by clients, but we make an exception for + # for out-of-band membership events (eg, incoming invites, or rejections of + # said invite) for the user themselves. + if event.type == EventTypes.Member and event.state_key == user_id: + logger.debug("Returning out-of-band-membership event %s", event) + return event - # Turn it into a list and remove None entries before returning. - return [ev for ev in filtered_events if ev] + return None + + if state is None: + raise Exception("Missing state for non-outlier event") + + # get the room_visibility at the time of the event. + visibility = get_effective_room_visibility_from_state(state) + + # Always allow history visibility events on boundaries. This is done + # by setting the effective visibility to the least restrictive + # of the old vs new. + if event.type == EventTypes.RoomHistoryVisibility: + prev_content = event.unsigned.get("prev_content", {}) + prev_visibility = prev_content.get("history_visibility", None) + + if prev_visibility not in VISIBILITY_PRIORITY: + prev_visibility = HistoryVisibility.SHARED + + new_priority = VISIBILITY_PRIORITY.index(visibility) + old_priority = VISIBILITY_PRIORITY.index(prev_visibility) + if old_priority < new_priority: + visibility = prev_visibility + + # likewise, if the event is the user's own membership event, use + # the 'most joined' membership + membership = None + if event.type == EventTypes.Member and event.state_key == user_id: + membership = event.content.get("membership", None) + if membership not in MEMBERSHIP_PRIORITY: + membership = "leave" + + prev_content = event.unsigned.get("prev_content", {}) + prev_membership = prev_content.get("membership", None) + if prev_membership not in MEMBERSHIP_PRIORITY: + prev_membership = "leave" + + # Always allow the user to see their own leave events, otherwise + # they won't see the room disappear if they reject the invite + # + # (Note this doesn't work for out-of-band invite rejections, which don't + # have prev_state populated. They are handled above in the outlier code.) + if membership == "leave" and ( + prev_membership == "join" or prev_membership == "invite" + ): + return event + + new_priority = MEMBERSHIP_PRIORITY.index(membership) + old_priority = MEMBERSHIP_PRIORITY.index(prev_membership) + if old_priority < new_priority: + membership = prev_membership + + # otherwise, get the user's membership at the time of the event. + if membership is None: + membership_event = state.get((EventTypes.Member, user_id), None) + if membership_event: + membership = membership_event.membership + + # if the user was a member of the room at the time of the event, + # they can see it. + if membership == Membership.JOIN: + return event + + # otherwise, it depends on the room visibility. + + if visibility == HistoryVisibility.JOINED: + # we weren't a member at the time of the event, so we can't + # see this event. + return None + + elif visibility == HistoryVisibility.INVITED: + # user can also see the event if they were *invited* at the time + # of the event. + return event if membership == Membership.INVITE else None + + elif visibility == HistoryVisibility.SHARED and is_peeking: + # if the visibility is shared, users cannot see the event unless + # they have *subsequently* joined the room (or were members at the + # time, of course) + # + # XXX: if the user has subsequently joined and then left again, + # ideally we would share history up to the point they left. But + # we don't know when they left. We just treat it as though they + # never joined, and restrict access. + return None + + # the visibility is either shared or world_readable, and the user was + # not a member at the time. We allow it, provided the original sender + # has not requested their data to be erased, in which case, we return + # a redacted version. + if sender_erased: + return prune_event(event) + + return event def get_effective_room_visibility_from_state(state: StateMap[EventBase]) -> str: From 1760a40198cff7d22f2f2a5fccab8aaa73926875 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 8 Jul 2022 10:53:01 +0100 Subject: [PATCH 29/50] Split out _check_filter_send_to_client --- synapse/visibility.py | 66 ++++++++++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 23 deletions(-) diff --git a/synapse/visibility.py b/synapse/visibility.py index f15eedf48d1b..0e74768b9120 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -165,31 +165,11 @@ def _check_client_allowed_to_see_event( # see events in the room at that point in the DAG, and that shouldn't be decided # on those checks. if filter_send_to_client: - if event.type == EventTypes.Dummy: - return None - - if not event.is_state() and sender_ignored: - return None - - # Until MSC2261 has landed we can't redact malicious alias events, so for - # now we temporarily filter out m.room.aliases entirely to mitigate - # abuse, while we spec a better solution to advertising aliases - # on rooms. - if event.type == EventTypes.Aliases: + if not _check_filter_send_to_client( + event, clock, retention_policy, sender_ignored + ): return None - # Don't try to apply the room's retention policy if the event is a state - # event, as MSC1763 states that retention is only considered for non-state - # events. - if not event.is_state(): - max_lifetime = retention_policy.max_lifetime - - if max_lifetime is not None: - oldest_allowed_ts = clock.time_msec() - max_lifetime - - if event.origin_server_ts < oldest_allowed_ts: - return None - if event.event_id in always_include_ids: return event @@ -297,6 +277,46 @@ def _check_client_allowed_to_see_event( return event +def _check_filter_send_to_client( + event: EventBase, + clock: Clock, + retention_policy: RetentionPolicy, + sender_ignored: bool, +) -> bool: + """Apply checks for sending events to client + + Returns: + True if might be allowed to be sent to clients, False if definitely not. + """ + + if event.type == EventTypes.Dummy: + return False + + if not event.is_state() and sender_ignored: + return False + + # Until MSC2261 has landed we can't redact malicious alias events, so for + # now we temporarily filter out m.room.aliases entirely to mitigate + # abuse, while we spec a better solution to advertising aliases + # on rooms. + if event.type == EventTypes.Aliases: + return False + + # Don't try to apply the room's retention policy if the event is a state + # event, as MSC1763 states that retention is only considered for non-state + # events. + if not event.is_state(): + max_lifetime = retention_policy.max_lifetime + + if max_lifetime is not None: + oldest_allowed_ts = clock.time_msec() - max_lifetime + + if event.origin_server_ts < oldest_allowed_ts: + return False + + return True + + def get_effective_room_visibility_from_state(state: StateMap[EventBase]) -> str: """Get the actual history vis, from a state map including the history_visibility event From b470cab8e5fef4273d497bcc3fb6dc5cb2280d86 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 8 Jul 2022 11:11:19 +0100 Subject: [PATCH 30/50] Split out _check_membership --- synapse/visibility.py | 93 ++++++++++++++++++++++++++++++------------- 1 file changed, 65 insertions(+), 28 deletions(-) diff --git a/synapse/visibility.py b/synapse/visibility.py index 0e74768b9120..0bb8fcb62c8b 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -187,26 +187,38 @@ def _check_client_allowed_to_see_event( if state is None: raise Exception("Missing state for non-outlier event") + # If the sender has been erased we must only return the redacted form. + if sender_erased: + event = prune_event(event) + # get the room_visibility at the time of the event. visibility = get_effective_room_visibility_from_state(state) - # Always allow history visibility events on boundaries. This is done - # by setting the effective visibility to the least restrictive - # of the old vs new. - if event.type == EventTypes.RoomHistoryVisibility: - prev_content = event.unsigned.get("prev_content", {}) - prev_visibility = prev_content.get("history_visibility", None) + # Check if the room has lax history visibility, allowing us to skip + # membership checks. + if _check_history_visibility(event, visibility, is_peeking): + return event - if prev_visibility not in VISIBILITY_PRIORITY: - prev_visibility = HistoryVisibility.SHARED + if _check_membership(user_id, event, visibility, state, is_peeking): + return event - new_priority = VISIBILITY_PRIORITY.index(visibility) - old_priority = VISIBILITY_PRIORITY.index(prev_visibility) - if old_priority < new_priority: - visibility = prev_visibility + return None + + +def _check_membership( + user_id: str, + event: EventBase, + visibility: str, + state: StateMap[EventBase], + is_peeking: bool, +) -> bool: + """Check whether the user can see the event due to their membership - # likewise, if the event is the user's own membership event, use - # the 'most joined' membership + Returns: + True if they can, False if they can't + """ + # If the event is the user's own membership event, use the 'most joined' + # membership membership = None if event.type == EventTypes.Member and event.state_key == user_id: membership = event.content.get("membership", None) @@ -226,7 +238,7 @@ def _check_client_allowed_to_see_event( if membership == "leave" and ( prev_membership == "join" or prev_membership == "invite" ): - return event + return True new_priority = MEMBERSHIP_PRIORITY.index(membership) old_priority = MEMBERSHIP_PRIORITY.index(prev_membership) @@ -242,19 +254,19 @@ def _check_client_allowed_to_see_event( # if the user was a member of the room at the time of the event, # they can see it. if membership == Membership.JOIN: - return event + return True # otherwise, it depends on the room visibility. if visibility == HistoryVisibility.JOINED: # we weren't a member at the time of the event, so we can't # see this event. - return None + return False elif visibility == HistoryVisibility.INVITED: # user can also see the event if they were *invited* at the time # of the event. - return event if membership == Membership.INVITE else None + return membership == Membership.INVITE elif visibility == HistoryVisibility.SHARED and is_peeking: # if the visibility is shared, users cannot see the event unless @@ -265,16 +277,11 @@ def _check_client_allowed_to_see_event( # ideally we would share history up to the point they left. But # we don't know when they left. We just treat it as though they # never joined, and restrict access. - return None - - # the visibility is either shared or world_readable, and the user was - # not a member at the time. We allow it, provided the original sender - # has not requested their data to be erased, in which case, we return - # a redacted version. - if sender_erased: - return prune_event(event) + return False - return event + # The visibility is either shared or world_readable, and the user was + # not a member at the time. We allow it. + return True def _check_filter_send_to_client( @@ -317,9 +324,39 @@ def _check_filter_send_to_client( return True +def _check_history_visibility( + event: EventBase, visibility: str, is_peeking: bool +) -> bool: + """Check if event is allowed to be seen due to lax history visibility. + + Returns: + True if user can definitely see the event, False if maybe not. + """ + # Always allow history visibility events on boundaries. This is done + # by setting the effective visibility to the least restrictive + # of the old vs new. + if event.type == EventTypes.RoomHistoryVisibility: + prev_content = event.unsigned.get("prev_content", {}) + prev_visibility = prev_content.get("history_visibility", None) + + if prev_visibility not in VISIBILITY_PRIORITY: + prev_visibility = HistoryVisibility.SHARED + + new_priority = VISIBILITY_PRIORITY.index(visibility) + old_priority = VISIBILITY_PRIORITY.index(prev_visibility) + if old_priority < new_priority: + visibility = prev_visibility + + if visibility == HistoryVisibility.SHARED and not is_peeking: + return True + elif visibility == HistoryVisibility.WORLD_READABLE: + return True + + return False + + def get_effective_room_visibility_from_state(state: StateMap[EventBase]) -> str: """Get the actual history vis, from a state map including the history_visibility event - Handles missing and invalid history visibility events. """ visibility_event = state.get(_HISTORY_VIS_KEY, None) From d5bc155663be2be38037cfe3a18176547ae93bf0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 8 Jul 2022 11:39:44 +0100 Subject: [PATCH 31/50] Add filter_event_for_clients_with_state --- synapse/visibility.py | 111 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/synapse/visibility.py b/synapse/visibility.py index 0bb8fcb62c8b..8cd54135380d 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -19,8 +19,10 @@ from synapse.api.constants import EventTypes, HistoryVisibility, Membership from synapse.events import EventBase +from synapse.events.snapshot import EventContext from synapse.events.utils import prune_event from synapse.storage.controllers import StorageControllers +from synapse.storage.databases.main import DataStore from synapse.storage.state import StateFilter from synapse.types import RetentionPolicy, StateMap, get_domain_from_id from synapse.util import Clock @@ -124,6 +126,115 @@ def allowed(event: EventBase) -> Optional[EventBase]: return [ev for ev in filtered_events if ev] +async def filter_event_for_clients_with_state( + store: DataStore, + user_ids: List[str], + event: EventBase, + context: EventContext, + is_peeking: bool = False, + filter_send_to_client: bool = True, +) -> Collection[str]: + """ + Checks to see if an event is visible to the users in the list at the time of + the event + + Args: + store: databases + user_ids: user_ids to be checked + event: the event to be checked + context: EventContext for the event to be checked + + Returns: + list of uids for whom the event is visible + """ + # None of the users should see the event if it is soft_failed + if event.internal_metadata.is_soft_failed(): + return [] + + # Make a set for all user IDs that haven't been filtered out by a check. + allowed_user_ids = set(user_ids) + + # Only run some checks if these events aren't about to be sent to clients. This is + # because, if this is not the case, we're probably only checking if the users can + # see events in the room at that point in the DAG, and that shouldn't be decided + # on those checks. + if filter_send_to_client: + ignored_by = await store.ignored_by(event.sender) + retention_policy = await store.get_retention_policy_for_room(event.room_id) + + for user_id in user_ids: + if not _check_filter_send_to_client( + event, + store.clock, + retention_policy, + sender_ignored=user_id in ignored_by, + ): + allowed_user_ids.discard(user_id) + + if event.internal_metadata.outlier: + # Normally these can't be seen by clients, but we make an exception for + # for out-of-band membership events (eg, incoming invites, or rejections of + # said invite) for the user themselves. + if event.type == EventTypes.Member and event.state_key in allowed_user_ids: + logger.debug("Returning out-of-band-membership event %s", event) + return {event.state_key} + + return set() + + # First we get just the history visibility in case its shared/world-readable + # room. + visibility_state_map = await _get_state_map( + store, event, context, StateFilter.from_types([_HISTORY_VIS_KEY]) + ) + + visibility = get_effective_room_visibility_from_state(visibility_state_map) + if _check_history_visibility(event, visibility, is_peeking=is_peeking): + return allowed_user_ids + + # The history visibility isn't lax, so we now need to fetch the membership + # events of all the users. + + filter_list = [] + for user_id in allowed_user_ids: + filter_list.append((EventTypes.Member, user_id)) + filter_list.append((EventTypes.RoomHistoryVisibility, "")) + + state_filter = StateFilter.from_types(filter_list) + state_map = await _get_state_map(store, event, context, state_filter) + + # Now we check whether the membership allows each user to see the event. + return { + user_id + for user_id in allowed_user_ids + if _check_membership(user_id, event, visibility, state_map, is_peeking) + } + + +async def _get_state_map( + store: DataStore, event: EventBase, context: EventContext, state_filter: StateFilter +) -> StateMap[EventBase]: + """Helper function for getting a `StateMap[EventBase]` from an `EventContext`""" + state_map = await context.get_prev_state_ids(state_filter) + + # Use events rather than event ids as content from the events are needed in + # _check_visibility + event_map = await store.get_events(state_map.values(), get_prev_content=False) + + updated_state_map = {} + for state_key, event_id in state_map.items(): + state_event = event_map.get(event_id) + if state_event: + updated_state_map[state_key] = state_event + + if event.is_state(): + current_state_key = (event.type, event.state_key) + # Add current event to updated_state_map, we need to do this here as it + # may not have been persisted to the db yet + updated_state_map[current_state_key] = event + + return updated_state_map + + def _check_client_allowed_to_see_event( user_id: str, event: EventBase, From 3f908a8d1ba2c827ca5f19f9eeb068388d6407cf Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 8 Jul 2022 11:41:32 +0100 Subject: [PATCH 32/50] Newfile --- changelog.d/13222.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/13222.misc diff --git a/changelog.d/13222.misc b/changelog.d/13222.misc new file mode 100644 index 000000000000..0bab1aed7042 --- /dev/null +++ b/changelog.d/13222.misc @@ -0,0 +1 @@ +Improve memory usage of calculating push actions for events in large rooms. From 99ce011fbac4c5e3dbea0039b3ef18d0105f9fe1 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 8 Jul 2022 11:47:04 +0100 Subject: [PATCH 33/50] Fix newsfile --- changelog.d/13222.misc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/13222.misc b/changelog.d/13222.misc index 0bab1aed7042..3835e97ad9c7 100644 --- a/changelog.d/13222.misc +++ b/changelog.d/13222.misc @@ -1 +1 @@ -Improve memory usage of calculating push actions for events in large rooms. +Reduce memory consumption when processing incoming events in large rooms. From f562735872b8ac6c88d04fd944393f5a0f28c8d7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 8 Jul 2022 12:20:31 +0100 Subject: [PATCH 34/50] Fix erased senders --- synapse/visibility.py | 44 ++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/synapse/visibility.py b/synapse/visibility.py index 8cd54135380d..9388e94a089b 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -136,7 +136,9 @@ async def filter_event_for_clients_with_state( ) -> Collection[str]: """ Checks to see if an event is visible to the users in the list at the time of - the event + the event. + + Note: This does *not* check if the sender of the event was erased. Args: store: databases @@ -206,7 +208,7 @@ async def filter_event_for_clients_with_state( return { user_id for user_id in allowed_user_ids - if _check_membership(user_id, event, visibility, state_map, is_peeking) + if _check_membership(user_id, event, visibility, state_map, is_peeking)[0] } @@ -298,22 +300,29 @@ def _check_client_allowed_to_see_event( if state is None: raise Exception("Missing state for non-outlier event") - # If the sender has been erased we must only return the redacted form. - if sender_erased: - event = prune_event(event) - # get the room_visibility at the time of the event. visibility = get_effective_room_visibility_from_state(state) # Check if the room has lax history visibility, allowing us to skip # membership checks. - if _check_history_visibility(event, visibility, is_peeking): + # + # We can only do this check if the sender has *not* been erased, as if they + # have we need to check the user's membership. + if not sender_erased and _check_history_visibility(event, visibility, is_peeking): return event - if _check_membership(user_id, event, visibility, state, is_peeking): - return event + allowed, membership = _check_membership( + user_id, event, visibility, state, is_peeking + ) + if not allowed: + return None - return None + # If the sender has been erased and the user was not joined at the time, we + # must only return the redacted form. + if sender_erased and membership != Membership.JOIN: + event = prune_event(event) + + return event def _check_membership( @@ -322,11 +331,12 @@ def _check_membership( visibility: str, state: StateMap[EventBase], is_peeking: bool, -) -> bool: +) -> Tuple[bool, Membership]: """Check whether the user can see the event due to their membership Returns: - True if they can, False if they can't + True if they can, False if they can't, plus the membership of the user + at the event. """ # If the event is the user's own membership event, use the 'most joined' # membership @@ -349,7 +359,7 @@ def _check_membership( if membership == "leave" and ( prev_membership == "join" or prev_membership == "invite" ): - return True + return True, membership new_priority = MEMBERSHIP_PRIORITY.index(membership) old_priority = MEMBERSHIP_PRIORITY.index(prev_membership) @@ -365,14 +375,14 @@ def _check_membership( # if the user was a member of the room at the time of the event, # they can see it. if membership == Membership.JOIN: - return True + return True, membership # otherwise, it depends on the room visibility. if visibility == HistoryVisibility.JOINED: # we weren't a member at the time of the event, so we can't # see this event. - return False + return False, membership elif visibility == HistoryVisibility.INVITED: # user can also see the event if they were *invited* at the time @@ -388,11 +398,11 @@ def _check_membership( # ideally we would share history up to the point they left. But # we don't know when they left. We just treat it as though they # never joined, and restrict access. - return False + return False, membership # The visibility is either shared or world_readable, and the user was # not a member at the time. We allow it. - return True + return True, membership def _check_filter_send_to_client( From 3d0fdc4fbc49418dfe43542a28457db50fde4166 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 8 Jul 2022 12:22:22 +0100 Subject: [PATCH 35/50] Review comments --- synapse/visibility.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/synapse/visibility.py b/synapse/visibility.py index 9388e94a089b..5b8d46fed704 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -128,7 +128,7 @@ def allowed(event: EventBase) -> Optional[EventBase]: async def filter_event_for_clients_with_state( store: DataStore, - user_ids: List[str], + user_ids: Collection[str], event: EventBase, context: EventContext, is_peeking: bool = False, @@ -145,9 +145,14 @@ async def filter_event_for_clients_with_state( user_ids: user_ids to be checked event: the event to be checked context: EventContext for the event to be checked + is_peeking: Whether the users are peeking into the room, ie not + currently joined + filter_send_to_client: Whether we're checking an event that's going to be + sent to a client. This might not always be the case since this function can + also be called to check whether a user can see the state at a given point. Returns: - list of uids for whom the event is visible + Collection of user IDs for whom the event is visible """ # None of the users should see the event if it is soft_failed if event.internal_metadata.is_soft_failed(): From 9f5ed7546501d2c8341b41ec995d0e649e670eca Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 8 Jul 2022 12:42:08 +0100 Subject: [PATCH 36/50] Use typed return values --- synapse/visibility.py | 92 ++++++++++++++++++++++++++++--------------- 1 file changed, 60 insertions(+), 32 deletions(-) diff --git a/synapse/visibility.py b/synapse/visibility.py index 5b8d46fed704..ee37835639f9 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -13,8 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +from enum import Enum, auto from typing import Collection, Dict, FrozenSet, List, Optional, Tuple +import attr from typing_extensions import Final from synapse.api.constants import EventTypes, HistoryVisibility, Membership @@ -170,11 +172,14 @@ async def filter_event_for_clients_with_state( retention_policy = await store.get_retention_policy_for_room(event.room_id) for user_id in user_ids: - if not _check_filter_send_to_client( - event, - store.clock, - retention_policy, - sender_ignored=user_id in ignored_by, + if ( + _check_filter_send_to_client( + event, + store.clock, + retention_policy, + sender_ignored=user_id in ignored_by, + ) + == _CheckFilter.DENIED ): allowed_user_ids.discard(user_id) @@ -195,7 +200,10 @@ async def filter_event_for_clients_with_state( ) visibility = get_effective_room_visibility_from_state(visibility_state_map) - if _check_history_visibility(event, visibility, is_peeking=is_peeking): + if ( + _check_history_visibility(event, visibility, is_peeking=is_peeking) + == _CheckVisibility.ALLOWED + ): return allowed_user_ids # The history visibility isn't lax, so we now need to fetch the membership @@ -213,7 +221,7 @@ async def filter_event_for_clients_with_state( return { user_id for user_id in allowed_user_ids - if _check_membership(user_id, event, visibility, state_map, is_peeking)[0] + if _check_membership(user_id, event, visibility, state_map, is_peeking).allowed } @@ -283,8 +291,9 @@ def _check_client_allowed_to_see_event( # see events in the room at that point in the DAG, and that shouldn't be decided # on those checks. if filter_send_to_client: - if not _check_filter_send_to_client( - event, clock, retention_policy, sender_ignored + if ( + _check_filter_send_to_client(event, clock, retention_policy, sender_ignored) + == _CheckFilter.DENIED ): return None @@ -313,30 +322,39 @@ def _check_client_allowed_to_see_event( # # We can only do this check if the sender has *not* been erased, as if they # have we need to check the user's membership. - if not sender_erased and _check_history_visibility(event, visibility, is_peeking): + if ( + not sender_erased + and _check_history_visibility(event, visibility, is_peeking) + == _CheckVisibility.ALLOWED + ): return event - allowed, membership = _check_membership( - user_id, event, visibility, state, is_peeking - ) - if not allowed: + membership_result = _check_membership(user_id, event, visibility, state, is_peeking) + if not membership_result.allowed: return None # If the sender has been erased and the user was not joined at the time, we # must only return the redacted form. - if sender_erased and membership != Membership.JOIN: + if sender_erased and not membership_result.joined: event = prune_event(event) return event +@attr.s(frozen=True, auto_attribs=True) +class _CheckMembershipReturn: + "Return value of _check_membership" + allowed: bool + joined: bool + + def _check_membership( user_id: str, event: EventBase, visibility: str, state: StateMap[EventBase], is_peeking: bool, -) -> Tuple[bool, Membership]: +) -> _CheckMembershipReturn: """Check whether the user can see the event due to their membership Returns: @@ -364,7 +382,7 @@ def _check_membership( if membership == "leave" and ( prev_membership == "join" or prev_membership == "invite" ): - return True, membership + return _CheckMembershipReturn(True, membership == Membership.JOIN) new_priority = MEMBERSHIP_PRIORITY.index(membership) old_priority = MEMBERSHIP_PRIORITY.index(prev_membership) @@ -380,19 +398,19 @@ def _check_membership( # if the user was a member of the room at the time of the event, # they can see it. if membership == Membership.JOIN: - return True, membership + return _CheckMembershipReturn(True, True) # otherwise, it depends on the room visibility. if visibility == HistoryVisibility.JOINED: # we weren't a member at the time of the event, so we can't # see this event. - return False, membership + return _CheckMembershipReturn(False, False) elif visibility == HistoryVisibility.INVITED: # user can also see the event if they were *invited* at the time # of the event. - return membership == Membership.INVITE + return _CheckMembershipReturn(membership == Membership.INVITE, False) elif visibility == HistoryVisibility.SHARED and is_peeking: # if the visibility is shared, users cannot see the event unless @@ -403,11 +421,16 @@ def _check_membership( # ideally we would share history up to the point they left. But # we don't know when they left. We just treat it as though they # never joined, and restrict access. - return False, membership + return _CheckMembershipReturn(False, False) # The visibility is either shared or world_readable, and the user was # not a member at the time. We allow it. - return True, membership + return _CheckMembershipReturn(True, False) + + +class _CheckFilter(Enum): + MAYBE_ALLOWED = auto() + DENIED = auto() def _check_filter_send_to_client( @@ -415,7 +438,7 @@ def _check_filter_send_to_client( clock: Clock, retention_policy: RetentionPolicy, sender_ignored: bool, -) -> bool: +) -> _CheckFilter: """Apply checks for sending events to client Returns: @@ -423,17 +446,17 @@ def _check_filter_send_to_client( """ if event.type == EventTypes.Dummy: - return False + return _CheckFilter.DENIED if not event.is_state() and sender_ignored: - return False + return _CheckFilter.DENIED # Until MSC2261 has landed we can't redact malicious alias events, so for # now we temporarily filter out m.room.aliases entirely to mitigate # abuse, while we spec a better solution to advertising aliases # on rooms. if event.type == EventTypes.Aliases: - return False + return _CheckFilter.DENIED # Don't try to apply the room's retention policy if the event is a state # event, as MSC1763 states that retention is only considered for non-state @@ -445,14 +468,19 @@ def _check_filter_send_to_client( oldest_allowed_ts = clock.time_msec() - max_lifetime if event.origin_server_ts < oldest_allowed_ts: - return False + return _CheckFilter.DENIED + + return _CheckFilter.MAYBE_ALLOWED + - return True +class _CheckVisibility(Enum): + ALLOWED = auto() + MAYBE_DENIED = auto() def _check_history_visibility( event: EventBase, visibility: str, is_peeking: bool -) -> bool: +) -> _CheckVisibility: """Check if event is allowed to be seen due to lax history visibility. Returns: @@ -474,11 +502,11 @@ def _check_history_visibility( visibility = prev_visibility if visibility == HistoryVisibility.SHARED and not is_peeking: - return True + return _CheckVisibility.ALLOWED elif visibility == HistoryVisibility.WORLD_READABLE: - return True + return _CheckVisibility.ALLOWED - return False + return _CheckVisibility.MAYBE_DENIED def get_effective_room_visibility_from_state(state: StateMap[EventBase]) -> str: From 4419b5865408b8fcaa04537f07fac2389d970304 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 8 Jul 2022 13:08:42 +0100 Subject: [PATCH 37/50] Fix erased senders mk2 --- synapse/visibility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/visibility.py b/synapse/visibility.py index ee37835639f9..2a1a4395703f 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -117,7 +117,7 @@ def allowed(event: EventBase) -> Optional[EventBase]: retention_policy=retention_policies[room_id], state=event_id_to_state.get(event.event_id), is_peeking=is_peeking, - sender_erased=event.sender in erased_senders, + sender_erased=erased_senders.get(event.sender, False), ) # Check each event: gives an iterable of None or (a potentially modified) From 43e21334cfc387b0e7268c24e8ef7866cc4f9a1a Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 8 Jul 2022 14:12:00 +0100 Subject: [PATCH 38/50] Fix admin cmd --- synapse/app/admin_cmd.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index 561621a28540..87f82bd9a5c7 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -39,6 +39,7 @@ from synapse.replication.slave.storage.receipts import SlavedReceiptsStore from synapse.replication.slave.storage.registration import SlavedRegistrationStore from synapse.server import HomeServer +from synapse.storage.database import DatabasePool, LoggingDatabaseConnection from synapse.storage.databases.main.room import RoomWorkerStore from synapse.types import StateMap from synapse.util import SYNAPSE_VERSION @@ -60,7 +61,17 @@ class AdminCmdSlavedStore( BaseSlavedStore, RoomWorkerStore, ): - pass + def __init__( + self, + database: DatabasePool, + db_conn: LoggingDatabaseConnection, + hs: "HomeServer", + ): + super().__init__(database, db_conn, hs) + + # Annoyingly `filter_events_for_client` assumes that this exists. We + # should refactor it to take a `Clock` directly. + self.clock = hs.get_clock() class AdminCmdServer(HomeServer): From 0b59e93a5c1486eb70d7d03b79f5a311c2559dfd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 11 Jul 2022 11:15:45 +0100 Subject: [PATCH 39/50] Add slots --- synapse/visibility.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/visibility.py b/synapse/visibility.py index 2a1a4395703f..9abbaa5a6464 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -341,7 +341,7 @@ def _check_client_allowed_to_see_event( return event -@attr.s(frozen=True, auto_attribs=True) +@attr.s(frozen=True, slots=True, auto_attribs=True) class _CheckMembershipReturn: "Return value of _check_membership" allowed: bool From d370472c989cd43755cf90b42e1997add6ed67b4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 11 Jul 2022 18:53:09 +0100 Subject: [PATCH 40/50] Fix invalidation --- synapse/storage/_base.py | 1 + synapse/storage/databases/main/cache.py | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 66f566579844..b8c8dcd76bfc 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -78,6 +78,7 @@ def _invalidate_state_caches( self._attempt_to_invalidate_cache( "get_number_joined_users_in_room", (room_id,) ) + self._attempt_to_invalidate_cache("get_local_users_in_room", (room_id,)) for user_id in members_changed: self._attempt_to_invalidate_cache( diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py index 445617d0206c..1653a6a9b694 100644 --- a/synapse/storage/databases/main/cache.py +++ b/synapse/storage/databases/main/cache.py @@ -217,9 +217,6 @@ def _invalidate_caches_for_event( if etype == EventTypes.Member: self._membership_stream_cache.entity_has_changed(state_key, stream_ordering) self.get_invited_rooms_for_local_user.invalidate((state_key,)) - self.get_local_users_in_room.invalidate((room_id,)) - self.get_number_joined_users_in_room.invalidate((room_id,)) - self.get_user_in_room_with_profile.invalidate((room_id, state_key)) if relates_to: self.get_relations_for_event.invalidate((relates_to,)) From 94c5419c730895c5b60f06186b6f3586983e13e0 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 11 Jul 2022 18:53:58 +0100 Subject: [PATCH 41/50] Rewrap lines --- synapse/push/bulk_push_rule_evaluator.py | 5 +++-- synapse/storage/databases/main/roommember.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 9544494d5cc3..e30ae6f75b6d 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -283,8 +283,9 @@ async def action_for_event_by_user( event.room_id, users ) - # This is a check for the case where user joins a room without being allowed to see history, and then the server - # receives a delayed event from before the user joined, which they should not be pushed for + # This is a check for the case where user joins a room without being + # allowed to see history, and then the server receives a delayed event + # from before the user joined, which they should not be pushed for uids_with_visibility = await filter_event_for_clients_with_state( self.store, users, event, context ) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index d2632945936c..f228310a5f5a 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -233,8 +233,8 @@ async def get_subset_users_in_room_with_profiles( revealed to users who are already in this room. Args: - room_id: The ID of the room to retrieve the users of. user_ids: a - list of users in the room to run the query for + room_id: The ID of the room to retrieve the users of. + user_ids: a list of users in the room to run the query for Returns: A mapping from user ID to ProfileInfo. From ebf01b94064797c9da1be45941ca3fea1e919be4 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 11 Jul 2022 18:54:28 +0100 Subject: [PATCH 42/50] Use Collection --- synapse/storage/databases/main/roommember.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index f228310a5f5a..3316e12a7d06 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -222,7 +222,7 @@ def get_user_in_room_with_profile( cached_method_name="get_user_in_room_with_profile", list_name="user_ids" ) async def get_subset_users_in_room_with_profiles( - self, room_id: str, user_ids: List[str] + self, room_id: str, user_ids: Collection[str] ) -> Dict[str, ProfileInfo]: """Get a mapping from user ID to profile information for a list of users in a given room. From 47cfc15c7ea24bd9bdb7baa68b4db1fd6bcf4484 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 11 Jul 2022 18:55:12 +0100 Subject: [PATCH 43/50] Update docstring --- synapse/push/bulk_push_rule_evaluator.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index e30ae6f75b6d..69062e202881 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -112,8 +112,11 @@ def __init__(self, hs: "HomeServer"): async def get_rules( self, room_id: str, event: EventBase ) -> Dict[str, List[Dict[str, dict]]]: - """Given an event return the rules for all users who are - currently in the room. + """Given an event return the rules for all users currently in the room + who should be notified of this event. + + Returns: + Mapping from user ID to their push rules. """ local_users = await self.store.get_local_users_in_room(room_id) From 615d053c20f3120e93ff239ad69cabfc8ab8112c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 11 Jul 2022 18:59:31 +0100 Subject: [PATCH 44/50] Remove pointless inclusion of join event --- synapse/push/bulk_push_rule_evaluator.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 69062e202881..0cd18003f16e 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -121,11 +121,6 @@ async def get_rules( local_users = await self.store.get_local_users_in_room(room_id) - if event.type == EventTypes.Member and event.membership == Membership.JOIN: - if self.hs.is_mine_id(event.state_key): - local_users = list(local_users) - local_users.append(event.state_key) - ret_rules_by_user = await self.store.bulk_get_push_rules(local_users) logger.debug("Users in room: %s", local_users) From 1e26f18bbd9ede5d49f1cf1b1d2d01541fb3acb8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 11 Jul 2022 19:01:36 +0100 Subject: [PATCH 45/50] Merge get_rules functions --- synapse/push/bulk_push_rule_evaluator.py | 52 +++++++++--------------- 1 file changed, 19 insertions(+), 33 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 0cd18003f16e..af92bfe2e18d 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -109,51 +109,37 @@ def __init__(self, hs: "HomeServer"): # Whether to support MSC3772 is supported. self._relations_match_enabled = self.hs.config.experimental.msc3772_enabled - async def get_rules( - self, room_id: str, event: EventBase - ) -> Dict[str, List[Dict[str, dict]]]: - """Given an event return the rules for all users currently in the room - who should be notified of this event. + async def _get_rules_for_event( + self, + event: EventBase, + ) -> Dict[str, List[Dict[str, Any]]]: + """Get the push rules for all users who should be notified about the + event. Returns: - Mapping from user ID to their push rules. + Mapping of user ID to their push rules. """ - local_users = await self.store.get_local_users_in_room(room_id) + local_users = await self.store.get_local_users_in_room(event.room_id) - ret_rules_by_user = await self.store.bulk_get_push_rules(local_users) + # if this event is an invite event, we may need to run rules for the user + # who's been invited, otherwise they won't get told they've been invited + if event.type == EventTypes.Member and event.membership == Membership.INVITE: + invited = event.state_key + if invited and self.hs.is_mine_id(invited) and invited not in local_users: + local_users = list(local_users) + local_users.append(invited) + + rules_by_user = await self.store.bulk_get_push_rules(local_users) logger.debug("Users in room: %s", local_users) if logger.isEnabledFor(logging.DEBUG): logger.debug( "Returning push rules for %r %r", - room_id, - ret_rules_by_user.keys(), + event.room_id, + rules_by_user.keys(), ) - return ret_rules_by_user - - async def _get_rules_for_event( - self, - event: EventBase, - ) -> Dict[str, List[Dict[str, Any]]]: - """This gets the rules for all users in the room at the time of the event, - as well as the push rules for the invitee if the event is an invite. - - Returns: - dict of user_id -> push_rules - """ - rules_by_user = await self.get_rules(event.room_id, event) - - # if this event is an invite event, we may need to run rules for the user - # who's been invited, otherwise they won't get told they've been invited - if event.type == "m.room.member" and event.content["membership"] == "invite": - invited = event.state_key - if invited and self.hs.is_mine_id(invited): - rules_by_user = dict(rules_by_user) - rules_by_user[invited] = await self.store.get_push_rules_for_user( - invited - ) return rules_by_user From 763dd3bfc58e2114d790cc2aeccd65c0b10bd758 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 11 Jul 2022 19:09:38 +0100 Subject: [PATCH 46/50] Remove pointless copy --- synapse/push/bulk_push_rule_evaluator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index af92bfe2e18d..322393211ff9 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -262,7 +262,7 @@ async def action_for_event_by_user( self._relations_match_enabled, ) - users = list(rules_by_user.keys()) + users = rules_by_user.keys() profiles = await self.store.get_subset_users_in_room_with_profiles( event.room_id, users ) From 858ebe823b593b8266b01aca15ef57052a2afab8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 11 Jul 2022 19:10:22 +0100 Subject: [PATCH 47/50] Fix logging --- synapse/push/bulk_push_rule_evaluator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 322393211ff9..580173d5130a 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -138,7 +138,7 @@ async def _get_rules_for_event( logger.debug( "Returning push rules for %r %r", event.room_id, - rules_by_user.keys(), + list(rules_by_user.keys()), ) return rules_by_user From fadc16a0e9baca15fa5f371a478abbb5c77502eb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 11 Jul 2022 19:12:52 +0100 Subject: [PATCH 48/50] Add note about efficiency --- synapse/push/bulk_push_rule_evaluator.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 580173d5130a..e581af9a9af3 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -113,12 +113,21 @@ async def _get_rules_for_event( self, event: EventBase, ) -> Dict[str, List[Dict[str, Any]]]: - """Get the push rules for all users who should be notified about the - event. + """Get the push rules for all users who may need to be notified about + the event. + + Note: this does not check if the user is allowed to see the event. Returns: Mapping of user ID to their push rules. """ + # We get the users who may need to be notified by first fetching the + # local users currently in the room, finding those that have push rules, + # and *then* checking which users are actually allowed to see the event. + # + # The alternative is to first fetch all users that were joined at the + # event, but that requires fetching the full state at the event, which + # may be expensive for large rooms with few local users. local_users = await self.store.get_local_users_in_room(event.room_id) From f9be434a510eeca609cbd6fab64a01671df3f6e9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 11 Jul 2022 19:16:47 +0100 Subject: [PATCH 49/50] Mark `get_local_users_in_room` as iterable cache --- synapse/storage/databases/main/roommember.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 3316e12a7d06..0b5e4e425412 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -518,7 +518,7 @@ def _get_rooms_for_local_user_where_membership_is_txn( return results - @cached() + @cached(iterable=True) async def get_local_users_in_room(self, room_id: str) -> List[str]: """ Retrieves a list of the current roommembers who are local to the server. From 6429b27983350b481402e245763b1ffc5e4f1236 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 11 Jul 2022 19:40:36 +0100 Subject: [PATCH 50/50] Up the query counts in tests --- tests/rest/client/test_rooms.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index d19b1bb85860..df7ffbe54538 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -709,7 +709,7 @@ def test_post_room_no_keys(self) -> None: self.assertEqual(200, channel.code, channel.result) self.assertTrue("room_id" in channel.json_body) assert channel.resource_usage is not None - self.assertEqual(33, channel.resource_usage.db_txn_count) + self.assertEqual(37, channel.resource_usage.db_txn_count) def test_post_room_initial_state(self) -> None: # POST with initial_state config key, expect new room id @@ -722,7 +722,7 @@ def test_post_room_initial_state(self) -> None: self.assertEqual(200, channel.code, channel.result) self.assertTrue("room_id" in channel.json_body) assert channel.resource_usage is not None - self.assertEqual(37, channel.resource_usage.db_txn_count) + self.assertEqual(41, channel.resource_usage.db_txn_count) def test_post_room_visibility_key(self) -> None: # POST with visibility config key, expect new room id