From bcc4f4a53c313d11908af8667a0a0bc8b8264c65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0imon=20Brandner?= Date: Wed, 11 May 2022 17:19:24 +0200 Subject: [PATCH 01/10] Fix expectation for `test_handles_missing_content_of_m_read` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Šimon Brandner --- tests/handlers/test_receipts.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/handlers/test_receipts.py b/tests/handlers/test_receipts.py index 0482a1ea34fb..dc47b5004e60 100644 --- a/tests/handlers/test_receipts.py +++ b/tests/handlers/test_receipts.py @@ -146,7 +146,6 @@ def test_handles_missing_content_of_m_read(self): [ { "content": { - "$14356419ggffg114394fHBLK:matrix.org": {ReceiptTypes.READ: {}}, "$1435641916114394fHBLK:matrix.org": { ReceiptTypes.READ: { "@user:jki.re": { From 96fd8b336709c7e6dbeba2fe27b264734c2d5fc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0imon=20Brandner?= Date: Wed, 11 May 2022 17:22:19 +0200 Subject: [PATCH 02/10] Mutate rather than copy in `filter_out_private()` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Šimon Brandner --- synapse/handlers/receipts.py | 42 +++++++++++++----------------------- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 43d615357b3c..412bcc60eba9 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -165,7 +165,7 @@ def __init__(self, hs: "HomeServer"): self.config = hs.config @staticmethod - def filter_out_private(events: List[JsonDict], user_id: str) -> List[JsonDict]: + def filter_out_private(events: List[JsonDict], our_user_id: str) -> List[JsonDict]: """ This method takes in what is returned by get_linearized_receipts_for_rooms() and goes through read receipts @@ -173,35 +173,23 @@ def filter_out_private(events: List[JsonDict], user_id: str) -> List[JsonDict]: current user. """ - visible_events = [] - # filter out private receipts the user shouldn't see - for event in events: + for index, event in enumerate(events): content = event.get("content", {}) - new_event = event.copy() - new_event["content"] = {} - - for event_id, event_content in content.items(): - receipt_event = {} - for receipt_type, receipt_content in event_content.items(): + for event_id, event_content in list(content.items()): + for receipt_type, receipt_content in list(event_content.items()): if receipt_type == ReceiptTypes.READ_PRIVATE: - user_rr = receipt_content.get(user_id, None) - if user_rr: - receipt_event[ReceiptTypes.READ_PRIVATE] = { - user_id: user_rr.copy() - } - else: - receipt_event[receipt_type] = receipt_content.copy() - - # Only include the receipt event if it is non-empty. - if receipt_event: - new_event["content"][event_id] = receipt_event - - # Append new_event to visible_events unless empty - if len(new_event["content"].keys()) > 0: - visible_events.append(new_event) - - return visible_events + for user_id in list(receipt_content.keys()): + if user_id != our_user_id: + del receipt_content[user_id] + + if len(receipt_content) == 0: + del event_content[receipt_type] + if len(event_content) == 0: + del content[event_id] + if len(content) == 0: + del events[index] + return events async def get_new_events( self, From d0013b1638f12b3a18b7087ce941e866c0288413 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0imon=20Brandner?= Date: Wed, 11 May 2022 17:25:27 +0200 Subject: [PATCH 03/10] Add changelog MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Šimon Brandner --- changelog.d/12711.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/12711.misc diff --git a/changelog.d/12711.misc b/changelog.d/12711.misc new file mode 100644 index 000000000000..0831ce045268 --- /dev/null +++ b/changelog.d/12711.misc @@ -0,0 +1 @@ +Optimize private read receipt filtering. From a5059b9896e9ec173c3a8dab92ee5ae59c64ee23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0imon=20Brandner?= Date: Wed, 11 May 2022 19:54:19 +0200 Subject: [PATCH 04/10] Copy `events` to not mutate cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Šimon Brandner --- synapse/handlers/receipts.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 412bcc60eba9..022281902fe4 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -173,6 +173,7 @@ def filter_out_private(events: List[JsonDict], our_user_id: str) -> List[JsonDic current user. """ + events = events.copy() # filter out private receipts the user shouldn't see for index, event in enumerate(events): content = event.get("content", {}) From f6da697fb9e48aa4404a3222408a16705b2a396d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0imon=20Brandner?= Date: Wed, 11 May 2022 19:54:40 +0200 Subject: [PATCH 05/10] `test_we_do_not_mutate` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Šimon Brandner --- tests/handlers/test_receipts.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/handlers/test_receipts.py b/tests/handlers/test_receipts.py index dc47b5004e60..8e0855a81b36 100644 --- a/tests/handlers/test_receipts.py +++ b/tests/handlers/test_receipts.py @@ -331,6 +331,27 @@ def test_leaves_our_private_and_their_public(self): ], ) + def test_we_do_not_mutate(self): + events = [ + { + "content": { + "$1435641916114394fHBLK:matrix.org": { + ReceiptTypes.READ_PRIVATE: { + "@rikj:jki.re": { + "ts": 1436451550453, + } + } + } + }, + "room_id": "!jEsUZKDJdhlrceRyVU:example.org", + "type": "m.receipt", + } + ] + events_copy = events.copy() + + self.event_source.filter_out_private(events, "@me:server.org") + self.assertEqual(events, events_copy) + def _test_filters_private( self, events: List[JsonDict], expected_output: List[JsonDict] ): From 67313884f86e91e95a8d85e186cff4e4f4c75d9c Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 12 May 2022 14:43:09 -0400 Subject: [PATCH 06/10] Rename method / improve docstring. --- synapse/handlers/initial_sync.py | 6 ++++-- synapse/handlers/receipts.py | 19 +++++++++++++------ tests/handlers/test_receipts.py | 7 ++++--- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/synapse/handlers/initial_sync.py b/synapse/handlers/initial_sync.py index 7b94770f9722..de09aed3a356 100644 --- a/synapse/handlers/initial_sync.py +++ b/synapse/handlers/initial_sync.py @@ -143,7 +143,7 @@ async def _snapshot_all_rooms( to_key=int(now_token.receipt_key), ) if self.hs.config.experimental.msc2285_enabled: - receipt = ReceiptEventSource.filter_out_private(receipt, user_id) + receipt = ReceiptEventSource.filter_out_private_receipts(receipt, user_id) tags_by_room = await self.store.get_tags_for_user(user_id) @@ -449,7 +449,9 @@ async def get_receipts() -> List[JsonDict]: if not receipts: return [] if self.hs.config.experimental.msc2285_enabled: - receipts = ReceiptEventSource.filter_out_private(receipts, user_id) + receipts = ReceiptEventSource.filter_out_private_receipts( + receipts, user_id + ) return receipts presence, receipts, (messages, token) = await make_deferred_yieldable( diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 022281902fe4..42c4da1e7ee5 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -165,12 +165,17 @@ def __init__(self, hs: "HomeServer"): self.config = hs.config @staticmethod - def filter_out_private(events: List[JsonDict], our_user_id: str) -> List[JsonDict]: + def filter_out_private_receipts( + events: List[JsonDict], our_user_id: str + ) -> List[JsonDict]: """ - This method takes in what is returned by - get_linearized_receipts_for_rooms() and goes through read receipts - filtering out m.read.private receipts if they were not sent by the - current user. + Filters a list of serialized receipts (as returned by /sync and /initialSync) + and removes private read receipts of other users. + + This operates on the return value of get_linearized_receipts_for_rooms(): + + A list of mappings, each mapping has a `content` field, which is a map + of event ID -> receipt type -> user ID -> receipt information. """ events = events.copy() @@ -212,7 +217,9 @@ async def get_new_events( ) if self.config.experimental.msc2285_enabled: - events = ReceiptEventSource.filter_out_private(events, user.to_string()) + events = ReceiptEventSource.filter_out_private_receipts( + events, user.to_string() + ) return events, to_key diff --git a/tests/handlers/test_receipts.py b/tests/handlers/test_receipts.py index 8e0855a81b36..fa2eb756add6 100644 --- a/tests/handlers/test_receipts.py +++ b/tests/handlers/test_receipts.py @@ -349,12 +349,13 @@ def test_we_do_not_mutate(self): ] events_copy = events.copy() - self.event_source.filter_out_private(events, "@me:server.org") - self.assertEqual(events, events_copy) + self._test_filters_private(events, events_copy) def _test_filters_private( self, events: List[JsonDict], expected_output: List[JsonDict] ): """Tests that the _filter_out_private returns the expected output""" - filtered_events = self.event_source.filter_out_private(events, "@me:server.org") + filtered_events = self.event_source.filter_out_private_receipts( + events, "@me:server.org" + ) self.assertEqual(filtered_events, expected_output) From 0afd9d2327da7aebed485068497a3b8c3b4e024e Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 12 May 2022 15:07:26 -0400 Subject: [PATCH 07/10] Fix test. --- tests/handlers/test_receipts.py | 55 +++++++++++---------------------- 1 file changed, 18 insertions(+), 37 deletions(-) diff --git a/tests/handlers/test_receipts.py b/tests/handlers/test_receipts.py index fa2eb756add6..e9feb4f02aa3 100644 --- a/tests/handlers/test_receipts.py +++ b/tests/handlers/test_receipts.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. - +from copy import deepcopy from typing import List from synapse.api.constants import ReceiptTypes @@ -26,24 +26,25 @@ def prepare(self, reactor, clock, hs): self.event_source = hs.get_event_sources().sources.receipt def test_filters_out_private_receipt(self): - self._test_filters_private( - [ - { - "content": { - "$1435641916114394fHBLK:matrix.org": { - ReceiptTypes.READ_PRIVATE: { - "@rikj:jki.re": { - "ts": 1436451550453, - } + events = [ + { + "content": { + "$1435641916114394fHBLK:matrix.org": { + ReceiptTypes.READ_PRIVATE: { + "@rikj:jki.re": { + "ts": 1436451550453, } } - }, - "room_id": "!jEsUZKDJdhlrceRyVU:example.org", - "type": "m.receipt", - } - ], - [], - ) + } + }, + "room_id": "!jEsUZKDJdhlrceRyVU:example.org", + "type": "m.receipt", + } + ] + original_events = deepcopy(events) + self._test_filters_private(events, []) + # Since the events are fed in from a cache they should not be modified. + self.assertEqual(events, original_events) def test_filters_out_private_receipt_and_ignores_rest(self): self._test_filters_private( @@ -331,26 +332,6 @@ def test_leaves_our_private_and_their_public(self): ], ) - def test_we_do_not_mutate(self): - events = [ - { - "content": { - "$1435641916114394fHBLK:matrix.org": { - ReceiptTypes.READ_PRIVATE: { - "@rikj:jki.re": { - "ts": 1436451550453, - } - } - } - }, - "room_id": "!jEsUZKDJdhlrceRyVU:example.org", - "type": "m.receipt", - } - ] - events_copy = events.copy() - - self._test_filters_private(events, events_copy) - def _test_filters_private( self, events: List[JsonDict], expected_output: List[JsonDict] ): From 9cc0bead400ed33b51cffd8c26964018fdbd3bfa Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 12 May 2022 15:13:50 -0400 Subject: [PATCH 08/10] Properly avoid mutating the cache. --- synapse/handlers/receipts.py | 59 +++++++++++++++++++++++---------- tests/handlers/test_receipts.py | 1 - 2 files changed, 41 insertions(+), 19 deletions(-) diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index 42c4da1e7ee5..f083f4585be4 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -166,7 +166,7 @@ def __init__(self, hs: "HomeServer"): @staticmethod def filter_out_private_receipts( - events: List[JsonDict], our_user_id: str + events: List[JsonDict], user_id: str ) -> List[JsonDict]: """ Filters a list of serialized receipts (as returned by /sync and /initialSync) @@ -178,24 +178,47 @@ def filter_out_private_receipts( of event ID -> receipt type -> user ID -> receipt information. """ - events = events.copy() + visible_events = [] + # filter out private receipts the user shouldn't see - for index, event in enumerate(events): - content = event.get("content", {}) - for event_id, event_content in list(content.items()): - for receipt_type, receipt_content in list(event_content.items()): - if receipt_type == ReceiptTypes.READ_PRIVATE: - for user_id in list(receipt_content.keys()): - if user_id != our_user_id: - del receipt_content[user_id] - - if len(receipt_content) == 0: - del event_content[receipt_type] - if len(event_content) == 0: - del content[event_id] - if len(content) == 0: - del events[index] - return events + for event in events: + # The event content with other user's private read receipts removed. + content = {} + for event_id, event_content in event.get("content", {}).items(): + receipt_event = event_content + # If there are no private read receipts, no additional logic is + # needed. + if ReceiptTypes.READ_PRIVATE in receipt_event: + # Make a copy without private read receipts. + receipt_event = { + k: v + for k, v in receipt_event.items() + if k != ReceiptTypes.READ_PRIVATE + } + + # Check the original content for the current user's private + # read receipt. If it exists, add it to the new content. + user_private_read_receipt = event_content[ + ReceiptTypes.READ_PRIVATE + ].get(user_id, None) + if user_private_read_receipt: + receipt_event[ReceiptTypes.READ_PRIVATE] = { + user_id: user_private_read_receipt + } + + # Only include the receipt event if it is non-empty. + if receipt_event: + content[event_id] = receipt_event + + # Include the event if there is at least one non-private read receipt + # or the current user has a private read receipt. + if content: + # Build a new event to avoid mutating the cache. + new_event = {k: v for k, v in event.items() if k != "content"} + new_event["content"] = content + visible_events.append(new_event) + + return visible_events async def get_new_events( self, diff --git a/tests/handlers/test_receipts.py b/tests/handlers/test_receipts.py index e9feb4f02aa3..6c34a3804839 100644 --- a/tests/handlers/test_receipts.py +++ b/tests/handlers/test_receipts.py @@ -131,7 +131,6 @@ def test_handles_missing_content_of_m_read(self): [ { "content": { - "$14356419ggffg114394fHBLK:matrix.org": {ReceiptTypes.READ: {}}, "$1435641916114394fHBLK:matrix.org": { ReceiptTypes.READ: { "@user:jki.re": { From b696139d21043b99cae21fa7935eb4cd3ab8551d Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Thu, 12 May 2022 15:24:59 -0400 Subject: [PATCH 09/10] Expand comments and rename variables for clarity. --- synapse/handlers/receipts.py | 63 ++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index f083f4585be4..f467911694ae 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -166,7 +166,7 @@ def __init__(self, hs: "HomeServer"): @staticmethod def filter_out_private_receipts( - events: List[JsonDict], user_id: str + rooms: List[JsonDict], user_id: str ) -> List[JsonDict]: """ Filters a list of serialized receipts (as returned by /sync and /initialSync) @@ -174,51 +174,58 @@ def filter_out_private_receipts( This operates on the return value of get_linearized_receipts_for_rooms(): - A list of mappings, each mapping has a `content` field, which is a map - of event ID -> receipt type -> user ID -> receipt information. + Args: + rooms: A list of mappings, each mapping has a `content` field, which + is a map of event ID -> receipt type -> user ID -> receipt information. + + Returns: + The same as rooms, but filtered. """ - visible_events = [] + result = [] - # filter out private receipts the user shouldn't see - for event in events: - # The event content with other user's private read receipts removed. + # Iterate through each room's receipt content. + for room in rooms: + # The receipt content with other user's private read receipts removed. content = {} - for event_id, event_content in event.get("content", {}).items(): - receipt_event = event_content - # If there are no private read receipts, no additional logic is - # needed. - if ReceiptTypes.READ_PRIVATE in receipt_event: - # Make a copy without private read receipts. - receipt_event = { - k: v - for k, v in receipt_event.items() - if k != ReceiptTypes.READ_PRIVATE + + # Iterate over each event ID / receipts for that event. + for event_id, orig_event_content in room.get("content", {}).items(): + event_content = orig_event_content + # If there are private read receipts, additional logic is necessary. + if ReceiptTypes.READ_PRIVATE in event_content: + # Make a copy without private read receipts to avoid leaking + # other user's private read receipts.. + event_content = { + receipt_type: receipt_value + for receipt_type, receipt_value in event_content.items() + if receipt_type != ReceiptTypes.READ_PRIVATE } - # Check the original content for the current user's private - # read receipt. If it exists, add it to the new content. - user_private_read_receipt = event_content[ + # Copy the current user's private read receipt from the + # original content, if it exists. + user_private_read_receipt = orig_event_content[ ReceiptTypes.READ_PRIVATE ].get(user_id, None) if user_private_read_receipt: - receipt_event[ReceiptTypes.READ_PRIVATE] = { + event_content[ReceiptTypes.READ_PRIVATE] = { user_id: user_private_read_receipt } - # Only include the receipt event if it is non-empty. - if receipt_event: - content[event_id] = receipt_event + # Include the event if there is at least one non-private read + # receipt or the current user has a private read receipt. + if event_content: + content[event_id] = event_content # Include the event if there is at least one non-private read receipt # or the current user has a private read receipt. if content: # Build a new event to avoid mutating the cache. - new_event = {k: v for k, v in event.items() if k != "content"} - new_event["content"] = content - visible_events.append(new_event) + new_room = {k: v for k, v in room.items() if k != "content"} + new_room["content"] = content + result.append(new_room) - return visible_events + return result async def get_new_events( self, From a873009a9ef9c1c1e3c2e11afa3ea2dd0483283e Mon Sep 17 00:00:00 2001 From: Patrick Cloke Date: Mon, 16 May 2022 10:36:19 -0400 Subject: [PATCH 10/10] Update tests based on feedback. --- synapse/handlers/receipts.py | 4 +- tests/handlers/test_receipts.py | 89 ++++++++++++++------------------- 2 files changed, 41 insertions(+), 52 deletions(-) diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index f467911694ae..550d58b0e1c9 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -172,7 +172,9 @@ def filter_out_private_receipts( Filters a list of serialized receipts (as returned by /sync and /initialSync) and removes private read receipts of other users. - This operates on the return value of get_linearized_receipts_for_rooms(): + This operates on the return value of get_linearized_receipts_for_rooms(), + which is wrapped in a cache. Care must be taken to ensure that the input + values are not modified. Args: rooms: A list of mappings, each mapping has a `content` field, which diff --git a/tests/handlers/test_receipts.py b/tests/handlers/test_receipts.py index 6c34a3804839..78807cdcfcdc 100644 --- a/tests/handlers/test_receipts.py +++ b/tests/handlers/test_receipts.py @@ -26,25 +26,24 @@ def prepare(self, reactor, clock, hs): self.event_source = hs.get_event_sources().sources.receipt def test_filters_out_private_receipt(self): - events = [ - { - "content": { - "$1435641916114394fHBLK:matrix.org": { - ReceiptTypes.READ_PRIVATE: { - "@rikj:jki.re": { - "ts": 1436451550453, + self._test_filters_private( + [ + { + "content": { + "$1435641916114394fHBLK:matrix.org": { + ReceiptTypes.READ_PRIVATE: { + "@rikj:jki.re": { + "ts": 1436451550453, + } } } - } - }, - "room_id": "!jEsUZKDJdhlrceRyVU:example.org", - "type": "m.receipt", - } - ] - original_events = deepcopy(events) - self._test_filters_private(events, []) - # Since the events are fed in from a cache they should not be modified. - self.assertEqual(events, original_events) + }, + "room_id": "!jEsUZKDJdhlrceRyVU:example.org", + "type": "m.receipt", + } + ], + [], + ) def test_filters_out_private_receipt_and_ignores_rest(self): self._test_filters_private( @@ -126,40 +125,6 @@ def test_filters_out_event_with_only_private_receipts_and_ignores_the_rest(self) ], ) - def test_handles_missing_content_of_m_read(self): - self._test_filters_private( - [ - { - "content": { - "$1435641916114394fHBLK:matrix.org": { - ReceiptTypes.READ: { - "@user:jki.re": { - "ts": 1436451550453, - } - } - }, - }, - "room_id": "!jEsUZKDJdhlrceRyVU:example.org", - "type": "m.receipt", - } - ], - [ - { - "content": { - "$1435641916114394fHBLK:matrix.org": { - ReceiptTypes.READ: { - "@user:jki.re": { - "ts": 1436451550453, - } - } - }, - }, - "room_id": "!jEsUZKDJdhlrceRyVU:example.org", - "type": "m.receipt", - } - ], - ) - def test_handles_empty_event(self): self._test_filters_private( [ @@ -331,6 +296,28 @@ def test_leaves_our_private_and_their_public(self): ], ) + def test_we_do_not_mutate(self): + """Ensure the input values are not modified.""" + events = [ + { + "content": { + "$1435641916114394fHBLK:matrix.org": { + ReceiptTypes.READ_PRIVATE: { + "@rikj:jki.re": { + "ts": 1436451550453, + } + } + } + }, + "room_id": "!jEsUZKDJdhlrceRyVU:example.org", + "type": "m.receipt", + } + ] + original_events = deepcopy(events) + self._test_filters_private(events, []) + # Since the events are fed in from a cache they should not be modified. + self.assertEqual(events, original_events) + def _test_filters_private( self, events: List[JsonDict], expected_output: List[JsonDict] ):