From 22c038e74e3fdb8557cbe5ca7dff6ec63f739476 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 28 Jan 2021 01:48:42 -0600 Subject: [PATCH 01/83] Add endpoints for backfilling history (MSC2716) Work on https://github.com/matrix-org/matrix-doc/pull/2716 Complement MR: https://github.com/matrix-org/complement/pull/68 --- scripts-dev/complement.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 3cde53f5c051..d0cdf1e43dfe 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -11,12 +11,10 @@ cd "$(dirname $0)/.." docker build -t matrixdotorg/synapse:latest -f docker/Dockerfile . # Download Complement -wget -N https://github.com/matrix-org/complement/archive/master.tar.gz -tar -xzf master.tar.gz -cd complement-master +cd ../complement # Build the Synapse image from Complement, based on the above image we just built docker build -t complement-synapse -f dockerfiles/Synapse.Dockerfile ./dockerfiles # Run the tests on the resulting image! -COMPLEMENT_BASE_IMAGE=complement-synapse go test -v -count=1 ./tests +COMPLEMENT_BASE_IMAGE=complement-synapse go test -tags msc2716 -v -count=1 ./tests/main_test.go ./tests/msc2716_test.go From 48369547c0a0004eb1cf47c9b506aaf2f61a3dc2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 28 Jan 2021 19:53:14 -0600 Subject: [PATCH 02/83] Add querystring prev_event to pass into message send API --- scripts-dev/complement.sh | 2 +- synapse/handlers/message.py | 6 ++- synapse/http/servlet.py | 74 ++++++++++++++++++++++++---------- synapse/rest/client/v1/room.py | 5 +++ 4 files changed, 64 insertions(+), 23 deletions(-) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index d0cdf1e43dfe..c0c4061aa116 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -17,4 +17,4 @@ cd ../complement docker build -t complement-synapse -f dockerfiles/Synapse.Dockerfile ./dockerfiles # Run the tests on the resulting image! -COMPLEMENT_BASE_IMAGE=complement-synapse go test -tags msc2716 -v -count=1 ./tests/main_test.go ./tests/msc2716_test.go +COMPLEMENT_DEBUG=1 COMPLEMENT_BASE_IMAGE=complement-synapse go test -tags msc2716 -v -count=1 ./tests/main_test.go ./tests/msc2716_test.go diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 9dfeab09cd92..382472fd817c 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -736,8 +736,12 @@ async def create_and_send_nonmember_event( assert event.internal_metadata.stream_ordering return event, event.internal_metadata.stream_ordering + prev_events = None + if "prev_events" in event_dict: + prev_events = event_dict["prev_events"] + event, context = await self.create_event( - requester, event_dict, txn_id=txn_id + requester, event_dict, txn_id=txn_id, prev_event_ids=prev_events ) assert self.hs.is_mine_id(event.sender), "User must be our own: %s" % ( diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index b361b7cbaf43..d288a5fe5d49 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -16,6 +16,7 @@ """ This module contains base REST classes for constructing REST servlets. """ import logging +from typing import List from synapse.api.errors import Codes, SynapseError from synapse.util import json_decoder @@ -147,7 +148,29 @@ def parse_string( ) -def parse_string_from_args( +def parse_string_value( + value, + allowed_values, + encoding="ascii" +) -> str: + if encoding: + try: + value = value.decode(encoding) + except ValueError: + raise SynapseError( + 400, "Query parameter %r must be %s" % (name, encoding) + ) + + if allowed_values is not None and value not in allowed_values: + message = "Query parameter %r must be one of [%s]" % ( + name, + ", ".join(repr(v) for v in allowed_values), + ) + raise SynapseError(400, message) + else: + return value + +def parse_strings_from_args( args, name, default=None, @@ -155,30 +178,15 @@ def parse_string_from_args( allowed_values=None, param_type="string", encoding="ascii", -): +) -> List[str]: if not isinstance(name, bytes): name = name.encode("ascii") if name in args: - value = args[name][0] - - if encoding: - try: - value = value.decode(encoding) - except ValueError: - raise SynapseError( - 400, "Query parameter %r must be %s" % (name, encoding) - ) - - if allowed_values is not None and value not in allowed_values: - message = "Query parameter %r must be one of [%s]" % ( - name, - ", ".join(repr(v) for v in allowed_values), - ) - raise SynapseError(400, message) - else: - return value + values = args[name] + + return [parse_string_value(value, allowed_values, encoding=encoding) for value in values] else: if required: message = "Missing %s query parameter %r" % (param_type, name) @@ -191,6 +199,31 @@ def parse_string_from_args( return default +def parse_string_from_args( + args, + name, + default=None, + required=False, + allowed_values=None, + param_type="string", + encoding="ascii", +): + strings = parse_strings_from_args( + args, + name, + default=default, + required=required, + allowed_values=allowed_values, + param_type=param_type, + encoding=encoding, + ) + + if isinstance(strings, list) and len(strings): + return strings[0] + + # Return the default + return strings + def parse_json_value_from_request(request, allow_empty_body=False): """Parse a JSON value from the body of a twisted HTTP request. @@ -245,7 +278,6 @@ def parse_json_object_from_request(request, allow_empty_body=False): return content - def assert_params_in_dict(body, required): absent = [] for k in required: diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index f95627ee615a..e04d068552a8 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -37,6 +37,7 @@ assert_params_in_dict, parse_integer, parse_json_object_from_request, + parse_strings_from_args, parse_string, ) from synapse.logging.opentracing import set_tag @@ -222,12 +223,16 @@ def register(self, http_server): async def on_POST(self, request, room_id, event_type, txn_id=None): requester = await self.auth.get_user_by_req(request, allow_guest=True) content = parse_json_object_from_request(request) + prev_events = parse_strings_from_args(request.args, "prev_event") + + logger.info("prev_events %s", prev_events) event_dict = { "type": event_type, "content": content, "room_id": room_id, "sender": requester.user.to_string(), + "prev_events": prev_events } if b"ts" in request.args and requester.app_service: From c0b0936335c9a1f6acafa7f75ac53804227a1fae Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 29 Jan 2021 22:16:09 -0600 Subject: [PATCH 03/83] Allow override origin_server_ts --- scripts-dev/complement.sh | 2 +- synapse/http/servlet.py | 3 ++- synapse/rest/client/v1/room.py | 17 +++++++++++++---- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index c0c4061aa116..d0cdf1e43dfe 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -17,4 +17,4 @@ cd ../complement docker build -t complement-synapse -f dockerfiles/Synapse.Dockerfile ./dockerfiles # Run the tests on the resulting image! -COMPLEMENT_DEBUG=1 COMPLEMENT_BASE_IMAGE=complement-synapse go test -tags msc2716 -v -count=1 ./tests/main_test.go ./tests/msc2716_test.go +COMPLEMENT_BASE_IMAGE=complement-synapse go test -tags msc2716 -v -count=1 ./tests/main_test.go ./tests/msc2716_test.go diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index d288a5fe5d49..fd1c98ad317d 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -151,6 +151,7 @@ def parse_string( def parse_string_value( value, allowed_values, + name="", encoding="ascii" ) -> str: if encoding: @@ -186,7 +187,7 @@ def parse_strings_from_args( if name in args: values = args[name] - return [parse_string_value(value, allowed_values, encoding=encoding) for value in values] + return [parse_string_value(value, allowed_values, name=name, encoding=encoding) for value in values] else: if required: message = "Missing %s query parameter %r" % (param_type, name) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index e04d068552a8..0116dcd3f6b6 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -38,6 +38,7 @@ parse_integer, parse_json_object_from_request, parse_strings_from_args, + parse_integer_from_args, parse_string, ) from synapse.logging.opentracing import set_tag @@ -224,17 +225,25 @@ async def on_POST(self, request, room_id, event_type, txn_id=None): requester = await self.auth.get_user_by_req(request, allow_guest=True) content = parse_json_object_from_request(request) prev_events = parse_strings_from_args(request.args, "prev_event") - - logger.info("prev_events %s", prev_events) + origin_server_ts = parse_integer(request, "origin_server_ts") event_dict = { "type": event_type, "content": content, "room_id": room_id, - "sender": requester.user.to_string(), - "prev_events": prev_events + "sender": requester.user.to_string() } + if prev_events: + event_dict["prev_events"] = prev_events + + # TODO: Add `and requester.app_service` + if origin_server_ts: + event_dict["origin_server_ts"] = origin_server_ts + + # TODO: I noticed in the Synapse code that we already accept a `ts` query parameter to override + # the `origin_server_ts` if the request is coming from an app service. + # Do we want to remove in favor of the spec'ed code above if b"ts" in request.args and requester.app_service: event_dict["origin_server_ts"] = parse_integer(request, "ts", 0) From bf900538b8835a97af066cbfa084dbcf566762cd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 2 Feb 2021 02:44:01 -0600 Subject: [PATCH 04/83] Remove origin_server_ts in favor of spec'ed ts query param Also edits for TARDIS visualation: https://github.com/matrix-org/tardis/pull/1 --- synapse/events/utils.py | 14 +++++++------- synapse/rest/client/v1/room.py | 10 +--------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 9c22e3381378..2b238f2d75fb 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -250,13 +250,13 @@ def format_event_for_client_v1(d): def format_event_for_client_v2(d): drop_keys = ( - "auth_events", - "prev_events", - "hashes", - "signatures", - "depth", - "origin", - "prev_state", + # "auth_events", + # "prev_events", + # "hashes", + # "signatures", + # "depth", + # "origin", + # "prev_state", ) for key in drop_keys: d.pop(key, None) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 0116dcd3f6b6..2770a73b26a5 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -225,7 +225,6 @@ async def on_POST(self, request, room_id, event_type, txn_id=None): requester = await self.auth.get_user_by_req(request, allow_guest=True) content = parse_json_object_from_request(request) prev_events = parse_strings_from_args(request.args, "prev_event") - origin_server_ts = parse_integer(request, "origin_server_ts") event_dict = { "type": event_type, @@ -237,14 +236,7 @@ async def on_POST(self, request, room_id, event_type, txn_id=None): if prev_events: event_dict["prev_events"] = prev_events - # TODO: Add `and requester.app_service` - if origin_server_ts: - event_dict["origin_server_ts"] = origin_server_ts - - # TODO: I noticed in the Synapse code that we already accept a `ts` query parameter to override - # the `origin_server_ts` if the request is coming from an app service. - # Do we want to remove in favor of the spec'ed code above - if b"ts" in request.args and requester.app_service: + if b"ts" in request.args: # and requester.app_service: event_dict["origin_server_ts"] = parse_integer(request, "ts", 0) try: From bcc69438cc6a6fca265ec6adf55af847e3fb8fba Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 2 Feb 2021 04:48:25 -0600 Subject: [PATCH 05/83] Use previous depth if overriding prev_events to insert into history TODO: Is the assumption of anytime we pass in prev_event_ids, we use same depth good enough? What corner cases are there? I see that we also pass in prev_event_ids this in synapse/handlers/room_member.py so need to make sure that still work as expected --- synapse/events/builder.py | 10 ++++++++-- synapse/handlers/message.py | 5 ++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/synapse/events/builder.py b/synapse/events/builder.py index 07df258e6eed..e9bc1524559b 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -98,7 +98,7 @@ def is_state(self): return self._state_key is not None async def build( - self, prev_event_ids: List[str], auth_event_ids: Optional[List[str]], + self, prev_event_ids: List[str], overriding_prev_events: False, auth_event_ids: Optional[List[str]], ) -> EventBase: """Transform into a fully signed and hashed event @@ -131,7 +131,13 @@ async def build( prev_events = prev_event_ids old_depth = await self._store.get_max_depth_of(prev_event_ids) - depth = old_depth + 1 + # awfewafeafew depth stuff + # If backfilling old message, let's just use the same depth of what we're inserting next to + if overriding_prev_events: + depth = old_depth + # Otherwise, progress the depth as normal + else: + depth = old_depth + 1 # we cap depth of generated events, to ensure that they are not # rejected by other servers (and so that they can be persisted in diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 382472fd817c..5e19e630cacd 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -794,7 +794,10 @@ async def create_new_client_event( Tuple of created event, context """ + overriding_prev_events = False if prev_event_ids is not None: + overriding_prev_events = True + assert len(prev_event_ids) <= 10, ( "Attempting to create an event with %i prev_events" % (len(prev_event_ids),) @@ -812,7 +815,7 @@ async def create_new_client_event( ), "Attempting to create an event with no prev_events" event = await builder.build( - prev_event_ids=prev_event_ids, auth_event_ids=auth_event_ids + prev_event_ids=prev_event_ids, overriding_prev_events=overriding_prev_events, auth_event_ids=auth_event_ids ) context = await self.state.compute_event_context(event) if requester: From 7f4c3a645b3a74422b379e4280dc3fd6b15b5a0a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 4 Feb 2021 22:24:32 -0600 Subject: [PATCH 06/83] Remove m.historical messages from /sync --- synapse/handlers/sync.py | 12 +++++++++++- synapse/rest/client/v2_alpha/sync.py | 2 ++ synapse/visibility.py | 11 +++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 5c7590f38e4d..0f3ab782fcb0 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -42,7 +42,10 @@ from synapse.util.caches.lrucache import LruCache from synapse.util.caches.response_cache import ResponseCache from synapse.util.metrics import Measure, measure_func -from synapse.visibility import filter_events_for_client +from synapse.visibility import ( + filter_events_for_client, + filter_historical_events +) if TYPE_CHECKING: from synapse.server import HomeServer @@ -423,6 +426,7 @@ async def _load_filtered_recents( potential_recents: Optional[List[EventBase]] = None, newly_joined_room: bool = False, ) -> TimelineBatch: + logger.info("_load_filtered_recents") with Measure(self.clock, "load_filtered_recents"): timeline_limit = sync_config.filter_collection.timeline_limit() block_all_timeline = ( @@ -462,6 +466,8 @@ async def _load_filtered_recents( else: recents = [] + logger.info("recents1 %s", recents) + if not limited or block_all_timeline: prev_batch_token = now_token if recents: @@ -534,6 +540,10 @@ async def _load_filtered_recents( prev_batch_token = now_token.copy_and_replace("room_key", room_key) + # `m.historical` events should not come down /sync + recents = await filter_historical_events(recents) + + logger.info("recents2 %s", recents) return TimelineBatch( events=recents, prev_batch=prev_batch_token, diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py index 8e52e4cca4b2..5546783a6453 100644 --- a/synapse/rest/client/v2_alpha/sync.py +++ b/synapse/rest/client/v2_alpha/sync.py @@ -86,6 +86,7 @@ def __init__(self, hs): self._event_serializer = hs.get_event_client_serializer() async def on_GET(self, request): + logger.info("/sync on_GET") if b"from" in request.args: # /events used to use 'from', but /sync uses 'since'. # Lets be helpful and whine if we see a 'from'. @@ -184,6 +185,7 @@ async def on_GET(self, request): logger.info("Client has disconnected; not serializing response.") return 200, {} + #logger.info("sync response %s", sync_result) time_now = self.clock.time_msec() response_content = await self.encode_response( time_now, sync_result, requester.access_token_id, filter_collection diff --git a/synapse/visibility.py b/synapse/visibility.py index ec50e7e977dc..efb933f88a6a 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -45,6 +45,17 @@ Membership.BAN, ) +async def filter_historical_events( + events +): + logger.info("filter_historical_events %s", ' '.join(map(str, events))) + filtered_events = [e for e in events if not e.content.get("m.historical", None)] + + # remove the None entries + filtered_events = filter(operator.truth, filtered_events) + + # we turn it into a list before returning it. + return list(filtered_events) async def filter_events_for_client( storage: Storage, From 800f3a33854355d8eaf20d057c5243fdcd491d66 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 4 Feb 2021 22:29:24 -0600 Subject: [PATCH 07/83] Remove debug logs --- synapse/handlers/sync.py | 4 ---- synapse/rest/client/v2_alpha/sync.py | 2 -- synapse/visibility.py | 1 - 3 files changed, 7 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 0f3ab782fcb0..6d7c00285c0c 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -426,7 +426,6 @@ async def _load_filtered_recents( potential_recents: Optional[List[EventBase]] = None, newly_joined_room: bool = False, ) -> TimelineBatch: - logger.info("_load_filtered_recents") with Measure(self.clock, "load_filtered_recents"): timeline_limit = sync_config.filter_collection.timeline_limit() block_all_timeline = ( @@ -466,8 +465,6 @@ async def _load_filtered_recents( else: recents = [] - logger.info("recents1 %s", recents) - if not limited or block_all_timeline: prev_batch_token = now_token if recents: @@ -543,7 +540,6 @@ async def _load_filtered_recents( # `m.historical` events should not come down /sync recents = await filter_historical_events(recents) - logger.info("recents2 %s", recents) return TimelineBatch( events=recents, prev_batch=prev_batch_token, diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py index 5546783a6453..8e52e4cca4b2 100644 --- a/synapse/rest/client/v2_alpha/sync.py +++ b/synapse/rest/client/v2_alpha/sync.py @@ -86,7 +86,6 @@ def __init__(self, hs): self._event_serializer = hs.get_event_client_serializer() async def on_GET(self, request): - logger.info("/sync on_GET") if b"from" in request.args: # /events used to use 'from', but /sync uses 'since'. # Lets be helpful and whine if we see a 'from'. @@ -185,7 +184,6 @@ async def on_GET(self, request): logger.info("Client has disconnected; not serializing response.") return 200, {} - #logger.info("sync response %s", sync_result) time_now = self.clock.time_msec() response_content = await self.encode_response( time_now, sync_result, requester.access_token_id, filter_collection diff --git a/synapse/visibility.py b/synapse/visibility.py index efb933f88a6a..3b49676e3e11 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -48,7 +48,6 @@ async def filter_historical_events( events ): - logger.info("filter_historical_events %s", ' '.join(map(str, events))) filtered_events = [e for e in events if not e.content.get("m.historical", None)] # remove the None entries From 9b5e057af135bf1c5d16a687a100d3ab62a5abff Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 4 Feb 2021 22:37:31 -0600 Subject: [PATCH 08/83] Fix some lint --- synapse/events/builder.py | 5 ++++- synapse/handlers/message.py | 4 +++- synapse/handlers/sync.py | 5 +---- synapse/http/servlet.py | 21 ++++++++++----------- synapse/rest/client/v1/room.py | 4 +++- synapse/visibility.py | 6 +++--- 6 files changed, 24 insertions(+), 21 deletions(-) diff --git a/synapse/events/builder.py b/synapse/events/builder.py index e9bc1524559b..ea98e6312c6a 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -98,7 +98,10 @@ def is_state(self): return self._state_key is not None async def build( - self, prev_event_ids: List[str], overriding_prev_events: False, auth_event_ids: Optional[List[str]], + self, + prev_event_ids: List[str], + overriding_prev_events: False, + auth_event_ids: Optional[List[str]], ) -> EventBase: """Transform into a fully signed and hashed event diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 5e19e630cacd..a53f71f64969 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -815,7 +815,9 @@ async def create_new_client_event( ), "Attempting to create an event with no prev_events" event = await builder.build( - prev_event_ids=prev_event_ids, overriding_prev_events=overriding_prev_events, auth_event_ids=auth_event_ids + prev_event_ids=prev_event_ids, + overriding_prev_events=overriding_prev_events, + auth_event_ids=auth_event_ids, ) context = await self.state.compute_event_context(event) if requester: diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 6d7c00285c0c..afbea5e58af2 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -42,10 +42,7 @@ from synapse.util.caches.lrucache import LruCache from synapse.util.caches.response_cache import ResponseCache from synapse.util.metrics import Measure, measure_func -from synapse.visibility import ( - filter_events_for_client, - filter_historical_events -) +from synapse.visibility import filter_events_for_client, filter_historical_events if TYPE_CHECKING: from synapse.server import HomeServer diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index fd1c98ad317d..ed8ea7be3ed2 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -148,19 +148,12 @@ def parse_string( ) -def parse_string_value( - value, - allowed_values, - name="", - encoding="ascii" -) -> str: +def parse_string_value(value, allowed_values, name="", encoding="ascii") -> str: if encoding: try: value = value.decode(encoding) except ValueError: - raise SynapseError( - 400, "Query parameter %r must be %s" % (name, encoding) - ) + raise SynapseError(400, "Query parameter %r must be %s" % (name, encoding)) if allowed_values is not None and value not in allowed_values: message = "Query parameter %r must be one of [%s]" % ( @@ -171,6 +164,7 @@ def parse_string_value( else: return value + def parse_strings_from_args( args, name, @@ -187,7 +181,10 @@ def parse_strings_from_args( if name in args: values = args[name] - return [parse_string_value(value, allowed_values, name=name, encoding=encoding) for value in values] + return [ + parse_string_value(value, allowed_values, name=name, encoding=encoding) + for value in values + ] else: if required: message = "Missing %s query parameter %r" % (param_type, name) @@ -218,13 +215,14 @@ def parse_string_from_args( param_type=param_type, encoding=encoding, ) - + if isinstance(strings, list) and len(strings): return strings[0] # Return the default return strings + def parse_json_value_from_request(request, allow_empty_body=False): """Parse a JSON value from the body of a twisted HTTP request. @@ -279,6 +277,7 @@ def parse_json_object_from_request(request, allow_empty_body=False): return content + def assert_params_in_dict(body, required): absent = [] for k in required: diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 2770a73b26a5..884e59bf01db 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -230,12 +230,14 @@ async def on_POST(self, request, room_id, event_type, txn_id=None): "type": event_type, "content": content, "room_id": room_id, - "sender": requester.user.to_string() + "sender": requester.user.to_string(), } if prev_events: event_dict["prev_events"] = prev_events + # TODO: Put app_service logic back in place once we figure out how to make the Complement tests + # run as an app service if b"ts" in request.args: # and requester.app_service: event_dict["origin_server_ts"] = parse_integer(request, "ts", 0) diff --git a/synapse/visibility.py b/synapse/visibility.py index 3b49676e3e11..cee6739b1b75 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -45,9 +45,8 @@ Membership.BAN, ) -async def filter_historical_events( - events -): + +async def filter_historical_events(events): filtered_events = [e for e in events if not e.content.get("m.historical", None)] # remove the None entries @@ -56,6 +55,7 @@ async def filter_historical_events( # we turn it into a list before returning it. return list(filtered_events) + async def filter_events_for_client( storage: Storage, user_id, From 447eaa86fd247ff5c7116ed8a86e8163cebfa87c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 4 Feb 2021 22:50:23 -0600 Subject: [PATCH 09/83] Add changelog --- changelog.d/9247.feature | 1 + synapse/rest/client/v1/room.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/9247.feature diff --git a/changelog.d/9247.feature b/changelog.d/9247.feature new file mode 100644 index 000000000000..be04bd3fc19b --- /dev/null +++ b/changelog.d/9247.feature @@ -0,0 +1 @@ +Implement MSC2716 to support backfilling history into rooms diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 884e59bf01db..37ad901c867c 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -36,9 +36,9 @@ RestServlet, assert_params_in_dict, parse_integer, + parse_integer_from_args, parse_json_object_from_request, parse_strings_from_args, - parse_integer_from_args, parse_string, ) from synapse.logging.opentracing import set_tag From 7ec22b568d852bbfbc68bababa918d15c074616e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 4 Feb 2021 22:51:05 -0600 Subject: [PATCH 10/83] Fix tox mypy check --- synapse/events/builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/events/builder.py b/synapse/events/builder.py index ea98e6312c6a..96ab351dc5c6 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -100,7 +100,7 @@ def is_state(self): async def build( self, prev_event_ids: List[str], - overriding_prev_events: False, + overriding_prev_events: Literal[False] = False, auth_event_ids: Optional[List[str]], ) -> EventBase: """Transform into a fully signed and hashed event From afa5e5df215f89e1d0e91df753002cb251c0bdcb Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 4 Feb 2021 22:54:13 -0600 Subject: [PATCH 11/83] Remove debugging fields for TARDIS visualization Related https://github.com/matrix-org/tardis/pull/1 --- synapse/events/builder.py | 1 - synapse/events/utils.py | 14 +++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/synapse/events/builder.py b/synapse/events/builder.py index 96ab351dc5c6..72ec95b8a032 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -134,7 +134,6 @@ async def build( prev_events = prev_event_ids old_depth = await self._store.get_max_depth_of(prev_event_ids) - # awfewafeafew depth stuff # If backfilling old message, let's just use the same depth of what we're inserting next to if overriding_prev_events: depth = old_depth diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 2b238f2d75fb..9c22e3381378 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -250,13 +250,13 @@ def format_event_for_client_v1(d): def format_event_for_client_v2(d): drop_keys = ( - # "auth_events", - # "prev_events", - # "hashes", - # "signatures", - # "depth", - # "origin", - # "prev_state", + "auth_events", + "prev_events", + "hashes", + "signatures", + "depth", + "origin", + "prev_state", ) for key in drop_keys: d.pop(key, None) From f1f3fb0a4c427d4bbd0651b7cd4fa658f360cfc4 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 5 Feb 2021 00:05:57 -0600 Subject: [PATCH 12/83] Pass inherit_depth all the way down the line which we define only when prev_event query param is used --- synapse/events/builder.py | 4 ++-- synapse/handlers/message.py | 11 ++++++----- synapse/rest/client/v1/room.py | 5 ++++- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/synapse/events/builder.py b/synapse/events/builder.py index 72ec95b8a032..2c78d4637047 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -100,8 +100,8 @@ def is_state(self): async def build( self, prev_event_ids: List[str], - overriding_prev_events: Literal[False] = False, auth_event_ids: Optional[List[str]], + inherit_depth: bool = False, ) -> EventBase: """Transform into a fully signed and hashed event @@ -135,7 +135,7 @@ async def build( old_depth = await self._store.get_max_depth_of(prev_event_ids) # If backfilling old message, let's just use the same depth of what we're inserting next to - if overriding_prev_events: + if inherit_depth: depth = old_depth # Otherwise, progress the depth as normal else: diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index a53f71f64969..0b66e4215885 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -438,6 +438,7 @@ async def create_event( event_dict: dict, txn_id: Optional[str] = None, prev_event_ids: Optional[List[str]] = None, + inherit_depth: bool = False, auth_event_ids: Optional[List[str]] = None, require_consent: bool = True, ) -> Tuple[EventBase, EventContext]: @@ -525,6 +526,7 @@ async def create_event( builder=builder, requester=requester, prev_event_ids=prev_event_ids, + inherit_depth=inherit_depth, auth_event_ids=auth_event_ids, ) @@ -682,6 +684,7 @@ async def create_and_send_nonmember_event( self, requester: Requester, event_dict: dict, + inherit_depth: bool = False, ratelimit: bool = True, txn_id: Optional[str] = None, ignore_shadow_ban: bool = False, @@ -741,7 +744,7 @@ async def create_and_send_nonmember_event( prev_events = event_dict["prev_events"] event, context = await self.create_event( - requester, event_dict, txn_id=txn_id, prev_event_ids=prev_events + requester, event_dict, txn_id=txn_id, prev_event_ids=prev_events, inherit_depth=inherit_depth ) assert self.hs.is_mine_id(event.sender), "User must be our own: %s" % ( @@ -772,6 +775,7 @@ async def create_new_client_event( builder: EventBuilder, requester: Optional[Requester] = None, prev_event_ids: Optional[List[str]] = None, + inherit_depth: bool = False, auth_event_ids: Optional[List[str]] = None, ) -> Tuple[EventBase, EventContext]: """Create a new event for a local client @@ -794,10 +798,7 @@ async def create_new_client_event( Tuple of created event, context """ - overriding_prev_events = False if prev_event_ids is not None: - overriding_prev_events = True - assert len(prev_event_ids) <= 10, ( "Attempting to create an event with %i prev_events" % (len(prev_event_ids),) @@ -816,7 +817,7 @@ async def create_new_client_event( event = await builder.build( prev_event_ids=prev_event_ids, - overriding_prev_events=overriding_prev_events, + inherit_depth=inherit_depth, auth_event_ids=auth_event_ids, ) context = await self.state.compute_event_context(event) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 37ad901c867c..a70933cfb73d 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -233,8 +233,11 @@ async def on_POST(self, request, room_id, event_type, txn_id=None): "sender": requester.user.to_string(), } + inherit_depth = False if prev_events: event_dict["prev_events"] = prev_events + # If backfilling old messages, let's just use the same depth of what we're inserting next to + inherit_depth = True # TODO: Put app_service logic back in place once we figure out how to make the Complement tests # run as an app service @@ -246,7 +249,7 @@ async def on_POST(self, request, room_id, event_type, txn_id=None): event, _, ) = await self.event_creation_handler.create_and_send_nonmember_event( - requester, event_dict, txn_id=txn_id + requester, event_dict, txn_id=txn_id, inherit_depth=inherit_depth ) event_id = event.event_id except ShadowBanError: From e7d7f9251cf3f1305c5b03d228fd2da1e5234db1 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 5 Feb 2021 16:40:59 -0600 Subject: [PATCH 13/83] Fix some lints --- changelog.d/9247.feature | 2 +- synapse/handlers/message.py | 6 +++++- synapse/rest/client/v1/room.py | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/changelog.d/9247.feature b/changelog.d/9247.feature index be04bd3fc19b..6d9b7c012e15 100644 --- a/changelog.d/9247.feature +++ b/changelog.d/9247.feature @@ -1 +1 @@ -Implement MSC2716 to support backfilling history into rooms +Implement MSC2716 to support backfilling history into rooms. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 0b66e4215885..ef515c24c180 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -744,7 +744,11 @@ async def create_and_send_nonmember_event( prev_events = event_dict["prev_events"] event, context = await self.create_event( - requester, event_dict, txn_id=txn_id, prev_event_ids=prev_events, inherit_depth=inherit_depth + requester, + event_dict, + txn_id=txn_id, + prev_event_ids=prev_events, + inherit_depth=inherit_depth, ) assert self.hs.is_mine_id(event.sender), "User must be our own: %s" % ( diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index a70933cfb73d..3e7cbe90f9b6 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -38,8 +38,8 @@ parse_integer, parse_integer_from_args, parse_json_object_from_request, - parse_strings_from_args, parse_string, + parse_strings_from_args, ) from synapse.logging.opentracing import set_tag from synapse.rest.client.transactions import HttpTransactionCache From b9024f76daaf08b3e4c4190c4a77e638d9ae6a30 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 9 Feb 2021 13:59:13 -0600 Subject: [PATCH 14/83] Type hinting and docstrings --- changelog.d/9247.feature | 2 +- synapse/events/builder.py | 2 + synapse/handlers/message.py | 8 +++ synapse/handlers/sync.py | 2 +- synapse/http/servlet.py | 104 +++++++++++++++++++++++++++--------- synapse/visibility.py | 13 ++--- 6 files changed, 97 insertions(+), 34 deletions(-) diff --git a/changelog.d/9247.feature b/changelog.d/9247.feature index 6d9b7c012e15..3eeaab0246d0 100644 --- a/changelog.d/9247.feature +++ b/changelog.d/9247.feature @@ -1 +1 @@ -Implement MSC2716 to support backfilling history into rooms. +Implement [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) to support backfilling history into rooms. diff --git a/synapse/events/builder.py b/synapse/events/builder.py index 2c78d4637047..0df341102bd1 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -110,6 +110,8 @@ async def build( auth_event_ids: The event IDs to use as the auth events. Should normally be set to None, which will cause them to be calculated based on the room state at the prev_events. + inherit_depth: True if you want to inherit the oldest depth from the prev_event_ids + and hang an event off the prev_events Returns: The signed and hashed event. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index ef515c24c180..d9c9ae0ba837 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -460,6 +460,9 @@ async def create_event( If None, they will be requested from the database. + inherit_depth: True if you want to inherit the oldest depth from the + prev_event_ids and hang an event off the prev_events + auth_event_ids: The event ids to use as the auth_events for the new event. Should normally be left as None, which will cause them to be calculated @@ -697,6 +700,8 @@ async def create_and_send_nonmember_event( Args: requester: The requester sending the event. event_dict: An entire event. + inherit_depth: True if you want to inherit the oldest depth from the + event_dict["prev_events"] and hang the event off of the prev_events ratelimit: Whether to rate limit this send. txn_id: The transaction ID. ignore_shadow_ban: True if shadow-banned users should be allowed to @@ -793,6 +798,9 @@ async def create_new_client_event( If None, they will be requested from the database. + inherit_depth: True if you want to inherit the oldest depth from the + prev_event_ids and hang the event off of the prev_events + auth_event_ids: The event ids to use as the auth_events for the new event. Should normally be left as None, which will cause them to be calculated diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index afbea5e58af2..ce9370b5a72e 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -535,7 +535,7 @@ async def _load_filtered_recents( prev_batch_token = now_token.copy_and_replace("room_key", room_key) # `m.historical` events should not come down /sync - recents = await filter_historical_events(recents) + recents = filter_historical_events(recents) return TimelineBatch( events=recents, diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index ed8ea7be3ed2..991b33c5e630 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -16,7 +16,7 @@ """ This module contains base REST classes for constructing REST servlets. """ import logging -from typing import List +from typing import List, Optional, Union from synapse.api.errors import Codes, SynapseError from synapse.util import json_decoder @@ -109,12 +109,11 @@ def parse_boolean_from_args(args, name, default=None, required=False): def parse_string( request, - name, - default=None, - required=False, - allowed_values=None, - param_type="string", - encoding="ascii", + name: str, + default: Optional[str] = None, + required: bool = False, + allowed_values: Optional[List[str]] = None, + encoding: str="ascii", ): """ Parse a string parameter from the request query string. @@ -144,7 +143,7 @@ def parse_string( is not one of those allowed values. """ return parse_string_from_args( - request.args, name, default, required, allowed_values, param_type, encoding + request.args, name, default, required, allowed_values, encoding ) @@ -166,14 +165,40 @@ def parse_string_value(value, allowed_values, name="", encoding="ascii") -> str: def parse_strings_from_args( - args, - name, - default=None, - required=False, - allowed_values=None, - param_type="string", - encoding="ascii", -) -> List[str]: + args: List[str], + name: str, + default: Optional[str] = None, + required: bool = False, + allowed_values: Optional[List[str]] = None, + encoding: str="ascii", +) -> Optional[List[Union[bytes,str]]]: + """ + Parse a string parameter from the request query string list. + + If encoding is not None, the content of the query param will be + decoded to Unicode using the encoding, otherwise it will be encoded + + Args: + args (List[str]): the twisted HTTP request.args list. + name (bytes|unicode): the name of the query parameter. + default (bytes|unicode|None): value to use if the parameter is absent, + defaults to None. Must be bytes if encoding is None. + required (bool): whether to raise a 400 SynapseError if the + parameter is absent, defaults to False. + allowed_values (list[bytes|unicode]): List of allowed values for the + string, or None if any value is allowed, defaults to None. Must be + the same type as name, if given. + encoding (str|None): The encoding to decode the string content with. + + Returns: + bytes/unicode|None: A string value or the default. Unicode if encoding + was given, bytes otherwise. + + Raises: + SynapseError if the parameter is absent and required, or if the + parameter is present, must be one of a list of allowed values and + is not one of those allowed values. + """ if not isinstance(name, bytes): name = name.encode("ascii") @@ -187,7 +212,7 @@ def parse_strings_from_args( ] else: if required: - message = "Missing %s query parameter %r" % (param_type, name) + message = "Missing string query parameter %r" % (name) raise SynapseError(400, message, errcode=Codes.MISSING_PARAM) else: @@ -198,21 +223,48 @@ def parse_strings_from_args( def parse_string_from_args( - args, - name, - default=None, - required=False, - allowed_values=None, - param_type="string", - encoding="ascii", -): + args: List[str], + name: str, + default: Optional[str] = None, + required: bool = False, + allowed_values: Optional[List[str]] = None, + encoding: Optional[str]="ascii", +) -> Optional[Union[bytes,str]]: + """ + Parse the string parameter from the request query string list + and return the first result. + + If encoding is not None, the content of the query param will be + decoded to Unicode using the encoding, otherwise it will be encoded + + Args: + args (List[str]): the twisted HTTP request.args list. + name (bytes|unicode): the name of the query parameter. + default (bytes|unicode|None): value to use if the parameter is absent, + defaults to None. Must be bytes if encoding is None. + required (bool): whether to raise a 400 SynapseError if the + parameter is absent, defaults to False. + allowed_values (list[bytes|unicode]): List of allowed values for the + string, or None if any value is allowed, defaults to None. Must be + the same type as name, if given. + encoding (str|None): The encoding to decode the string content with. + + Returns: + bytes/unicode|None: A string value or the default. Unicode if encoding + was given, bytes otherwise. + + Raises: + SynapseError if the parameter is absent and required, or if the + parameter is present, must be one of a list of allowed values and + is not one of those allowed values. + """ + strings = parse_strings_from_args( args, name, default=default, required=required, allowed_values=allowed_values, - param_type=param_type, encoding=encoding, ) diff --git a/synapse/visibility.py b/synapse/visibility.py index cee6739b1b75..6f72da7d8865 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -14,6 +14,10 @@ # limitations under the License. import logging import operator +from typing import ( + Iterable, + List, +) from synapse.api.constants import ( AccountDataTypes, @@ -21,6 +25,7 @@ HistoryVisibility, Membership, ) +from synapse.events import EventBase from synapse.events.utils import prune_event from synapse.storage import Storage from synapse.storage.state import StateFilter @@ -46,14 +51,10 @@ ) -async def filter_historical_events(events): +def filter_historical_events(events: Iterable[EventBase]) -> List[EventBase]: filtered_events = [e for e in events if not e.content.get("m.historical", None)] - # remove the None entries - filtered_events = filter(operator.truth, filtered_events) - - # we turn it into a list before returning it. - return list(filtered_events) + return filtered_events async def filter_events_for_client( From dbba361cb638935dbcfc2972a2babf04bfdf4f33 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 9 Feb 2021 15:53:19 -0600 Subject: [PATCH 15/83] Add experimental feature flag for MSC2716 --- synapse/config/experimental.py | 3 +++ synapse/handlers/sync.py | 8 ++++++-- synapse/http/servlet.py | 8 ++++---- synapse/rest/client/v1/room.py | 13 ++++++++----- 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index b1c1c51e4dcc..f823fcade4a9 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -27,3 +27,6 @@ def read_config(self, config: JsonDict, **kwargs): # MSC2858 (multiple SSO identity providers) self.msc2858_enabled = experimental.get("msc2858_enabled", False) # type: bool + + # MSC2716 (backfill existing history) + self.msc2716_enabled = experimental.get("msc2716_enabled", False) # type: bool diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index ce9370b5a72e..233b324b7b1b 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -259,6 +259,8 @@ def __init__(self, hs: "HomeServer"): expiry_ms=LAZY_LOADED_MEMBERS_CACHE_MAX_AGE, ) + self._msc2716_enabled = hs.config.experimental._msc2716_enabled + async def wait_for_sync_for_user( self, requester: Requester, @@ -534,8 +536,10 @@ async def _load_filtered_recents( prev_batch_token = now_token.copy_and_replace("room_key", room_key) - # `m.historical` events should not come down /sync - recents = filter_historical_events(recents) + + if(self._msc2716_enabled): + # `m.historical` events should not come down /sync + recents = filter_historical_events(recents) return TimelineBatch( events=recents, diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index 991b33c5e630..bd57d9135d9a 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -170,8 +170,8 @@ def parse_strings_from_args( default: Optional[str] = None, required: bool = False, allowed_values: Optional[List[str]] = None, - encoding: str="ascii", -) -> Optional[List[Union[bytes,str]]]: + encoding: str = "ascii", +) -> Optional[List[Union[bytes, str]]]: """ Parse a string parameter from the request query string list. @@ -228,8 +228,8 @@ def parse_string_from_args( default: Optional[str] = None, required: bool = False, allowed_values: Optional[List[str]] = None, - encoding: Optional[str]="ascii", -) -> Optional[Union[bytes,str]]: + encoding: Optional[str] = "ascii", +) -> Optional[Union[bytes, str]]: """ Parse the string parameter from the request query string list and return the first result. diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 4768162383cb..d1fe99c4ae15 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -215,6 +215,8 @@ def __init__(self, hs): super().__init__(hs) self.event_creation_handler = hs.get_event_creation_handler() self.auth = hs.get_auth() + + self._msc2716_enabled = hs.config.experimental._msc2716_enabled def register(self, http_server): # /rooms/$roomid/send/$event_type[/$txn_id] @@ -224,7 +226,6 @@ def register(self, http_server): async def on_POST(self, request, room_id, event_type, txn_id=None): requester = await self.auth.get_user_by_req(request, allow_guest=True) content = parse_json_object_from_request(request) - prev_events = parse_strings_from_args(request.args, "prev_event") event_dict = { "type": event_type, @@ -234,10 +235,12 @@ async def on_POST(self, request, room_id, event_type, txn_id=None): } inherit_depth = False - if prev_events: - event_dict["prev_events"] = prev_events - # If backfilling old messages, let's just use the same depth of what we're inserting next to - inherit_depth = True + prev_events = parse_strings_from_args(request.args, "prev_event") + if(self._msc2716_enabled): + if prev_events: + event_dict["prev_events"] = prev_events + # If backfilling old messages, let's just use the same depth of what we're inserting next to + inherit_depth = True # TODO: Put app_service logic back in place once we figure out how to make the Complement tests # run as an app service From f1c31f1054ea6431474c8ff99bd9f6642c77ece9 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 9 Feb 2021 15:54:00 -0600 Subject: [PATCH 16/83] Fix isort linting --- synapse/visibility.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/synapse/visibility.py b/synapse/visibility.py index b69a96e7f586..0590645c9fba 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -14,10 +14,7 @@ # limitations under the License. import logging import operator -from typing import ( - Iterable, - List, -) +from typing import Iterable, List from synapse.api.constants import ( AccountDataTypes, From 19aa93c20c27b0c53ecbc230b72abc24598b27ea Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 9 Feb 2021 16:01:09 -0600 Subject: [PATCH 17/83] Fix experimental msc2716 feature flag --- synapse/handlers/sync.py | 2 +- synapse/rest/client/v1/room.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 233b324b7b1b..b162c3feafb0 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -259,7 +259,7 @@ def __init__(self, hs: "HomeServer"): expiry_ms=LAZY_LOADED_MEMBERS_CACHE_MAX_AGE, ) - self._msc2716_enabled = hs.config.experimental._msc2716_enabled + self._msc2716_enabled = hs.config.experimental.msc2716_enabled async def wait_for_sync_for_user( self, diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index d1fe99c4ae15..cc984fdc357d 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -215,8 +215,8 @@ def __init__(self, hs): super().__init__(hs) self.event_creation_handler = hs.get_event_creation_handler() self.auth = hs.get_auth() - - self._msc2716_enabled = hs.config.experimental._msc2716_enabled + + self._msc2716_enabled = hs.config.experimental.msc2716_enabled def register(self, http_server): # /rooms/$roomid/send/$event_type[/$txn_id] From c07458407b36e2bce35540dce79eca125e767cd0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 9 Feb 2021 16:58:32 -0600 Subject: [PATCH 18/83] Try fix type hints and tox errors --- synapse/handlers/sync.py | 2 +- synapse/http/servlet.py | 18 +++++++++--------- synapse/rest/client/v1/room.py | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index b162c3feafb0..46fea95d3978 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -537,7 +537,7 @@ async def _load_filtered_recents( prev_batch_token = now_token.copy_and_replace("room_key", room_key) - if(self._msc2716_enabled): + if self._msc2716_enabled: # `m.historical` events should not come down /sync recents = filter_historical_events(recents) diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index bd57d9135d9a..11700b8ba4c2 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -16,7 +16,7 @@ """ This module contains base REST classes for constructing REST servlets. """ import logging -from typing import List, Optional, Union +from typing import Iterable, List, Optional, Union from synapse.api.errors import Codes, SynapseError from synapse.util import json_decoder @@ -109,11 +109,11 @@ def parse_boolean_from_args(args, name, default=None, required=False): def parse_string( request, - name: str, + name: Union[bytes, str], default: Optional[str] = None, required: bool = False, - allowed_values: Optional[List[str]] = None, - encoding: str="ascii", + allowed_values: Optional[Iterable[str]] = None, + encoding: Optional[str] = "ascii", ): """ Parse a string parameter from the request query string. @@ -166,11 +166,11 @@ def parse_string_value(value, allowed_values, name="", encoding="ascii") -> str: def parse_strings_from_args( args: List[str], - name: str, + name: Union[bytes, str], default: Optional[str] = None, required: bool = False, - allowed_values: Optional[List[str]] = None, - encoding: str = "ascii", + allowed_values: Optional[Iterable[str]] = None, + encoding: Optional[str] = "ascii", ) -> Optional[List[Union[bytes, str]]]: """ Parse a string parameter from the request query string list. @@ -224,10 +224,10 @@ def parse_strings_from_args( def parse_string_from_args( args: List[str], - name: str, + name: Union[bytes, str], default: Optional[str] = None, required: bool = False, - allowed_values: Optional[List[str]] = None, + allowed_values: Optional[Iterable[str]] = None, encoding: Optional[str] = "ascii", ) -> Optional[Union[bytes, str]]: """ diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index cc984fdc357d..727e125332fd 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -236,7 +236,7 @@ async def on_POST(self, request, room_id, event_type, txn_id=None): inherit_depth = False prev_events = parse_strings_from_args(request.args, "prev_event") - if(self._msc2716_enabled): + if self._msc2716_enabled: if prev_events: event_dict["prev_events"] = prev_events # If backfilling old messages, let's just use the same depth of what we're inserting next to From c02079de642c5fc9d103bc01fa0c161dc8a6ce90 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 9 Feb 2021 17:05:44 -0600 Subject: [PATCH 19/83] Fix extra new line lint --- synapse/handlers/sync.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 46fea95d3978..5ddc3c7f4cdf 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -536,7 +536,6 @@ async def _load_filtered_recents( prev_batch_token = now_token.copy_and_replace("room_key", room_key) - if self._msc2716_enabled: # `m.historical` events should not come down /sync recents = filter_historical_events(recents) From 412ffc342026995182ececdb831b0ca56d13384a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 10 Feb 2021 18:42:35 -0600 Subject: [PATCH 20/83] Update changelog.d/9247.feature Co-authored-by: Patrick Cloke --- changelog.d/9247.feature | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/9247.feature b/changelog.d/9247.feature index 3eeaab0246d0..c687acf102d4 100644 --- a/changelog.d/9247.feature +++ b/changelog.d/9247.feature @@ -1 +1 @@ -Implement [MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716) to support backfilling history into rooms. +Add experimental support for backfilling history into rooms ([MSC2716](https://github.com/matrix-org/matrix-doc/pull/2716)). From 7160f3ba2eaf5140885da77c3863f9aa2b568593 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 10 Feb 2021 18:42:45 -0600 Subject: [PATCH 21/83] Update synapse/events/builder.py Co-authored-by: Patrick Cloke --- synapse/events/builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/events/builder.py b/synapse/events/builder.py index 0df341102bd1..0353f73fc78d 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -136,7 +136,7 @@ async def build( prev_events = prev_event_ids old_depth = await self._store.get_max_depth_of(prev_event_ids) - # If backfilling old message, let's just use the same depth of what we're inserting next to + # If backfilling old message, use the same depth as what we're inserting next to. if inherit_depth: depth = old_depth # Otherwise, progress the depth as normal From 5ff398df8a81423be12c3e5758c4aca647d0a147 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 10 Feb 2021 18:47:50 -0600 Subject: [PATCH 22/83] Update synapse/visibility.py Co-authored-by: Patrick Cloke --- synapse/visibility.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/synapse/visibility.py b/synapse/visibility.py index 0590645c9fba..79890d11f5a7 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -49,9 +49,8 @@ def filter_historical_events(events: Iterable[EventBase]) -> List[EventBase]: - filtered_events = [e for e in events if not e.content.get("m.historical", None)] - - return filtered_events + """Return a new list with historical events removed from the input.""" + return [e for e in events if not e.content.get("m.historical", None)] async def filter_events_for_client( From 864b98f22ba99384121530904b798194d1359155 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 10 Feb 2021 19:06:07 -0600 Subject: [PATCH 23/83] Clean up docstrings and address review --- synapse/events/builder.py | 4 +-- synapse/handlers/message.py | 12 ++++----- synapse/http/servlet.py | 46 +++++++++++++++++----------------- synapse/rest/client/v1/room.py | 7 ++---- 4 files changed, 33 insertions(+), 36 deletions(-) diff --git a/synapse/events/builder.py b/synapse/events/builder.py index 0df341102bd1..4e449312a5c8 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -110,8 +110,8 @@ async def build( auth_event_ids: The event IDs to use as the auth events. Should normally be set to None, which will cause them to be calculated based on the room state at the prev_events. - inherit_depth: True if you want to inherit the oldest depth from the prev_event_ids - and hang an event off the prev_events + inherit_depth: True to use the oldest depth from the prev_event_ids + (instead of calculating a new depth). Returns: The signed and hashed event. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 131288d8bd2e..4246a7a20fe6 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -462,8 +462,8 @@ async def create_event( If None, they will be requested from the database. - inherit_depth: True if you want to inherit the oldest depth from the - prev_event_ids and hang an event off the prev_events + inherit_depth: True to use the oldest depth from the prev_event_ids + (instead of calculating a new depth). auth_event_ids: The event ids to use as the auth_events for the new event. @@ -702,8 +702,8 @@ async def create_and_send_nonmember_event( Args: requester: The requester sending the event. event_dict: An entire event. - inherit_depth: True if you want to inherit the oldest depth from the - event_dict["prev_events"] and hang the event off of the prev_events + inherit_depth: True to use the oldest depth from the event_dict["prev_events"] + (instead of calculating a new depth). ratelimit: Whether to rate limit this send. txn_id: The transaction ID. ignore_shadow_ban: True if shadow-banned users should be allowed to @@ -800,8 +800,8 @@ async def create_new_client_event( If None, they will be requested from the database. - inherit_depth: True if you want to inherit the oldest depth from the - prev_event_ids and hang the event off of the prev_events + inherit_depth: True to use the oldest depth from the prev_event_ids + (instead of calculating a new depth). auth_event_ids: The event ids to use as the auth_events for the new event. diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index 11700b8ba4c2..6656de24d97d 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -114,7 +114,7 @@ def parse_string( required: bool = False, allowed_values: Optional[Iterable[str]] = None, encoding: Optional[str] = "ascii", -): +) -> Optional[Union[bytes, str]]: """ Parse a string parameter from the request query string. @@ -123,18 +123,18 @@ def parse_string( Args: request: the twisted HTTP request. - name (bytes|unicode): the name of the query parameter. - default (bytes|unicode|None): value to use if the parameter is absent, + name: the name of the query parameter. + default: value to use if the parameter is absent, defaults to None. Must be bytes if encoding is None. - required (bool): whether to raise a 400 SynapseError if the + required: whether to raise a 400 SynapseError if the parameter is absent, defaults to False. - allowed_values (list[bytes|unicode]): List of allowed values for the + allowed_values: List of allowed values for the string, or None if any value is allowed, defaults to None. Must be the same type as name, if given. - encoding (str|None): The encoding to decode the string content with. + encoding: The encoding to decode the string content with. Returns: - bytes/unicode|None: A string value or the default. Unicode if encoding + A string value or the default. Unicode if encoding was given, bytes otherwise. Raises: @@ -147,7 +147,7 @@ def parse_string( ) -def parse_string_value(value, allowed_values, name="", encoding="ascii") -> str: +def _parse_string_value(value: Union[str, bytes], allowed_values: Optional[Iterable[str]], name: str, encoding: Optional[str]) -> str: if encoding: try: value = value.decode(encoding) @@ -179,19 +179,19 @@ def parse_strings_from_args( decoded to Unicode using the encoding, otherwise it will be encoded Args: - args (List[str]): the twisted HTTP request.args list. - name (bytes|unicode): the name of the query parameter. - default (bytes|unicode|None): value to use if the parameter is absent, + args: the twisted HTTP request.args list. + name: the name of the query parameter. + default: value to use if the parameter is absent, defaults to None. Must be bytes if encoding is None. - required (bool): whether to raise a 400 SynapseError if the + required : whether to raise a 400 SynapseError if the parameter is absent, defaults to False. allowed_values (list[bytes|unicode]): List of allowed values for the string, or None if any value is allowed, defaults to None. Must be the same type as name, if given. - encoding (str|None): The encoding to decode the string content with. + encoding: The encoding to decode the string content with. Returns: - bytes/unicode|None: A string value or the default. Unicode if encoding + A string value or the default. Unicode if encoding was given, bytes otherwise. Raises: @@ -207,7 +207,7 @@ def parse_strings_from_args( values = args[name] return [ - parse_string_value(value, allowed_values, name=name, encoding=encoding) + _parse_string_value(value, allowed_values, name=name, encoding=encoding) for value in values ] else: @@ -238,19 +238,19 @@ def parse_string_from_args( decoded to Unicode using the encoding, otherwise it will be encoded Args: - args (List[str]): the twisted HTTP request.args list. - name (bytes|unicode): the name of the query parameter. - default (bytes|unicode|None): value to use if the parameter is absent, + args: the twisted HTTP request.args list. + name: the name of the query parameter. + default: value to use if the parameter is absent, defaults to None. Must be bytes if encoding is None. - required (bool): whether to raise a 400 SynapseError if the + required: whether to raise a 400 SynapseError if the parameter is absent, defaults to False. - allowed_values (list[bytes|unicode]): List of allowed values for the + allowed_values: List of allowed values for the string, or None if any value is allowed, defaults to None. Must be the same type as name, if given. - encoding (str|None): The encoding to decode the string content with. + encoding: The encoding to decode the string content with. Returns: - bytes/unicode|None: A string value or the default. Unicode if encoding + A string value or the default. Unicode if encoding was given, bytes otherwise. Raises: @@ -268,7 +268,7 @@ def parse_string_from_args( encoding=encoding, ) - if isinstance(strings, list) and len(strings): + if isinstance(strings, list): return strings[0] # Return the default diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 727e125332fd..e6f6e15b61dc 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -36,7 +36,6 @@ RestServlet, assert_params_in_dict, parse_integer, - parse_integer_from_args, parse_json_object_from_request, parse_string, parse_strings_from_args, @@ -237,14 +236,12 @@ async def on_POST(self, request, room_id, event_type, txn_id=None): inherit_depth = False prev_events = parse_strings_from_args(request.args, "prev_event") if self._msc2716_enabled: - if prev_events: + if prev_events and requester.app_service: event_dict["prev_events"] = prev_events # If backfilling old messages, let's just use the same depth of what we're inserting next to inherit_depth = True - # TODO: Put app_service logic back in place once we figure out how to make the Complement tests - # run as an app service - if b"ts" in request.args: # and requester.app_service: + if b"ts" in request.args and requester.app_service: event_dict["origin_server_ts"] = parse_integer(request, "ts", 0) try: From ba1eb395f2accd74c457a2474cefbf6d7a4bd4d7 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 10 Feb 2021 19:16:01 -0600 Subject: [PATCH 24/83] Simplify prev_event fetching off the event dictionary --- synapse/handlers/message.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 4246a7a20fe6..047816a5788e 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -746,9 +746,7 @@ async def create_and_send_nonmember_event( assert event.internal_metadata.stream_ordering return event, event.internal_metadata.stream_ordering - prev_events = None - if "prev_events" in event_dict: - prev_events = event_dict["prev_events"] + prev_events = event_dict.get("prev_events") event, context = await self.create_event( requester, From 6d4fcb677dd11efd30d1150547d5e41df00ccf6c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 10 Feb 2021 19:39:55 -0600 Subject: [PATCH 25/83] Filter recent events in both places that we fetch them This way we handle the early return case as well Address https://github.com/matrix-org/synapse/pull/9247/files#r573711617 --- synapse/handlers/sync.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 5ddc3c7f4cdf..860a616d3b7d 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -461,9 +461,14 @@ async def _load_filtered_recents( recents, always_include_ids=current_state_ids, ) + + if self._msc2716_enabled: + # `m.historical` events should not come down /sync + recents = filter_historical_events(recents) else: recents = [] + if not limited or block_all_timeline: prev_batch_token = now_token if recents: @@ -524,6 +529,10 @@ async def _load_filtered_recents( loaded_recents.extend(recents) recents = loaded_recents + if self._msc2716_enabled: + # `m.historical` events should not come down /sync + recents = filter_historical_events(recents) + if len(events) <= load_limit: limited = False break @@ -536,10 +545,6 @@ async def _load_filtered_recents( prev_batch_token = now_token.copy_and_replace("room_key", room_key) - if self._msc2716_enabled: - # `m.historical` events should not come down /sync - recents = filter_historical_events(recents) - return TimelineBatch( events=recents, prev_batch=prev_batch_token, From 5d5fb8be0613e32758d1741886a6472688064d54 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 10 Feb 2021 19:42:40 -0600 Subject: [PATCH 26/83] Fix lint --- synapse/http/servlet.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index 6656de24d97d..2fabd8fa21ef 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -147,7 +147,12 @@ def parse_string( ) -def _parse_string_value(value: Union[str, bytes], allowed_values: Optional[Iterable[str]], name: str, encoding: Optional[str]) -> str: +def _parse_string_value( + value: Union[str, bytes], + allowed_values: Optional[Iterable[str]], + name: str, + encoding: Optional[str], +) -> str: if encoding: try: value = value.decode(encoding) From 1aa3af976810d1975c51040b4f3801a51a5d94b9 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 10 Feb 2021 20:02:29 -0600 Subject: [PATCH 27/83] Fix newline lint --- synapse/handlers/sync.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 860a616d3b7d..01db015eafe1 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -468,7 +468,6 @@ async def _load_filtered_recents( else: recents = [] - if not limited or block_all_timeline: prev_batch_token = now_token if recents: From e63ef8eb9d3aee295533d22aabb240e2db2359fb Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 10 Feb 2021 20:20:50 -0600 Subject: [PATCH 28/83] Remove return strict type to avoid downstream lint problems See https://github.com/matrix-org/synapse/pull/9247#discussion_r574212632 --- synapse/http/servlet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index 2fabd8fa21ef..9da5da0361bf 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -114,7 +114,7 @@ def parse_string( required: bool = False, allowed_values: Optional[Iterable[str]] = None, encoding: Optional[str] = "ascii", -) -> Optional[Union[bytes, str]]: +): """ Parse a string parameter from the request query string. From c90af9eecd7460d2e07b19e374189345221df59f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 10 Feb 2021 21:34:01 -0600 Subject: [PATCH 29/83] Fix test failure with mocked build function --- synapse/handlers/message.py | 2 +- tests/storage/test_redaction.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 047816a5788e..6f03e5d932ea 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -829,8 +829,8 @@ async def create_new_client_event( event = await builder.build( prev_event_ids=prev_event_ids, - inherit_depth=inherit_depth, auth_event_ids=auth_event_ids, + inherit_depth=inherit_depth, ) context = await self.state.compute_event_context(event) if requester: diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index a6303bf0ee2e..49bc0ac39eff 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -231,9 +231,11 @@ def __init__(self, base_builder, event_id): self._event_id = event_id @defer.inlineCallbacks - def build(self, prev_event_ids, auth_event_ids): + def build(self, prev_event_ids, auth_event_ids, inherit_depth: bool = False,): built_event = yield defer.ensureDeferred( - self._base_builder.build(prev_event_ids, auth_event_ids) + self._base_builder.build( + prev_event_ids, auth_event_ids, inherit_depth + ) ) built_event._event_id = self._event_id From 270e5eeef13fb9ebe5c13e0e429396d1dcb59f41 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 10 Feb 2021 23:44:23 -0600 Subject: [PATCH 30/83] Fix lint --- synapse/http/servlet.py | 2 +- tests/storage/test_redaction.py | 16 +++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index 9da5da0361bf..84b383e312c6 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -347,7 +347,7 @@ def assert_params_in_dict(body, required): class RestServlet: - """ A Synapse REST Servlet. + """A Synapse REST Servlet. An implementing class can either provide its own custom 'register' method, or use the automatic pattern handling provided by the base class. diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index 49bc0ac39eff..4bb1a7b1c5e3 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -231,7 +231,12 @@ def __init__(self, base_builder, event_id): self._event_id = event_id @defer.inlineCallbacks - def build(self, prev_event_ids, auth_event_ids, inherit_depth: bool = False,): + def build( + self, + prev_event_ids, + auth_event_ids, + inherit_depth: bool = False, + ): built_event = yield defer.ensureDeferred( self._base_builder.build( prev_event_ids, auth_event_ids, inherit_depth @@ -301,8 +306,7 @@ def type(self): ) def test_redact_censor(self): - """Test that a redacted event gets censored in the DB after a month - """ + """Test that a redacted event gets censored in the DB after a month""" self.get_success( self.inject_room_member(self.room1, self.u_alice, Membership.JOIN) @@ -372,8 +376,7 @@ def test_redact_censor(self): self.assert_dict({"content": {}}, json.loads(event_json)) def test_redact_redaction(self): - """Tests that we can redact a redaction and can fetch it again. - """ + """Tests that we can redact a redaction and can fetch it again.""" self.get_success( self.inject_room_member(self.room1, self.u_alice, Membership.JOIN) @@ -406,8 +409,7 @@ def test_redact_redaction(self): ) def test_store_redacted_redaction(self): - """Tests that we can store a redacted redaction. - """ + """Tests that we can store a redacted redaction.""" self.get_success( self.inject_room_member(self.room1, self.u_alice, Membership.JOIN) From 4359ab868aa336c8f8e8b9fbc6abc55bc536d010 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 11 Feb 2021 00:03:53 -0600 Subject: [PATCH 31/83] Fix lint again wanting to reverse what black wants... --- tests/storage/test_redaction.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index 4bb1a7b1c5e3..282f48d7b46d 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -232,10 +232,7 @@ def __init__(self, base_builder, event_id): @defer.inlineCallbacks def build( - self, - prev_event_ids, - auth_event_ids, - inherit_depth: bool = False, + self, prev_event_ids, auth_event_ids, inherit_depth: bool = False, ): built_event = yield defer.ensureDeferred( self._base_builder.build( From 6d8514f882db3a417e4daf7223272de1daebe8bd Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 17 Feb 2021 11:28:31 -0600 Subject: [PATCH 32/83] Run lint.sh --- tests/storage/test_redaction.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index 282f48d7b46d..4bb1a7b1c5e3 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -232,7 +232,10 @@ def __init__(self, base_builder, event_id): @defer.inlineCallbacks def build( - self, prev_event_ids, auth_event_ids, inherit_depth: bool = False, + self, + prev_event_ids, + auth_event_ids, + inherit_depth: bool = False, ): built_event = yield defer.ensureDeferred( self._base_builder.build( From 5dacc860a37754158bffa3b375a85ff8e9eba303 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 17 Feb 2021 11:35:47 -0600 Subject: [PATCH 33/83] Remove default for inherit_depth See, https://github.com/matrix-org/synapse/pull/9247#discussion_r575408405 --- synapse/events/builder.py | 2 +- synapse/handlers/message.py | 1 + tests/replication/test_federation_sender_shard.py | 8 +++++++- tests/storage/test_redaction.py | 2 +- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/synapse/events/builder.py b/synapse/events/builder.py index 084478fe9355..387551e4a9b0 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -101,7 +101,7 @@ async def build( self, prev_event_ids: List[str], auth_event_ids: Optional[List[str]], - inherit_depth: bool = False, + inherit_depth: bool, ) -> EventBase: """Transform into a fully signed and hashed event diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index dee9b734c023..ae3ee29c1512 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1423,6 +1423,7 @@ async def _rebuild_event_after_third_party_rules( event = await builder.build( prev_event_ids=original_event.prev_event_ids(), auth_event_ids=original_event.auth_event_ids(), + inherit_depth=False, ) # we rebuild the event context, to be on the safe side. If nothing else, diff --git a/tests/replication/test_federation_sender_shard.py b/tests/replication/test_federation_sender_shard.py index fffdb742c8ea..8fd838bac95b 100644 --- a/tests/replication/test_federation_sender_shard.py +++ b/tests/replication/test_federation_sender_shard.py @@ -226,7 +226,13 @@ def create_room_with_remote_server(self, user, token, remote_server="other_serve } builder = factory.for_room_version(room_version, event_dict) - join_event = self.get_success(builder.build(prev_event_ids, None)) + join_event = self.get_success( + builder.build( + prev_event_ids, + None, + inherit_depth=False, + ) + ) self.get_success(federation.on_send_join_request(remote_server, join_event)) self.replicate() diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index 4bb1a7b1c5e3..1555ed2164c5 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -235,7 +235,7 @@ def build( self, prev_event_ids, auth_event_ids, - inherit_depth: bool = False, + inherit_depth: bool, ): built_event = yield defer.ensureDeferred( self._base_builder.build( From 61dc89f669b7b6169a7a25f34dea26ebb5393f3e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 11 Mar 2021 00:16:06 -0600 Subject: [PATCH 34/83] WIP: Use depth from successor event Implementing https://github.com/matrix-org/synapse/pull/9247#discussion_r588479201 --- scripts-dev/complement.sh | 2 +- synapse/events/builder.py | 51 ++++++++++++++++-- synapse/events/utils.py | 14 ++--- synapse/handlers/message.py | 10 +++- .../databases/main/event_federation.py | 52 ++++++++++++++++--- synapse/visibility.py | 3 +- 6 files changed, 112 insertions(+), 20 deletions(-) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index d0cdf1e43dfe..b28446e399d1 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -17,4 +17,4 @@ cd ../complement docker build -t complement-synapse -f dockerfiles/Synapse.Dockerfile ./dockerfiles # Run the tests on the resulting image! -COMPLEMENT_BASE_IMAGE=complement-synapse go test -tags msc2716 -v -count=1 ./tests/main_test.go ./tests/msc2716_test.go +COMPLEMENT_BASE_IMAGE=complement-synapse go test -tags msc2716 -v -count=1 ./tests/main_test.go ./tests/msc2716_test.go -run TestBackfillingHistory/parallel/Backfilled_messages_come_back_in_correct_order diff --git a/synapse/events/builder.py b/synapse/events/builder.py index 387551e4a9b0..e520e2a9b289 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import logging from typing import Any, Dict, List, Optional, Tuple, Union import attr @@ -34,6 +35,8 @@ from synapse.util import Clock from synapse.util.stringutils import random_string +logger = logging.getLogger(__name__) + @attr.s(slots=True, cmp=False, frozen=True) class EventBuilder: @@ -135,13 +138,53 @@ async def build( auth_events = auth_event_ids prev_events = prev_event_ids - old_depth = await self._store.get_max_depth_of(prev_event_ids) - # If backfilling old message, use the same depth as what we're inserting next to. + ( + most_recent_prev_event_id, + most_recent_prev_event_depth, + ) = await self._store.get_max_depth_of(prev_event_ids) + logger.info( + "event_id_with_max_depth %s %s", + most_recent_prev_event_id, + most_recent_prev_event_depth, + ) + + # We want to insert the historical event after the `prev_event` but before the successor event + # + # We inherit depth from the successor event instead of the `prev_event` + # because events returned from `/messages` are first sorted by `topological_ordering` + # which is just the `depth` and then tie-break with `stream_ordering`. + # + # We mark these inserted historical events as "backfilled" which gives them a + # negative `stream_ordering`. If we use the same depth as the `prev_event`, + # then our historical event will tie-break and be sorted before the `prev_event` + # when it should come after. + # + # We want to use the successor event depth so they appear after `prev_event` because + # it has a larger `depth` but before the successor event because the `stream_ordering` + # is negative before the successor event. if inherit_depth: - depth = old_depth + sucessor_event_ids = await self._store.get_successor_events( + [most_recent_prev_event_id] + ) + logger.info("sucessor_event_ids %s", sucessor_event_ids) + + # If we can't find any successor events, this should only happen when + # you're inserting onto a forward extremity so I guess we should just + # progress the depth as normal. + if not sucessor_event_ids: + depth = most_recent_prev_event_depth + 1 + else: + ( + _, + oldest_successor_depth, + ) = await self._store.get_min_depth_of(sucessor_event_ids) + + logger.info("oldest_successor_depth %s", oldest_successor_depth) + + depth = oldest_successor_depth # Otherwise, progress the depth as normal else: - depth = old_depth + 1 + depth = most_recent_prev_event_depth + 1 # we cap depth of generated events, to ensure that they are not # rejected by other servers (and so that they can be persisted in diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 7ca5c9940a3a..5cf91c71fea1 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -250,13 +250,13 @@ def format_event_for_client_v1(d): def format_event_for_client_v2(d): drop_keys = ( - "auth_events", - "prev_events", - "hashes", - "signatures", - "depth", - "origin", - "prev_state", + # "auth_events", + # "prev_events", + # "hashes", + # "signatures", + # "depth", + # "origin", + # "prev_state", ) for key in drop_keys: d.pop(key, None) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index ae3ee29c1512..d3eff8c3f11b 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1245,13 +1245,21 @@ async def persist_and_notify_client_event( if prev_state_ids: raise AuthError(403, "Changing the room create event is forbidden") + backfilled = False + if event.content.get("m.historical", None): + backfilled = True + + logger.info("backfilled %s", backfilled) + # Note that this returns the event that was persisted, which may not be # the same as we passed in if it was deduplicated due transaction IDs. ( event, event_pos, max_stream_token, - ) = await self.storage.persistence.persist_event(event, context=context) + ) = await self.storage.persistence.persist_event( + event, context=context, backfilled=backfilled + ) if self._ephemeral_events_enabled: # If there's an expiry timestamp on the event, schedule its expiry. diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 18ddb92fcca5..190883b53c3f 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -15,8 +15,9 @@ import itertools import logging from queue import Empty, PriorityQueue -from typing import Dict, Iterable, List, Set, Tuple +from typing import Optional, Dict, Iterable, List, Set, Tuple +from synapse.api.constants import MAX_DEPTH from synapse.api.errors import StoreError from synapse.events import EventBase from synapse.metrics.background_process_metrics import wrap_as_background_process @@ -530,8 +531,8 @@ def get_oldest_events_with_depth_in_room_txn(self, txn, room_id): return dict(txn) - async def get_max_depth_of(self, event_ids: List[str]) -> int: - """Returns the max depth of a set of event IDs + async def get_max_depth_of(self, event_ids: List[str]) -> Tuple[str, int]: + """Returns the event ID and depth for the event that has the max depth from a set of event IDs Args: event_ids: The event IDs to calculate the max depth of. @@ -540,14 +541,53 @@ async def get_max_depth_of(self, event_ids: List[str]) -> int: table="events", column="event_id", iterable=event_ids, - retcols=("depth",), + retcols=( + "event_id", + "depth", + ), desc="get_max_depth_of", ) if not rows: - return 0 + return None, 0 else: - return max(row["depth"] for row in rows) + max_depth_event_id = "" + current_max_depth = 0 + for row in rows: + if row["depth"] > current_max_depth: + max_depth_event_id = row["event_id"] + current_max_depth = row["depth"] + + return max_depth_event_id, current_max_depth + + async def get_min_depth_of(self, event_ids: List[str]) -> Tuple[str, int]: + """Returns the event ID and depth for the event that has the min depth from a set of event IDs + + Args: + event_ids: The event IDs to calculate the max depth of. + """ + rows = await self.db_pool.simple_select_many_batch( + table="events", + column="event_id", + iterable=event_ids, + retcols=( + "event_id", + "depth", + ), + desc="get_min_depth_of", + ) + + if not rows: + return None, 0 + else: + min_depth_event_id = "" + current_min_depth = MAX_DEPTH + for row in rows: + if row["depth"] < current_min_depth: + min_depth_event_id = row["event_id"] + current_min_depth = row["depth"] + + return min_depth_event_id, current_min_depth async def get_prev_events_for_room(self, room_id: str) -> List[str]: """ diff --git a/synapse/visibility.py b/synapse/visibility.py index 79890d11f5a7..24acf9892001 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -50,7 +50,8 @@ def filter_historical_events(events: Iterable[EventBase]) -> List[EventBase]: """Return a new list with historical events removed from the input.""" - return [e for e in events if not e.content.get("m.historical", None)] + # return [e for e in events if not e.content.get("m.historical", None)] + return events async def filter_events_for_client( From e12a77d288d60879bcacc2000dcd7ae4158b9d32 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 11 Mar 2021 00:48:22 -0600 Subject: [PATCH 35/83] Fix depth on historical forward extremeties --- synapse/events/builder.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/synapse/events/builder.py b/synapse/events/builder.py index e520e2a9b289..5a158df2d3a5 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -168,11 +168,12 @@ async def build( ) logger.info("sucessor_event_ids %s", sucessor_event_ids) - # If we can't find any successor events, this should only happen when - # you're inserting onto a forward extremity so I guess we should just - # progress the depth as normal. + # If we can't find any successor events, then it's a forward extremity of + # historical messages nd we can just inherit from the previous historical + # event which we can already assume has the correct depth where we want + # to insert into. if not sucessor_event_ids: - depth = most_recent_prev_event_depth + 1 + depth = most_recent_prev_event_depth else: ( _, @@ -186,6 +187,8 @@ async def build( else: depth = most_recent_prev_event_depth + 1 + logger.info("depth %s", depth) + # we cap depth of generated events, to ensure that they are not # rejected by other servers (and so that they can be persisted in # the db) From 8549219b53a610bb47cbbca83f6b7492bb1c2c00 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 12 Mar 2021 21:14:54 -0600 Subject: [PATCH 36/83] 403 when non appservice trying to use prev_event --- synapse/rest/client/v1/room.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index f7429b90796f..e09d8033c4ca 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -236,7 +236,13 @@ async def on_POST(self, request, room_id, event_type, txn_id=None): inherit_depth = False prev_events = parse_strings_from_args(request.args, "prev_event") if self._msc2716_enabled: - if prev_events and requester.app_service: + if prev_events: + if not requester.app_service: + raise AuthError( + 403, + "Only application services can use the ?prev_event query paramtera", + ) + event_dict["prev_events"] = prev_events # If backfilling old messages, let's just use the same depth of what we're inserting next to inherit_depth = True From 23d037942fae8edf5e1581def4753e3cf0d5be8e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 30 Mar 2021 01:55:11 -0500 Subject: [PATCH 37/83] Bulk send endpoint for backfilling history (MSC2716) Spawned from discussion in https://github.com/matrix-org/matrix-doc/pull/2716#discussion_r598896168 Other PR with MSC2716 attempt: https://github.com/matrix-org/synapse/pull/9247 --- synapse/handlers/message.py | 3 ++ synapse/rest/client/v1/room.py | 68 ++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 254ada01072e..fc7ab588d717 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -455,6 +455,7 @@ async def create_event( inherit_depth: bool = False, auth_event_ids: Optional[List[str]] = None, require_consent: bool = True, + outlier: bool = False, ) -> Tuple[EventBase, EventContext]: """ Given a dict from a client, create a new event. @@ -539,6 +540,8 @@ async def create_event( if txn_id is not None: builder.internal_metadata.txn_id = txn_id + builder.internal_metadata.outlier = outlier + event, context = await self.create_new_client_event( builder=builder, requester=requester, diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 8bd86a5bd746..ce744f8115ac 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -284,6 +284,74 @@ def on_PUT(self, request, room_id, event_type, txn_id): ) +class RoomBulkSendEventRestServlet(TransactionRestServlet): + def __init__(self, hs): + super().__init__(hs) + self.event_creation_handler = hs.get_event_creation_handler() + self.auth = hs.get_auth() + + self._msc2716_enabled = hs.config.experimental.msc2716_enabled + + def register(self, http_server): + # /rooms/$roomid/bulksend + PATTERNS = "/rooms/(?P[^/]*)/bulksend" + register_txn_path(self, PATTERNS, http_server, with_get=True) + + async def on_POST(self, request, room_id): + requester = await self.auth.get_user_by_req(request, allow_guest=False) + body = parse_json_object_from_request(request) + prev_events = parse_strings_from_args(request.args, "prev_event") + + auth_event_ids = [] + for stateEv in body.state_events_at_start: + event_dict = { + "type": stateEv.type, + "content": stateEv.content, + "room_id": room_id, + "sender": stateEv.sender, # requester.user.to_string(), + } + + # Make the events float off in their own + fake_prev_event_id = "$" + random_string(43) + + # Also mark the event as an outlier outside of the normal DAG + (event, _,) = await self.event_creation_handler.create_event( + requester, event_dict, prev_event_ids=[fake_prev_event_id], outlier=True + ) + event_id = event.event_id + auth_event_ids.append(event_id) + + prev_event_ids = prev_events + for ev in body.events: + event_dict = { + "type": ev.type, + "content": ev.content, + "room_id": room_id, + "sender": ev.sender, # requester.user.to_string(), + } + + (event, _,) = await self.event_creation_handler.create_event( + requester, + event_dict, + # We are allowed to post these messages because we are referencing the + # floating auth state events that we just created above + auth_event_ids=auth_event_ids, + prev_event_ids=prev_event_ids, + inherit_depth=True, + ) + event_id = event.event_id + + prev_event_ids = [event_id] + + return 200, {"foo": "bar"} + + def on_GET(self, request, room_id, event_type, txn_id): + return 501, "Not implemented" + + def on_PUT(self, request, room_id, event_type, txn_id): + return 501, "Not implemented" + + # TODO: Needs unit testing for room ID + alias joins class JoinRoomAliasServlet(TransactionRestServlet): def __init__(self, hs): From ea37564acce840c438427d627dbaaa166d6931a0 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 31 Mar 2021 02:01:01 -0500 Subject: [PATCH 38/83] Add bulk send endpoint As described from https://github.com/matrix-org/matrix-doc/pull/2716#discussion_r598896168 --- scripts-dev/complement.sh | 2 +- synapse/events/builder.py | 5 +++ synapse/handlers/message.py | 2 ++ synapse/rest/client/v1/room.py | 63 ++++++++++++++++++++++++++-------- 4 files changed, 57 insertions(+), 15 deletions(-) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index b28446e399d1..ea9ca9a4a07d 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -17,4 +17,4 @@ cd ../complement docker build -t complement-synapse -f dockerfiles/Synapse.Dockerfile ./dockerfiles # Run the tests on the resulting image! -COMPLEMENT_BASE_IMAGE=complement-synapse go test -tags msc2716 -v -count=1 ./tests/main_test.go ./tests/msc2716_test.go -run TestBackfillingHistory/parallel/Backfilled_messages_come_back_in_correct_order +COMPLEMENT_BASE_IMAGE=complement-synapse go test -tags msc2716 -v -count=1 ./tests/main_test.go ./tests/msc2716_test.go -run TestBackfillingHistory/parallel/Backfilled_historical_events_resolve_with_proper_state diff --git a/synapse/events/builder.py b/synapse/events/builder.py index 5a158df2d3a5..82f0ff3c1857 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -138,6 +138,11 @@ async def build( auth_events = auth_event_ids prev_events = prev_event_ids + logger.info( + "prev_events %s", + prev_events, + ) + ( most_recent_prev_event_id, most_recent_prev_event_depth, diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index fc7ab588d717..55b3d8d0e6a7 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -705,6 +705,7 @@ async def create_and_send_nonmember_event( requester: Requester, event_dict: dict, inherit_depth: bool = False, + auth_event_ids: Optional[List[str]] = None, ratelimit: bool = True, txn_id: Optional[str] = None, ignore_shadow_ban: bool = False, @@ -768,6 +769,7 @@ async def create_and_send_nonmember_event( event_dict, txn_id=txn_id, prev_event_ids=prev_events, + auth_event_ids=auth_event_ids, inherit_depth=inherit_depth, ) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index ce744f8115ac..a0087550a9bf 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -300,15 +300,23 @@ def register(self, http_server): async def on_POST(self, request, room_id): requester = await self.auth.get_user_by_req(request, allow_guest=False) body = parse_json_object_from_request(request) - prev_events = parse_strings_from_args(request.args, "prev_event") + assert_params_in_dict(body, ["state_events_at_start", "events"]) + + prev_events_from_query = parse_strings_from_args(request.args, "prev_event") + + logger.info("body waewefaew %s", body) auth_event_ids = [] - for stateEv in body.state_events_at_start: + for stateEv in body["state_events_at_start"]: + logger.info("stateEv %s", stateEv) + assert_params_in_dict(stateEv, ["type", "content", "sender"]) + event_dict = { - "type": stateEv.type, - "content": stateEv.content, + "type": stateEv["type"], + "content": stateEv["content"], "room_id": room_id, - "sender": stateEv.sender, # requester.user.to_string(), + "sender": stateEv["sender"], # requester.user.to_string(), + "state_key": stateEv["state_key"], } # Make the events float off in their own @@ -321,15 +329,34 @@ async def on_POST(self, request, room_id): event_id = event.event_id auth_event_ids.append(event_id) - prev_event_ids = prev_events - for ev in body.events: + logger.info("Done with state events grrtrsrdhh %s", auth_event_ids) + + event_ids = [] + prev_event_ids = prev_events_from_query + for ev in body["events"]: + logger.info("ev %s", ev) + assert_params_in_dict(ev, ["type", "content", "sender"]) + event_dict = { - "type": ev.type, - "content": ev.content, + "type": ev["type"], + "content": ev["content"], "room_id": room_id, - "sender": ev.sender, # requester.user.to_string(), + "sender": ev["sender"], # requester.user.to_string(), + "prev_events": prev_event_ids, } + # ( + # event, + # _, + # ) = await self.event_creation_handler.create_and_send_nonmember_event( + # requester, + # event_dict, + # inherit_depth=True, + # # We are allowed to post these messages because we are referencing the + # # floating auth state events that we just created above + # auth_event_ids=auth_event_ids, + # ) + (event, _,) = await self.event_creation_handler.create_event( requester, event_dict, @@ -340,17 +367,24 @@ async def on_POST(self, request, room_id): inherit_depth=True, ) event_id = event.event_id + event_ids.append(event_id) + + logger.info("event esrgegrerg %s", event) prev_event_ids = [event_id] - return 200, {"foo": "bar"} + logger.info("Done with events afeefwaefw") - def on_GET(self, request, room_id, event_type, txn_id): - return 501, "Not implemented" + return 200, {"state_events": auth_event_ids, "events": event_ids} - def on_PUT(self, request, room_id, event_type, txn_id): + def on_GET(self, request, room_id): return 501, "Not implemented" + def on_PUT(self, request, room_id): + return self.txns.fetch_or_execute_request( + request, self.on_POST, request, room_id + ) + # TODO: Needs unit testing for room ID + alias joins class JoinRoomAliasServlet(TransactionRestServlet): @@ -1140,6 +1174,7 @@ def register_servlets(hs: "HomeServer", http_server, is_worker=False): JoinRoomAliasServlet(hs).register(http_server) RoomMembershipRestServlet(hs).register(http_server) RoomSendEventRestServlet(hs).register(http_server) + RoomBulkSendEventRestServlet(hs).register(http_server) PublicRoomListRestServlet(hs).register(http_server) RoomStateRestServlet(hs).register(http_server) RoomRedactEventRestServlet(hs).register(http_server) From 7008ee0d574ba75791ea32e7661a568e6de9422f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 31 Mar 2021 02:32:57 -0500 Subject: [PATCH 39/83] Skip everything and just persist the event --- synapse/rest/client/v1/room.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index a0087550a9bf..3b8e38644af9 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -357,7 +357,7 @@ async def on_POST(self, request, room_id): # auth_event_ids=auth_event_ids, # ) - (event, _,) = await self.event_creation_handler.create_event( + event, context = await self.event_creation_handler.create_event( requester, event_dict, # We are allowed to post these messages because we are referencing the @@ -366,6 +366,18 @@ async def on_POST(self, request, room_id): prev_event_ids=prev_event_ids, inherit_depth=True, ) + # await self.event_creation_handler.persist_and_notify_client_event( + # requester, event, context, ratelimit=False + # ) + + ( + event, + _, + _, + ) = await self.event_creation_handler.storage.persistence.persist_event( + event, context=context, backfilled=True + ) + event_id = event.event_id event_ids.append(event_id) From 0d4736f1ab759398aa0cdbc5c7916e66e038f766 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 6 Apr 2021 01:59:06 -0500 Subject: [PATCH 40/83] Scratch commit trying to use existing functions --- synapse/api/auth.py | 2 + synapse/handlers/message.py | 9 +++- synapse/handlers/room_member.py | 6 +++ synapse/rest/client/v1/room.py | 89 ++++++++++++++++++++++----------- synapse/state/__init__.py | 5 ++ 5 files changed, 79 insertions(+), 32 deletions(-) diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 7d9930ae7b7c..1ce598663cdd 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -88,9 +88,11 @@ async def check_from_context( self, room_version: str, event, context, do_sig_check=True ): prev_state_ids = await context.get_prev_state_ids() + logger.info("check_from_context prev_state_ids %s", prev_state_ids) auth_events_ids = self.compute_auth_events( event, prev_state_ids, for_verification=True ) + logger.info("check_from_context auth_events_ids %s", auth_events_ids) auth_events = await self.store.get_events(auth_events_ids) auth_events = {(e.type, e.state_key): e for e in auth_events.values()} diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 55b3d8d0e6a7..7c5001eacf1a 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -16,7 +16,7 @@ # limitations under the License. import logging import random -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple +from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple from canonicaljson import encode_canonical_json @@ -454,6 +454,7 @@ async def create_event( prev_event_ids: Optional[List[str]] = None, inherit_depth: bool = False, auth_event_ids: Optional[List[str]] = None, + state_for_events: Optional[Iterable[EventBase]] = None, require_consent: bool = True, outlier: bool = False, ) -> Tuple[EventBase, EventContext]: @@ -548,6 +549,7 @@ async def create_event( prev_event_ids=prev_event_ids, inherit_depth=inherit_depth, auth_event_ids=auth_event_ids, + state_for_events=state_for_events, ) # In an ideal world we wouldn't need the second part of this condition. However, @@ -706,9 +708,11 @@ async def create_and_send_nonmember_event( event_dict: dict, inherit_depth: bool = False, auth_event_ids: Optional[List[str]] = None, + state_for_events: Optional[Iterable[EventBase]] = None, ratelimit: bool = True, txn_id: Optional[str] = None, ignore_shadow_ban: bool = False, + outlier: bool = False, ) -> Tuple[EventBase, int]: """ Creates an event, then sends it. @@ -803,6 +807,7 @@ async def create_new_client_event( prev_event_ids: Optional[List[str]] = None, inherit_depth: bool = False, auth_event_ids: Optional[List[str]] = None, + state_for_events: Optional[Iterable[EventBase]] = None, ) -> Tuple[EventBase, EventContext]: """Create a new event for a local client @@ -850,7 +855,7 @@ async def create_new_client_event( auth_event_ids=auth_event_ids, inherit_depth=inherit_depth, ) - context = await self.state.compute_event_context(event) + context = await self.state.compute_event_context(event, state_for_events) if requester: context.app_service = requester.app_service diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 4d20ed835764..1fe478d555c6 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -180,6 +180,7 @@ async def _local_membership_update( ratelimit: bool = True, content: Optional[dict] = None, require_consent: bool = True, + outlier: bool = False, ) -> Tuple[str, int]: user_id = target.to_string() @@ -218,6 +219,7 @@ async def _local_membership_update( txn_id=txn_id, prev_event_ids=prev_event_ids, require_consent=require_consent, + outlier=outlier ) prev_state_ids = await context.get_prev_state_ids() @@ -312,6 +314,7 @@ async def update_membership( ratelimit: bool = True, content: Optional[dict] = None, require_consent: bool = True, + outlier: bool = False, ) -> Tuple[str, int]: """Update a user's membership in a room. @@ -352,6 +355,7 @@ async def update_membership( ratelimit=ratelimit, content=content, require_consent=require_consent, + outlier=outlier, ) return result @@ -368,6 +372,7 @@ async def update_membership_locked( ratelimit: bool = True, content: Optional[dict] = None, require_consent: bool = True, + outlier: bool = False, ) -> Tuple[str, int]: """Helper for update_membership. @@ -623,6 +628,7 @@ async def update_membership_locked( prev_event_ids=latest_event_ids, content=content, require_consent=require_consent, + outlier=outlier ) async def transfer_room_state_on_room_upgrade( diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 3b8e38644af9..bc00bb7cf649 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -287,7 +287,9 @@ def on_PUT(self, request, room_id, event_type, txn_id): class RoomBulkSendEventRestServlet(TransactionRestServlet): def __init__(self, hs): super().__init__(hs) + self.store = hs.get_datastore() self.event_creation_handler = hs.get_event_creation_handler() + self.room_member_handler = hs.get_room_member_handler() self.auth = hs.get_auth() self._msc2716_enabled = hs.config.experimental.msc2716_enabled @@ -306,6 +308,7 @@ async def on_POST(self, request, room_id): logger.info("body waewefaew %s", body) + state_for_events = [] auth_event_ids = [] for stateEv in body["state_events_at_start"]: logger.info("stateEv %s", stateEv) @@ -322,12 +325,36 @@ async def on_POST(self, request, room_id): # Make the events float off in their own fake_prev_event_id = "$" + random_string(43) - # Also mark the event as an outlier outside of the normal DAG - (event, _,) = await self.event_creation_handler.create_event( - requester, event_dict, prev_event_ids=[fake_prev_event_id], outlier=True - ) - event_id = event.event_id - auth_event_ids.append(event_id) + # # Also mark the event as an outlier outside of the normal DAG + # (event, _,) = await self.event_creation_handler.create_event( + # requester, event_dict, prev_event_ids=[fake_prev_event_id], outlier=True + # ) + + if stateEv["type"] == EventTypes.Member: + membership = stateEv["content"].get("membership", None) + event_id, _ = await self.room_member_handler.update_membership( + requester, + target=UserID.from_string(stateEv["state_key"]), + room_id=room_id, + action=membership, + content=stateEv["content"], + outlier=True + ) + auth_event = await self.store.get_event(event_id) + state_for_events.append(auth_event) + auth_event_ids.append(event_id) + else: + ( + event, + _, + ) = await self.event_creation_handler.create_and_send_nonmember_event( + requester, + event_dict, + outlier=True, + ) + state_for_events.append(event) + event_id = event.event_id + auth_event_ids.append(event_id) logger.info("Done with state events grrtrsrdhh %s", auth_event_ids) @@ -345,38 +372,40 @@ async def on_POST(self, request, room_id): "prev_events": prev_event_ids, } - # ( - # event, - # _, - # ) = await self.event_creation_handler.create_and_send_nonmember_event( - # requester, - # event_dict, - # inherit_depth=True, - # # We are allowed to post these messages because we are referencing the - # # floating auth state events that we just created above - # auth_event_ids=auth_event_ids, - # ) - - event, context = await self.event_creation_handler.create_event( + ( + event, + _, + ) = await self.event_creation_handler.create_and_send_nonmember_event( requester, event_dict, + inherit_depth=True, # We are allowed to post these messages because we are referencing the # floating auth state events that we just created above auth_event_ids=auth_event_ids, - prev_event_ids=prev_event_ids, - inherit_depth=True, + state_for_events=state_for_events, + outlier=True, ) - # await self.event_creation_handler.persist_and_notify_client_event( - # requester, event, context, ratelimit=False + + # event, context = await self.event_creation_handler.create_event( + # requester, + # event_dict, + # # We are allowed to post these messages because we are referencing the + # # floating auth state events that we just created above + # auth_event_ids=auth_event_ids, + # prev_event_ids=prev_event_ids, + # inherit_depth=True, # ) + # # await self.event_creation_handler.persist_and_notify_client_event( + # # requester, event, context, ratelimit=False + # # ) - ( - event, - _, - _, - ) = await self.event_creation_handler.storage.persistence.persist_event( - event, context=context, backfilled=True - ) + # ( + # event, + # _, + # _, + # ) = await self.event_creation_handler.storage.persistence.persist_event( + # event, context=context, backfilled=True + # ) event_id = event.event_id event_ids.append(event_id) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index c3d6e80c49f7..c8a7512f8d91 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -268,6 +268,11 @@ async def compute_event_context( The event context. """ + logger.info( + "compute_event_context is_outlier=%s event_id=%s", + event.internal_metadata.is_outlier(), + event.event_id, + ) if event.internal_metadata.is_outlier(): # If this is an outlier, then we know it shouldn't have any current # state. Certainly store.get_current_state won't return any, and From 49631e5c3298319aaa1d472daaf6bd14a533937c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 6 Apr 2021 03:16:58 -0500 Subject: [PATCH 41/83] Scratch commit 2 --- synapse/handlers/message.py | 11 +++++++++++ synapse/handlers/room_member.py | 10 ++++++++-- synapse/rest/client/v1/room.py | 24 ++++++++++++++++++++---- 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 7c5001eacf1a..85af8a9b23f1 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -541,6 +541,11 @@ async def create_event( if txn_id is not None: builder.internal_metadata.txn_id = txn_id + logger.info("setting internal_metadata.outlier %s", outlier) + # Setting the outlier on the builder does not work to propogate it over to the event + # because `outlier` is not part of the `interal_metadata_dict` that is serialized later. + # Not sure why it is separate. + # See https://github.com/matrix-org/synapse/pull/9247#r607595779 builder.internal_metadata.outlier = outlier event, context = await self.create_new_client_event( @@ -775,6 +780,7 @@ async def create_and_send_nonmember_event( prev_event_ids=prev_events, auth_event_ids=auth_event_ids, inherit_depth=inherit_depth, + outlier=outlier, ) assert self.hs.is_mine_id(event.sender), "User must be our own: %s" % ( @@ -855,6 +861,11 @@ async def create_new_client_event( auth_event_ids=auth_event_ids, inherit_depth=inherit_depth, ) + + # Pass on the outlier property from the builder to the event + if builder.internal_metadata.outlier: + event.internal_metadata.outlier = builder.internal_metadata.outlier + context = await self.state.compute_event_context(event, state_for_events) if requester: context.app_service = requester.app_service diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 1fe478d555c6..aae6f203007c 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -181,6 +181,7 @@ async def _local_membership_update( content: Optional[dict] = None, require_consent: bool = True, outlier: bool = False, + state_for_events: Optional[Iterable[EventBase]] = None, ) -> Tuple[str, int]: user_id = target.to_string() @@ -219,7 +220,8 @@ async def _local_membership_update( txn_id=txn_id, prev_event_ids=prev_event_ids, require_consent=require_consent, - outlier=outlier + outlier=outlier, + state_for_events=state_for_events, ) prev_state_ids = await context.get_prev_state_ids() @@ -315,6 +317,7 @@ async def update_membership( content: Optional[dict] = None, require_consent: bool = True, outlier: bool = False, + state_for_events: Optional[Iterable[EventBase]] = None, ) -> Tuple[str, int]: """Update a user's membership in a room. @@ -356,6 +359,7 @@ async def update_membership( content=content, require_consent=require_consent, outlier=outlier, + state_for_events=state_for_events, ) return result @@ -373,6 +377,7 @@ async def update_membership_locked( content: Optional[dict] = None, require_consent: bool = True, outlier: bool = False, + state_for_events: Optional[Iterable[EventBase]] = None, ) -> Tuple[str, int]: """Helper for update_membership. @@ -628,7 +633,8 @@ async def update_membership_locked( prev_event_ids=latest_event_ids, content=content, require_consent=require_consent, - outlier=outlier + outlier=outlier, + state_for_events=state_for_events, ) async def transfer_room_state_on_room_upgrade( diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index bc00bb7cf649..02d72e03d20e 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -308,8 +308,22 @@ async def on_POST(self, request, room_id): logger.info("body waewefaew %s", body) + # current_state_ids = await self.store.get_current_state_ids( + # room_id, + # ) + # current_auth_event_ids = self.auth.compute_auth_events( + # # TODO, + # asdf, + # current_state_ids + # ) + state_for_events = [] auth_event_ids = [] + + create_event = await self.store.get_create_event_for_room(room_id) + state_for_events.append(create_event) + auth_event_ids.append(create_event.event_id) + for stateEv in body["state_events_at_start"]: logger.info("stateEv %s", stateEv) assert_params_in_dict(stateEv, ["type", "content", "sender"]) @@ -322,7 +336,7 @@ async def on_POST(self, request, room_id): "state_key": stateEv["state_key"], } - # Make the events float off in their own + # Make the events float off on their own fake_prev_event_id = "$" + random_string(43) # # Also mark the event as an outlier outside of the normal DAG @@ -338,7 +352,9 @@ async def on_POST(self, request, room_id): room_id=room_id, action=membership, content=stateEv["content"], - outlier=True + outlier=True, + # TODO: + # state_for_events=current_auth_event_ids, ) auth_event = await self.store.get_event(event_id) state_for_events.append(auth_event) @@ -382,8 +398,8 @@ async def on_POST(self, request, room_id): # We are allowed to post these messages because we are referencing the # floating auth state events that we just created above auth_event_ids=auth_event_ids, - state_for_events=state_for_events, - outlier=True, + # state_for_events=state_for_events, + # outlier=True, ) # event, context = await self.event_creation_handler.create_event( From 06dccec2ddebdcc11a0ea932746080550342e511 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 7 Apr 2021 01:28:53 -0500 Subject: [PATCH 42/83] Scratch commit 3 --- synapse/rest/client/v1/room.py | 35 +++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 02d72e03d20e..da702e4f0c16 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -27,10 +27,15 @@ Codes, HttpResponseException, InvalidClientCredentialsError, + NotFoundError, ShadowBanError, SynapseError, ) +from synapse.api.room_versions import ( + KNOWN_ROOM_VERSIONS, +) from synapse.api.filtering import Filter +from synapse.events import make_event_from_dict from synapse.events.utils import format_event_for_client_v2 from synapse.http.servlet import ( RestServlet, @@ -308,15 +313,6 @@ async def on_POST(self, request, room_id): logger.info("body waewefaew %s", body) - # current_state_ids = await self.store.get_current_state_ids( - # room_id, - # ) - # current_auth_event_ids = self.auth.compute_auth_events( - # # TODO, - # asdf, - # current_state_ids - # ) - state_for_events = [] auth_event_ids = [] @@ -344,6 +340,24 @@ async def on_POST(self, request, room_id): # requester, event_dict, prev_event_ids=[fake_prev_event_id], outlier=True # ) + try: + room_version_id = await self.store.get_room_version_id( + event_dict["room_id"] + ) + room_version = KNOWN_ROOM_VERSIONS.get(room_version_id) + except NotFoundError: + raise AuthError(403, "Unknown room") + + current_state_ids = await self.store.get_current_state_ids( + event_dict["room_id"], + ) + current_auth_event_ids = self.auth.compute_auth_events( + make_event_from_dict(event_dict, room_version), + current_state_ids, + ) + current_auth_event_map = await self.store.get_events(current_auth_event_ids) + current_auth_events = current_auth_event_map.values() + if stateEv["type"] == EventTypes.Member: membership = stateEv["content"].get("membership", None) event_id, _ = await self.room_member_handler.update_membership( @@ -353,8 +367,7 @@ async def on_POST(self, request, room_id): action=membership, content=stateEv["content"], outlier=True, - # TODO: - # state_for_events=current_auth_event_ids, + state_for_events=current_auth_events, ) auth_event = await self.store.get_event(event_id) state_for_events.append(auth_event) From cbd16b880253dae67282d48e9606f2572c2b1084 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 9 Apr 2021 23:12:10 -0500 Subject: [PATCH 43/83] Working bulksend endpoint for sending state and historical messages (authed) Copy repo state from https://github.com/matrix-org/synapse/pull/9759 --- synapse/api/auth.py | 14 +- synapse/config/experimental.py | 5 +- synapse/events/builder.py | 17 -- synapse/events/utils.py | 14 +- synapse/handlers/message.py | 43 ++--- synapse/handlers/room_member.py | 283 +++++++++++++++++--------------- synapse/handlers/sync.py | 12 +- synapse/rest/client/v1/room.py | 134 ++++----------- synapse/state/__init__.py | 5 - synapse/storage/state.py | 2 +- synapse/visibility.py | 8 +- 11 files changed, 218 insertions(+), 319 deletions(-) diff --git a/synapse/api/auth.py b/synapse/api/auth.py index 1ce598663cdd..b64b14e736ad 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -87,13 +87,13 @@ def __init__(self, hs): async def check_from_context( self, room_version: str, event, context, do_sig_check=True ): - prev_state_ids = await context.get_prev_state_ids() - logger.info("check_from_context prev_state_ids %s", prev_state_ids) - auth_events_ids = self.compute_auth_events( - event, prev_state_ids, for_verification=True - ) - logger.info("check_from_context auth_events_ids %s", auth_events_ids) - auth_events = await self.store.get_events(auth_events_ids) + auth_event_ids = event.auth_event_ids() + if auth_event_ids == None: + prev_state_ids = await context.get_prev_state_ids() + auth_event_ids = self.compute_auth_events( + event, prev_state_ids, for_verification=True + ) + auth_events = await self.store.get_events(auth_event_ids) auth_events = {(e.type, e.state_key): e for e in auth_events.values()} room_version_obj = KNOWN_ROOM_VERSIONS[room_version] diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index 6283917bf678..47d67eb8f6f3 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -28,8 +28,7 @@ def read_config(self, config: JsonDict, **kwargs): # MSC2858 (multiple SSO identity providers) self.msc2858_enabled = experimental.get("msc2858_enabled", False) # type: bool - # MSC2716 (backfill existing history) - self.msc2716_enabled = experimental.get("msc2716_enabled", False) # type: bool + # Spaces (MSC1772, MSC2946, MSC3083, etc) self.spaces_enabled = experimental.get("spaces_enabled", False) # type: bool if self.spaces_enabled: @@ -37,3 +36,5 @@ def read_config(self, config: JsonDict, **kwargs): # MSC3026 (busy presence state) self.msc3026_enabled = experimental.get("msc3026_enabled", False) # type: bool + # MSC2716 (backfill existing history) + self.msc2716_enabled = experimental.get("msc2716_enabled", False) # type: bool diff --git a/synapse/events/builder.py b/synapse/events/builder.py index 82f0ff3c1857..f9924a524eed 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -113,8 +113,6 @@ async def build( auth_event_ids: The event IDs to use as the auth events. Should normally be set to None, which will cause them to be calculated based on the room state at the prev_events. - inherit_depth: True to use the oldest depth from the prev_event_ids - (instead of calculating a new depth). Returns: The signed and hashed event. @@ -138,20 +136,10 @@ async def build( auth_events = auth_event_ids prev_events = prev_event_ids - logger.info( - "prev_events %s", - prev_events, - ) - ( most_recent_prev_event_id, most_recent_prev_event_depth, ) = await self._store.get_max_depth_of(prev_event_ids) - logger.info( - "event_id_with_max_depth %s %s", - most_recent_prev_event_id, - most_recent_prev_event_depth, - ) # We want to insert the historical event after the `prev_event` but before the successor event # @@ -171,7 +159,6 @@ async def build( sucessor_event_ids = await self._store.get_successor_events( [most_recent_prev_event_id] ) - logger.info("sucessor_event_ids %s", sucessor_event_ids) # If we can't find any successor events, then it's a forward extremity of # historical messages nd we can just inherit from the previous historical @@ -185,15 +172,11 @@ async def build( oldest_successor_depth, ) = await self._store.get_min_depth_of(sucessor_event_ids) - logger.info("oldest_successor_depth %s", oldest_successor_depth) - depth = oldest_successor_depth # Otherwise, progress the depth as normal else: depth = most_recent_prev_event_depth + 1 - logger.info("depth %s", depth) - # we cap depth of generated events, to ensure that they are not # rejected by other servers (and so that they can be persisted in # the db) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 023bf74a6301..0f8a3b5ad839 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -253,13 +253,13 @@ def format_event_for_client_v1(d): def format_event_for_client_v2(d): drop_keys = ( - # "auth_events", - # "prev_events", - # "hashes", - # "signatures", - # "depth", - # "origin", - # "prev_state", + "auth_events", + "prev_events", + "hashes", + "signatures", + "depth", + "origin", + "prev_state", ) for key in drop_keys: d.pop(key, None) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index e08b1a3551b5..20b177a37928 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -16,7 +16,7 @@ # limitations under the License. import logging import random -from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple +from typing import TYPE_CHECKING, Dict, List, Optional, Tuple from canonicaljson import encode_canonical_json @@ -454,11 +454,10 @@ async def create_event( event_dict: dict, txn_id: Optional[str] = None, prev_event_ids: Optional[List[str]] = None, - inherit_depth: bool = False, auth_event_ids: Optional[List[str]] = None, - state_for_events: Optional[Iterable[EventBase]] = None, require_consent: bool = True, outlier: bool = False, + inherit_depth: bool = False, ) -> Tuple[EventBase, EventContext]: """ Given a dict from a client, create a new event. @@ -478,9 +477,6 @@ async def create_event( If None, they will be requested from the database. - inherit_depth: True to use the oldest depth from the prev_event_ids - (instead of calculating a new depth). - auth_event_ids: The event ids to use as the auth_events for the new event. Should normally be left as None, which will cause them to be calculated @@ -543,20 +539,14 @@ async def create_event( if txn_id is not None: builder.internal_metadata.txn_id = txn_id - logger.info("setting internal_metadata.outlier %s", outlier) - # Setting the outlier on the builder does not work to propogate it over to the event - # because `outlier` is not part of the `interal_metadata_dict` that is serialized later. - # Not sure why it is separate. - # See https://github.com/matrix-org/synapse/pull/9247#r607595779 builder.internal_metadata.outlier = outlier event, context = await self.create_new_client_event( builder=builder, requester=requester, prev_event_ids=prev_event_ids, - inherit_depth=inherit_depth, auth_event_ids=auth_event_ids, - state_for_events=state_for_events, + inherit_depth=inherit_depth, ) # In an ideal world we wouldn't need the second part of this condition. However, @@ -713,13 +703,12 @@ async def create_and_send_nonmember_event( self, requester: Requester, event_dict: dict, - inherit_depth: bool = False, auth_event_ids: Optional[List[str]] = None, - state_for_events: Optional[Iterable[EventBase]] = None, ratelimit: bool = True, txn_id: Optional[str] = None, ignore_shadow_ban: bool = False, outlier: bool = False, + inherit_depth: bool = False, ) -> Tuple[EventBase, int]: """ Creates an event, then sends it. @@ -729,8 +718,6 @@ async def create_and_send_nonmember_event( Args: requester: The requester sending the event. event_dict: An entire event. - inherit_depth: True to use the oldest depth from the event_dict["prev_events"] - (instead of calculating a new depth). ratelimit: Whether to rate limit this send. txn_id: The transaction ID. ignore_shadow_ban: True if shadow-banned users should be allowed to @@ -781,8 +768,8 @@ async def create_and_send_nonmember_event( txn_id=txn_id, prev_event_ids=prev_events, auth_event_ids=auth_event_ids, - inherit_depth=inherit_depth, outlier=outlier, + inherit_depth=inherit_depth, ) assert self.hs.is_mine_id(event.sender), "User must be our own: %s" % ( @@ -813,9 +800,8 @@ async def create_new_client_event( builder: EventBuilder, requester: Optional[Requester] = None, prev_event_ids: Optional[List[str]] = None, - inherit_depth: bool = False, auth_event_ids: Optional[List[str]] = None, - state_for_events: Optional[Iterable[EventBase]] = None, + inherit_depth: bool = False, ) -> Tuple[EventBase, EventContext]: """Create a new event for a local client @@ -828,9 +814,6 @@ async def create_new_client_event( If None, they will be requested from the database. - inherit_depth: True to use the oldest depth from the prev_event_ids - (instead of calculating a new depth). - auth_event_ids: The event ids to use as the auth_events for the new event. Should normally be left as None, which will cause them to be calculated @@ -864,11 +847,18 @@ async def create_new_client_event( inherit_depth=inherit_depth, ) + old_state = None + # Pass on the outlier property from the builder to the event + # after it is created if builder.internal_metadata.outlier: event.internal_metadata.outlier = builder.internal_metadata.outlier - context = await self.state.compute_event_context(event, state_for_events) + # For outliers that pass in their own auth_event_ids, let's calculate the state for them + if auth_event_ids: + old_state = await self.store.get_events_as_list(auth_event_ids) + + context = await self.state.compute_event_context(event, old_state=old_state) if requester: context.app_service = requester.app_service @@ -982,7 +972,7 @@ async def handle_new_client_event( # are invite rejections we have generated ourselves. assert event.type == EventTypes.Member assert event.content["membership"] == Membership.LEAVE - else: + else: # if not event.internal_metadata.is_outlier(): try: await self.auth.check_from_context(room_version, event, context) except AuthError as err: @@ -1281,8 +1271,6 @@ async def persist_and_notify_client_event( if event.content.get("m.historical", None): backfilled = True - logger.info("backfilled %s", backfilled) - # Note that this returns the event that was persisted, which may not be # the same as we passed in if it was deduplicated due transaction IDs. ( @@ -1463,7 +1451,6 @@ async def _rebuild_event_after_third_party_rules( event = await builder.build( prev_event_ids=original_event.prev_event_ids(), auth_event_ids=original_event.auth_event_ids(), - inherit_depth=False, ) # we rebuild the event context, to be on the safe side. If nothing else, diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 0f82affd34c0..f77a5f764c8f 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -242,12 +242,12 @@ async def _local_membership_update( room_id: str, membership: str, prev_event_ids: List[str], + auth_event_ids: Optional[List[str]] = None, txn_id: Optional[str] = None, ratelimit: bool = True, content: Optional[dict] = None, require_consent: bool = True, outlier: bool = False, - state_for_events: Optional[Iterable[EventBase]] = None, ) -> Tuple[str, int]: user_id = target.to_string() @@ -285,9 +285,9 @@ async def _local_membership_update( }, txn_id=txn_id, prev_event_ids=prev_event_ids, + auth_event_ids=auth_event_ids, require_consent=require_consent, outlier=outlier, - state_for_events=state_for_events, ) prev_state_ids = await context.get_prev_state_ids() @@ -399,7 +399,8 @@ async def update_membership( content: Optional[dict] = None, require_consent: bool = True, outlier: bool = False, - state_for_events: Optional[Iterable[EventBase]] = None, + prev_event_ids: Optional[List[str]] = None, + auth_event_ids: Optional[List[str]] = None, ) -> Tuple[str, int]: """Update a user's membership in a room. @@ -441,7 +442,8 @@ async def update_membership( content=content, require_consent=require_consent, outlier=outlier, - state_for_events=state_for_events, + prev_event_ids=prev_event_ids, + auth_event_ids=auth_event_ids, ) return result @@ -459,7 +461,8 @@ async def update_membership_locked( content: Optional[dict] = None, require_consent: bool = True, outlier: bool = False, - state_for_events: Optional[Iterable[EventBase]] = None, + prev_event_ids: Optional[List[str]] = None, + auth_event_ids: Optional[List[str]] = None, ) -> Tuple[str, int]: """Helper for update_membership. @@ -548,160 +551,170 @@ async def update_membership_locked( if block_invite: raise SynapseError(403, "Invites have been disabled on this server") - latest_event_ids = await self.store.get_prev_events_for_room(room_id) + if prev_event_ids: + latest_event_ids = prev_event_ids + else: + latest_event_ids = await self.store.get_prev_events_for_room(room_id) - current_state_ids = await self.state_handler.get_current_state_ids( - room_id, latest_event_ids=latest_event_ids - ) + current_state_ids = await self.state_handler.get_current_state_ids( + room_id, latest_event_ids=latest_event_ids + ) - # TODO: Refactor into dictionary of explicitly allowed transitions - # between old and new state, with specific error messages for some - # transitions and generic otherwise - old_state_id = current_state_ids.get((EventTypes.Member, target.to_string())) - if old_state_id: - old_state = await self.store.get_event(old_state_id, allow_none=True) - old_membership = old_state.content.get("membership") if old_state else None - if action == "unban" and old_membership != "ban": - raise SynapseError( - 403, - "Cannot unban user who was not banned" - " (membership=%s)" % old_membership, - errcode=Codes.BAD_STATE, - ) - if old_membership == "ban" and action != "unban": - raise SynapseError( - 403, - "Cannot %s user who was banned" % (action,), - errcode=Codes.BAD_STATE, + # TODO: Refactor into dictionary of explicitly allowed transitions + # between old and new state, with specific error messages for some + # transitions and generic otherwise + old_state_id = current_state_ids.get( + (EventTypes.Member, target.to_string()) + ) + if old_state_id: + old_state = await self.store.get_event(old_state_id, allow_none=True) + old_membership = ( + old_state.content.get("membership") if old_state else None ) - - if old_state: - same_content = content == old_state.content - same_membership = old_membership == effective_membership_state - same_sender = requester.user.to_string() == old_state.sender - if same_sender and same_membership and same_content: - # duplicate event. - # we know it was persisted, so must have a stream ordering. - assert old_state.internal_metadata.stream_ordering - return ( - old_state.event_id, - old_state.internal_metadata.stream_ordering, + if action == "unban" and old_membership != "ban": + raise SynapseError( + 403, + "Cannot unban user who was not banned" + " (membership=%s)" % old_membership, + errcode=Codes.BAD_STATE, ) - - if old_membership in ["ban", "leave"] and action == "kick": - raise AuthError(403, "The target user is not in the room") - - # we don't allow people to reject invites to the server notice - # room, but they can leave it once they are joined. - if ( - old_membership == Membership.INVITE - and effective_membership_state == Membership.LEAVE - ): - is_blocked = await self._is_server_notice_room(room_id) - if is_blocked: + if old_membership == "ban" and action != "unban": raise SynapseError( - HTTPStatus.FORBIDDEN, - "You cannot reject this invite", - errcode=Codes.CANNOT_LEAVE_SERVER_NOTICE_ROOM, + 403, + "Cannot %s user who was banned" % (action,), + errcode=Codes.BAD_STATE, ) - else: - if action == "kick": - raise AuthError(403, "The target user is not in the room") - is_host_in_room = await self._is_host_in_room(current_state_ids) + if old_state: + same_content = content == old_state.content + same_membership = old_membership == effective_membership_state + same_sender = requester.user.to_string() == old_state.sender + if same_sender and same_membership and same_content: + # duplicate event. + # we know it was persisted, so must have a stream ordering. + assert old_state.internal_metadata.stream_ordering + return ( + old_state.event_id, + old_state.internal_metadata.stream_ordering, + ) - if effective_membership_state == Membership.JOIN: - if requester.is_guest: - guest_can_join = await self._can_guest_join(current_state_ids) - if not guest_can_join: - # This should be an auth check, but guests are a local concept, - # so don't really fit into the general auth process. - raise AuthError(403, "Guest access not allowed") + if old_membership in ["ban", "leave"] and action == "kick": + raise AuthError(403, "The target user is not in the room") - if not is_host_in_room: - if ratelimit: - time_now_s = self.clock.time() - ( - allowed, - time_allowed, - ) = await self._join_rate_limiter_remote.can_do_action( - requester, - ) + # we don't allow people to reject invites to the server notice + # room, but they can leave it once they are joined. + if ( + old_membership == Membership.INVITE + and effective_membership_state == Membership.LEAVE + ): + is_blocked = await self._is_server_notice_room(room_id) + if is_blocked: + raise SynapseError( + HTTPStatus.FORBIDDEN, + "You cannot reject this invite", + errcode=Codes.CANNOT_LEAVE_SERVER_NOTICE_ROOM, + ) + else: + if action == "kick": + raise AuthError(403, "The target user is not in the room") - if not allowed: - raise LimitExceededError( - retry_after_ms=int(1000 * (time_allowed - time_now_s)) + is_host_in_room = await self._is_host_in_room(current_state_ids) + + if effective_membership_state == Membership.JOIN: + if requester.is_guest: + guest_can_join = await self._can_guest_join(current_state_ids) + if not guest_can_join: + # This should be an auth check, but guests are a local concept, + # so don't really fit into the general auth process. + raise AuthError(403, "Guest access not allowed") + + if not is_host_in_room: + if ratelimit: + time_now_s = self.clock.time() + ( + allowed, + time_allowed, + ) = await self._join_rate_limiter_remote.can_do_action( + requester, ) - inviter = await self._get_inviter(target.to_string(), room_id) - if inviter and not self.hs.is_mine(inviter): - remote_room_hosts.append(inviter.domain) + if not allowed: + raise LimitExceededError( + retry_after_ms=int(1000 * (time_allowed - time_now_s)) + ) - content["membership"] = Membership.JOIN + inviter = await self._get_inviter(target.to_string(), room_id) + if inviter and not self.hs.is_mine(inviter): + remote_room_hosts.append(inviter.domain) - profile = self.profile_handler - if not content_specified: - content["displayname"] = await profile.get_displayname(target) - content["avatar_url"] = await profile.get_avatar_url(target) + content["membership"] = Membership.JOIN - if requester.is_guest: - content["kind"] = "guest" + profile = self.profile_handler + if not content_specified: + content["displayname"] = await profile.get_displayname(target) + content["avatar_url"] = await profile.get_avatar_url(target) - remote_join_response = await self._remote_join( - requester, remote_room_hosts, room_id, target, content - ) + if requester.is_guest: + content["kind"] = "guest" - return remote_join_response + remote_join_response = await self._remote_join( + requester, remote_room_hosts, room_id, target, content + ) - elif effective_membership_state == Membership.LEAVE: - if not is_host_in_room: - # perhaps we've been invited - ( - current_membership_type, - current_membership_event_id, - ) = await self.store.get_local_current_membership_for_user_in_room( - target.to_string(), room_id - ) - if ( - current_membership_type != Membership.INVITE - or not current_membership_event_id - ): + return remote_join_response + + elif effective_membership_state == Membership.LEAVE: + if not is_host_in_room: + # perhaps we've been invited + ( + current_membership_type, + current_membership_event_id, + ) = await self.store.get_local_current_membership_for_user_in_room( + target.to_string(), room_id + ) + if ( + current_membership_type != Membership.INVITE + or not current_membership_event_id + ): + logger.info( + "%s sent a leave request to %s, but that is not an active room " + "on this server, and there is no pending invite", + target, + room_id, + ) + + raise SynapseError(404, "Not a known room") + + invite = await self.store.get_event(current_membership_event_id) logger.info( - "%s sent a leave request to %s, but that is not an active room " - "on this server, and there is no pending invite", + "%s rejects invite to %s from %s", target, room_id, + invite.sender, ) - raise SynapseError(404, "Not a known room") - - invite = await self.store.get_event(current_membership_event_id) - logger.info( - "%s rejects invite to %s from %s", target, room_id, invite.sender - ) - - if not self.hs.is_mine_id(invite.sender): - # send the rejection to the inviter's HS (with fallback to - # local event) - return await self.remote_reject_invite( - invite.event_id, - txn_id, - requester, - content, - ) + if not self.hs.is_mine_id(invite.sender): + # send the rejection to the inviter's HS (with fallback to + # local event) + return await self.remote_reject_invite( + invite.event_id, + txn_id, + requester, + content, + ) - # the inviter was on our server, but has now left. Carry on - # with the normal rejection codepath, which will also send the - # rejection out to any other servers we believe are still in the room. + # the inviter was on our server, but has now left. Carry on + # with the normal rejection codepath, which will also send the + # rejection out to any other servers we believe are still in the room. - # thanks to overzealous cleaning up of event_forward_extremities in - # `delete_old_current_state_events`, it's possible to end up with no - # forward extremities here. If that happens, let's just hang the - # rejection off the invite event. - # - # see: https://github.com/matrix-org/synapse/issues/7139 - if len(latest_event_ids) == 0: - latest_event_ids = [invite.event_id] + # thanks to overzealous cleaning up of event_forward_extremities in + # `delete_old_current_state_events`, it's possible to end up with no + # forward extremities here. If that happens, let's just hang the + # rejection off the invite event. + # + # see: https://github.com/matrix-org/synapse/issues/7139 + if len(latest_event_ids) == 0: + latest_event_ids = [invite.event_id] return await self._local_membership_update( requester=requester, @@ -711,10 +724,10 @@ async def update_membership_locked( txn_id=txn_id, ratelimit=ratelimit, prev_event_ids=latest_event_ids, + auth_event_ids=auth_event_ids, content=content, require_consent=require_consent, outlier=outlier, - state_for_events=state_for_events, ) async def transfer_room_state_on_room_upgrade( diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 794557d4eb59..f8d88ef77be9 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -43,7 +43,7 @@ from synapse.util.caches.lrucache import LruCache from synapse.util.caches.response_cache import ResponseCache from synapse.util.metrics import Measure, measure_func -from synapse.visibility import filter_events_for_client, filter_historical_events +from synapse.visibility import filter_events_for_client if TYPE_CHECKING: from synapse.server import HomeServer @@ -260,8 +260,6 @@ def __init__(self, hs: "HomeServer"): expiry_ms=LAZY_LOADED_MEMBERS_CACHE_MAX_AGE, ) # type: ExpiringCache[Tuple[str, Optional[str]], LruCache[str, str]] - self._msc2716_enabled = hs.config.experimental.msc2716_enabled - async def wait_for_sync_for_user( self, requester: Requester, @@ -468,10 +466,6 @@ async def _load_filtered_recents( recents, always_include_ids=current_state_ids, ) - - if self._msc2716_enabled: - # `m.historical` events should not come down /sync - recents = filter_historical_events(recents) else: recents = [] @@ -535,10 +529,6 @@ async def _load_filtered_recents( loaded_recents.extend(recents) recents = loaded_recents - if self._msc2716_enabled: - # `m.historical` events should not come down /sync - recents = filter_historical_events(recents) - if len(events) <= load_limit: limited = False break diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index da702e4f0c16..e6fc943bfefd 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -27,15 +27,10 @@ Codes, HttpResponseException, InvalidClientCredentialsError, - NotFoundError, ShadowBanError, SynapseError, ) -from synapse.api.room_versions import ( - KNOWN_ROOM_VERSIONS, -) from synapse.api.filtering import Filter -from synapse.events import make_event_from_dict from synapse.events.utils import format_event_for_client_v2 from synapse.http.servlet import ( RestServlet, @@ -229,8 +224,6 @@ def __init__(self, hs): self.event_creation_handler = hs.get_event_creation_handler() self.auth = hs.get_auth() - self._msc2716_enabled = hs.config.experimental.msc2716_enabled - def register(self, http_server): # /rooms/$roomid/send/$event_type[/$txn_id] PATTERNS = "/rooms/(?P[^/]*)/send/(?P[^/]*)" @@ -247,20 +240,6 @@ async def on_POST(self, request, room_id, event_type, txn_id=None): "sender": requester.user.to_string(), } - inherit_depth = False - prev_events = parse_strings_from_args(request.args, "prev_event") - if self._msc2716_enabled: - if prev_events: - if not requester.app_service: - raise AuthError( - 403, - "Only application services can use the ?prev_event query paramtera", - ) - - event_dict["prev_events"] = prev_events - # If backfilling old messages, let's just use the same depth of what we're inserting next to - inherit_depth = True - if b"ts" in request.args and requester.app_service: event_dict["origin_server_ts"] = parse_integer(request, "ts", 0) @@ -269,7 +248,7 @@ async def on_POST(self, request, room_id, event_type, txn_id=None): event, _, ) = await self.event_creation_handler.create_and_send_nonmember_event( - requester, event_dict, txn_id=txn_id, inherit_depth=inherit_depth + requester, event_dict, txn_id=txn_id ) event_id = event.event_id except ShadowBanError: @@ -293,6 +272,7 @@ class RoomBulkSendEventRestServlet(TransactionRestServlet): def __init__(self, hs): super().__init__(hs) self.store = hs.get_datastore() + self.state_store = hs.get_storage().state self.event_creation_handler = hs.get_event_creation_handler() self.room_member_handler = hs.get_room_member_handler() self.auth = hs.get_auth() @@ -311,68 +291,52 @@ async def on_POST(self, request, room_id): prev_events_from_query = parse_strings_from_args(request.args, "prev_event") - logger.info("body waewefaew %s", body) - - state_for_events = [] - auth_event_ids = [] - - create_event = await self.store.get_create_event_for_room(room_id) - state_for_events.append(create_event) - auth_event_ids.append(create_event.event_id) + # For the event we are inserting next to (`prev_events_from_query`), + # find the most recent auth events (derived from state events) that + # allowed that message to be sent. We will use that as a base + # to auth our historical messages against. + ( + most_recent_prev_event_id, + _, + ) = await self.store.get_max_depth_of(prev_events_from_query) + # mapping from (type, state_key) -> state_event_id + prev_state_map = await self.state_store.get_state_ids_for_event( + most_recent_prev_event_id + ) + # List of state event ID's + prev_state_ids = list(prev_state_map.values()) + auth_event_ids = prev_state_ids for stateEv in body["state_events_at_start"]: - logger.info("stateEv %s", stateEv) assert_params_in_dict(stateEv, ["type", "content", "sender"]) event_dict = { "type": stateEv["type"], "content": stateEv["content"], "room_id": room_id, - "sender": stateEv["sender"], # requester.user.to_string(), + "sender": stateEv["sender"], "state_key": stateEv["state_key"], } - # Make the events float off on their own + # Make the state events float off on their own fake_prev_event_id = "$" + random_string(43) - # # Also mark the event as an outlier outside of the normal DAG - # (event, _,) = await self.event_creation_handler.create_event( - # requester, event_dict, prev_event_ids=[fake_prev_event_id], outlier=True - # ) - - try: - room_version_id = await self.store.get_room_version_id( - event_dict["room_id"] - ) - room_version = KNOWN_ROOM_VERSIONS.get(room_version_id) - except NotFoundError: - raise AuthError(403, "Unknown room") - - current_state_ids = await self.store.get_current_state_ids( - event_dict["room_id"], - ) - current_auth_event_ids = self.auth.compute_auth_events( - make_event_from_dict(event_dict, room_version), - current_state_ids, - ) - current_auth_event_map = await self.store.get_events(current_auth_event_ids) - current_auth_events = current_auth_event_map.values() - - if stateEv["type"] == EventTypes.Member: - membership = stateEv["content"].get("membership", None) + # TODO: This is pretty much the same as some other code to handle inserting state in this file + if event_dict["type"] == EventTypes.Member: + membership = event_dict["content"].get("membership", None) event_id, _ = await self.room_member_handler.update_membership( requester, - target=UserID.from_string(stateEv["state_key"]), + target=UserID.from_string(event_dict["state_key"]), room_id=room_id, action=membership, - content=stateEv["content"], + content=event_dict["content"], outlier=True, - state_for_events=current_auth_events, + prev_event_ids=[fake_prev_event_id], + auth_event_ids=auth_event_ids, ) - auth_event = await self.store.get_event(event_id) - state_for_events.append(auth_event) - auth_event_ids.append(event_id) else: + # TODO: Add some complement tests that adds state that is not member joins + # and will use this code path ( event, _, @@ -380,17 +344,16 @@ async def on_POST(self, request, room_id): requester, event_dict, outlier=True, + prev_event_ids=[fake_prev_event_id], + auth_event_ids=auth_event_ids, ) - state_for_events.append(event) event_id = event.event_id - auth_event_ids.append(event_id) - logger.info("Done with state events grrtrsrdhh %s", auth_event_ids) + auth_event_ids.append(event_id) event_ids = [] prev_event_ids = prev_events_from_query for ev in body["events"]: - logger.info("ev %s", ev) assert_params_in_dict(ev, ["type", "content", "sender"]) event_dict = { @@ -407,44 +370,17 @@ async def on_POST(self, request, room_id): ) = await self.event_creation_handler.create_and_send_nonmember_event( requester, event_dict, + # TODO: Should these be an outlier? + # outlier=True, inherit_depth=True, - # We are allowed to post these messages because we are referencing the - # floating auth state events that we just created above + # TODO: Do we need to use `self.auth.compute_auth_events(...)` to filter the `auth_event_ids`? auth_event_ids=auth_event_ids, - # state_for_events=state_for_events, - # outlier=True, ) - - # event, context = await self.event_creation_handler.create_event( - # requester, - # event_dict, - # # We are allowed to post these messages because we are referencing the - # # floating auth state events that we just created above - # auth_event_ids=auth_event_ids, - # prev_event_ids=prev_event_ids, - # inherit_depth=True, - # ) - # # await self.event_creation_handler.persist_and_notify_client_event( - # # requester, event, context, ratelimit=False - # # ) - - # ( - # event, - # _, - # _, - # ) = await self.event_creation_handler.storage.persistence.persist_event( - # event, context=context, backfilled=True - # ) - event_id = event.event_id - event_ids.append(event_id) - - logger.info("event esrgegrerg %s", event) + event_ids.append(event_id) prev_event_ids = [event_id] - logger.info("Done with events afeefwaefw") - return 200, {"state_events": auth_event_ids, "events": event_ids} def on_GET(self, request, room_id): diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 278d1f83138c..c0f79ffdc8d1 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -269,11 +269,6 @@ async def compute_event_context( The event context. """ - logger.info( - "compute_event_context is_outlier=%s event_id=%s", - event.internal_metadata.is_outlier(), - event.event_id, - ) if event.internal_metadata.is_outlier(): # If this is an outlier, then we know it shouldn't have any current # state. Certainly store.get_current_state won't return any, and diff --git a/synapse/storage/state.py b/synapse/storage/state.py index c1c147c62ac2..9552eba67d6c 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -541,7 +541,7 @@ async def get_state_ids_for_event( state_filter: The state filter used to fetch state from the database. Returns: - A dict from (type, state_key) -> state_event + A dict from (type, state_key) -> state_event_id """ state_map = await self.get_state_ids_for_events( [event_id], state_filter or StateFilter.all() diff --git a/synapse/visibility.py b/synapse/visibility.py index 8387832a54b5..ff53a49b3a70 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import Dict, FrozenSet, Iterable, List, Optional +from typing import Dict, FrozenSet, List, Optional from synapse.api.constants import ( AccountDataTypes, @@ -47,12 +47,6 @@ ) -def filter_historical_events(events: Iterable[EventBase]) -> List[EventBase]: - """Return a new list with historical events removed from the input.""" - # return [e for e in events if not e.content.get("m.historical", None)] - return events - - async def filter_events_for_client( storage: Storage, user_id: str, From 044a761f57029d1deda861a337384eeec161bdc6 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 12 Apr 2021 01:34:11 -0500 Subject: [PATCH 44/83] Fix up some CI lints --- synapse/api/auth.py | 2 +- synapse/events/builder.py | 2 +- synapse/handlers/message.py | 6 ++++-- synapse/storage/databases/main/event_federation.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/synapse/api/auth.py b/synapse/api/auth.py index b64b14e736ad..1ac473bb24e7 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -88,7 +88,7 @@ async def check_from_context( self, room_version: str, event, context, do_sig_check=True ): auth_event_ids = event.auth_event_ids() - if auth_event_ids == None: + if auth_event_ids is None: prev_state_ids = await context.get_prev_state_ids() auth_event_ids = self.compute_auth_events( event, prev_state_ids, for_verification=True diff --git a/synapse/events/builder.py b/synapse/events/builder.py index f9924a524eed..e60720698169 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -161,7 +161,7 @@ async def build( ) # If we can't find any successor events, then it's a forward extremity of - # historical messages nd we can just inherit from the previous historical + # historical messages and we can just inherit from the previous historical # event which we can already assume has the correct depth where we want # to insert into. if not sucessor_event_ids: diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 20b177a37928..792d83935b21 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -854,7 +854,7 @@ async def create_new_client_event( if builder.internal_metadata.outlier: event.internal_metadata.outlier = builder.internal_metadata.outlier - # For outliers that pass in their own auth_event_ids, let's calculate the state for them + # Calculate the state for outliers that pass in their own `auth_event_ids` if auth_event_ids: old_state = await self.store.get_events_as_list(auth_event_ids) @@ -972,7 +972,7 @@ async def handle_new_client_event( # are invite rejections we have generated ourselves. assert event.type == EventTypes.Member assert event.content["membership"] == Membership.LEAVE - else: # if not event.internal_metadata.is_outlier(): + else: try: await self.auth.check_from_context(room_version, event, context) except AuthError as err: @@ -1267,6 +1267,8 @@ async def persist_and_notify_client_event( if prev_state_ids: raise AuthError(403, "Changing the room create event is forbidden") + # Mark any `m.historical` messages as backfilled so they don't appear + # in `/sync` and have the proper decrementing `stream_ordering` as we import backfilled = False if event.content.get("m.historical", None): backfilled = True diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index bc3f635453ee..f6303c043cd7 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -15,7 +15,7 @@ import itertools import logging from queue import Empty, PriorityQueue -from typing import Optional, Dict, Iterable, List, Set, Tuple +from typing import Dict, Iterable, List, Set, Tuple from synapse.api.constants import MAX_DEPTH from synapse.api.errors import StoreError From 86ec9150ccae55fe02f0ac760a347c109a85b2f7 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Apr 2021 00:38:29 -0500 Subject: [PATCH 45/83] Fix some more CI lint and tests --- synapse/events/builder.py | 2 +- tests/handlers/test_presence.py | 2 +- tests/storage/test_redaction.py | 6 +++++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/synapse/events/builder.py b/synapse/events/builder.py index e60720698169..f925b7e7371f 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -104,7 +104,7 @@ async def build( self, prev_event_ids: List[str], auth_event_ids: Optional[List[str]], - inherit_depth: bool, + inherit_depth: bool = False, ) -> EventBase: """Transform into a fully signed and hashed event diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py index 9f16cc65fc2e..abf4bcdce26d 100644 --- a/tests/handlers/test_presence.py +++ b/tests/handlers/test_presence.py @@ -640,7 +640,7 @@ def _add_new_user(self, room_id, user_id): self.store.get_latest_event_ids_in_room(room_id) ) - event = self.get_success(builder.build(prev_event_ids, None)) + event = self.get_success(builder.build(prev_event_ids, None, False)) self.get_success(self.federation_handler.on_receive_pdu(hostname, event)) diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index ea8703f2d1e5..250bbe0fcacb 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -237,7 +237,7 @@ async def build( self, prev_event_ids, auth_event_ids, - inherit_depth: bool, + inherit_depth: bool = False, ): built_event = await self._base_builder.build( prev_event_ids, auth_event_ids, inherit_depth @@ -257,6 +257,10 @@ def room_id(self): def type(self): return self._base_builder.type + @property + def internal_metadata(self): + return self._base_builder.internal_metadata + event_1, context_1 = self.get_success( self.event_creation_handler.create_new_client_event( EventIdManglingBuilder( From 81b45b594a89d3eabeff8255af7a3d2a0a9a0061 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Apr 2021 14:43:33 -0500 Subject: [PATCH 46/83] Copy over origin_server_ts when bulksend'ing --- scripts-dev/complement.sh | 3 +-- synapse/rest/client/v1/room.py | 8 ++++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 31df64c7120e..3c7235d0748b 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -37,8 +37,7 @@ docker build -t matrixdotorg/synapse -f docker/Dockerfile . # Build the Synapse monolith image from Complement, based on the above image we just built docker build -t complement-synapse -f "$COMPLEMENT_DIR/dockerfiles/Synapse.Dockerfile" "$COMPLEMENT_DIR/dockerfiles" -# Download Complement -cd ../complement +cd "$COMPLEMENT_DIR" EXTRA_COMPLEMENT_ARGS="" if [[ -n "$1" ]]; then diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index e6fc943bfefd..7013fd75eb81 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -308,10 +308,13 @@ async def on_POST(self, request, room_id): auth_event_ids = prev_state_ids for stateEv in body["state_events_at_start"]: - assert_params_in_dict(stateEv, ["type", "content", "sender"]) + assert_params_in_dict( + stateEv, ["type", "origin_server_ts", "content", "sender"] + ) event_dict = { "type": stateEv["type"], + "origin_server_ts": stateEv["origin_server_ts"], "content": stateEv["content"], "room_id": room_id, "sender": stateEv["sender"], @@ -354,10 +357,11 @@ async def on_POST(self, request, room_id): event_ids = [] prev_event_ids = prev_events_from_query for ev in body["events"]: - assert_params_in_dict(ev, ["type", "content", "sender"]) + assert_params_in_dict(ev, ["type", "origin_server_ts", "content", "sender"]) event_dict = { "type": ev["type"], + "origin_server_ts": ev["origin_server_ts"], "content": ev["content"], "room_id": room_id, "sender": ev["sender"], # requester.user.to_string(), From 02b7335b08ff3e973f4113127c36a2257d0d60c2 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Apr 2021 15:24:13 -0500 Subject: [PATCH 47/83] Wrap bulksend endpoint around experimental feature flag and only appservice can use --- synapse/rest/client/v1/room.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 7013fd75eb81..6040ebd0469d 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -277,8 +277,6 @@ def __init__(self, hs): self.room_member_handler = hs.get_room_member_handler() self.auth = hs.get_auth() - self._msc2716_enabled = hs.config.experimental.msc2716_enabled - def register(self, http_server): # /rooms/$roomid/bulksend PATTERNS = "/rooms/(?P[^/]*)/bulksend" @@ -286,6 +284,13 @@ def register(self, http_server): async def on_POST(self, request, room_id): requester = await self.auth.get_user_by_req(request, allow_guest=False) + + if not requester.app_service: + raise AuthError( + 403, + "Only application services can use the /bulksend endpoint", + ) + body = parse_json_object_from_request(request) assert_params_in_dict(body, ["state_events_at_start", "events"]) @@ -1177,6 +1182,8 @@ async def on_POST( def register_servlets(hs: "HomeServer", http_server, is_worker=False): + msc2716_enabled = hs.config.experimental.msc2716_enabled + RoomStateEventRestServlet(hs).register(http_server) RoomMemberListRestServlet(hs).register(http_server) JoinedRoomMemberListRestServlet(hs).register(http_server) @@ -1184,7 +1191,8 @@ def register_servlets(hs: "HomeServer", http_server, is_worker=False): JoinRoomAliasServlet(hs).register(http_server) RoomMembershipRestServlet(hs).register(http_server) RoomSendEventRestServlet(hs).register(http_server) - RoomBulkSendEventRestServlet(hs).register(http_server) + if msc2716_enabled: + RoomBulkSendEventRestServlet(hs).register(http_server) PublicRoomListRestServlet(hs).register(http_server) RoomStateRestServlet(hs).register(http_server) RoomRedactEventRestServlet(hs).register(http_server) From 82708cb1df9edad6678feade540d7e3e9512b674 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 14 Apr 2021 15:58:05 -0500 Subject: [PATCH 48/83] Add comment docs for new parameters --- synapse/events/builder.py | 2 ++ synapse/handlers/message.py | 19 +++++++++++ synapse/handlers/room_member.py | 60 +++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+) diff --git a/synapse/events/builder.py b/synapse/events/builder.py index f925b7e7371f..003c958e40c5 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -113,6 +113,8 @@ async def build( auth_event_ids: The event IDs to use as the auth events. Should normally be set to None, which will cause them to be calculated based on the room state at the prev_events. + inherit_depth: True to inherit the depth from the successor of the most + recent event from prev_event_ids. False to progress the depth as normal. Returns: The signed and hashed event. diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 792d83935b21..a93c84d7c7bb 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -484,6 +484,13 @@ async def create_event( require_consent: Whether to check if the requester has consented to the privacy policy. + + outlier: Indicates whether the event is an `outlier`, i.e. if + it's from an arbitrary point and floating in the DAG as + opposed to being inline with the current DAG. + + inherit_depth: True to inherit the depth from the successor of the most + recent event from prev_event_ids. False to progress the depth as normal. Raises: ResourceLimitError if server is blocked to some resource being exceeded @@ -718,10 +725,19 @@ async def create_and_send_nonmember_event( Args: requester: The requester sending the event. event_dict: An entire event. + auth_event_ids: + The event ids to use as the auth_events for the new event. + Should normally be left as None, which will cause them to be calculated + based on the room state at the prev_events. ratelimit: Whether to rate limit this send. txn_id: The transaction ID. ignore_shadow_ban: True if shadow-banned users should be allowed to send this event. + outlier: Indicates whether the event is an `outlier`, i.e. if + it's from an arbitrary point and floating in the DAG as + opposed to being inline with the current DAG. + inherit_depth: True to inherit the depth from the successor of the most + recent event from prev_event_ids. False to progress the depth as normal. Returns: The event, and its stream ordering (if deduplication happened, @@ -819,6 +835,9 @@ async def create_new_client_event( Should normally be left as None, which will cause them to be calculated based on the room state at the prev_events. + inherit_depth: True to inherit the depth from the successor of the most + recent event from prev_event_ids. False to progress the depth as normal. + Returns: Tuple of created event, context """ diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index f77a5f764c8f..a4dff3ab737c 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -249,6 +249,35 @@ async def _local_membership_update( require_consent: bool = True, outlier: bool = False, ) -> Tuple[str, int]: + """ + Internal membership update function to get an existing event or create + and persist a new event for the new membership change. + + Args: + requester: + target: + room_id: + membership: + prev_event_ids: The event IDs to use as the prev events + + auth_event_ids: + The event ids to use as the auth_events for the new event. + Should normally be left as None, which will cause them to be calculated + based on the room state at the prev_events. + + txn_id: + ratelimit: + content: + require_consent: + + outlier: Indicates whether the event is an `outlier`, i.e. if + it's from an arbitrary point and floating in the DAG as + opposed to being inline with the current DAG. + + Returns: + Tuple of event ID and stream ordering position + """ + user_id = target.to_string() if content is None: @@ -415,6 +444,14 @@ async def update_membership( ratelimit: Whether to rate limit the request. content: The content of the created event. require_consent: Whether consent is required. + outlier: Indicates whether the event is an `outlier`, i.e. if + it's from an arbitrary point and floating in the DAG as + opposed to being inline with the current DAG. + prev_event_ids: The event IDs to use as the prev events + auth_event_ids: + The event ids to use as the auth_events for the new event. + Should normally be left as None, which will cause them to be calculated + based on the room state at the prev_events. Returns: A tuple of the new event ID and stream ID. @@ -467,6 +504,29 @@ async def update_membership_locked( """Helper for update_membership. Assumes that the membership linearizer is already held for the room. + + Args: + requester: + target: + room_id: + action: + txn_id: + remote_room_hosts: + third_party_signed: + ratelimit: + content: + require_consent: + outlier: Indicates whether the event is an `outlier`, i.e. if + it's from an arbitrary point and floating in the DAG as + opposed to being inline with the current DAG. + prev_event_ids: The event IDs to use as the prev events + auth_event_ids: + The event ids to use as the auth_events for the new event. + Should normally be left as None, which will cause them to be calculated + based on the room state at the prev_events. + + Returns: + A tuple of the new event ID and stream ID. """ content_specified = bool(content) if content is None: From f0fb7328063b41bf0d0bef684c1d213e2893045a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 30 Apr 2021 21:21:21 -0500 Subject: [PATCH 49/83] Add historical messages to /backfill response They do not auth correctly though ``` 2021-05-01 02:08:20,888 - synapse.handlers.federation - 2509 - INFO - GET-4 - auth_events refers to events which are not in our calculated auth chain: {'$tLNZXWvubBTZyo8VQwG77xni_lMw67AseQRxQ9hJblM', '$iwYNb_ZF4IHRB-4x1OCjtCT206clBlUjDlTZFtxPbiY', '$79GFmHJKGSpQTYmxL6gpteK7CgaVtAV_KKjTxXKSKtY', '$6U3Dw80m6PwtUqp4hzGdo6LL3Q3CeRyL3CpY2j9NOKo', '$8qUw8T4lslR38TUmeIB2kXwF2eMvOLbAGj-PiIdVAM8', '$bhniSE3Vv71c3Eo1G_YCig3VNflaAbDIyIm51d_6nsU', '$gl7q_qh3siy4Gx_iCwhT8cGJSdnH38_o97kGHR0HJYM'} 2021-05-01 02:08:20,890 - synapse.state - 452 - INFO - GET-4 - Resolving state for !pIRfotsTwFNpPshIIV:hs1 with 2 groups 2021-05-01 02:08:20,893 - synapse.handlers.federation - 2548 - INFO - GET-4 - After state res: updating auth_events with new state {('m.room.create', ''): '$6U3Dw80m6PwtUqp4hzGdo6LL3Q3CeRyL3CpY2j9NOKo', ('m.room.member', '@the-bridge-user:hs1'): '$tLNZXWvubBTZyo8VQwG77xni_lMw67AseQRxQ9hJblM', ('m.room.power_levels', ''): '$iwYNb_ZF4IHRB-4x1OCjtCT206clBlUjDlTZFtxPbiY', ('m.room.join_rules', ''): '$bhniSE3Vv71c3Eo1G_YCig3VNflaAbDIyIm51d_6nsU', ('m.room.history_visibility', ''): '$gl7q_qh3siy4Gx_iCwhT8cGJSdnH38_o97kGHR0HJYM', ('m.room.member', '@alice:hs1'): '$8qUw8T4lslR38TUmeIB2kXwF2eMvOLbAGj-PiIdVAM8'} 2021-05-01 02:08:20,897 - synapse.handlers.federation - 2389 - WARNING - GET-4 - Failed auth resolution for because 403: User @maria:hs1 not in room !pIRfotsTwFNpPshIIV:hs1 (None) ``` --- scripts-dev/complement.sh | 2 +- synapse/events/builder.py | 6 +-- synapse/events/utils.py | 14 +++--- synapse/federation/federation_server.py | 5 +++ synapse/federation/transport/server.py | 9 ++++ synapse/handlers/federation.py | 9 ++++ synapse/http/servlet.py | 2 +- .../databases/main/event_federation.py | 44 ++++++++++++++++++- synapse/storage/databases/main/events.py | 23 +++++----- 9 files changed, 91 insertions(+), 23 deletions(-) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 3c7235d0748b..70085e2baa0f 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -46,4 +46,4 @@ if [[ -n "$1" ]]; then fi # Run the tests on the resulting image! -COMPLEMENT_BASE_IMAGE=complement-synapse go test -tags msc2716 -v -count=1 ./tests/main_test.go ./tests/msc2716_test.go -run TestBackfillingHistory/parallel/Backfilled_historical_events_resolve_with_proper_state +COMPLEMENT_BASE_IMAGE=complement-synapse go test -tags msc2716 -v -count=1 ./tests/main_test.go ./tests/msc2716_test.go -run TestBackfillingHistory/parallel/Historical_messages_are_visible_on_federated_server diff --git a/synapse/events/builder.py b/synapse/events/builder.py index 003c958e40c5..bc8fee46adaa 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -158,7 +158,7 @@ async def build( # it has a larger `depth` but before the successor event because the `stream_ordering` # is negative before the successor event. if inherit_depth: - sucessor_event_ids = await self._store.get_successor_events( + successor_event_ids = await self._store.get_successor_events( [most_recent_prev_event_id] ) @@ -166,13 +166,13 @@ async def build( # historical messages and we can just inherit from the previous historical # event which we can already assume has the correct depth where we want # to insert into. - if not sucessor_event_ids: + if not successor_event_ids: depth = most_recent_prev_event_depth else: ( _, oldest_successor_depth, - ) = await self._store.get_min_depth_of(sucessor_event_ids) + ) = await self._store.get_min_depth_of(successor_event_ids) depth = oldest_successor_depth # Otherwise, progress the depth as normal diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 0f8a3b5ad839..023bf74a6301 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -253,13 +253,13 @@ def format_event_for_client_v1(d): def format_event_for_client_v2(d): drop_keys = ( - "auth_events", - "prev_events", - "hashes", - "signatures", - "depth", - "origin", - "prev_state", + # "auth_events", + # "prev_events", + # "hashes", + # "signatures", + # "depth", + # "origin", + # "prev_state", ) for key in drop_keys: d.pop(key, None) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index b9f8d966a621..a2dbf5a15a17 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -147,6 +147,11 @@ async def on_backfill_request( origin_host, _ = parse_server_name(origin) await self.check_server_matches_acl(origin_host, room_id) + logger.info( + "federation_server.on_backfill_request versions=%d -> %s", + len(versions), + versions, + ) pdus = await self.handler.on_backfill_request( origin, room_id, versions, limit ) diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index a9c1391d27ef..50bc1783495b 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -467,12 +467,21 @@ class FederationBackfillServlet(BaseFederationServlet): PATH = "/backfill/(?P[^/]*)/?" async def on_GET(self, origin, content, query, room_id): + logger.info( + "FederationBackfillServlet.on_GET query=%s", + query, + ) versions = [x.decode("ascii") for x in query[b"v"]] limit = parse_integer_from_args(query, "limit", None) if not limit: return 400, {"error": "Did not include limit param"} + logger.info( + "FederationBackfillServlet.on_GET versions=%d -> %s", + len(versions), + versions, + ) return await self.handler.on_backfill_request(origin, room_id, versions, limit) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 67888898ffb5..1b68f16c45c0 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -923,6 +923,7 @@ async def backfill( events = await self.federation_client.backfill( dest, room_id, limit=limit, extremities=extremities ) + logger.info("backfill response returned %d events", len(events)) if not events: return [] @@ -1949,10 +1950,18 @@ async def on_backfill_request( # Synapse asks for 100 events per backfill request. Do not allow more. limit = min(limit, 100) + logger.info( + "handlers.on_backfill_request pdu_list %d %s", len(pdu_list), pdu_list + ) + events = await self.store.get_backfill_events(room_id, pdu_list, limit) + logger.info("handlers.on_backfill_request %d", len(events)) + events = await filter_events_for_server(self.storage, origin, events) + logger.info("handlers.on_backfill_request after filter %d", len(events)) + return events @log_function diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index 84b383e312c6..ab5523c64c19 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -305,7 +305,7 @@ def parse_json_value_from_request(request, allow_empty_body=False): try: content = json_decoder.decode(content_bytes.decode("utf-8")) except Exception as e: - logger.warning("Unable to parse JSON: %s", e) + logger.warning("Unable to parse JSON: %s (%s)", e, content_bytes) raise SynapseError(400, "Content not JSON.", errcode=Codes.NOT_JSON) return content diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index f6303c043cd7..b285e8b2b045 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -915,7 +915,7 @@ async def get_backfill_events(self, room_id: str, event_list: list, limit: int): return sorted(events, key=lambda e: -e.depth) def _get_backfill_events(self, txn, room_id, event_list, limit): - logger.debug("_get_backfill_events: %s, %r, %s", room_id, event_list, limit) + logger.info("_get_backfill_events: %s, %r, %s", room_id, event_list, limit) event_results = set() @@ -956,12 +956,36 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): event_results.add(event_id) + logger.info("_get_backfill_events query: %s", event_id) txn.execute(query, (event_id, False, limit - len(event_results))) for row in txn: + logger.info("_get_backfill_events query row: %s", row) if row[1] not in event_results: queue.put((-row[0], row[1])) + # From each event, go forwards through sucesors to find chains of historical backfilled floating outliers + successorQueue = PriorityQueue() + successorQueue.put(event_id) + while not successorQueue.empty() and len(event_results) < limit: + try: + event_id = successorQueue.get_nowait() + except Empty: + break + + successor_event_ids = self.get_successor_events_txn(txn, [event_id]) + logger.info( + "_get_backfill_events successor_event_ids: %s", successor_event_ids + ) + for successor_event_id in successor_event_ids: + # Skip any branches we have already gone down + if successor_event_id in event_results: + continue + + event_results.add(successor_event_id) + + successorQueue.put(successor_event_id) + return event_results async def get_missing_events(self, room_id, earliest_events, latest_events, limit): @@ -1007,6 +1031,24 @@ def _get_missing_events(self, txn, room_id, earliest_events, latest_events, limi event_results.reverse() return event_results + def get_successor_events_txn(self, txn, event_ids: Iterable[str]) -> List[str]: + """Fetch all events that have the given events as a prev event + + Args: + txn: Transaction object + event_ids: The events to use as the previous events. + """ + rows = self.db_pool.simple_select_many_txn( + txn, + table="event_edges", + column="prev_event_id", + iterable=event_ids, + retcols=("event_id",), + keyvalues={}, + ) + + return [row["event_id"] for row in rows] + async def get_successor_events(self, event_ids: Iterable[str]) -> List[str]: """Fetch all events that have the given events as a prev event diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index ad17123915b4..b12db0bc3c2c 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1975,19 +1975,22 @@ def _handle_mult_prev_events(self, txn, events): For the given event, update the event edges table and forward and backward extremities tables. """ + asdf = [ + { + "event_id": ev.event_id, + "prev_event_id": e_id, + "room_id": ev.room_id, + "is_state": False, + } + for ev in events + for e_id in ev.prev_event_ids() + ] + + logger.info("inserting event_edges=%s", asdf) self.db_pool.simple_insert_many_txn( txn, table="event_edges", - values=[ - { - "event_id": ev.event_id, - "prev_event_id": e_id, - "room_id": ev.room_id, - "is_state": False, - } - for ev in events - for e_id in ev.prev_event_ids() - ], + values=asdf, ) self._update_backward_extremeties(txn, events) From c525c5d3733adca2a17decd56bd91bc64f5bc36d Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Sat, 1 May 2021 01:54:31 -0500 Subject: [PATCH 50/83] Logging to better debug federated event not authing See https://github.com/matrix-org/synapse/pull/9247#discussion_r624432973 Wrong member event coming down /event_auth --- synapse/federation/federation_client.py | 7 +++++ synapse/handlers/federation.py | 9 +++++- synapse/rest/client/v1/room.py | 28 +++++++++++++++++++ .../databases/main/event_federation.py | 1 + synapse/storage/databases/main/events.py | 23 +++++++-------- 5 files changed, 54 insertions(+), 14 deletions(-) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 55533d75014c..f297d2c40a2e 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -447,6 +447,13 @@ async def get_event_auth( signed_auth = await self._check_sigs_and_hash_and_fetch( destination, auth_chain, outlier=True, room_version=room_version ) + logger.info( + "get_event_auth auth_chain(%d)=%s signed_auth(%d)=%s", + len(auth_chain), + auth_chain, + len(signed_auth), + signed_auth, + ) signed_auth.sort(key=lambda e: e.depth) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 1b68f16c45c0..695b45bc41a9 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2428,6 +2428,9 @@ async def _update_auth_events_and_context_for_auth( updated context """ event_auth_events = set(event.auth_event_ids()) + logger.info( + "event.event_id=%s event_auth_events=%s", event.event_id, event_auth_events + ) # missing_auth is the set of the event's auth_events which we don't yet have # in auth_events. @@ -2451,6 +2454,9 @@ async def _update_auth_events_and_context_for_auth( remote_auth_chain = await self.federation_client.get_event_auth( origin, event.room_id, event.event_id ) + logger.info( + "auth_events retrieved remote_auth_chain=%s", remote_auth_chain + ) except RequestSendFailed as e1: # The other side isn't around or doesn't implement the # endpoint, so lets just bail out. @@ -2460,6 +2466,7 @@ async def _update_auth_events_and_context_for_auth( seen_remotes = await self.store.have_seen_events( [e.event_id for e in remote_auth_chain] ) + logger.info("auth_events seen_remotes=%s", seen_remotes) for e in remote_auth_chain: if e.event_id in seen_remotes: @@ -2477,7 +2484,7 @@ async def _update_auth_events_and_context_for_auth( } e.internal_metadata.outlier = True - logger.debug( + logger.info( "do_auth %s missing_auth: %s", event.event_id, e.event_id ) await self._handle_new_event(origin, e, auth_events=auth) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 6040ebd0469d..bd67053917bc 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -342,6 +342,12 @@ async def on_POST(self, request, room_id): prev_event_ids=[fake_prev_event_id], auth_event_ids=auth_event_ids, ) + logger.info( + "/bulksend member state event_id=%s state_key=%s, content=%s", + event_id, + event_dict["state_key"], + event_dict["content"], + ) else: # TODO: Add some complement tests that adds state that is not member joins # and will use this code path @@ -356,6 +362,12 @@ async def on_POST(self, request, room_id): auth_event_ids=auth_event_ids, ) event_id = event.event_id + logger.info( + "/bulksend other state event_id=%s state_key=%s, content=%s", + event_id, + event_dict["state_key"], + event_dict["content"], + ) auth_event_ids.append(event_id) @@ -386,6 +398,22 @@ async def on_POST(self, request, room_id): auth_event_ids=auth_event_ids, ) event_id = event.event_id + event_auth_ids = list(event.auth_event_ids()) + logger.info( + "/bulksend event_id=%s auth_event_ids(%d)=%s", + event_id, + len(event_auth_ids), + event_auth_ids, + ) + + double_check_event = await self.store.get_event(event_id) + double_check_event_auth_ids = list(double_check_event.auth_event_ids()) + logger.info( + "/bulksend double_check_event_id=%s auth_event_ids(%d)=%s", + double_check_event.event_id, + len(double_check_event_auth_ids), + double_check_event_auth_ids, + ) event_ids.append(event_id) prev_event_ids = [event_id] diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index b285e8b2b045..2f857f0f22ae 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -982,6 +982,7 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): if successor_event_id in event_results: continue + # TODO: Do we need to filter out state events? event_results.add(successor_event_id) successorQueue.put(successor_event_id) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index b12db0bc3c2c..ad17123915b4 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1975,22 +1975,19 @@ def _handle_mult_prev_events(self, txn, events): For the given event, update the event edges table and forward and backward extremities tables. """ - asdf = [ - { - "event_id": ev.event_id, - "prev_event_id": e_id, - "room_id": ev.room_id, - "is_state": False, - } - for ev in events - for e_id in ev.prev_event_ids() - ] - - logger.info("inserting event_edges=%s", asdf) self.db_pool.simple_insert_many_txn( txn, table="event_edges", - values=asdf, + values=[ + { + "event_id": ev.event_id, + "prev_event_id": e_id, + "room_id": ev.room_id, + "is_state": False, + } + for ev in events + for e_id in ev.prev_event_ids() + ], ) self._update_backward_extremeties(txn, events) From 5deee7ccb2fc3e46ae3aa2a8c41b6419dbedef16 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 5 May 2021 00:28:30 -0500 Subject: [PATCH 51/83] Add local copy of signedjson to debug sign/verify steps See https://github.com/matrix-org/synapse/pull/9247#discussion_r626259383 --- .dockerignore | 1 + .gitignore | 2 ++ docker/Dockerfile | 10 ++++++++++ scripts-dev/complement.sh | 2 +- synapse/crypto/event_signing.py | 8 ++++++++ synapse/crypto/keyring.py | 7 +++++++ synapse/events/builder.py | 4 ++++ synapse/federation/federation_server.py | 6 ++++++ synapse/handlers/federation.py | 6 +++--- synapse/storage/databases/main/events_worker.py | 8 ++++++++ 10 files changed, 50 insertions(+), 4 deletions(-) diff --git a/.dockerignore b/.dockerignore index f6c638b0a221..1cf61005be17 100644 --- a/.dockerignore +++ b/.dockerignore @@ -5,6 +5,7 @@ !docker !scripts !synapse +!python-signedjson !MANIFEST.in !README.rst !setup.py diff --git a/.gitignore b/.gitignore index 295a18b5399a..12eb5eb880f8 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,5 @@ __pycache__/ /docs/build/ /htmlcov /pip-wheel-metadata/ + +python-signedjson/ diff --git a/docker/Dockerfile b/docker/Dockerfile index 4f5cd06d7294..1eb39d055abe 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -31,6 +31,7 @@ RUN apt-get update && apt-get install -y \ openssl \ rustc \ zlib1g-dev \ + git \ && rm -rf /var/lib/apt/lists/* # Copy just what we need to pip install @@ -55,6 +56,11 @@ COPY synapse /synapse/synapse/ # This is aiming at installing only the `packages=find_packages(...)` from `setup.py RUN pip install --prefix="/install" --no-deps --no-warn-script-location /synapse +# Install our local dev copy of signedjson +COPY python-signedjson /python-signedjson/ +RUN cd /python-signedjson && pip install --prefix="/install" . && cd .. + + ### ### Stage 1: runtime ### @@ -82,6 +88,10 @@ COPY --from=builder /install /usr/local COPY ./docker/start.py /start.py COPY ./docker/conf /conf +RUN python -c "import signedjson; print(signedjson.__file__)" +RUN python -c "import signedjson; print(signedjson.__version__)" +RUN cat $(python -c "import signedjson; print(signedjson.__file__)") + VOLUME ["/data"] EXPOSE 8008/tcp 8009/tcp 8448/tcp diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 70085e2baa0f..31d4edbad539 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -33,7 +33,7 @@ if [[ -z "$COMPLEMENT_DIR" ]]; then fi # Build the base Synapse image from the local checkout -docker build -t matrixdotorg/synapse -f docker/Dockerfile . +docker build --progress=plain -t matrixdotorg/synapse -f docker/Dockerfile . # Build the Synapse monolith image from Complement, based on the above image we just built docker build -t complement-synapse -f "$COMPLEMENT_DIR/dockerfiles/Synapse.Dockerfile" "$COMPLEMENT_DIR/dockerfiles" diff --git a/synapse/crypto/event_signing.py b/synapse/crypto/event_signing.py index 8fb116ae182c..9e338dbe6334 100644 --- a/synapse/crypto/event_signing.py +++ b/synapse/crypto/event_signing.py @@ -21,6 +21,7 @@ from typing import Any, Callable, Dict, Tuple from canonicaljson import encode_canonical_json +from signedjson.key import encode_signing_key_base64 from signedjson.sign import sign_json from signedjson.types import SigningKey from unpaddedbase64 import decode_base64, encode_base64 @@ -152,6 +153,13 @@ def compute_event_signature( redact_json.pop("unsigned", None) if logger.isEnabledFor(logging.DEBUG): logger.debug("Signing event: %s", encode_canonical_json(redact_json)) + logger.info( + "sign_json signature_name=%s signing_key=%s encode_signing_key_base64=%s, json=%s", + signature_name, + signing_key, + encode_signing_key_base64(signing_key), + redact_json, + ) redact_json = sign_json(redact_json, signature_name, signing_key) if logger.isEnabledFor(logging.DEBUG): logger.debug("Signed event: %s", encode_canonical_json(redact_json)) diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index d5fb51513b59..a4fc0ba9d8b4 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -895,6 +895,13 @@ async def _handle_key_deferred(verify_request: VerifyJsonRequest) -> None: json_object = verify_request.json_object + logger.info( + "keyring _handle_key_deferred verify_request=%s, verify_key.version=%s, verify_key=%s, encode_verify_key_base64=%s", + verify_request, + verify_key.version, + verify_key, + encode_verify_key_base64(verify_key), + ) try: verify_signed_json(json_object, server_name, verify_key) except SignatureVerifyException as e: diff --git a/synapse/events/builder.py b/synapse/events/builder.py index bc8fee46adaa..df800059574d 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -309,6 +309,10 @@ def create_local_event_from_event_dict( event_dict.setdefault("signatures", {}) add_hashes_and_signatures(room_version, event_dict, hostname, signing_key) + logger.info( + "create_local_event_from_event_dict after add_hashes_and_signatures event_dict=%s", + event_dict, + ) return make_event_from_dict( event_dict, room_version, internal_metadata_dict=internal_metadata_dict ) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index a2dbf5a15a17..7b4a24d9c99c 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -602,7 +602,13 @@ async def on_event_auth( time_now = self._clock.time_msec() auth_pdus = await self.handler.on_event_auth(event_id) + logger.info("on_event_auth auth_pdus=%s", auth_pdus) + logger.info( + "on_event_auth signatures=%s", [a.signatures for a in auth_pdus] + ) res = {"auth_chain": [a.get_pdu_json(time_now) for a in auth_pdus]} + logger.info("on_event_auth res=%s", res) + return 200, res @log_function diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 695b45bc41a9..2b16708f63aa 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2428,9 +2428,9 @@ async def _update_auth_events_and_context_for_auth( updated context """ event_auth_events = set(event.auth_event_ids()) - logger.info( - "event.event_id=%s event_auth_events=%s", event.event_id, event_auth_events - ) + # logger.info( + # "event.event_id=%s event_auth_events=%s", event.event_id, event_auth_events + # ) # missing_auth is the set of the event's auth_events which we don't yet have # in auth_events. diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index c00780969f6e..7438e05a08c9 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -359,12 +359,20 @@ async def get_events_as_list( set(event_ids), allow_rejected=allow_rejected ) + # logger.info("get_events_as_list event_entry_map=%s", event_entry_map) + events = [] for event_id in event_ids: entry = event_entry_map.get(event_id, None) if not entry: continue + # logger.info( + # "get_events_as_list entry=%s sig=%s", + # entry.event, + # entry.event.signatures, + # ) + if not allow_rejected: assert not entry.event.rejected_reason, ( "rejected event returned from _get_events_from_cache_or_db despite " From 960ec217eb8c9bba5025a1365131003f2d31f038 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 5 May 2021 22:53:44 -0500 Subject: [PATCH 52/83] More log debugging --- docker/Dockerfile | 4 +- scripts-dev/complement.sh | 2 +- synapse/crypto/event_signing.py | 14 ++--- synapse/crypto/keyring.py | 14 ++--- synapse/events/builder.py | 8 +-- synapse/federation/federation_server.py | 24 +++++---- synapse/federation/transport/server.py | 18 +++---- synapse/handlers/federation.py | 12 ++--- synapse/rest/client/v1/room.py | 34 +++++++++---- synapse/storage/databases/main/events.py | 14 +++++ .../storage/databases/main/events_worker.py | 51 ++++++++++++++++--- synapse/storage/persist_events.py | 9 ++++ 12 files changed, 143 insertions(+), 61 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 1eb39d055abe..c965c10dd0e7 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -57,8 +57,8 @@ COPY synapse /synapse/synapse/ RUN pip install --prefix="/install" --no-deps --no-warn-script-location /synapse # Install our local dev copy of signedjson -COPY python-signedjson /python-signedjson/ -RUN cd /python-signedjson && pip install --prefix="/install" . && cd .. +# COPY python-signedjson /python-signedjson/ +# RUN cd /python-signedjson && pip install --prefix="/install" . && cd .. ### diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 31d4edbad539..70085e2baa0f 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -33,7 +33,7 @@ if [[ -z "$COMPLEMENT_DIR" ]]; then fi # Build the base Synapse image from the local checkout -docker build --progress=plain -t matrixdotorg/synapse -f docker/Dockerfile . +docker build -t matrixdotorg/synapse -f docker/Dockerfile . # Build the Synapse monolith image from Complement, based on the above image we just built docker build -t complement-synapse -f "$COMPLEMENT_DIR/dockerfiles/Synapse.Dockerfile" "$COMPLEMENT_DIR/dockerfiles" diff --git a/synapse/crypto/event_signing.py b/synapse/crypto/event_signing.py index 9e338dbe6334..fad9e94f037f 100644 --- a/synapse/crypto/event_signing.py +++ b/synapse/crypto/event_signing.py @@ -153,13 +153,13 @@ def compute_event_signature( redact_json.pop("unsigned", None) if logger.isEnabledFor(logging.DEBUG): logger.debug("Signing event: %s", encode_canonical_json(redact_json)) - logger.info( - "sign_json signature_name=%s signing_key=%s encode_signing_key_base64=%s, json=%s", - signature_name, - signing_key, - encode_signing_key_base64(signing_key), - redact_json, - ) + # logger.info( + # "sign_json signature_name=%s signing_key=%s encode_signing_key_base64=%s, json=%s", + # signature_name, + # signing_key, + # encode_signing_key_base64(signing_key), + # redact_json, + # ) redact_json = sign_json(redact_json, signature_name, signing_key) if logger.isEnabledFor(logging.DEBUG): logger.debug("Signed event: %s", encode_canonical_json(redact_json)) diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index a4fc0ba9d8b4..e8f12f3dde75 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -895,13 +895,13 @@ async def _handle_key_deferred(verify_request: VerifyJsonRequest) -> None: json_object = verify_request.json_object - logger.info( - "keyring _handle_key_deferred verify_request=%s, verify_key.version=%s, verify_key=%s, encode_verify_key_base64=%s", - verify_request, - verify_key.version, - verify_key, - encode_verify_key_base64(verify_key), - ) + # logger.info( + # "keyring _handle_key_deferred verify_request=%s, verify_key.version=%s, verify_key=%s, encode_verify_key_base64=%s", + # verify_request, + # verify_key.version, + # verify_key, + # encode_verify_key_base64(verify_key), + # ) try: verify_signed_json(json_object, server_name, verify_key) except SignatureVerifyException as e: diff --git a/synapse/events/builder.py b/synapse/events/builder.py index df800059574d..f68ddf67ad71 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -309,10 +309,10 @@ def create_local_event_from_event_dict( event_dict.setdefault("signatures", {}) add_hashes_and_signatures(room_version, event_dict, hostname, signing_key) - logger.info( - "create_local_event_from_event_dict after add_hashes_and_signatures event_dict=%s", - event_dict, - ) + # logger.info( + # "create_local_event_from_event_dict after add_hashes_and_signatures event_dict=%s", + # event_dict, + # ) return make_event_from_dict( event_dict, room_version, internal_metadata_dict=internal_metadata_dict ) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 7b4a24d9c99c..1a2f959d3070 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -147,11 +147,11 @@ async def on_backfill_request( origin_host, _ = parse_server_name(origin) await self.check_server_matches_acl(origin_host, room_id) - logger.info( - "federation_server.on_backfill_request versions=%d -> %s", - len(versions), - versions, - ) + # logger.info( + # "federation_server.on_backfill_request versions=%d -> %s", + # len(versions), + # versions, + # ) pdus = await self.handler.on_backfill_request( origin, room_id, versions, limit ) @@ -602,10 +602,16 @@ async def on_event_auth( time_now = self._clock.time_msec() auth_pdus = await self.handler.on_event_auth(event_id) - logger.info("on_event_auth auth_pdus=%s", auth_pdus) - logger.info( - "on_event_auth signatures=%s", [a.signatures for a in auth_pdus] - ) + + for pdu in auth_pdus: + if pdu.type == "m.room.member" and pdu.state_key == "@maria:hs1": + logger.info( + "on_event_auth pdu=%s auth_events(%d)=%s", + pdu, + len(pdu.auth_event_ids()), + pdu.auth_event_ids(), + ) + res = {"auth_chain": [a.get_pdu_json(time_now) for a in auth_pdus]} logger.info("on_event_auth res=%s", res) diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 50bc1783495b..0eb2f1b89367 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -467,21 +467,21 @@ class FederationBackfillServlet(BaseFederationServlet): PATH = "/backfill/(?P[^/]*)/?" async def on_GET(self, origin, content, query, room_id): - logger.info( - "FederationBackfillServlet.on_GET query=%s", - query, - ) + # logger.info( + # "FederationBackfillServlet.on_GET query=%s", + # query, + # ) versions = [x.decode("ascii") for x in query[b"v"]] limit = parse_integer_from_args(query, "limit", None) if not limit: return 400, {"error": "Did not include limit param"} - logger.info( - "FederationBackfillServlet.on_GET versions=%d -> %s", - len(versions), - versions, - ) + # logger.info( + # "FederationBackfillServlet.on_GET versions=%d -> %s", + # len(versions), + # versions, + # ) return await self.handler.on_backfill_request(origin, room_id, versions, limit) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 2b16708f63aa..b47bd3ee2814 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2448,15 +2448,15 @@ async def _update_auth_events_and_context_for_auth( if missing_auth: # If we don't have all the auth events, we need to get them. - logger.info("auth_events contains unknown events: %s", missing_auth) + # logger.info("auth_events contains unknown events: %s", missing_auth) try: try: remote_auth_chain = await self.federation_client.get_event_auth( origin, event.room_id, event.event_id ) - logger.info( - "auth_events retrieved remote_auth_chain=%s", remote_auth_chain - ) + # logger.info( + # "auth_events retrieved remote_auth_chain=%s", remote_auth_chain + # ) except RequestSendFailed as e1: # The other side isn't around or doesn't implement the # endpoint, so lets just bail out. @@ -2466,7 +2466,7 @@ async def _update_auth_events_and_context_for_auth( seen_remotes = await self.store.have_seen_events( [e.event_id for e in remote_auth_chain] ) - logger.info("auth_events seen_remotes=%s", seen_remotes) + # logger.info("auth_events seen_remotes=%s", seen_remotes) for e in remote_auth_chain: if e.event_id in seen_remotes: @@ -2484,7 +2484,7 @@ async def _update_auth_events_and_context_for_auth( } e.internal_metadata.outlier = True - logger.info( + logger.debug( "do_auth %s missing_auth: %s", event.event_id, e.event_id ) await self._handle_new_event(origin, e, auth_events=auth) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index bd67053917bc..2dc392f56e6b 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -343,11 +343,23 @@ async def on_POST(self, request, room_id): auth_event_ids=auth_event_ids, ) logger.info( - "/bulksend member state event_id=%s state_key=%s, content=%s", + "/bulksend member state event_id=%s state_key=%s, content=%s auth_event_ids(%d)=%s", event_id, event_dict["state_key"], event_dict["content"], + len(auth_event_ids), + auth_event_ids, ) + + double_check_event = await self.store.get_event(event_id) + double_check_event_auth_ids = list(double_check_event.auth_event_ids()) + logger.info( + "/bulksend double_check_event_id=%s auth_event_ids(%d)=%s", + double_check_event.event_id, + len(double_check_event_auth_ids), + double_check_event_auth_ids, + ) + else: # TODO: Add some complement tests that adds state that is not member joins # and will use this code path @@ -406,18 +418,20 @@ async def on_POST(self, request, room_id): event_auth_ids, ) - double_check_event = await self.store.get_event(event_id) - double_check_event_auth_ids = list(double_check_event.auth_event_ids()) - logger.info( - "/bulksend double_check_event_id=%s auth_event_ids(%d)=%s", - double_check_event.event_id, - len(double_check_event_auth_ids), - double_check_event_auth_ids, - ) - event_ids.append(event_id) prev_event_ids = [event_id] + triple_check_event = await self.store.get_event( + auth_event_ids[len(auth_event_ids) - 1] + ) + triple_check_event_auth_ids = list(triple_check_event.auth_event_ids()) + logger.info( + "/bulksend triple_check_event_id=%s auth_event_ids(%d)=%s", + triple_check_event.event_id, + len(triple_check_event_auth_ids), + triple_check_event_auth_ids, + ) + return 200, {"state_events": auth_event_ids, "events": event_ids} def on_GET(self, request, room_id): diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index ad17123915b4..d0f76c9fc310 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -385,6 +385,7 @@ def _persist_events_txn( # Insert into event_to_state_groups. self._store_event_state_mappings_txn(txn, events_and_contexts) + # TODO: REMOVE marker ERIC!!! WHERE event auth chain is stored self._persist_event_auth_chain_txn(txn, [e for e, _ in events_and_contexts]) # _store_rejected_events_txn filters out any events which were @@ -1337,6 +1338,9 @@ def get_internal_metadata(event): return im + # if event.type == "m.room.member" and event.state_key == "@maria:hs1": + # logger.info("_store_event_txn event_dict=%s", event_dict(event)) + self.db_pool.simple_insert_many_txn( txn, table="event_json", @@ -1569,6 +1573,16 @@ def _add_to_cache(self, txn, events_and_contexts): def prefill(): for cache_entry in to_prefill: + if ( + cache_entry[0].type == "m.room.member" + and cache_entry[0].state_key == "@maria:hs1" + ): + logger.info( + "_get_event_cache.set (from events.py) event_id=%s auth_events(%d)=%s", + cache_entry[0].event_id, + len(cache_entry[0].auth_event_ids()), + cache_entry[0].auth_event_ids(), + ) self.store._get_event_cache.set((cache_entry[0].event_id,), cache_entry) txn.call_after(prefill) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 7438e05a08c9..28d8d7930417 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -367,11 +367,16 @@ async def get_events_as_list( if not entry: continue - # logger.info( - # "get_events_as_list entry=%s sig=%s", - # entry.event, - # entry.event.signatures, - # ) + if ( + entry.event.type == "m.room.member" + and entry.event.state_key == "@maria:hs1" + ): + logger.info( + "get_events_as_list entry=%s auth_event_ids(%d)=%s", + entry.event, + len(entry.event.auth_event_ids()), + entry.event.auth_event_ids(), + ) if not allow_rejected: assert not entry.event.rejected_reason, ( @@ -497,10 +502,22 @@ async def _get_events_from_cache_or_db(self, event_ids, allow_rejected=False): event_entry_map = self._get_events_from_cache( event_ids, allow_rejected=allow_rejected ) + for key in event_entry_map: + event = event_entry_map[key].event + if event.type == "m.room.member" and event.state_key == "@maria:hs1": + logger.info( + "_get_events_from_cache_or_db event=%s auth_events(%d)=%s", + event, + len(event.auth_event_ids()), + event.auth_event_ids(), + ) missing_events_ids = [e for e in event_ids if e not in event_entry_map] if missing_events_ids: + logger.info( + "_get_events_from_cache_or_db missing_events_ids=%s", missing_events_ids + ) log_ctx = current_context() log_ctx.record_event_fetch(len(missing_events_ids)) @@ -542,6 +559,17 @@ def _get_events_from_cache(self, events, allow_rejected, update_metrics=True): if not ret: continue + if ( + ret.event.type == "m.room.member" + and ret.event.state_key == "@maria:hs1" + ): + logger.info( + "_get_event_cache.get event_id=%s auth_events(%d)=%s", + event_id, + len(ret.event.auth_event_ids()), + ret.event.auth_event_ids(), + ) + if allow_rejected or not ret.event.rejected_reason: event_map[event_id] = ret else: @@ -695,7 +723,6 @@ async def _get_events_from_db(self, event_ids, allow_rejected=False): while events_to_fetch: row_map = await self._enqueue_events(events_to_fetch) - # we need to recursively fetch any redactions of those events redaction_ids = set() for event_id in events_to_fetch: @@ -824,6 +851,16 @@ async def _get_events_from_db(self, event_ids, allow_rejected=False): event=original_ev, redacted_event=redacted_event ) + if ( + original_ev.type == "m.room.member" + and original_ev.state_key == "@maria:hs1" + ): + logger.info( + "_get_event_cache.set (from events_workers) event_id=%s auth_events(%d)=%s", + event_id, + len(original_ev.auth_event_ids()), + original_ev.auth_event_ids(), + ) self._get_event_cache.set((event_id,), cache_entry) result_map[event_id] = cache_entry @@ -906,6 +943,7 @@ def _fetch_event_rows(self, txn, event_ids): """ event_dict = {} for evs in batch_iter(event_ids, 200): + # TODO: REMOVE marker ERIC!!! WHERE EVENTS ARE FETCHED sql = """\ SELECT e.event_id, @@ -930,6 +968,7 @@ def _fetch_event_rows(self, txn, event_ids): for row in txn: event_id = row[0] + logger.info("_fetch_event_rows event_id=%s json=%s", event_id, row[3]) event_dict[event_id] = { "event_id": event_id, "stream_ordering": row[1], diff --git a/synapse/storage/persist_events.py b/synapse/storage/persist_events.py index 3a0d6fb32e84..1729d72904f2 100644 --- a/synapse/storage/persist_events.py +++ b/synapse/storage/persist_events.py @@ -289,6 +289,15 @@ async def persist_event( event if it was deduplicated due to an existing event matching the transaction ID. """ + + if event.type == "m.room.member" and event.state_key == "@maria:hs1": + logger.info( + "persist_event event=%s auth_events(%d)=%s", + event, + len(event.auth_event_ids()), + event.auth_event_ids(), + ) + deferred = self._event_persist_queue.add_to_queue( event.room_id, [(event, context)], backfilled=backfilled ) From 779ef25aa6b0c09bd6f4f6c37c6446fb87fe3e8f Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 5 May 2021 23:01:13 -0500 Subject: [PATCH 53/83] Revert debugging commits --- .dockerignore | 1 - .gitignore | 2 - docker/Dockerfile | 10 ---- synapse/crypto/event_signing.py | 8 --- synapse/crypto/keyring.py | 7 --- synapse/events/builder.py | 4 -- synapse/federation/federation_client.py | 7 --- synapse/federation/federation_server.py | 22 ++------- synapse/federation/transport/server.py | 18 +++---- synapse/handlers/federation.py | 9 +--- synapse/rest/client/v1/room.py | 42 ---------------- .../databases/main/event_federation.py | 1 - synapse/storage/databases/main/events.py | 37 +++++--------- .../storage/databases/main/events_worker.py | 49 +------------------ synapse/storage/persist_events.py | 9 ---- 15 files changed, 29 insertions(+), 197 deletions(-) diff --git a/.dockerignore b/.dockerignore index 1cf61005be17..f6c638b0a221 100644 --- a/.dockerignore +++ b/.dockerignore @@ -5,7 +5,6 @@ !docker !scripts !synapse -!python-signedjson !MANIFEST.in !README.rst !setup.py diff --git a/.gitignore b/.gitignore index 12eb5eb880f8..295a18b5399a 100644 --- a/.gitignore +++ b/.gitignore @@ -46,5 +46,3 @@ __pycache__/ /docs/build/ /htmlcov /pip-wheel-metadata/ - -python-signedjson/ diff --git a/docker/Dockerfile b/docker/Dockerfile index c965c10dd0e7..4f5cd06d7294 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -31,7 +31,6 @@ RUN apt-get update && apt-get install -y \ openssl \ rustc \ zlib1g-dev \ - git \ && rm -rf /var/lib/apt/lists/* # Copy just what we need to pip install @@ -56,11 +55,6 @@ COPY synapse /synapse/synapse/ # This is aiming at installing only the `packages=find_packages(...)` from `setup.py RUN pip install --prefix="/install" --no-deps --no-warn-script-location /synapse -# Install our local dev copy of signedjson -# COPY python-signedjson /python-signedjson/ -# RUN cd /python-signedjson && pip install --prefix="/install" . && cd .. - - ### ### Stage 1: runtime ### @@ -88,10 +82,6 @@ COPY --from=builder /install /usr/local COPY ./docker/start.py /start.py COPY ./docker/conf /conf -RUN python -c "import signedjson; print(signedjson.__file__)" -RUN python -c "import signedjson; print(signedjson.__version__)" -RUN cat $(python -c "import signedjson; print(signedjson.__file__)") - VOLUME ["/data"] EXPOSE 8008/tcp 8009/tcp 8448/tcp diff --git a/synapse/crypto/event_signing.py b/synapse/crypto/event_signing.py index fad9e94f037f..8fb116ae182c 100644 --- a/synapse/crypto/event_signing.py +++ b/synapse/crypto/event_signing.py @@ -21,7 +21,6 @@ from typing import Any, Callable, Dict, Tuple from canonicaljson import encode_canonical_json -from signedjson.key import encode_signing_key_base64 from signedjson.sign import sign_json from signedjson.types import SigningKey from unpaddedbase64 import decode_base64, encode_base64 @@ -153,13 +152,6 @@ def compute_event_signature( redact_json.pop("unsigned", None) if logger.isEnabledFor(logging.DEBUG): logger.debug("Signing event: %s", encode_canonical_json(redact_json)) - # logger.info( - # "sign_json signature_name=%s signing_key=%s encode_signing_key_base64=%s, json=%s", - # signature_name, - # signing_key, - # encode_signing_key_base64(signing_key), - # redact_json, - # ) redact_json = sign_json(redact_json, signature_name, signing_key) if logger.isEnabledFor(logging.DEBUG): logger.debug("Signed event: %s", encode_canonical_json(redact_json)) diff --git a/synapse/crypto/keyring.py b/synapse/crypto/keyring.py index e8f12f3dde75..d5fb51513b59 100644 --- a/synapse/crypto/keyring.py +++ b/synapse/crypto/keyring.py @@ -895,13 +895,6 @@ async def _handle_key_deferred(verify_request: VerifyJsonRequest) -> None: json_object = verify_request.json_object - # logger.info( - # "keyring _handle_key_deferred verify_request=%s, verify_key.version=%s, verify_key=%s, encode_verify_key_base64=%s", - # verify_request, - # verify_key.version, - # verify_key, - # encode_verify_key_base64(verify_key), - # ) try: verify_signed_json(json_object, server_name, verify_key) except SignatureVerifyException as e: diff --git a/synapse/events/builder.py b/synapse/events/builder.py index f68ddf67ad71..bc8fee46adaa 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -309,10 +309,6 @@ def create_local_event_from_event_dict( event_dict.setdefault("signatures", {}) add_hashes_and_signatures(room_version, event_dict, hostname, signing_key) - # logger.info( - # "create_local_event_from_event_dict after add_hashes_and_signatures event_dict=%s", - # event_dict, - # ) return make_event_from_dict( event_dict, room_version, internal_metadata_dict=internal_metadata_dict ) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index f297d2c40a2e..55533d75014c 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -447,13 +447,6 @@ async def get_event_auth( signed_auth = await self._check_sigs_and_hash_and_fetch( destination, auth_chain, outlier=True, room_version=room_version ) - logger.info( - "get_event_auth auth_chain(%d)=%s signed_auth(%d)=%s", - len(auth_chain), - auth_chain, - len(signed_auth), - signed_auth, - ) signed_auth.sort(key=lambda e: e.depth) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 1a2f959d3070..a2dbf5a15a17 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -147,11 +147,11 @@ async def on_backfill_request( origin_host, _ = parse_server_name(origin) await self.check_server_matches_acl(origin_host, room_id) - # logger.info( - # "federation_server.on_backfill_request versions=%d -> %s", - # len(versions), - # versions, - # ) + logger.info( + "federation_server.on_backfill_request versions=%d -> %s", + len(versions), + versions, + ) pdus = await self.handler.on_backfill_request( origin, room_id, versions, limit ) @@ -602,19 +602,7 @@ async def on_event_auth( time_now = self._clock.time_msec() auth_pdus = await self.handler.on_event_auth(event_id) - - for pdu in auth_pdus: - if pdu.type == "m.room.member" and pdu.state_key == "@maria:hs1": - logger.info( - "on_event_auth pdu=%s auth_events(%d)=%s", - pdu, - len(pdu.auth_event_ids()), - pdu.auth_event_ids(), - ) - res = {"auth_chain": [a.get_pdu_json(time_now) for a in auth_pdus]} - logger.info("on_event_auth res=%s", res) - return 200, res @log_function diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 0eb2f1b89367..50bc1783495b 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -467,21 +467,21 @@ class FederationBackfillServlet(BaseFederationServlet): PATH = "/backfill/(?P[^/]*)/?" async def on_GET(self, origin, content, query, room_id): - # logger.info( - # "FederationBackfillServlet.on_GET query=%s", - # query, - # ) + logger.info( + "FederationBackfillServlet.on_GET query=%s", + query, + ) versions = [x.decode("ascii") for x in query[b"v"]] limit = parse_integer_from_args(query, "limit", None) if not limit: return 400, {"error": "Did not include limit param"} - # logger.info( - # "FederationBackfillServlet.on_GET versions=%d -> %s", - # len(versions), - # versions, - # ) + logger.info( + "FederationBackfillServlet.on_GET versions=%d -> %s", + len(versions), + versions, + ) return await self.handler.on_backfill_request(origin, room_id, versions, limit) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index b47bd3ee2814..1b68f16c45c0 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -2428,9 +2428,6 @@ async def _update_auth_events_and_context_for_auth( updated context """ event_auth_events = set(event.auth_event_ids()) - # logger.info( - # "event.event_id=%s event_auth_events=%s", event.event_id, event_auth_events - # ) # missing_auth is the set of the event's auth_events which we don't yet have # in auth_events. @@ -2448,15 +2445,12 @@ async def _update_auth_events_and_context_for_auth( if missing_auth: # If we don't have all the auth events, we need to get them. - # logger.info("auth_events contains unknown events: %s", missing_auth) + logger.info("auth_events contains unknown events: %s", missing_auth) try: try: remote_auth_chain = await self.federation_client.get_event_auth( origin, event.room_id, event.event_id ) - # logger.info( - # "auth_events retrieved remote_auth_chain=%s", remote_auth_chain - # ) except RequestSendFailed as e1: # The other side isn't around or doesn't implement the # endpoint, so lets just bail out. @@ -2466,7 +2460,6 @@ async def _update_auth_events_and_context_for_auth( seen_remotes = await self.store.have_seen_events( [e.event_id for e in remote_auth_chain] ) - # logger.info("auth_events seen_remotes=%s", seen_remotes) for e in remote_auth_chain: if e.event_id in seen_remotes: diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 2dc392f56e6b..6040ebd0469d 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -342,24 +342,6 @@ async def on_POST(self, request, room_id): prev_event_ids=[fake_prev_event_id], auth_event_ids=auth_event_ids, ) - logger.info( - "/bulksend member state event_id=%s state_key=%s, content=%s auth_event_ids(%d)=%s", - event_id, - event_dict["state_key"], - event_dict["content"], - len(auth_event_ids), - auth_event_ids, - ) - - double_check_event = await self.store.get_event(event_id) - double_check_event_auth_ids = list(double_check_event.auth_event_ids()) - logger.info( - "/bulksend double_check_event_id=%s auth_event_ids(%d)=%s", - double_check_event.event_id, - len(double_check_event_auth_ids), - double_check_event_auth_ids, - ) - else: # TODO: Add some complement tests that adds state that is not member joins # and will use this code path @@ -374,12 +356,6 @@ async def on_POST(self, request, room_id): auth_event_ids=auth_event_ids, ) event_id = event.event_id - logger.info( - "/bulksend other state event_id=%s state_key=%s, content=%s", - event_id, - event_dict["state_key"], - event_dict["content"], - ) auth_event_ids.append(event_id) @@ -410,28 +386,10 @@ async def on_POST(self, request, room_id): auth_event_ids=auth_event_ids, ) event_id = event.event_id - event_auth_ids = list(event.auth_event_ids()) - logger.info( - "/bulksend event_id=%s auth_event_ids(%d)=%s", - event_id, - len(event_auth_ids), - event_auth_ids, - ) event_ids.append(event_id) prev_event_ids = [event_id] - triple_check_event = await self.store.get_event( - auth_event_ids[len(auth_event_ids) - 1] - ) - triple_check_event_auth_ids = list(triple_check_event.auth_event_ids()) - logger.info( - "/bulksend triple_check_event_id=%s auth_event_ids(%d)=%s", - triple_check_event.event_id, - len(triple_check_event_auth_ids), - triple_check_event_auth_ids, - ) - return 200, {"state_events": auth_event_ids, "events": event_ids} def on_GET(self, request, room_id): diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 2f857f0f22ae..b285e8b2b045 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -982,7 +982,6 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): if successor_event_id in event_results: continue - # TODO: Do we need to filter out state events? event_results.add(successor_event_id) successorQueue.put(successor_event_id) diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index d0f76c9fc310..b12db0bc3c2c 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -385,7 +385,6 @@ def _persist_events_txn( # Insert into event_to_state_groups. self._store_event_state_mappings_txn(txn, events_and_contexts) - # TODO: REMOVE marker ERIC!!! WHERE event auth chain is stored self._persist_event_auth_chain_txn(txn, [e for e, _ in events_and_contexts]) # _store_rejected_events_txn filters out any events which were @@ -1338,9 +1337,6 @@ def get_internal_metadata(event): return im - # if event.type == "m.room.member" and event.state_key == "@maria:hs1": - # logger.info("_store_event_txn event_dict=%s", event_dict(event)) - self.db_pool.simple_insert_many_txn( txn, table="event_json", @@ -1573,16 +1569,6 @@ def _add_to_cache(self, txn, events_and_contexts): def prefill(): for cache_entry in to_prefill: - if ( - cache_entry[0].type == "m.room.member" - and cache_entry[0].state_key == "@maria:hs1" - ): - logger.info( - "_get_event_cache.set (from events.py) event_id=%s auth_events(%d)=%s", - cache_entry[0].event_id, - len(cache_entry[0].auth_event_ids()), - cache_entry[0].auth_event_ids(), - ) self.store._get_event_cache.set((cache_entry[0].event_id,), cache_entry) txn.call_after(prefill) @@ -1989,19 +1975,22 @@ def _handle_mult_prev_events(self, txn, events): For the given event, update the event edges table and forward and backward extremities tables. """ + asdf = [ + { + "event_id": ev.event_id, + "prev_event_id": e_id, + "room_id": ev.room_id, + "is_state": False, + } + for ev in events + for e_id in ev.prev_event_ids() + ] + + logger.info("inserting event_edges=%s", asdf) self.db_pool.simple_insert_many_txn( txn, table="event_edges", - values=[ - { - "event_id": ev.event_id, - "prev_event_id": e_id, - "room_id": ev.room_id, - "is_state": False, - } - for ev in events - for e_id in ev.prev_event_ids() - ], + values=asdf, ) self._update_backward_extremeties(txn, events) diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index 28d8d7930417..c00780969f6e 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -359,25 +359,12 @@ async def get_events_as_list( set(event_ids), allow_rejected=allow_rejected ) - # logger.info("get_events_as_list event_entry_map=%s", event_entry_map) - events = [] for event_id in event_ids: entry = event_entry_map.get(event_id, None) if not entry: continue - if ( - entry.event.type == "m.room.member" - and entry.event.state_key == "@maria:hs1" - ): - logger.info( - "get_events_as_list entry=%s auth_event_ids(%d)=%s", - entry.event, - len(entry.event.auth_event_ids()), - entry.event.auth_event_ids(), - ) - if not allow_rejected: assert not entry.event.rejected_reason, ( "rejected event returned from _get_events_from_cache_or_db despite " @@ -502,22 +489,10 @@ async def _get_events_from_cache_or_db(self, event_ids, allow_rejected=False): event_entry_map = self._get_events_from_cache( event_ids, allow_rejected=allow_rejected ) - for key in event_entry_map: - event = event_entry_map[key].event - if event.type == "m.room.member" and event.state_key == "@maria:hs1": - logger.info( - "_get_events_from_cache_or_db event=%s auth_events(%d)=%s", - event, - len(event.auth_event_ids()), - event.auth_event_ids(), - ) missing_events_ids = [e for e in event_ids if e not in event_entry_map] if missing_events_ids: - logger.info( - "_get_events_from_cache_or_db missing_events_ids=%s", missing_events_ids - ) log_ctx = current_context() log_ctx.record_event_fetch(len(missing_events_ids)) @@ -559,17 +534,6 @@ def _get_events_from_cache(self, events, allow_rejected, update_metrics=True): if not ret: continue - if ( - ret.event.type == "m.room.member" - and ret.event.state_key == "@maria:hs1" - ): - logger.info( - "_get_event_cache.get event_id=%s auth_events(%d)=%s", - event_id, - len(ret.event.auth_event_ids()), - ret.event.auth_event_ids(), - ) - if allow_rejected or not ret.event.rejected_reason: event_map[event_id] = ret else: @@ -723,6 +687,7 @@ async def _get_events_from_db(self, event_ids, allow_rejected=False): while events_to_fetch: row_map = await self._enqueue_events(events_to_fetch) + # we need to recursively fetch any redactions of those events redaction_ids = set() for event_id in events_to_fetch: @@ -851,16 +816,6 @@ async def _get_events_from_db(self, event_ids, allow_rejected=False): event=original_ev, redacted_event=redacted_event ) - if ( - original_ev.type == "m.room.member" - and original_ev.state_key == "@maria:hs1" - ): - logger.info( - "_get_event_cache.set (from events_workers) event_id=%s auth_events(%d)=%s", - event_id, - len(original_ev.auth_event_ids()), - original_ev.auth_event_ids(), - ) self._get_event_cache.set((event_id,), cache_entry) result_map[event_id] = cache_entry @@ -943,7 +898,6 @@ def _fetch_event_rows(self, txn, event_ids): """ event_dict = {} for evs in batch_iter(event_ids, 200): - # TODO: REMOVE marker ERIC!!! WHERE EVENTS ARE FETCHED sql = """\ SELECT e.event_id, @@ -968,7 +922,6 @@ def _fetch_event_rows(self, txn, event_ids): for row in txn: event_id = row[0] - logger.info("_fetch_event_rows event_id=%s json=%s", event_id, row[3]) event_dict[event_id] = { "event_id": event_id, "stream_ordering": row[1], diff --git a/synapse/storage/persist_events.py b/synapse/storage/persist_events.py index 1729d72904f2..3a0d6fb32e84 100644 --- a/synapse/storage/persist_events.py +++ b/synapse/storage/persist_events.py @@ -289,15 +289,6 @@ async def persist_event( event if it was deduplicated due to an existing event matching the transaction ID. """ - - if event.type == "m.room.member" and event.state_key == "@maria:hs1": - logger.info( - "persist_event event=%s auth_events(%d)=%s", - event, - len(event.auth_event_ids()), - event.auth_event_ids(), - ) - deferred = self._event_persist_queue.add_to_queue( event.room_id, [(event, context)], backfilled=backfilled ) From 7dcc0fa50f66a0ce2bf408915a66c8e78d0181a5 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 5 May 2021 23:15:38 -0500 Subject: [PATCH 54/83] Fix signature check failing for historical state events Make sure to use a copy of this list as we add to it and loop here. Otherwise it will be the same reference and update when we append here later. For context, see https://github.com/matrix-org/synapse/pull/9247#discussion_r627057557 --- synapse/rest/client/v1/room.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 6040ebd0469d..c76ee7e6630a 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -340,7 +340,9 @@ async def on_POST(self, request, room_id): content=event_dict["content"], outlier=True, prev_event_ids=[fake_prev_event_id], - auth_event_ids=auth_event_ids, + # Make sure to use a copy of this list as we add to it and loop here. + # Otherwise it will be the same reference and update when we append here later. + auth_event_ids=auth_event_ids.copy(), ) else: # TODO: Add some complement tests that adds state that is not member joins @@ -353,7 +355,9 @@ async def on_POST(self, request, room_id): event_dict, outlier=True, prev_event_ids=[fake_prev_event_id], - auth_event_ids=auth_event_ids, + # Make sure to use a copy of this list as we add to it and loop here. + # Otherwise it will be the same reference and update when we append here later. + auth_event_ids=auth_event_ids.copy(), ) event_id = event.event_id From 52b1e7ba4f8218a4a468f54c47adac39ce7cf977 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 5 May 2021 23:29:42 -0500 Subject: [PATCH 55/83] Clean up remaining debug logs --- scripts-dev/complement.sh | 4 ++-- synapse/federation/federation_server.py | 5 ---- synapse/federation/transport/server.py | 9 -------- synapse/handlers/federation.py | 9 -------- .../databases/main/event_federation.py | 8 ++----- synapse/storage/databases/main/events.py | 23 ++++++++----------- 6 files changed, 14 insertions(+), 44 deletions(-) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 70085e2baa0f..1d11202c1192 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -45,5 +45,5 @@ if [[ -n "$1" ]]; then EXTRA_COMPLEMENT_ARGS+="-run $1 " fi -# Run the tests on the resulting image! -COMPLEMENT_BASE_IMAGE=complement-synapse go test -tags msc2716 -v -count=1 ./tests/main_test.go ./tests/msc2716_test.go -run TestBackfillingHistory/parallel/Historical_messages_are_visible_on_federated_server +# Run the tests! +COMPLEMENT_BASE_IMAGE=complement-synapse go test -tags msc2716 -v -count=1 ./tests/main_test.go ./tests/msc2716_test.go diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index a2dbf5a15a17..b9f8d966a621 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -147,11 +147,6 @@ async def on_backfill_request( origin_host, _ = parse_server_name(origin) await self.check_server_matches_acl(origin_host, room_id) - logger.info( - "federation_server.on_backfill_request versions=%d -> %s", - len(versions), - versions, - ) pdus = await self.handler.on_backfill_request( origin, room_id, versions, limit ) diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 50bc1783495b..a9c1391d27ef 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -467,21 +467,12 @@ class FederationBackfillServlet(BaseFederationServlet): PATH = "/backfill/(?P[^/]*)/?" async def on_GET(self, origin, content, query, room_id): - logger.info( - "FederationBackfillServlet.on_GET query=%s", - query, - ) versions = [x.decode("ascii") for x in query[b"v"]] limit = parse_integer_from_args(query, "limit", None) if not limit: return 400, {"error": "Did not include limit param"} - logger.info( - "FederationBackfillServlet.on_GET versions=%d -> %s", - len(versions), - versions, - ) return await self.handler.on_backfill_request(origin, room_id, versions, limit) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 1b68f16c45c0..67888898ffb5 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -923,7 +923,6 @@ async def backfill( events = await self.federation_client.backfill( dest, room_id, limit=limit, extremities=extremities ) - logger.info("backfill response returned %d events", len(events)) if not events: return [] @@ -1950,18 +1949,10 @@ async def on_backfill_request( # Synapse asks for 100 events per backfill request. Do not allow more. limit = min(limit, 100) - logger.info( - "handlers.on_backfill_request pdu_list %d %s", len(pdu_list), pdu_list - ) - events = await self.store.get_backfill_events(room_id, pdu_list, limit) - logger.info("handlers.on_backfill_request %d", len(events)) - events = await filter_events_for_server(self.storage, origin, events) - logger.info("handlers.on_backfill_request after filter %d", len(events)) - return events @log_function diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index b285e8b2b045..51fbd04a430c 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -956,15 +956,14 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): event_results.add(event_id) - logger.info("_get_backfill_events query: %s", event_id) txn.execute(query, (event_id, False, limit - len(event_results))) for row in txn: - logger.info("_get_backfill_events query row: %s", row) if row[1] not in event_results: queue.put((-row[0], row[1])) - # From each event, go forwards through sucesors to find chains of historical backfilled floating outliers + # From each event, go forwards through successors to find chains of + # historical backfilled floating outliers successorQueue = PriorityQueue() successorQueue.put(event_id) while not successorQueue.empty() and len(event_results) < limit: @@ -974,9 +973,6 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): break successor_event_ids = self.get_successor_events_txn(txn, [event_id]) - logger.info( - "_get_backfill_events successor_event_ids: %s", successor_event_ids - ) for successor_event_id in successor_event_ids: # Skip any branches we have already gone down if successor_event_id in event_results: diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index b12db0bc3c2c..ad17123915b4 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -1975,22 +1975,19 @@ def _handle_mult_prev_events(self, txn, events): For the given event, update the event edges table and forward and backward extremities tables. """ - asdf = [ - { - "event_id": ev.event_id, - "prev_event_id": e_id, - "room_id": ev.room_id, - "is_state": False, - } - for ev in events - for e_id in ev.prev_event_ids() - ] - - logger.info("inserting event_edges=%s", asdf) self.db_pool.simple_insert_many_txn( txn, table="event_edges", - values=asdf, + values=[ + { + "event_id": ev.event_id, + "prev_event_id": e_id, + "room_id": ev.room_id, + "is_state": False, + } + for ev in events + for e_id in ev.prev_event_ids() + ], ) self._update_backward_extremeties(txn, events) From 1d6cf78b012b16232a08a7a52ce2527f6e65738e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 14 May 2021 01:57:36 -0500 Subject: [PATCH 56/83] Add insertion events to the end of chunks --- scripts-dev/complement.sh | 2 +- synapse/api/constants.py | 3 +++ synapse/rest/client/v1/room.py | 21 ++++++++++++++++++++- 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 1d11202c1192..18e92725df96 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -46,4 +46,4 @@ if [[ -n "$1" ]]; then fi # Run the tests! -COMPLEMENT_BASE_IMAGE=complement-synapse go test -tags msc2716 -v -count=1 ./tests/main_test.go ./tests/msc2716_test.go +COMPLEMENT_BASE_IMAGE=complement-synapse go test -tags msc2716 -v -count=1 ./tests/main_test.go ./tests/msc2716_test.go -run TestBackfillingHistory/parallel/Backfilled_historical_events_resolve_with_proper_state_in_correct_order diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 6856dab06c1b..1f4b056160b1 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -106,6 +106,9 @@ class EventTypes: MSC1772_SPACE_CHILD = "org.matrix.msc1772.space.child" MSC1772_SPACE_PARENT = "org.matrix.msc1772.space.parent" + MSC2716_INSERTION = "org.matrix.msc2716.insertion" + MSC2716_MARKER = "org.matrix.msc2716.marker" + class EduTypes: Presence = "m.presence" diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index c76ee7e6630a..821514abf93a 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -363,9 +363,28 @@ async def on_POST(self, request, room_id): auth_event_ids.append(event_id) + events_to_insert = body["events"] + events_to_insert.append( + { + "type": EventTypes.MSC2716_INSERTION, + # requester.user.to_string() + "sender": events_to_insert[len(events_to_insert) - 1]["sender"], + "content": { + "next_chunk_id": random_string(64), + "m.historical": True, + # TODO: Why is `body` necessary for this to show up in /messages + "body": "TODO_REMOVE", + }, + # Copy the origin_server_ts from the last event we're inserting + "origin_server_ts": events_to_insert[len(events_to_insert) - 1][ + "origin_server_ts" + ], + } + ) + event_ids = [] prev_event_ids = prev_events_from_query - for ev in body["events"]: + for ev in events_to_insert: assert_params_in_dict(ev, ["type", "origin_server_ts", "content", "sender"]) event_dict = { From 13b18a8ec5cf03e3f46cc730464cfbfd10c82664 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 14 May 2021 03:37:18 -0500 Subject: [PATCH 57/83] Add insertion initially, end of chunk, and add live markers --- scripts-dev/complement.sh | 2 +- synapse/api/constants.py | 7 +++ synapse/rest/client/v1/room.py | 78 +++++++++++++++++++++++++++++++--- 3 files changed, 79 insertions(+), 8 deletions(-) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 18e92725df96..34472473bc58 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -46,4 +46,4 @@ if [[ -n "$1" ]]; then fi # Run the tests! -COMPLEMENT_BASE_IMAGE=complement-synapse go test -tags msc2716 -v -count=1 ./tests/main_test.go ./tests/msc2716_test.go -run TestBackfillingHistory/parallel/Backfilled_historical_events_resolve_with_proper_state_in_correct_order +COMPLEMENT_BASE_IMAGE=complement-synapse go test -tags msc2716 -v -count=1 ./tests/main_test.go ./tests/msc2716_test.go diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 1f4b056160b1..dba2e43f7c48 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -172,6 +172,13 @@ class EventContentFields: # cf https://github.com/matrix-org/matrix-doc/pull/1772 MSC1772_ROOM_TYPE = "org.matrix.msc1772.type" + # For "insertion" events + MSC2716_NEXT_CHUNK_ID = "org.matrix.msc2716.next_chunk_id" + # Used on normal message events to indicate where the chunk connects to + MSC2716_CHUNK_ID = "org.matrix.msc2716.chunk_id" + # For "marker" events + MSC2716_PREV_INSERTION = "org.matrix.msc2716.prev_insertion" + class RoomEncryptionAlgorithms: MEGOLM_V1_AES_SHA2 = "m.megolm.v1.aes-sha2" diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 821514abf93a..7797c691785b 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -21,7 +21,7 @@ from typing import TYPE_CHECKING, List, Optional, Tuple from urllib import parse as urlparse -from synapse.api.constants import EventTypes, Membership +from synapse.api.constants import EventTypes, EventContentFields, Membership from synapse.api.errors import ( AuthError, Codes, @@ -277,6 +277,25 @@ def __init__(self, hs): self.room_member_handler = hs.get_room_member_handler() self.auth = hs.get_auth() + async def _send_marker_event_for_insertion( + self, requester, room_id, insertion_event_id, auth_event_ids + ): + marker_event_dict = { + "type": EventTypes.MSC2716_MARKER, + "sender": requester.user.to_string(), + "content": { + EventContentFields.MSC2716_PREV_INSERTION: insertion_event_id, + }, + "room_id": room_id, + } + + (event, _,) = await self.event_creation_handler.create_and_send_nonmember_event( + requester, + marker_event_dict, + # TODO: Do we need to use `self.auth.compute_auth_events(...)` to filter the `auth_event_ids`? + auth_event_ids=auth_event_ids, + ) + def register(self, http_server): # /rooms/$roomid/bulksend PATTERNS = "/rooms/(?P[^/]*)/bulksend" @@ -364,18 +383,51 @@ async def on_POST(self, request, room_id): auth_event_ids.append(event_id) events_to_insert = body["events"] + + # Since we can only add backfilled events from an "insertion" point, + # If they did not provide a `chunk_id` to branch from, add an "insertion" point + # to start from. + if EventContentFields.MSC2716_CHUNK_ID not in events_to_insert[0]["content"]: + first_event = events_to_insert[0] + initial_chunk_id = random_string(64) + + # Prepend to the list + events_to_insert.insert( + 0, + { + "type": EventTypes.MSC2716_INSERTION, + "sender": requester.user.to_string(), + "content": { + EventContentFields.MSC2716_NEXT_CHUNK_ID: initial_chunk_id, + "m.historical": True, + # TODO: Why is `body` necessary for this to show up in /messages + "body": "TODO_REMOVE - INITIAL", + }, + # Since this initial insertion event is put at the start of the chunk, + # copy the origin_server_ts from the first event we're inserting + "origin_server_ts": first_event["origin_server_ts"], + }, + ) + + # Then copy the chunk_id onto the event we're trying to insert first + first_event["content"][ + EventContentFields.MSC2716_CHUNK_ID + ] = initial_chunk_id + + # Add an "insertion" event to the end of each chunk so the next chunk can be connected to this one + next_chunk_id = random_string(64) events_to_insert.append( { "type": EventTypes.MSC2716_INSERTION, - # requester.user.to_string() - "sender": events_to_insert[len(events_to_insert) - 1]["sender"], + "sender": requester.user.to_string(), "content": { - "next_chunk_id": random_string(64), + EventContentFields.MSC2716_NEXT_CHUNK_ID: next_chunk_id, "m.historical": True, # TODO: Why is `body` necessary for this to show up in /messages - "body": "TODO_REMOVE", + "body": "TODO_REMOVE - END_CHUNK", }, - # Copy the origin_server_ts from the last event we're inserting + # Since the insertion event is put at the end of the chunk, + # copy the origin_server_ts from the last event we're inserting "origin_server_ts": events_to_insert[len(events_to_insert) - 1][ "origin_server_ts" ], @@ -413,7 +465,19 @@ async def on_POST(self, request, room_id): event_ids.append(event_id) prev_event_ids = [event_id] - return 200, {"state_events": auth_event_ids, "events": event_ids} + # Add "marker" events in the normal "live" timeline for each "insertion" + # event to signal homeservers to paginate over to the historical messages + # when they scrollback + if event.type == EventTypes.MSC2716_INSERTION: + await self._send_marker_event_for_insertion( + requester, room_id, event_id, auth_event_ids + ) + + return 200, { + "state_events": auth_event_ids, + "events": event_ids, + "next_chunk_id": next_chunk_id, + } def on_GET(self, request, room_id): return 501, "Not implemented" From 3d513bfd2ef7d82f48710ccb7e8843dcf632d855 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 20 May 2021 17:56:28 -0500 Subject: [PATCH 58/83] Remove partial federation code in favor of future insertion/marker logic See https://github.com/matrix-org/synapse/pull/9247/files#r634269224 --- .../databases/main/event_federation.py | 40 +------------------ 1 file changed, 1 insertion(+), 39 deletions(-) diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index 51fbd04a430c..f6303c043cd7 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -915,7 +915,7 @@ async def get_backfill_events(self, room_id: str, event_list: list, limit: int): return sorted(events, key=lambda e: -e.depth) def _get_backfill_events(self, txn, room_id, event_list, limit): - logger.info("_get_backfill_events: %s, %r, %s", room_id, event_list, limit) + logger.debug("_get_backfill_events: %s, %r, %s", room_id, event_list, limit) event_results = set() @@ -962,26 +962,6 @@ def _get_backfill_events(self, txn, room_id, event_list, limit): if row[1] not in event_results: queue.put((-row[0], row[1])) - # From each event, go forwards through successors to find chains of - # historical backfilled floating outliers - successorQueue = PriorityQueue() - successorQueue.put(event_id) - while not successorQueue.empty() and len(event_results) < limit: - try: - event_id = successorQueue.get_nowait() - except Empty: - break - - successor_event_ids = self.get_successor_events_txn(txn, [event_id]) - for successor_event_id in successor_event_ids: - # Skip any branches we have already gone down - if successor_event_id in event_results: - continue - - event_results.add(successor_event_id) - - successorQueue.put(successor_event_id) - return event_results async def get_missing_events(self, room_id, earliest_events, latest_events, limit): @@ -1027,24 +1007,6 @@ def _get_missing_events(self, txn, room_id, earliest_events, latest_events, limi event_results.reverse() return event_results - def get_successor_events_txn(self, txn, event_ids: Iterable[str]) -> List[str]: - """Fetch all events that have the given events as a prev event - - Args: - txn: Transaction object - event_ids: The events to use as the previous events. - """ - rows = self.db_pool.simple_select_many_txn( - txn, - table="event_edges", - column="prev_event_id", - iterable=event_ids, - retcols=("event_id",), - keyvalues={}, - ) - - return [row["event_id"] for row in rows] - async def get_successor_events(self, event_ids: Iterable[str]) -> List[str]: """Fetch all events that have the given events as a prev event From e92a9e95928c1e456f8827af97dd0d6e6dfbb9cf Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 20 May 2021 23:29:55 -0500 Subject: [PATCH 59/83] Start of new approach for chronolgoical events in chunk See https://github.com/matrix-org/synapse/pull/9247#discussion_r636587140 Start of refactor to order events in each chunk in chronological order but persist events in reverse-chronological order to get the correct (topological_ordering, stream_ordering). No marker events --- scripts-dev/complement.sh | 2 +- synapse/api/constants.py | 5 +- synapse/rest/client/v1/room.py | 113 ++++++++++----------------------- 3 files changed, 40 insertions(+), 80 deletions(-) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 34472473bc58..18e92725df96 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -46,4 +46,4 @@ if [[ -n "$1" ]]; then fi # Run the tests! -COMPLEMENT_BASE_IMAGE=complement-synapse go test -tags msc2716 -v -count=1 ./tests/main_test.go ./tests/msc2716_test.go +COMPLEMENT_BASE_IMAGE=complement-synapse go test -tags msc2716 -v -count=1 ./tests/main_test.go ./tests/msc2716_test.go -run TestBackfillingHistory/parallel/Backfilled_historical_events_resolve_with_proper_state_in_correct_order diff --git a/synapse/api/constants.py b/synapse/api/constants.py index dba2e43f7c48..5841b6164263 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -177,7 +177,10 @@ class EventContentFields: # Used on normal message events to indicate where the chunk connects to MSC2716_CHUNK_ID = "org.matrix.msc2716.chunk_id" # For "marker" events - MSC2716_PREV_INSERTION = "org.matrix.msc2716.prev_insertion" + MSC2716_MARKER_INSERTION = "org.matrix.msc2716.marker.insertion" + MSC2716_MARKER_INSERTION_PREV_EVENTS = ( + "org.matrix.msc2716.marker.insertion_prev_events" + ) class RoomEncryptionAlgorithms: diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 7797c691785b..dcbdfcaf8304 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -277,25 +277,6 @@ def __init__(self, hs): self.room_member_handler = hs.get_room_member_handler() self.auth = hs.get_auth() - async def _send_marker_event_for_insertion( - self, requester, room_id, insertion_event_id, auth_event_ids - ): - marker_event_dict = { - "type": EventTypes.MSC2716_MARKER, - "sender": requester.user.to_string(), - "content": { - EventContentFields.MSC2716_PREV_INSERTION: insertion_event_id, - }, - "room_id": room_id, - } - - (event, _,) = await self.event_creation_handler.create_and_send_nonmember_event( - requester, - marker_event_dict, - # TODO: Do we need to use `self.auth.compute_auth_events(...)` to filter the `auth_event_ids`? - auth_event_ids=auth_event_ids, - ) - def register(self, http_server): # /rooms/$roomid/bulksend PATTERNS = "/rooms/(?P[^/]*)/bulksend" @@ -314,6 +295,7 @@ async def on_POST(self, request, room_id): assert_params_in_dict(body, ["state_events_at_start", "events"]) prev_events_from_query = parse_strings_from_args(request.args, "prev_event") + chunk_id_from_query = parse_string(request, "chunk_id", default=None) # For the event we are inserting next to (`prev_events_from_query`), # find the most recent auth events (derived from state events) that @@ -359,13 +341,15 @@ async def on_POST(self, request, room_id): content=event_dict["content"], outlier=True, prev_event_ids=[fake_prev_event_id], - # Make sure to use a copy of this list as we add to it and loop here. - # Otherwise it will be the same reference and update when we append here later. + # Make sure to use a copy of this list because we modify it + # later in the loop here. Otherwise it will be the same + # reference and also update in the event when we append later. auth_event_ids=auth_event_ids.copy(), ) else: # TODO: Add some complement tests that adds state that is not member joins - # and will use this code path + # and will use this code path. Maybe we only want to support join state events + # and can get rid of this `else`? ( event, _, @@ -374,65 +358,45 @@ async def on_POST(self, request, room_id): event_dict, outlier=True, prev_event_ids=[fake_prev_event_id], - # Make sure to use a copy of this list as we add to it and loop here. - # Otherwise it will be the same reference and update when we append here later. + # Make sure to use a copy of this list because we modify it + # later in the loop here. Otherwise it will be the same + # reference and also update in the event when we append later. auth_event_ids=auth_event_ids.copy(), ) event_id = event.event_id auth_event_ids.append(event_id) + logger.info("bulk insert events %s", body["events"]) events_to_insert = body["events"] - # Since we can only add backfilled events from an "insertion" point, - # If they did not provide a `chunk_id` to branch from, add an "insertion" point - # to start from. - if EventContentFields.MSC2716_CHUNK_ID not in events_to_insert[0]["content"]: - first_event = events_to_insert[0] - initial_chunk_id = random_string(64) - - # Prepend to the list - events_to_insert.insert( - 0, - { - "type": EventTypes.MSC2716_INSERTION, - "sender": requester.user.to_string(), - "content": { - EventContentFields.MSC2716_NEXT_CHUNK_ID: initial_chunk_id, - "m.historical": True, - # TODO: Why is `body` necessary for this to show up in /messages - "body": "TODO_REMOVE - INITIAL", - }, - # Since this initial insertion event is put at the start of the chunk, - # copy the origin_server_ts from the first event we're inserting - "origin_server_ts": first_event["origin_server_ts"], - }, - ) - - # Then copy the chunk_id onto the event we're trying to insert first - first_event["content"][ + # If provided, connect the chunk to the last insertion point + # The chunk ID passed in comes from the chunk_id in the + # "insertion" event from the previous chunk. + if chunk_id_from_query: + last_event_in_chunk = events_to_insert[len(events_to_insert) - 1] + last_event_in_chunk["content"][ EventContentFields.MSC2716_CHUNK_ID - ] = initial_chunk_id + ] = chunk_id_from_query - # Add an "insertion" event to the end of each chunk so the next chunk can be connected to this one + # Add an "insertion" event to the start of each chunk (next to the oldest + # event in the chunk) so the next chunk can be connected to this one. next_chunk_id = random_string(64) - events_to_insert.append( - { - "type": EventTypes.MSC2716_INSERTION, - "sender": requester.user.to_string(), - "content": { - EventContentFields.MSC2716_NEXT_CHUNK_ID: next_chunk_id, - "m.historical": True, - # TODO: Why is `body` necessary for this to show up in /messages - "body": "TODO_REMOVE - END_CHUNK", - }, - # Since the insertion event is put at the end of the chunk, - # copy the origin_server_ts from the last event we're inserting - "origin_server_ts": events_to_insert[len(events_to_insert) - 1][ - "origin_server_ts" - ], - } - ) + insertion_event = { + "type": EventTypes.MSC2716_INSERTION, + "sender": requester.user.to_string(), + "content": { + EventContentFields.MSC2716_NEXT_CHUNK_ID: next_chunk_id, + "m.historical": True, + # TODO: Why is `body` necessary for this to show up in /messages + "body": "TODO_REMOVE - INSERTION", + }, + # Since the insertion event is put at the end of the chunk, + # copy the origin_server_ts from the last event we're inserting + "origin_server_ts": events_to_insert[0]["origin_server_ts"], + } + # Prepend the insertion event to the start of the chunk + events_to_insert = [insertion_event] + events_to_insert event_ids = [] prev_event_ids = prev_events_from_query @@ -448,6 +412,7 @@ async def on_POST(self, request, room_id): "prev_events": prev_event_ids, } + # TODO: persist in reverse chronological order ( event, _, @@ -465,14 +430,6 @@ async def on_POST(self, request, room_id): event_ids.append(event_id) prev_event_ids = [event_id] - # Add "marker" events in the normal "live" timeline for each "insertion" - # event to signal homeservers to paginate over to the historical messages - # when they scrollback - if event.type == EventTypes.MSC2716_INSERTION: - await self._send_marker_event_for_insertion( - requester, room_id, event_id, auth_event_ids - ) - return 200, { "state_events": auth_event_ids, "events": event_ids, From e9ae5e119eb1a37a1181a0428ec43ff40ddd6e27 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 21 May 2021 04:22:27 -0500 Subject: [PATCH 60/83] Chronological events but persist in reverse-chronological See https://github.com/matrix-org/synapse/pull/9247#discussion_r636587140 --- synapse/api/constants.py | 2 ++ synapse/handlers/message.py | 11 +++++-- synapse/rest/client/v1/room.py | 58 ++++++++++++++++++++++++---------- 3 files changed, 53 insertions(+), 18 deletions(-) diff --git a/synapse/api/constants.py b/synapse/api/constants.py index 5841b6164263..73d16b638f5c 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -172,6 +172,8 @@ class EventContentFields: # cf https://github.com/matrix-org/matrix-doc/pull/1772 MSC1772_ROOM_TYPE = "org.matrix.msc1772.type" + # Used on normal messages to indicate they were historically imported after the fact + MSC2716_HISTORICAL = "org.matrix.msc2716.historical" # For "insertion" events MSC2716_NEXT_CHUNK_ID = "org.matrix.msc2716.next_chunk_id" # Used on normal message events to indicate where the chunk connects to diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index a93c84d7c7bb..cdc79395819f 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -788,6 +788,8 @@ async def create_and_send_nonmember_event( inherit_depth=inherit_depth, ) + current_state_ids = await context.get_current_state_ids() + assert self.hs.is_mine_id(event.sender), "User must be our own: %s" % ( event.sender, ) @@ -878,6 +880,7 @@ async def create_new_client_event( old_state = await self.store.get_events_as_list(auth_event_ids) context = await self.state.compute_event_context(event, old_state=old_state) + if requester: context.app_service = requester.app_service @@ -1006,7 +1009,11 @@ async def handle_new_client_event( logger.exception("Failed to encode content: %r", event.content) raise - await self.action_generator.handle_push_actions_for_event(event, context) + # TODO: Skip actions for historical messages or figure out how to + # generate proper context.current_state_ids and state_groups for our historical events + # which have prev_events that reference non-persisted events because + # we are persisting in reverse-chronolical + # await self.action_generator.handle_push_actions_for_event(event, context) await self.cache_joined_hosts_for_event(event) @@ -1289,7 +1296,7 @@ async def persist_and_notify_client_event( # Mark any `m.historical` messages as backfilled so they don't appear # in `/sync` and have the proper decrementing `stream_ordering` as we import backfilled = False - if event.content.get("m.historical", None): + if event.content.get(EventContentFields.MSC2716_HISTORICAL, None): backfilled = True # Note that this returns the event that was persisted, which may not be diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index dcbdfcaf8304..973047211636 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -368,13 +368,13 @@ async def on_POST(self, request, room_id): auth_event_ids.append(event_id) logger.info("bulk insert events %s", body["events"]) - events_to_insert = body["events"] + events_to_create = body["events"] # If provided, connect the chunk to the last insertion point # The chunk ID passed in comes from the chunk_id in the # "insertion" event from the previous chunk. if chunk_id_from_query: - last_event_in_chunk = events_to_insert[len(events_to_insert) - 1] + last_event_in_chunk = events_to_create[len(events_to_create) - 1] last_event_in_chunk["content"][ EventContentFields.MSC2716_CHUNK_ID ] = chunk_id_from_query @@ -387,48 +387,74 @@ async def on_POST(self, request, room_id): "sender": requester.user.to_string(), "content": { EventContentFields.MSC2716_NEXT_CHUNK_ID: next_chunk_id, - "m.historical": True, + EventContentFields.MSC2716_HISTORICAL: True, # TODO: Why is `body` necessary for this to show up in /messages "body": "TODO_REMOVE - INSERTION", }, # Since the insertion event is put at the end of the chunk, # copy the origin_server_ts from the last event we're inserting - "origin_server_ts": events_to_insert[0]["origin_server_ts"], + "origin_server_ts": events_to_create[0]["origin_server_ts"], } # Prepend the insertion event to the start of the chunk - events_to_insert = [insertion_event] + events_to_insert + events_to_create = [insertion_event] + events_to_create event_ids = [] prev_event_ids = prev_events_from_query - for ev in events_to_insert: + events_to_persist = [] + for ev in events_to_create: assert_params_in_dict(ev, ["type", "origin_server_ts", "content", "sender"]) + # Mark all events as historical + # This has important semantics within the Synapse internals to backfill properly + ev["content"][EventContentFields.MSC2716_HISTORICAL] = True + event_dict = { "type": ev["type"], "origin_server_ts": ev["origin_server_ts"], "content": ev["content"], "room_id": room_id, "sender": ev["sender"], # requester.user.to_string(), - "prev_events": prev_event_ids, + "prev_events": prev_event_ids.copy(), } - # TODO: persist in reverse chronological order - ( - event, - _, - ) = await self.event_creation_handler.create_and_send_nonmember_event( + event, context = await self.event_creation_handler.create_event( requester, event_dict, - # TODO: Should these be an outlier? - # outlier=True, - inherit_depth=True, + prev_event_ids=event_dict.get("prev_events"), # TODO: Do we need to use `self.auth.compute_auth_events(...)` to filter the `auth_event_ids`? auth_event_ids=auth_event_ids, + inherit_depth=True, ) + current_state_ids = await context.get_current_state_ids() + logger.info( + "bulksend event=%s current_state_ids=%s", + event, + current_state_ids, + ) + + # TODO: Should we add the same `hs.is_mine_id(event.sender)` assert check that `create_and_send_nonmember_event` has? + + events_to_persist.append((event, context)) event_id = event.event_id event_ids.append(event_id) - prev_event_ids = [event_id] + # We add `event_id` so it references the last message. + # We add `prev_events_from_query` so it can find the proper depth + # while persisting. I wish we could rely on just `event_id` but + # since we are persisting in reverse-chronolical order below, + # that event isn't persisted yet. + prev_event_ids = [event_id] + prev_events_from_query + + # Persist events in reverse-chronological order so they have the + # correct stream_ordering as they are backfilled (which decrements). + # Events are sorted by (topological_ordering, stream_ordering) + # where topological_ordering is just depth. + for (event, context) in reversed(events_to_persist): + ev = await self.event_creation_handler.handle_new_client_event( + requester=requester, + event=event, + context=context, + ) return 200, { "state_events": auth_event_ids, From a978f38a251ce085fc31d7dbb22636a551438200 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 May 2021 18:40:12 -0500 Subject: [PATCH 61/83] Skip push notification actions for historical messages --- synapse/handlers/message.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index cdc79395819f..88e033590eb6 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -788,8 +788,6 @@ async def create_and_send_nonmember_event( inherit_depth=inherit_depth, ) - current_state_ids = await context.get_current_state_ids() - assert self.hs.is_mine_id(event.sender), "User must be our own: %s" % ( event.sender, ) @@ -1009,11 +1007,13 @@ async def handle_new_client_event( logger.exception("Failed to encode content: %r", event.content) raise - # TODO: Skip actions for historical messages or figure out how to - # generate proper context.current_state_ids and state_groups for our historical events - # which have prev_events that reference non-persisted events because - # we are persisting in reverse-chronolical - # await self.action_generator.handle_push_actions_for_event(event, context) + # Skip push notification actions for historical messages + # because we don't want to notify people about old history back in time. + # The historical messages also do not have the proper `context.current_state_ids` + # and `state_groups` because they have `prev_events` that aren't persisted yet + # (historical messages persisted in reverse-chronological order). + if event.content.get(EventContentFields.MSC2716_HISTORICAL, None) is None: + await self.action_generator.handle_push_actions_for_event(event, context) await self.cache_joined_hosts_for_event(event) From 2c229294113346f9f1538eff7e5ef2088baa6570 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 May 2021 18:44:33 -0500 Subject: [PATCH 62/83] Use more explicit comparison --- synapse/handlers/message.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 88e033590eb6..661dcb82fb06 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1296,7 +1296,7 @@ async def persist_and_notify_client_event( # Mark any `m.historical` messages as backfilled so they don't appear # in `/sync` and have the proper decrementing `stream_ordering` as we import backfilled = False - if event.content.get(EventContentFields.MSC2716_HISTORICAL, None): + if event.content.get(EventContentFields.MSC2716_HISTORICAL, None) is not None: backfilled = True # Note that this returns the event that was persisted, which may not be From 0501a112d046bdf3a75e51d5804fb38b155ce950 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 May 2021 19:46:42 -0500 Subject: [PATCH 63/83] Fix lint --- synapse/http/servlet.py | 2 +- synapse/rest/client/v1/room.py | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/synapse/http/servlet.py b/synapse/http/servlet.py index ab5523c64c19..71f0752e0a52 100644 --- a/synapse/http/servlet.py +++ b/synapse/http/servlet.py @@ -172,7 +172,7 @@ def _parse_string_value( def parse_strings_from_args( args: List[str], name: Union[bytes, str], - default: Optional[str] = None, + default: Optional[List[str]] = None, required: bool = False, allowed_values: Optional[Iterable[str]] = None, encoding: Optional[str] = "ascii", diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 973047211636..e4393e70176a 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -21,7 +21,7 @@ from typing import TYPE_CHECKING, List, Optional, Tuple from urllib import parse as urlparse -from synapse.api.constants import EventTypes, EventContentFields, Membership +from synapse.api.constants import EventContentFields, EventTypes, Membership from synapse.api.errors import ( AuthError, Codes, @@ -297,6 +297,13 @@ async def on_POST(self, request, room_id): prev_events_from_query = parse_strings_from_args(request.args, "prev_event") chunk_id_from_query = parse_string(request, "chunk_id", default=None) + if prev_events_from_query is None: + raise SynapseError( + 400, + "prev_event query parameter is required when inserting historical messages back in time", + errcode=Codes.MISSING_PARAM, + ) + # For the event we are inserting next to (`prev_events_from_query`), # find the most recent auth events (derived from state events) that # allowed that message to be sent. We will use that as a base From 176a85465316c5eb75a2f26082e656df821e69e3 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 26 May 2021 22:00:10 -0500 Subject: [PATCH 64/83] Uncomment dropped fields to see if tests pass --- synapse/events/utils.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/events/utils.py b/synapse/events/utils.py index 4c08bd069a42..7d7cd9aaee5a 100644 --- a/synapse/events/utils.py +++ b/synapse/events/utils.py @@ -252,13 +252,13 @@ def format_event_for_client_v1(d): def format_event_for_client_v2(d): drop_keys = ( - # "auth_events", - # "prev_events", - # "hashes", - # "signatures", - # "depth", - # "origin", - # "prev_state", + "auth_events", + "prev_events", + "hashes", + "signatures", + "depth", + "origin", + "prev_state", ) for key in drop_keys: d.pop(key, None) From 25aef56f43a13ab6832b656eb829fd34e57fc2f6 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 27 May 2021 10:43:39 -0500 Subject: [PATCH 65/83] Return early instead of big if nesting See https://github.com/matrix-org/synapse/pull/10049#discussion_r640448108 --- synapse/handlers/room_member.py | 286 ++++++++++++++++---------------- 1 file changed, 147 insertions(+), 139 deletions(-) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 28ae07ea073b..2c4131aa3fa2 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -570,169 +570,177 @@ async def update_membership_locked( raise SynapseError(403, "Invites have been disabled on this server") if prev_event_ids: - latest_event_ids = prev_event_ids - else: - latest_event_ids = await self.store.get_prev_events_for_room(room_id) - - current_state_ids = await self.state_handler.get_current_state_ids( - room_id, latest_event_ids=latest_event_ids + return await self._local_membership_update( + requester=requester, + target=target, + room_id=room_id, + membership=effective_membership_state, + txn_id=txn_id, + ratelimit=ratelimit, + prev_event_ids=prev_event_ids, + auth_event_ids=auth_event_ids, + content=content, + require_consent=require_consent, + outlier=outlier, ) - # TODO: Refactor into dictionary of explicitly allowed transitions - # between old and new state, with specific error messages for some - # transitions and generic otherwise - old_state_id = current_state_ids.get( - (EventTypes.Member, target.to_string()) - ) - if old_state_id: - old_state = await self.store.get_event(old_state_id, allow_none=True) - old_membership = ( - old_state.content.get("membership") if old_state else None + latest_event_ids = await self.store.get_prev_events_for_room(room_id) + + current_state_ids = await self.state_handler.get_current_state_ids( + room_id, latest_event_ids=latest_event_ids + ) + + # TODO: Refactor into dictionary of explicitly allowed transitions + # between old and new state, with specific error messages for some + # transitions and generic otherwise + old_state_id = current_state_ids.get((EventTypes.Member, target.to_string())) + if old_state_id: + old_state = await self.store.get_event(old_state_id, allow_none=True) + old_membership = old_state.content.get("membership") if old_state else None + if action == "unban" and old_membership != "ban": + raise SynapseError( + 403, + "Cannot unban user who was not banned" + " (membership=%s)" % old_membership, + errcode=Codes.BAD_STATE, ) - if action == "unban" and old_membership != "ban": - raise SynapseError( - 403, - "Cannot unban user who was not banned" - " (membership=%s)" % old_membership, - errcode=Codes.BAD_STATE, + if old_membership == "ban" and action != "unban": + raise SynapseError( + 403, + "Cannot %s user who was banned" % (action,), + errcode=Codes.BAD_STATE, + ) + + if old_state: + same_content = content == old_state.content + same_membership = old_membership == effective_membership_state + same_sender = requester.user.to_string() == old_state.sender + if same_sender and same_membership and same_content: + # duplicate event. + # we know it was persisted, so must have a stream ordering. + assert old_state.internal_metadata.stream_ordering + return ( + old_state.event_id, + old_state.internal_metadata.stream_ordering, ) - if old_membership == "ban" and action != "unban": + + if old_membership in ["ban", "leave"] and action == "kick": + raise AuthError(403, "The target user is not in the room") + + # we don't allow people to reject invites to the server notice + # room, but they can leave it once they are joined. + if ( + old_membership == Membership.INVITE + and effective_membership_state == Membership.LEAVE + ): + is_blocked = await self._is_server_notice_room(room_id) + if is_blocked: raise SynapseError( - 403, - "Cannot %s user who was banned" % (action,), - errcode=Codes.BAD_STATE, + HTTPStatus.FORBIDDEN, + "You cannot reject this invite", + errcode=Codes.CANNOT_LEAVE_SERVER_NOTICE_ROOM, ) + else: + if action == "kick": + raise AuthError(403, "The target user is not in the room") - if old_state: - same_content = content == old_state.content - same_membership = old_membership == effective_membership_state - same_sender = requester.user.to_string() == old_state.sender - if same_sender and same_membership and same_content: - # duplicate event. - # we know it was persisted, so must have a stream ordering. - assert old_state.internal_metadata.stream_ordering - return ( - old_state.event_id, - old_state.internal_metadata.stream_ordering, - ) - - if old_membership in ["ban", "leave"] and action == "kick": - raise AuthError(403, "The target user is not in the room") + is_host_in_room = await self._is_host_in_room(current_state_ids) - # we don't allow people to reject invites to the server notice - # room, but they can leave it once they are joined. - if ( - old_membership == Membership.INVITE - and effective_membership_state == Membership.LEAVE - ): - is_blocked = await self._is_server_notice_room(room_id) - if is_blocked: - raise SynapseError( - HTTPStatus.FORBIDDEN, - "You cannot reject this invite", - errcode=Codes.CANNOT_LEAVE_SERVER_NOTICE_ROOM, - ) - else: - if action == "kick": - raise AuthError(403, "The target user is not in the room") + if effective_membership_state == Membership.JOIN: + if requester.is_guest: + guest_can_join = await self._can_guest_join(current_state_ids) + if not guest_can_join: + # This should be an auth check, but guests are a local concept, + # so don't really fit into the general auth process. + raise AuthError(403, "Guest access not allowed") - is_host_in_room = await self._is_host_in_room(current_state_ids) + if not is_host_in_room: + if ratelimit: + time_now_s = self.clock.time() + ( + allowed, + time_allowed, + ) = await self._join_rate_limiter_remote.can_do_action( + requester, + ) - if effective_membership_state == Membership.JOIN: - if requester.is_guest: - guest_can_join = await self._can_guest_join(current_state_ids) - if not guest_can_join: - # This should be an auth check, but guests are a local concept, - # so don't really fit into the general auth process. - raise AuthError(403, "Guest access not allowed") - - if not is_host_in_room: - if ratelimit: - time_now_s = self.clock.time() - ( - allowed, - time_allowed, - ) = await self._join_rate_limiter_remote.can_do_action( - requester, + if not allowed: + raise LimitExceededError( + retry_after_ms=int(1000 * (time_allowed - time_now_s)) ) - if not allowed: - raise LimitExceededError( - retry_after_ms=int(1000 * (time_allowed - time_now_s)) - ) - - inviter = await self._get_inviter(target.to_string(), room_id) - if inviter and not self.hs.is_mine(inviter): - remote_room_hosts.append(inviter.domain) - - content["membership"] = Membership.JOIN + inviter = await self._get_inviter(target.to_string(), room_id) + if inviter and not self.hs.is_mine(inviter): + remote_room_hosts.append(inviter.domain) - profile = self.profile_handler - if not content_specified: - content["displayname"] = await profile.get_displayname(target) - content["avatar_url"] = await profile.get_avatar_url(target) + content["membership"] = Membership.JOIN - if requester.is_guest: - content["kind"] = "guest" + profile = self.profile_handler + if not content_specified: + content["displayname"] = await profile.get_displayname(target) + content["avatar_url"] = await profile.get_avatar_url(target) - remote_join_response = await self._remote_join( - requester, remote_room_hosts, room_id, target, content - ) - - return remote_join_response + if requester.is_guest: + content["kind"] = "guest" - elif effective_membership_state == Membership.LEAVE: - if not is_host_in_room: - # perhaps we've been invited - ( - current_membership_type, - current_membership_event_id, - ) = await self.store.get_local_current_membership_for_user_in_room( - target.to_string(), room_id - ) - if ( - current_membership_type != Membership.INVITE - or not current_membership_event_id - ): - logger.info( - "%s sent a leave request to %s, but that is not an active room " - "on this server, and there is no pending invite", - target, - room_id, - ) + remote_join_response = await self._remote_join( + requester, remote_room_hosts, room_id, target, content + ) - raise SynapseError(404, "Not a known room") + return remote_join_response - invite = await self.store.get_event(current_membership_event_id) + elif effective_membership_state == Membership.LEAVE: + if not is_host_in_room: + # perhaps we've been invited + ( + current_membership_type, + current_membership_event_id, + ) = await self.store.get_local_current_membership_for_user_in_room( + target.to_string(), room_id + ) + if ( + current_membership_type != Membership.INVITE + or not current_membership_event_id + ): logger.info( - "%s rejects invite to %s from %s", + "%s sent a leave request to %s, but that is not an active room " + "on this server, and there is no pending invite", target, room_id, - invite.sender, ) - if not self.hs.is_mine_id(invite.sender): - # send the rejection to the inviter's HS (with fallback to - # local event) - return await self.remote_reject_invite( - invite.event_id, - txn_id, - requester, - content, - ) + raise SynapseError(404, "Not a known room") + + invite = await self.store.get_event(current_membership_event_id) + logger.info( + "%s rejects invite to %s from %s", + target, + room_id, + invite.sender, + ) + + if not self.hs.is_mine_id(invite.sender): + # send the rejection to the inviter's HS (with fallback to + # local event) + return await self.remote_reject_invite( + invite.event_id, + txn_id, + requester, + content, + ) - # the inviter was on our server, but has now left. Carry on - # with the normal rejection codepath, which will also send the - # rejection out to any other servers we believe are still in the room. - - # thanks to overzealous cleaning up of event_forward_extremities in - # `delete_old_current_state_events`, it's possible to end up with no - # forward extremities here. If that happens, let's just hang the - # rejection off the invite event. - # - # see: https://github.com/matrix-org/synapse/issues/7139 - if len(latest_event_ids) == 0: - latest_event_ids = [invite.event_id] + # the inviter was on our server, but has now left. Carry on + # with the normal rejection codepath, which will also send the + # rejection out to any other servers we believe are still in the room. + + # thanks to overzealous cleaning up of event_forward_extremities in + # `delete_old_current_state_events`, it's possible to end up with no + # forward extremities here. If that happens, let's just hang the + # rejection off the invite event. + # + # see: https://github.com/matrix-org/synapse/issues/7139 + if len(latest_event_ids) == 0: + latest_event_ids = [invite.event_id] return await self._local_membership_update( requester=requester, From 0580d09ef9484e3ccca2fb87f18d80668bd6deab Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 2 Jun 2021 13:26:58 -0500 Subject: [PATCH 66/83] Pass in prev_events as function parameter so clients can't set it in the event dictionary See https://github.com/matrix-org/synapse/pull/10049#discussion_r643837726 --- synapse/handlers/message.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index d1876678b71e..43669c64e348 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -734,6 +734,7 @@ async def create_and_send_nonmember_event( self, requester: Requester, event_dict: dict, + prev_event_ids: Optional[List[str]] = None, auth_event_ids: Optional[List[str]] = None, ratelimit: bool = True, txn_id: Optional[str] = None, @@ -749,6 +750,10 @@ async def create_and_send_nonmember_event( Args: requester: The requester sending the event. event_dict: An entire event. + prev_event_ids: + The event IDs to use as the prev events. + Should normally be left as None to automatically request them + from the database. auth_event_ids: The event ids to use as the auth_events for the new event. Should normally be left as None, which will cause them to be calculated @@ -800,13 +805,11 @@ async def create_and_send_nonmember_event( assert event.internal_metadata.stream_ordering return event, event.internal_metadata.stream_ordering - prev_events = event_dict.get("prev_events") - event, context = await self.create_event( requester, event_dict, txn_id=txn_id, - prev_event_ids=prev_events, + prev_event_ids=prev_event_ids, auth_event_ids=auth_event_ids, outlier=outlier, inherit_depth=inherit_depth, From ef68832a78dae296c2fc09b0a85de7f74a42c8aa Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Sat, 5 Jun 2021 02:14:10 -0500 Subject: [PATCH 67/83] Switch to passing depth directly instead of inherit_depth See https://github.com/matrix-org/synapse/pull/9247/files#r636778295 Conflicts: changelog.d/10049.misc synapse/handlers/message.py --- synapse/events/builder.py | 43 +++---------------- synapse/handlers/message.py | 27 ++++++------ .../test_federation_sender_shard.py | 8 +--- tests/storage/test_redaction.py | 4 +- 4 files changed, 24 insertions(+), 58 deletions(-) diff --git a/synapse/events/builder.py b/synapse/events/builder.py index 59221efec84a..44ceac48ec65 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -103,7 +103,7 @@ async def build( self, prev_event_ids: List[str], auth_event_ids: Optional[List[str]], - inherit_depth: bool = False, + depth: Optional[int] = None, ) -> EventBase: """Transform into a fully signed and hashed event @@ -112,8 +112,9 @@ async def build( auth_event_ids: The event IDs to use as the auth events. Should normally be set to None, which will cause them to be calculated based on the room state at the prev_events. - inherit_depth: True to inherit the depth from the successor of the most - recent event from prev_event_ids. False to progress the depth as normal. + depth: Override the depth used to order the event in the DAG. + Should normally be set to None, which will cause the depth to be calculated + based on the prev_events. Returns: The signed and hashed event. @@ -138,44 +139,12 @@ async def build( prev_events = prev_event_ids ( - most_recent_prev_event_id, + _, most_recent_prev_event_depth, ) = await self._store.get_max_depth_of(prev_event_ids) - # We want to insert the historical event after the `prev_event` but before the successor event - # - # We inherit depth from the successor event instead of the `prev_event` - # because events returned from `/messages` are first sorted by `topological_ordering` - # which is just the `depth` and then tie-break with `stream_ordering`. - # - # We mark these inserted historical events as "backfilled" which gives them a - # negative `stream_ordering`. If we use the same depth as the `prev_event`, - # then our historical event will tie-break and be sorted before the `prev_event` - # when it should come after. - # - # We want to use the successor event depth so they appear after `prev_event` because - # it has a larger `depth` but before the successor event because the `stream_ordering` - # is negative before the successor event. - if inherit_depth: - successor_event_ids = await self._store.get_successor_events( - [most_recent_prev_event_id] - ) - - # If we can't find any successor events, then it's a forward extremity of - # historical messages and we can just inherit from the previous historical - # event which we can already assume has the correct depth where we want - # to insert into. - if not successor_event_ids: - depth = most_recent_prev_event_depth - else: - ( - _, - oldest_successor_depth, - ) = await self._store.get_min_depth_of(successor_event_ids) - - depth = oldest_successor_depth # Otherwise, progress the depth as normal - else: + if depth is None: depth = most_recent_prev_event_depth + 1 # we cap depth of generated events, to ensure that they are not diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index 43669c64e348..a09aa2fdafdc 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -481,7 +481,7 @@ async def create_event( auth_event_ids: Optional[List[str]] = None, require_consent: bool = True, outlier: bool = False, - inherit_depth: bool = False, + depth: Optional[int] = None, ) -> Tuple[EventBase, EventContext]: """ Given a dict from a client, create a new event. @@ -512,9 +512,10 @@ async def create_event( outlier: Indicates whether the event is an `outlier`, i.e. if it's from an arbitrary point and floating in the DAG as opposed to being inline with the current DAG. + depth: Override the depth used to order the event in the DAG. + Should normally be set to None, which will cause the depth to be calculated + based on the prev_events. - inherit_depth: True to inherit the depth from the successor of the most - recent event from prev_event_ids. False to progress the depth as normal. Raises: ResourceLimitError if server is blocked to some resource being exceeded @@ -577,7 +578,7 @@ async def create_event( requester=requester, prev_event_ids=prev_event_ids, auth_event_ids=auth_event_ids, - inherit_depth=inherit_depth, + depth=depth, ) # In an ideal world we wouldn't need the second part of this condition. However, @@ -740,7 +741,7 @@ async def create_and_send_nonmember_event( txn_id: Optional[str] = None, ignore_shadow_ban: bool = False, outlier: bool = False, - inherit_depth: bool = False, + depth: Optional[int] = None, ) -> Tuple[EventBase, int]: """ Creates an event, then sends it. @@ -765,8 +766,9 @@ async def create_and_send_nonmember_event( outlier: Indicates whether the event is an `outlier`, i.e. if it's from an arbitrary point and floating in the DAG as opposed to being inline with the current DAG. - inherit_depth: True to inherit the depth from the successor of the most - recent event from prev_event_ids. False to progress the depth as normal. + depth: Override the depth used to order the event in the DAG. + Should normally be set to None, which will cause the depth to be calculated + based on the prev_events. Returns: The event, and its stream ordering (if deduplication happened, @@ -812,7 +814,7 @@ async def create_and_send_nonmember_event( prev_event_ids=prev_event_ids, auth_event_ids=auth_event_ids, outlier=outlier, - inherit_depth=inherit_depth, + depth=depth, ) assert self.hs.is_mine_id(event.sender), "User must be our own: %s" % ( @@ -844,7 +846,7 @@ async def create_new_client_event( requester: Optional[Requester] = None, prev_event_ids: Optional[List[str]] = None, auth_event_ids: Optional[List[str]] = None, - inherit_depth: bool = False, + depth: Optional[int] = None, ) -> Tuple[EventBase, EventContext]: """Create a new event for a local client @@ -862,8 +864,9 @@ async def create_new_client_event( Should normally be left as None, which will cause them to be calculated based on the room state at the prev_events. - inherit_depth: True to inherit the depth from the successor of the most - recent event from prev_event_ids. False to progress the depth as normal. + depth: Override the depth used to order the event in the DAG. + Should normally be set to None, which will cause the depth to be calculated + based on the prev_events. Returns: Tuple of created event, context @@ -890,7 +893,7 @@ async def create_new_client_event( event = await builder.build( prev_event_ids=prev_event_ids, auth_event_ids=auth_event_ids, - inherit_depth=inherit_depth, + depth=depth, ) old_state = None diff --git a/tests/replication/test_federation_sender_shard.py b/tests/replication/test_federation_sender_shard.py index 20324e6b5354..48ab3aa4e36b 100644 --- a/tests/replication/test_federation_sender_shard.py +++ b/tests/replication/test_federation_sender_shard.py @@ -224,13 +224,7 @@ def create_room_with_remote_server(self, user, token, remote_server="other_serve } builder = factory.for_room_version(room_version, event_dict) - join_event = self.get_success( - builder.build( - prev_event_ids, - None, - inherit_depth=False, - ) - ) + join_event = self.get_success(builder.build(prev_event_ids, None)) self.get_success(federation.on_send_join_request(remote_server, join_event)) self.replicate() diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index df8ee4b291cf..61757e86e3b0 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -236,10 +236,10 @@ async def build( self, prev_event_ids, auth_event_ids, - inherit_depth: bool = False, + depth: Optional[int] = None, ): built_event = await self._base_builder.build( - prev_event_ids, auth_event_ids, inherit_depth + prev_event_ids, auth_event_ids ) built_event._event_id = self._event_id From d8316d6091b9bfa81774ac12d7ca3e8f5b2f842e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Sun, 6 Jun 2021 16:23:33 -0500 Subject: [PATCH 68/83] Calculate and pass in depth directly --- synapse/rest/client/v1/room.py | 52 +++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 566554622fb4..a12d886aa540 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -281,6 +281,46 @@ def register(self, http_server): PATTERNS = "/rooms/(?P[^/]*)/bulksend" register_txn_path(self, PATTERNS, http_server, with_get=True) + async def inherit_depth_from_prev_ids(self, prev_event_ids) -> int: + ( + most_recent_prev_event_id, + most_recent_prev_event_depth, + ) = await self.store.get_max_depth_of(prev_event_ids) + + # We want to insert the historical event after the `prev_event` but before the successor event + # + # We inherit depth from the successor event instead of the `prev_event` + # because events returned from `/messages` are first sorted by `topological_ordering` + # which is just the `depth` and then tie-break with `stream_ordering`. + # + # We mark these inserted historical events as "backfilled" which gives them a + # negative `stream_ordering`. If we use the same depth as the `prev_event`, + # then our historical event will tie-break and be sorted before the `prev_event` + # when it should come after. + # + # We want to use the successor event depth so they appear after `prev_event` because + # it has a larger `depth` but before the successor event because the `stream_ordering` + # is negative before the successor event. + successor_event_ids = await self.store.get_successor_events( + [most_recent_prev_event_id] + ) + + # If we can't find any successor events, then it's a forward extremity of + # historical messages and we can just inherit from the previous historical + # event which we can already assume has the correct depth where we want + # to insert into. + if not successor_event_ids: + depth = most_recent_prev_event_depth + else: + ( + _, + oldest_successor_depth, + ) = await self.store.get_min_depth_of(successor_event_ids) + + depth = oldest_successor_depth + + return depth + async def on_POST(self, request, room_id): requester = await self.auth.get_user_by_req(request, allow_guest=False) @@ -404,6 +444,9 @@ async def on_POST(self, request, room_id): # Prepend the insertion event to the start of the chunk events_to_create = [insertion_event] + events_to_create + inherited_depth = await self.inherit_depth_from_prev_ids(prev_events_from_query) + logger.info("inherited_depth %s", inherited_depth) + event_ids = [] prev_event_ids = prev_events_from_query events_to_persist = [] @@ -429,7 +472,7 @@ async def on_POST(self, request, room_id): prev_event_ids=event_dict.get("prev_events"), # TODO: Do we need to use `self.auth.compute_auth_events(...)` to filter the `auth_event_ids`? auth_event_ids=auth_event_ids, - inherit_depth=True, + depth=inherited_depth, ) current_state_ids = await context.get_current_state_ids() logger.info( @@ -444,12 +487,7 @@ async def on_POST(self, request, room_id): event_id = event.event_id event_ids.append(event_id) - # We add `event_id` so it references the last message. - # We add `prev_events_from_query` so it can find the proper depth - # while persisting. I wish we could rely on just `event_id` but - # since we are persisting in reverse-chronolical order below, - # that event isn't persisted yet. - prev_event_ids = [event_id] + prev_events_from_query + prev_event_ids = [event_id] # Persist events in reverse-chronological order so they have the # correct stream_ordering as they are backfilled (which decrements). From 513d7a2ac7a39fab70804b794b2905bfb6ae795e Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 7 Jun 2021 08:44:14 -0500 Subject: [PATCH 69/83] Only get_max_depth_of where we use it --- synapse/events/builder.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/events/builder.py b/synapse/events/builder.py index 44ceac48ec65..81bf8615b712 100644 --- a/synapse/events/builder.py +++ b/synapse/events/builder.py @@ -138,13 +138,13 @@ async def build( auth_events = auth_event_ids prev_events = prev_event_ids - ( - _, - most_recent_prev_event_depth, - ) = await self._store.get_max_depth_of(prev_event_ids) - # Otherwise, progress the depth as normal if depth is None: + ( + _, + most_recent_prev_event_depth, + ) = await self._store.get_max_depth_of(prev_event_ids) + depth = most_recent_prev_event_depth + 1 # we cap depth of generated events, to ensure that they are not From f36bdde4db9811b1cfa476f65ebc10b1d82434bc Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 7 Jun 2021 09:29:54 -0500 Subject: [PATCH 70/83] Remove random logs used while developing --- synapse/rest/client/v1/room.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index a12d886aa540..a6f31f4a86ae 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -100,7 +100,7 @@ def get_room_config(self, request): # TODO: Needs unit testing for generic events -class RoomStateEventRestServlet(TransactionRestServlet): +class Roomstate_evententRestServlet(TransactionRestServlet): def __init__(self, hs): super().__init__(hs) self.event_creation_handler = hs.get_event_creation_handler() @@ -359,18 +359,24 @@ async def on_POST(self, request, room_id): prev_state_ids = list(prev_state_map.values()) auth_event_ids = prev_state_ids - for stateEv in body["state_events_at_start"]: + for state_event in body["state_events_at_start"]: assert_params_in_dict( - stateEv, ["type", "origin_server_ts", "content", "sender"] + state_event, ["type", "origin_server_ts", "content", "sender"] + ) + + logger.debug( + "RoomBulkSendEventRestServlet inserting state_event=%s, auth_event_ids=%s", + state_event, + auth_event_ids, ) event_dict = { - "type": stateEv["type"], - "origin_server_ts": stateEv["origin_server_ts"], - "content": stateEv["content"], + "type": state_event["type"], + "origin_server_ts": state_event["origin_server_ts"], + "content": state_event["content"], "room_id": room_id, - "sender": stateEv["sender"], - "state_key": stateEv["state_key"], + "sender": state_event["sender"], + "state_key": state_event["state_key"], } # Make the state events float off on their own @@ -413,7 +419,6 @@ async def on_POST(self, request, room_id): auth_event_ids.append(event_id) - logger.info("bulk insert events %s", body["events"]) events_to_create = body["events"] # If provided, connect the chunk to the last insertion point @@ -445,7 +450,6 @@ async def on_POST(self, request, room_id): events_to_create = [insertion_event] + events_to_create inherited_depth = await self.inherit_depth_from_prev_ids(prev_events_from_query) - logger.info("inherited_depth %s", inherited_depth) event_ids = [] prev_event_ids = prev_events_from_query @@ -474,11 +478,11 @@ async def on_POST(self, request, room_id): auth_event_ids=auth_event_ids, depth=inherited_depth, ) - current_state_ids = await context.get_current_state_ids() - logger.info( - "bulksend event=%s current_state_ids=%s", + logger.debug( + "RoomBulkSendEventRestServlet inserting event=%s, prev_event_ids=%s, auth_event_ids=%s", event, - current_state_ids, + prev_event_ids, + auth_event_ids, ) # TODO: Should we add the same `hs.is_mine_id(event.sender)` assert check that `create_and_send_nonmember_event` has? @@ -1299,7 +1303,7 @@ async def on_POST( def register_servlets(hs: "HomeServer", http_server, is_worker=False): msc2716_enabled = hs.config.experimental.msc2716_enabled - RoomStateEventRestServlet(hs).register(http_server) + Roomstate_evententRestServlet(hs).register(http_server) RoomMemberListRestServlet(hs).register(http_server) JoinedRoomMemberListRestServlet(hs).register(http_server) RoomMessageListRestServlet(hs).register(http_server) From c2e1924ae410d0962b3c09f5f64938e1903c302a Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 7 Jun 2021 09:32:35 -0500 Subject: [PATCH 71/83] Use nice negative list index to grab last item --- synapse/rest/client/v1/room.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index a6f31f4a86ae..7e2f67fbc3a3 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -425,7 +425,7 @@ async def on_POST(self, request, room_id): # The chunk ID passed in comes from the chunk_id in the # "insertion" event from the previous chunk. if chunk_id_from_query: - last_event_in_chunk = events_to_create[len(events_to_create) - 1] + last_event_in_chunk = events_to_create[-1] last_event_in_chunk["content"][ EventContentFields.MSC2716_CHUNK_ID ] = chunk_id_from_query From 88327fba7f6c5cc025c66e752441950e2dc83663 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 7 Jun 2021 09:50:52 -0500 Subject: [PATCH 72/83] Remove unneeded body from insertion events It was a problem with how my Complement tests were only iterating over events with body fields. --- synapse/rest/client/v1/room.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 7e2f67fbc3a3..1123157d2632 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -439,8 +439,6 @@ async def on_POST(self, request, room_id): "content": { EventContentFields.MSC2716_NEXT_CHUNK_ID: next_chunk_id, EventContentFields.MSC2716_HISTORICAL: True, - # TODO: Why is `body` necessary for this to show up in /messages - "body": "TODO_REMOVE - INSERTION", }, # Since the insertion event is put at the end of the chunk, # copy the origin_server_ts from the last event we're inserting From 34f130c91ef4d82c17d2926ae07447455a158d61 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 7 Jun 2021 15:19:55 -0500 Subject: [PATCH 73/83] Switch wording from bulk to batch See https://github.com/matrix-org/synapse/pull/9247#discussion_r632856155 --- synapse/rest/client/v1/room.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 1123157d2632..1c28ea345811 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -267,7 +267,7 @@ def on_PUT(self, request, room_id, event_type, txn_id): ) -class RoomBulkSendEventRestServlet(TransactionRestServlet): +class RoomBatchSendEventRestServlet(TransactionRestServlet): def __init__(self, hs): super().__init__(hs) self.store = hs.get_datastore() @@ -277,8 +277,8 @@ def __init__(self, hs): self.auth = hs.get_auth() def register(self, http_server): - # /rooms/$roomid/bulksend - PATTERNS = "/rooms/(?P[^/]*)/bulksend" + # /rooms/$roomid/batchsend + PATTERNS = "/rooms/(?P[^/]*)/batchsend" register_txn_path(self, PATTERNS, http_server, with_get=True) async def inherit_depth_from_prev_ids(self, prev_event_ids) -> int: @@ -327,7 +327,7 @@ async def on_POST(self, request, room_id): if not requester.app_service: raise AuthError( 403, - "Only application services can use the /bulksend endpoint", + "Only application services can use the /batchsend endpoint", ) body = parse_json_object_from_request(request) @@ -365,7 +365,7 @@ async def on_POST(self, request, room_id): ) logger.debug( - "RoomBulkSendEventRestServlet inserting state_event=%s, auth_event_ids=%s", + "RoomBatchSendEventRestServlet inserting state_event=%s, auth_event_ids=%s", state_event, auth_event_ids, ) @@ -477,7 +477,7 @@ async def on_POST(self, request, room_id): depth=inherited_depth, ) logger.debug( - "RoomBulkSendEventRestServlet inserting event=%s, prev_event_ids=%s, auth_event_ids=%s", + "RoomBatchSendEventRestServlet inserting event=%s, prev_event_ids=%s, auth_event_ids=%s", event, prev_event_ids, auth_event_ids, @@ -1309,7 +1309,7 @@ def register_servlets(hs: "HomeServer", http_server, is_worker=False): RoomMembershipRestServlet(hs).register(http_server) RoomSendEventRestServlet(hs).register(http_server) if msc2716_enabled: - RoomBulkSendEventRestServlet(hs).register(http_server) + RoomBatchSendEventRestServlet(hs).register(http_server) PublicRoomListRestServlet(hs).register(http_server) RoomStateRestServlet(hs).register(http_server) RoomRedactEventRestServlet(hs).register(http_server) From 2f35954d4f4a6515a03da31fa77785f42fa790ce Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 7 Jun 2021 15:30:06 -0500 Subject: [PATCH 74/83] Always use auth_event_ids from the event itself See https://github.com/matrix-org/synapse/pull/9247#discussion_r610992859 --- synapse/api/auth.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/synapse/api/auth.py b/synapse/api/auth.py index eb4e87792ec8..d5a7eaad6c3b 100644 --- a/synapse/api/auth.py +++ b/synapse/api/auth.py @@ -93,11 +93,6 @@ async def check_from_context( self, room_version: str, event, context, do_sig_check=True ) -> None: auth_event_ids = event.auth_event_ids() - if auth_event_ids is None: - prev_state_ids = await context.get_prev_state_ids() - auth_event_ids = self.compute_auth_events( - event, prev_state_ids, for_verification=True - ) auth_events_by_id = await self.store.get_events(auth_event_ids) auth_events = {(e.type, e.state_key): e for e in auth_events_by_id.values()} From 6ce47d31ed50806aafe6dc0aa323450ed9820534 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 7 Jun 2021 16:15:17 -0500 Subject: [PATCH 75/83] Protect from clients from using the historical logic (only /batchsend can use it) See https://github.com/matrix-org/synapse/pull/9247#discussion_r643839724 --- synapse/events/__init__.py | 9 +++++++++ synapse/handlers/message.py | 7 +++++-- synapse/rest/client/v1/room.py | 6 +++++- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index c8b52cbc7a09..0cb9c1cc1e72 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -119,6 +119,7 @@ def __init__(self, internal_metadata_dict: JsonDict): redacted = DictProperty("redacted") # type: bool txn_id = DictProperty("txn_id") # type: str token_id = DictProperty("token_id") # type: str + historical = DictProperty("historical") # type: bool # XXX: These are set by StreamWorkerStore._set_before_and_after. # I'm pretty sure that these are never persisted to the database, so shouldn't @@ -204,6 +205,14 @@ def is_redacted(self): """ return self._dict.get("redacted", False) + def is_historical(self) -> bool: + """Whether this is a historical message. + This is used by the batchsend historical message endpoint and + is needed to and mark the event as backfilled and skip some checks + like push notifications. + """ + return self._dict.get("historical", False) + class EventBase(metaclass=abc.ABCMeta): @property diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index a09aa2fdafdc..d03e9b10cc92 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -481,6 +481,7 @@ async def create_event( auth_event_ids: Optional[List[str]] = None, require_consent: bool = True, outlier: bool = False, + historical: bool = False, depth: Optional[int] = None, ) -> Tuple[EventBase, EventContext]: """ @@ -573,6 +574,8 @@ async def create_event( builder.internal_metadata.outlier = outlier + builder.internal_metadata.historical = historical + event, context = await self.create_new_client_event( builder=builder, requester=requester, @@ -1078,7 +1081,7 @@ async def _persist_event( # The historical messages also do not have the proper `context.current_state_ids` # and `state_groups` because they have `prev_events` that aren't persisted yet # (historical messages persisted in reverse-chronological order). - if event.content.get(EventContentFields.MSC2716_HISTORICAL, None) is None: + if event.internal_metadata.is_historical(): await self.action_generator.handle_push_actions_for_event(event, context) try: @@ -1373,7 +1376,7 @@ async def persist_and_notify_client_event( # Mark any `m.historical` messages as backfilled so they don't appear # in `/sync` and have the proper decrementing `stream_ordering` as we import backfilled = False - if event.content.get(EventContentFields.MSC2716_HISTORICAL, None) is not None: + if event.internal_metadata.is_historical(): backfilled = True # Note that this returns the event that was persisted, which may not be diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 1c28ea345811..c64c625bbad3 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -270,6 +270,7 @@ def on_PUT(self, request, room_id, event_type, txn_id): class RoomBatchSendEventRestServlet(TransactionRestServlet): def __init__(self, hs): super().__init__(hs) + self.hs = hs self.store = hs.get_datastore() self.state_store = hs.get_storage().state self.event_creation_handler = hs.get_event_creation_handler() @@ -474,6 +475,7 @@ async def on_POST(self, request, room_id): prev_event_ids=event_dict.get("prev_events"), # TODO: Do we need to use `self.auth.compute_auth_events(...)` to filter the `auth_event_ids`? auth_event_ids=auth_event_ids, + historical=True, depth=inherited_depth, ) logger.debug( @@ -483,7 +485,9 @@ async def on_POST(self, request, room_id): auth_event_ids, ) - # TODO: Should we add the same `hs.is_mine_id(event.sender)` assert check that `create_and_send_nonmember_event` has? + assert self.hs.is_mine_id(event.sender), "User must be our own: %s" % ( + event.sender, + ) events_to_persist.append((event, context)) event_id = event.event_id From 22881e2ad777a25e55e3f7713b05ea1ee8d4a2ef Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 7 Jun 2021 22:48:10 -0500 Subject: [PATCH 76/83] Fix incorrect logic when refactoring to is_historical (failed tests as well) --- synapse/handlers/message.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index d03e9b10cc92..727ee5f7c624 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1081,7 +1081,7 @@ async def _persist_event( # The historical messages also do not have the proper `context.current_state_ids` # and `state_groups` because they have `prev_events` that aren't persisted yet # (historical messages persisted in reverse-chronological order). - if event.internal_metadata.is_historical(): + if not event.internal_metadata.is_historical(): await self.action_generator.handle_push_actions_for_event(event, context) try: From ae85719c5873e41886bbe7e8e8d8e05b23b68ca8 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 9 Jun 2021 01:58:46 -0500 Subject: [PATCH 77/83] Fix tests using False as a depth value (should be None) See https://github.com/matrix-org/synapse/pull/9247#discussion_r648019790 --- tests/handlers/test_presence.py | 4 +++- tests/replication/test_federation_sender_shard.py | 4 +++- tests/storage/test_redaction.py | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py index ecc411db44c7..dfb9b3a0fa4b 100644 --- a/tests/handlers/test_presence.py +++ b/tests/handlers/test_presence.py @@ -863,7 +863,9 @@ def _add_new_user(self, room_id, user_id): self.store.get_latest_event_ids_in_room(room_id) ) - event = self.get_success(builder.build(prev_event_ids, None, False)) + event = self.get_success( + builder.build(prev_event_ids=prev_event_ids, auth_event_ids=None) + ) self.get_success(self.federation_handler.on_receive_pdu(hostname, event)) diff --git a/tests/replication/test_federation_sender_shard.py b/tests/replication/test_federation_sender_shard.py index 48ab3aa4e36b..584da5837179 100644 --- a/tests/replication/test_federation_sender_shard.py +++ b/tests/replication/test_federation_sender_shard.py @@ -224,7 +224,9 @@ def create_room_with_remote_server(self, user, token, remote_server="other_serve } builder = factory.for_room_version(room_version, event_dict) - join_event = self.get_success(builder.build(prev_event_ids, None)) + join_event = self.get_success( + builder.build(prev_event_ids=prev_event_ids, auth_event_ids=None) + ) self.get_success(federation.on_send_join_request(remote_server, join_event)) self.replicate() diff --git a/tests/storage/test_redaction.py b/tests/storage/test_redaction.py index 61757e86e3b0..dbacce4380f2 100644 --- a/tests/storage/test_redaction.py +++ b/tests/storage/test_redaction.py @@ -239,7 +239,7 @@ async def build( depth: Optional[int] = None, ): built_event = await self._base_builder.build( - prev_event_ids, auth_event_ids + prev_event_ids=prev_event_ids, auth_event_ids=auth_event_ids ) built_event._event_id = self._event_id From c236a3c09e5455a92527ac2f2d9773b5c294cc5c Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Fri, 11 Jun 2021 01:16:46 -0500 Subject: [PATCH 78/83] Fix greedy find/replace --- synapse/rest/client/v1/room.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 4a755fb28d9a..929793d3380d 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -99,7 +99,7 @@ def get_room_config(self, request): # TODO: Needs unit testing for generic events -class Roomstate_evententRestServlet(TransactionRestServlet): +class RoomStateEventRestServlet(TransactionRestServlet): def __init__(self, hs): super().__init__(hs) self.event_creation_handler = hs.get_event_creation_handler() @@ -1310,7 +1310,7 @@ async def on_POST( def register_servlets(hs: "HomeServer", http_server, is_worker=False): msc2716_enabled = hs.config.experimental.msc2716_enabled - Roomstate_evententRestServlet(hs).register(http_server) + RoomStateEventRestServlet(hs).register(http_server) RoomMemberListRestServlet(hs).register(http_server) JoinedRoomMemberListRestServlet(hs).register(http_server) RoomMessageListRestServlet(hs).register(http_server) From 429e130fcbd35cb2f5ed4679da963049a9d372bc Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 15 Jun 2021 19:31:37 -0500 Subject: [PATCH 79/83] Add docstring to explain batchsend endpoint --- synapse/rest/client/v1/room.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 929793d3380d..5bc8e264f914 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -267,6 +267,32 @@ def on_PUT(self, request, room_id, event_type, txn_id): class RoomBatchSendEventRestServlet(TransactionRestServlet): + """ + API endpoint which can insert a chunk of events historically back in time + next to the given `prev_event`. + + `chunk_id` comes from `next_chunk_id `in the response of the batch send + endpoint and is derived from the "insertion" events added to each chunk. + It's not required for the first batch send. + + `state_events_at_start` is used to define the historical state events + needed to auth the events like join events. These events will float + outside of the normal DAG as outlier's and won't be visible in the chat + history which also allows us to insert multiple chunks without having a bunch + of `@mxid joined the room` noise between each chunk. + + `events` is chronological chunk/list of events you want to insert. + There is a reverse-chronological constraint on chunks so once you insert + some messages, you can only insert older ones after that. + tldr; Insert chunks from your most recent history -> oldest history. + + POST /_matrix/client/r0/rooms//batchsend?prev_event=&chunk_id= + { + "events": [ ... ], + "state_events_at_start": [ ... ] + } + """ + def __init__(self, hs): super().__init__(hs) self.hs = hs From 29c370892db7376f8fcbb0914ab7d8afe21a8101 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Tue, 15 Jun 2021 21:25:58 -0500 Subject: [PATCH 80/83] Only use necessary auth_events See https://github.com/matrix-org/synapse/pull/9247#discussion_r651803946 --- synapse/handlers/message.py | 20 ++++++++++++++++++++ synapse/rest/client/v1/room.py | 1 - 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index a82784dd3cf4..db12abd59d5f 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -578,6 +578,26 @@ async def create_event( builder.internal_metadata.historical = historical + # Strip down the auth_event_ids to only what we need to auth the event. + # For example, we don't need extra m.room.member that don't match event.sender + if auth_event_ids is not None: + temp_event = await builder.build( + prev_event_ids=prev_event_ids, + auth_event_ids=auth_event_ids, + depth=depth, + ) + auth_events = await self.store.get_events_as_list(auth_event_ids) + # Create a StateMap[str] + auth_event_state_map = { + (e.type, e.state_key): e.event_id for e in auth_events + } + # Actually strip down and use the necessary auth events + auth_event_ids = self.auth.compute_auth_events( + event=temp_event, + current_state_ids=auth_event_state_map, + for_verification=False, + ) + event, context = await self.create_new_client_event( builder=builder, requester=requester, diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 5bc8e264f914..1169c1565ab7 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -498,7 +498,6 @@ async def on_POST(self, request, room_id): requester, event_dict, prev_event_ids=event_dict.get("prev_events"), - # TODO: Do we need to use `self.auth.compute_auth_events(...)` to filter the `auth_event_ids`? auth_event_ids=auth_event_ids, historical=True, depth=inherited_depth, From bfe458c0c4ce5075867246595d3ff0140cb89180 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Wed, 16 Jun 2021 22:22:00 -0500 Subject: [PATCH 81/83] Use unstable endpoint for MSC2716 batch send --- synapse/rest/client/v1/room.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index 1169c1565ab7..b389c693ce96 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -286,13 +286,20 @@ class RoomBatchSendEventRestServlet(TransactionRestServlet): some messages, you can only insert older ones after that. tldr; Insert chunks from your most recent history -> oldest history. - POST /_matrix/client/r0/rooms//batchsend?prev_event=&chunk_id= + POST /_matrix/client/unstable/org.matrix.msc2716/rooms//batch_send?prev_event=&chunk_id= { "events": [ ... ], "state_events_at_start": [ ... ] } """ + PATTERNS = ( + re.compile( + "^/_matrix/client/unstable/org.matrix.msc2716" + "/rooms/(?P[^/]*)/batch_send$" + ), + ) + def __init__(self, hs): super().__init__(hs) self.hs = hs @@ -302,11 +309,6 @@ def __init__(self, hs): self.room_member_handler = hs.get_room_member_handler() self.auth = hs.get_auth() - def register(self, http_server): - # /rooms/$roomid/batchsend - PATTERNS = "/rooms/(?P[^/]*)/batchsend" - register_txn_path(self, PATTERNS, http_server, with_get=True) - async def inherit_depth_from_prev_ids(self, prev_event_ids) -> int: ( most_recent_prev_event_id, From e851dacc4b1c9895f936b182584505d3c8b623ff Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Thu, 17 Jun 2021 01:08:54 -0500 Subject: [PATCH 82/83] Remove complement test jig changes --- scripts-dev/complement.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts-dev/complement.sh b/scripts-dev/complement.sh index 0b00e2f8ce9a..ba060104c3aa 100755 --- a/scripts-dev/complement.sh +++ b/scripts-dev/complement.sh @@ -65,4 +65,4 @@ if [[ -n "$1" ]]; then fi # Run the tests! -go test -v -tags synapse_blacklist,msc2946,msc3083,msc2716 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests -run TestBackfillingHistory/parallel/Backfilled_historical_events_resolve_with_proper_state_in_correct_order +go test -v -tags synapse_blacklist,msc2946,msc3083,msc2716 -count=1 $EXTRA_COMPLEMENT_ARGS ./tests From f7158ff74a053c473fa5d1a2aa02446fb999d910 Mon Sep 17 00:00:00 2001 From: Eric Eastwood Date: Mon, 21 Jun 2021 20:39:02 -0500 Subject: [PATCH 83/83] Correct comment to make more sense to what the code was doing --- synapse/rest/client/v1/room.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index b389c693ce96..92ebe838fd84 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -468,8 +468,9 @@ async def on_POST(self, request, room_id): EventContentFields.MSC2716_NEXT_CHUNK_ID: next_chunk_id, EventContentFields.MSC2716_HISTORICAL: True, }, - # Since the insertion event is put at the end of the chunk, - # copy the origin_server_ts from the last event we're inserting + # Since the insertion event is put at the start of the chunk, + # where the oldest event is, copy the origin_server_ts from + # the first event we're inserting "origin_server_ts": events_to_create[0]["origin_server_ts"], } # Prepend the insertion event to the start of the chunk