Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Resync state after partial-state join #12394

Merged
merged 3 commits into from
Apr 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/12394.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Preparation for faster-room-join work: start a background process to resynchronise the room state after a room join.
75 changes: 75 additions & 0 deletions synapse/handlers/federation.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,8 @@ async def do_invite_join(
)

if ret.partial_state:
# TODO(faster_joins): roll this back if we don't manage to start the
# background resync (eg process_remote_join fails)
await self.store.store_partial_state_room(room_id, ret.servers_in_room)

max_stream_id = await self._federation_event_handler.process_remote_join(
Expand All @@ -478,6 +480,18 @@ async def do_invite_join(
partial_state=ret.partial_state,
)

if ret.partial_state:
# Kick off the process of asynchronously fetching the state for this
# room.
#
# TODO(faster_joins): pick this up again on restart
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm guessing we need to persist more info to the database and then ensure that starts up again on restart?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not even sure we ned any extra info in the database - all we need is a list of room ids, and we have that.

So yes, it's just a matter of starting it up again on restart.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair! Was mostly just curious, not asking for it to be fixed yet! 👍

run_as_background_process(
desc="sync_partial_state_room",
func=self._sync_partial_state_room,
destination=origin,
room_id=room_id,
)

# We wait here until this instance has seen the events come down
# replication (if we're using replication) as the below uses caches.
await self._replication.wait_for_stream_position(
Expand Down Expand Up @@ -1370,3 +1384,64 @@ async def get_room_complexity(
# We fell off the bottom, couldn't get the complexity from anyone. Oh
# well.
return None

async def _sync_partial_state_room(
self,
destination: str,
room_id: str,
) -> None:
"""Background process to resync the state of a partial-state room

Args:
destination: homeserver to pull the state from
room_id: room to be resynced
"""

# TODO(faster_joins): do we need to lock to avoid races? What happens if other
# worker processes kick off a resync in parallel? Perhaps we should just elect
# a single worker to do the resync.
Comment on lines +1400 to +1402
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe put this on the background process worker?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, maybe. On the other hand it does a bunch of work which is quite similar to the event creator, and it would be nice to shard it across multiple workers (as we do with event creators), so it's not entirely obvious. Either way, I'm punting the decision for now.

#
# TODO(faster_joins): what happens if we leave the room during a resync? if we
# really leave, that might mean we have difficulty getting the room state over
# federation.
Comment on lines +1404 to +1406
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this is fine, but will this work OK if a second user joins from your server while we only have partial room state?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that's really a special-case of the more general problem of sending events while we have partial state. Currently, it's completely broken, but in a couple of PRs time, we'll have a thing which will wait for the resync to complete.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That makes sense, thanks! 👍 Glad to know this is something in the works already.

#
# TODO(faster_joins): try other destinations if the one we have fails

logger.info("Syncing state for room %s via %s", room_id, destination)

# we work through the queue in order of increasing stream ordering.
while True:
batch = await self.store.get_partial_state_events_batch(room_id)
if not batch:
# all the events are updated, so we can update current state and
# clear the lazy-loading flag.
logger.info("Updating current state for %s", room_id)
assert (
self.storage.persistence is not None
), "TODO(faster_joins): support for workers"
await self.storage.persistence.update_current_state(room_id)

logger.info("Clearing partial-state flag for %s", room_id)
success = await self.store.clear_partial_state_room(room_id)
if success:
logger.info("State resync complete for %s", room_id)

# TODO(faster_joins) update room stats and user directory?
return

# we raced against more events arriving with partial state. Go round
# the loop again. We've already logged a warning, so no need for more.
Comment on lines +1432 to +1433
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a bit confused at where this might come from -- if we've resolved the entire room state how could there be states with partial state arriving?

These are from other servers in the room sending us events, not from the process of resolving partial state, correct?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are from other servers in the room sending us events, not from the process of resolving partial state, correct?

that's right. Currently, when we get a new event over federation, we calculate the state at that event based on the state at the new event's prev_event(s). Obviously, if any of those prev_events have partial state, the state at the new event is also going to be partial. So, there is potential for a race:

  • we pull the list of partial-state events, and find event X is the only one left. We start the process of updating the state at X.
  • an event Y arrives which refers to event X in its prev_events. At this point, X still has partial state, hence so does Y. We hand off Y to be persisted by the event persistence thread.
  • We complete the de-partial-stating of X, and go round the loop again. We find there are no remaining partial state events. Hurrah!
  • Y gets persisted.
  • We attempt to mark the room as having no partial-state events. This fails, because of Y.

Obviously, there's another failure mode here, where Y's persistence happens after we mark the room as having no partial-state events. It seems like I'm going to need to revisit this. For now, I'm just going to add yet another TODO.

# TODO(faster_joins): there is still a race here, whereby incoming events which raced
# with us will fail to be persisted after the call to `clear_partial_state_room` due to
# having partial state.
Copy link
Contributor

@MadLittleMods MadLittleMods May 13, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are there plans to look at this soon™️?

I think I am running into this race with the test on matrix-org/complement#214 when sending the marker event.

The following error occurs where the query looks like: simple_insert_many_txn sql=INSERT INTO partial_state_events (room_id, event_id) VALUES(?, ?) values=[('!tNpiAqfxkwVfethRjU:hs1', '$d-ZaRzttjmLPwP8SMHHxvNiheVLozYME5stmHctL54Q')].

In the full logs (below in the collapsed section), it looks like we're calling clear_partial_state_room right before we try to do the query above to insert more partial state and the NOT NULL REFERENCES constraint doesn't like that it can't find the room_id in partial_state_rooms anymore -> sqlite3.IntegrityError: FOREIGN KEY constraint failed

Failed to handle PDU $d-ZaRzttjmLPwP8SMHHxvNiheVLozYME5stmHctL54Q
Traceback (most recent call last):
  File "/usr/local/lib/python3.9/site-packages/synapse/federation/federation_server.py", line 1088, in _process_incoming_pdus_in_room_inner
    await self._federation_event_handler.on_receive_pdu(
  File "/usr/local/lib/python3.9/site-packages/synapse/handlers/federation_event.py", line 275, in on_receive_pdu
    await self._process_received_pdu(origin, pdu, state=None)
  File "/usr/local/lib/python3.9/site-packages/synapse/handlers/federation_event.py", line 1119, in _process_received_pdu
    await self._run_push_actions_and_persist_event(event, context, backfilled)
  File "/usr/local/lib/python3.9/site-packages/synapse/handlers/federation_event.py", line 1966, in _run_push_actions_and_persist_event
    await self.persist_events_and_notify(
  File "/usr/local/lib/python3.9/site-packages/synapse/handlers/federation_event.py", line 2016, in persist_events_and_notify
    events, max_stream_token = await self._storage.persistence.persist_events(
  File "/usr/local/lib/python3.9/site-packages/synapse/logging/opentracing.py", line 797, in _trace_inner
    return await func(*args, **kwargs)
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/persist_events.py", line 326, in persist_events
    ret_vals = await yieldable_gather_results(enqueue, partitioned.items())
  File "/usr/local/lib/python3.9/site-packages/synapse/util/async_helpers.py", line 291, in yieldable_gather_results
    raise dfe.subFailure.value from None
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 1656, in _inlineCallbacks
    result = current_context.run(
  File "/usr/local/lib/python3.9/site-packages/twisted/python/failure.py", line 514, in throwExceptionIntoGenerator
    return g.throw(self.type, self.value, self.tb)
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/persist_events.py", line 322, in enqueue
    return await self._event_persist_queue.add_to_queue(
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/persist_events.py", line 200, in add_to_queue
    res = await make_deferred_yieldable(end_item.deferred.observe())
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/persist_events.py", line 243, in handle_queue_loop
    ret = await self._per_item_callback(
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/persist_events.py", line 636, in _persist_event_batch
    await self.persist_events_store._persist_events_and_state_updates(
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/databases/main/events.py", line 191, in _persist_events_and_state_updates
    await self.db_pool.runInteraction(
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/database.py", line 814, in runInteraction
    return await delay_cancellation(_runInteraction())
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 1656, in _inlineCallbacks
    result = current_context.run(
  File "/usr/local/lib/python3.9/site-packages/twisted/python/failure.py", line 514, in throwExceptionIntoGenerator
    return g.throw(self.type, self.value, self.tb)
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/database.py", line 786, in _runInteraction
    result = await self.runWithConnection(
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/database.py", line 909, in runWithConnection
    return await make_deferred_yieldable(
  File "/usr/local/lib/python3.9/site-packages/twisted/python/threadpool.py", line 244, in inContext
    result = inContext.theWork()  # type: ignore[attr-defined]
  File "/usr/local/lib/python3.9/site-packages/twisted/python/threadpool.py", line 260, in <lambda>
    inContext.theWork = lambda: context.call(  # type: ignore[attr-defined]
  File "/usr/local/lib/python3.9/site-packages/twisted/python/context.py", line 117, in callWithContext
    return self.currentContext().callWithContext(ctx, func, *args, **kw)
  File "/usr/local/lib/python3.9/site-packages/twisted/python/context.py", line 82, in callWithContext
    return func(*args, **kw)
  File "/usr/local/lib/python3.9/site-packages/twisted/enterprise/adbapi.py", line 282, in _runWithConnection
    result = func(conn, *args, **kw)
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/database.py", line 902, in inner_func
    return func(db_conn, *args, **kwargs)
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/database.py", line 650, in new_transaction
    r = func(cursor, *args, **kwargs)
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/databases/main/events.py", line 413, in _persist_events_txn
    self._store_event_state_mappings_txn(txn, events_and_contexts)
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/databases/main/events.py", line 2237, in _store_event_state_mappings_txn
    self.db_pool.simple_insert_many_txn(
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/database.py", line 1058, in simple_insert_many_txn
    txn.execute_batch(sql, values)
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/database.py", line 317, in execute_batch
    self.executemany(sql, args)
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/database.py", line 340, in executemany
    self._do_execute(self.txn.executemany, sql, *args)
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/database.py", line 379, in _do_execute
    return func(sql, *args, **kwargs)
sqlite3.IntegrityError: FOREIGN KEY constraint failed
Full log around syncing partial state
2022-05-13 04:39:38,029 - synapse.handlers.federation - 1466 - INFO - sync_partial_state_room-0 - Syncing state for room !tNpiAqfxkwVfethRjU:hs1 via hs1
2022-05-13 04:39:38,031 - synapse.access.http.8008 - 427 - INFO - POST-7 - ::ffff:172.29.0.1 - 8008 - {@charlie:hs2} Processed request: 0.942sec/0.001sec (0.018sec, 0.015sec) (0.020sec/0.025sec/20) 37B 200 "POST /_matrix/client/r0/join/%21tNpiAqfxkwVfethRjU:hs1?server_name=hs1 HTTP/1.1" "Go-http-client/1.1" [0 dbevts]
2022-05-13 04:39:38,039 - synapse.handlers.federation_event - 504 - INFO - sync_partial_state_room-0 - Updating state for $dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo
2022-05-13 04:39:38,044 - synapse.handlers.federation_event - 821 - INFO - sync_partial_state_room-0-$dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo - Event $dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo is missing prev_events ['$HE1j9EVMBRsegak3PC7F1RyzZIksseDv3RJkA8LGGQ8']: calculating state for a backwards extremity
2022-05-13 04:39:38,045 - synapse.handlers.federation_event - 843 - INFO - sync_partial_state_room-0-$dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo - Requesting state after missing prev_event $HE1j9EVMBRsegak3PC7F1RyzZIksseDv3RJkA8LGGQ8
2022-05-13 04:39:38,093 - synapse.storage.database - 1056 - INFO - GET-11 - simple_insert_many_txn sql=INSERT INTO presence_stream (stream_id, user_id, state, last_active_ts, last_federation_update_ts, last_user_sync_ts, status_msg, currently_active, instance_name) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?) values=[(3, '@charlie:hs2', 'online', 1652416778083, 1652416778083, 0, None, True, 'master')]
2022-05-13 04:39:38,117 - synapse.http.matrixfederationclient - 285 - INFO - sync_partial_state_room-0-$dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo-$HE1j9EVMBRsegak3PC7F1RyzZIksseDv3RJkA8LGGQ8 - {GET-O-4} [hs1] Completed request: 200 OK in 0.07 secs, got 549 bytes - GET matrix://hs1/_matrix/federation/v1/state_ids/%21tNpiAqfxkwVfethRjU%3Ahs1?event_id=%24HE1j9EVMBRsegak3PC7F1RyzZIksseDv3RJkA8LGGQ8
2022-05-13 04:39:38,151 - synapse.federation.transport.server.federation - 102 - INFO - PUT-12 - Received txn 1652416770039 from hs1. (PDUs: 1, EDUs: 0)
2022-05-13 04:39:38,156 - synapse.http.matrixfederationclient - 285 - INFO - sync_partial_state_room-0-$dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo-$HE1j9EVMBRsegak3PC7F1RyzZIksseDv3RJkA8LGGQ8 - {GET-O-5} [hs1] Completed request: 200 OK in 0.02 secs, got 7572 bytes - GET matrix://hs1/_matrix/federation/v1/state/%21tNpiAqfxkwVfethRjU%3Ahs1?event_id=%24HE1j9EVMBRsegak3PC7F1RyzZIksseDv3RJkA8LGGQ8
2022-05-13 04:39:38,157 - synapse.federation.federation_client - 466 - INFO - sync_partial_state_room-0-$dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo-$HE1j9EVMBRsegak3PC7F1RyzZIksseDv3RJkA8LGGQ8 - Processing from /state: 7 state events, 0 auth events
2022-05-13 04:39:38,164 - synapse.storage.util.partial_state_events_tracker - 77 - INFO - GET-11 - Awaiting un-partial-stating of events ['$dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo']
Stack (most recent call last):
  File "/usr/local/lib/python3.9/runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/local/lib/python3.9/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.9/site-packages/synapse/app/homeserver.py", line 460, in <module>
    main()
  File "/usr/local/lib/python3.9/site-packages/synapse/app/homeserver.py", line 456, in main
    run(hs)
  File "/usr/local/lib/python3.9/site-packages/synapse/app/homeserver.py", line 435, in run
    _base.start_reactor(
  File "/usr/local/lib/python3.9/site-packages/synapse/app/_base.py", line 184, in start_reactor
    run()
  File "/usr/local/lib/python3.9/site-packages/synapse/app/_base.py", line 166, in run
    run_command()
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/base.py", line 1315, in run
    self.mainLoop()
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/base.py", line 1325, in mainLoop
    reactorBaseSelf.runUntilCurrent()
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/base.py", line 964, in runUntilCurrent
    f(*a, **kw)
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 661, in callback
    self._startRunCallbacks(result)
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 763, in _startRunCallbacks
    self._runCallbacks()
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 857, in _runCallbacks
    current.result = callback(  # type: ignore[misc]
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 1750, in gotResult
    current_context.run(_inlineCallbacks, r, gen, status)
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 1663, in _inlineCallbacks
    status.deferred.callback(getattr(e, "value", None))
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 661, in callback
    self._startRunCallbacks(result)
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 763, in _startRunCallbacks
    self._runCallbacks()
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 857, in _runCallbacks
    current.result = callback(  # type: ignore[misc]
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 661, in callback
    self._startRunCallbacks(result)
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 763, in _startRunCallbacks
    self._runCallbacks()
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 857, in _runCallbacks
    current.result = callback(  # type: ignore[misc]
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 1750, in gotResult
    current_context.run(_inlineCallbacks, r, gen, status)
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 1663, in _inlineCallbacks
    status.deferred.callback(getattr(e, "value", None))
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 661, in callback
    self._startRunCallbacks(result)
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 763, in _startRunCallbacks
    self._runCallbacks()
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 857, in _runCallbacks
    current.result = callback(  # type: ignore[misc]
  File "/usr/local/lib/python3.9/site-packages/synapse/util/caches/descriptors.py", line 497, in complete_all
    d1.callback(val)
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 661, in callback
    self._startRunCallbacks(result)
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 763, in _startRunCallbacks
    self._runCallbacks()
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 857, in _runCallbacks
    current.result = callback(  # type: ignore[misc]
  File "/usr/local/lib/python3.9/site-packages/synapse/util/async_helpers.py", line 120, in callback
    observer.callback(r)
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 661, in callback
    self._startRunCallbacks(result)
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 763, in _startRunCallbacks
    self._runCallbacks()
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 857, in _runCallbacks
    current.result = callback(  # type: ignore[misc]
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 1337, in _cbDeferred
    self.callback(cast(_DeferredListResultListT, self.resultList))
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 661, in callback
    self._startRunCallbacks(result)
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 763, in _startRunCallbacks
    self._runCallbacks()
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 857, in _runCallbacks
    current.result = callback(  # type: ignore[misc]
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 661, in callback
    self._startRunCallbacks(result)
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 763, in _startRunCallbacks
    self._runCallbacks()
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 857, in _runCallbacks
    current.result = callback(  # type: ignore[misc]
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 1750, in gotResult
    current_context.run(_inlineCallbacks, r, gen, status)
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 1660, in _inlineCallbacks
    result = current_context.run(gen.send, result)
  File "/usr/local/lib/python3.9/site-packages/synapse/util/async_helpers.py", line 227, in _concurrently_execute_inner
    await maybe_awaitable(func(value))
  File "/usr/local/lib/python3.9/site-packages/synapse/handlers/sync.py", line 1613, in handle_room_entries
    await self._generate_room_entry(
  File "/usr/local/lib/python3.9/site-packages/synapse/handlers/sync.py", line 2059, in _generate_room_entry
    batch = await self._load_filtered_recents(
  File "/usr/local/lib/python3.9/site-packages/synapse/handlers/sync.py", line 592, in _load_filtered_recents
    loaded_recents = await filter_events_for_client(
  File "/usr/local/lib/python3.9/site-packages/synapse/visibility.py", line 85, in filter_events_for_client
    event_id_to_state = await storage.state.get_state_for_events(
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/state.py", line 676, in get_state_for_events
    event_to_groups = await self._get_state_group_for_events(event_ids)
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/state.py", line 806, in _get_state_group_for_events
    await self._partial_state_events_tracker.await_full_state(event_ids)
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/util/partial_state_events_tracker.py", line 77, in await_full_state
    logger.info(
2022-05-13 04:39:38,167 - synapse.handlers.federation_event - 1040 - INFO - sync_partial_state_room-0-$dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo-$HE1j9EVMBRsegak3PC7F1RyzZIksseDv3RJkA8LGGQ8 - /state returned 7 events
2022-05-13 04:39:38,167 - synapse.handlers.federation_event - 1042 - INFO - sync_partial_state_room-0-$dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo-$HE1j9EVMBRsegak3PC7F1RyzZIksseDv3RJkA8LGGQ8 - _get_state_and_persist auth_events(7)=[<FrozenEventV3 event_id=$Woh3E6gvB2iXS6ixGJqQmpSD7ojIY6nRe0W0qqIiJDE, type=m.room.create, state_key=, outlier=False>, <FrozenEventV3 event_id=$8VkNhAhCRNnIJzT1BV-tPIjgzSGcufAwcHZN9Msb93Q, type=m.room.power_levels, state_key=, outlier=False>, <FrozenEventV3 event_id=$BqmEAYfPvqoXZzYrMdldWEY-ZXfWZ-msHsY5Gh3oFxE, type=m.room.join_rules, state_key=, outlier=False>, <FrozenEventV3 event_id=$PIhf8N4pkn8Y4YJG8M1P8Q1GbNQz1VKoRfUW6pv2TkI, type=m.room.history_visibility, state_key=, outlier=False>, <FrozenEventV3 event_id=$AfvRfXijO7LnkYQyZZ2ExU7Hw-WxO3JAHRwHEQQsob4, type=m.room.name, state_key=, outlier=False>, <FrozenEventV3 event_id=$3PvG4kbdHIzOUcaqZdC1asc2tHeE4F7EfSOERzq7O7Q, type=m.room.member, state_key=@the-bridge-user:hs1, outlier=False>, <FrozenEventV3 event_id=$i8nCcFBnqP8R5st9TQC0CcFE4YpTnsSLCruGpZ7Ut-Y, type=m.room.member, state_key=@alice:hs1, outlier=False>] state_events(0)=[]
2022-05-13 04:39:38,171 - synapse.handlers.federation_event - 1409 - INFO - sync_partial_state_room-0-$dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo-$HE1j9EVMBRsegak3PC7F1RyzZIksseDv3RJkA8LGGQ8 - Persisting 1 of 7 remaining outliers: ['$Woh3E6gvB2iXS6ixGJqQmpSD7ojIY6nRe0W0qqIiJDE']
2022-05-13 04:39:38,176 - synapse.storage.database - 1056 - INFO - persist_events-2 - simple_insert_many_txn sql=INSERT INTO event_forward_extremities (event_id, room_id) VALUES(?, ?) values=[]
2022-05-13 04:39:38,177 - synapse.storage.database - 1056 - INFO - persist_events-2 - simple_insert_many_txn sql=INSERT INTO stream_ordering_to_exterm (room_id, event_id, stream_ordering) VALUES(?, ?, ?) values=[]
2022-05-13 04:39:38,178 - synapse.storage.database - 1056 - INFO - persist_events-2 - simple_insert_many_txn sql=INSERT INTO partial_state_events (room_id, event_id) VALUES(?, ?) values=[]
2022-05-13 04:39:38,179 - synapse.handlers.federation_event - 1409 - INFO - sync_partial_state_room-0-$dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo-$HE1j9EVMBRsegak3PC7F1RyzZIksseDv3RJkA8LGGQ8 - Persisting 1 of 6 remaining outliers: ['$3PvG4kbdHIzOUcaqZdC1asc2tHeE4F7EfSOERzq7O7Q']
2022-05-13 04:39:38,191 - synapse.federation.federation_server - 1085 - INFO - _process_incoming_pdus_in_room_inner-0 - handling received PDU in room !tNpiAqfxkwVfethRjU:hs1: <FrozenEventV3 event_id=$d-ZaRzttjmLPwP8SMHHxvNiheVLozYME5stmHctL54Q, type=org.matrix.msc2716.marker, state_key=None, outlier=False>
2022-05-13 04:39:38,196 - synapse.storage.database - 1056 - INFO - persist_events-3 - simple_insert_many_txn sql=INSERT INTO event_forward_extremities (event_id, room_id) VALUES(?, ?) values=[]
2022-05-13 04:39:38,198 - synapse.storage.database - 1056 - INFO - persist_events-3 - simple_insert_many_txn sql=INSERT INTO stream_ordering_to_exterm (room_id, event_id, stream_ordering) VALUES(?, ?, ?) values=[]
2022-05-13 04:39:38,199 - synapse.storage.database - 1056 - INFO - persist_events-3 - simple_insert_many_txn sql=INSERT INTO partial_state_events (room_id, event_id) VALUES(?, ?) values=[]
2022-05-13 04:39:38,201 - synapse.access.https.8448 - 427 - INFO - PUT-12 - ::ffff:172.29.0.3 - 8448 - {hs1} Processed request: 0.052sec/0.003sec (0.009sec, 0.001sec) (0.008sec/0.021sec/4) 60B 200 "PUT /_matrix/federation/v1/send/1652416770039 HTTP/1.1" "Synapse/1.58.1" [0 dbevts]
2022-05-13 04:39:38,202 - synapse.handlers.federation_event - 1409 - INFO - sync_partial_state_room-0-$dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo-$HE1j9EVMBRsegak3PC7F1RyzZIksseDv3RJkA8LGGQ8 - Persisting 1 of 5 remaining outliers: ['$8VkNhAhCRNnIJzT1BV-tPIjgzSGcufAwcHZN9Msb93Q']
2022-05-13 04:39:38,208 - synapse.storage.database - 1056 - INFO - persist_events-4 - simple_insert_many_txn sql=INSERT INTO event_forward_extremities (event_id, room_id) VALUES(?, ?) values=[]
2022-05-13 04:39:38,208 - synapse.storage.database - 1056 - INFO - persist_events-4 - simple_insert_many_txn sql=INSERT INTO stream_ordering_to_exterm (room_id, event_id, stream_ordering) VALUES(?, ?, ?) values=[]
2022-05-13 04:39:38,208 - synapse.storage.database - 1056 - INFO - persist_events-4 - simple_insert_many_txn sql=INSERT INTO partial_state_events (room_id, event_id) VALUES(?, ?) values=[]
2022-05-13 04:39:38,211 - synapse.handlers.federation_event - 1409 - INFO - sync_partial_state_room-0-$dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo-$HE1j9EVMBRsegak3PC7F1RyzZIksseDv3RJkA8LGGQ8 - Persisting 3 of 4 remaining outliers: ['$BqmEAYfPvqoXZzYrMdldWEY-ZXfWZ-msHsY5Gh3oFxE', '$PIhf8N4pkn8Y4YJG8M1P8Q1GbNQz1VKoRfUW6pv2TkI', '$AfvRfXijO7LnkYQyZZ2ExU7Hw-WxO3JAHRwHEQQsob4']
2022-05-13 04:39:38,216 - synapse.storage.util.partial_state_events_tracker - 77 - INFO - _process_incoming_pdus_in_room_inner-0-$d-ZaRzttjmLPwP8SMHHxvNiheVLozYME5stmHctL54Q - Awaiting un-partial-stating of events ['$dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo']
Stack (most recent call last):
  File "/usr/local/lib/python3.9/runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/local/lib/python3.9/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.9/site-packages/synapse/app/homeserver.py", line 460, in <module>
    main()
  File "/usr/local/lib/python3.9/site-packages/synapse/app/homeserver.py", line 456, in main
    run(hs)
  File "/usr/local/lib/python3.9/site-packages/synapse/app/homeserver.py", line 435, in run
    _base.start_reactor(
  File "/usr/local/lib/python3.9/site-packages/synapse/app/_base.py", line 184, in start_reactor
    run()
  File "/usr/local/lib/python3.9/site-packages/synapse/app/_base.py", line 166, in run
    run_command()
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/base.py", line 1315, in run
    self.mainLoop()
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/base.py", line 1325, in mainLoop
    reactorBaseSelf.runUntilCurrent()
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/base.py", line 964, in runUntilCurrent
    f(*a, **kw)
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 661, in callback
    self._startRunCallbacks(result)
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 763, in _startRunCallbacks
    self._runCallbacks()
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 857, in _runCallbacks
    current.result = callback(  # type: ignore[misc]
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 1750, in gotResult
    current_context.run(_inlineCallbacks, r, gen, status)
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 1663, in _inlineCallbacks
    status.deferred.callback(getattr(e, "value", None))
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 661, in callback
    self._startRunCallbacks(result)
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 763, in _startRunCallbacks
    self._runCallbacks()
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 857, in _runCallbacks
    current.result = callback(  # type: ignore[misc]
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 661, in callback
    self._startRunCallbacks(result)
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 763, in _startRunCallbacks
    self._runCallbacks()
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 857, in _runCallbacks
    current.result = callback(  # type: ignore[misc]
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 1750, in gotResult
    current_context.run(_inlineCallbacks, r, gen, status)
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 1660, in _inlineCallbacks
    result = current_context.run(gen.send, result)
  File "/usr/local/lib/python3.9/site-packages/synapse/metrics/background_process_metrics.py", line 243, in run
    return await func(*args, **kwargs)
  File "/usr/local/lib/python3.9/site-packages/synapse/federation/federation_server.py", line 1088, in _process_incoming_pdus_in_room_inner
    await self._federation_event_handler.on_receive_pdu(
  File "/usr/local/lib/python3.9/site-packages/synapse/handlers/federation_event.py", line 275, in on_receive_pdu
    await self._process_received_pdu(origin, pdu, state=None)
  File "/usr/local/lib/python3.9/site-packages/synapse/handlers/federation_event.py", line 1099, in _process_received_pdu
    context = await self._state_handler.compute_event_context(
  File "/usr/local/lib/python3.9/site-packages/synapse/state/__init__.py", line 325, in compute_event_context
    entry = await self.resolve_state_groups_for_events(
  File "/usr/local/lib/python3.9/site-packages/synapse/util/metrics.py", line 113, in measured_func
    r = await func(self, *args, **kwargs)
  File "/usr/local/lib/python3.9/site-packages/synapse/state/__init__.py", line 424, in resolve_state_groups_for_events
    state_groups_ids = await self.state_store.get_state_groups_ids(
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/state.py", line 587, in get_state_groups_ids
    event_to_groups = await self._get_state_group_for_events(event_ids)
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/state.py", line 806, in _get_state_group_for_events
    await self._partial_state_events_tracker.await_full_state(event_ids)
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/util/partial_state_events_tracker.py", line 77, in await_full_state
    logger.info(
2022-05-13 04:39:38,218 - synapse.storage.database - 1056 - INFO - persist_events-5 - simple_insert_many_txn sql=INSERT INTO event_forward_extremities (event_id, room_id) VALUES(?, ?) values=[]
2022-05-13 04:39:38,218 - synapse.storage.database - 1056 - INFO - persist_events-5 - simple_insert_many_txn sql=INSERT INTO stream_ordering_to_exterm (room_id, event_id, stream_ordering) VALUES(?, ?, ?) values=[]
2022-05-13 04:39:38,219 - synapse.storage.database - 1056 - INFO - persist_events-5 - simple_insert_many_txn sql=INSERT INTO partial_state_events (room_id, event_id) VALUES(?, ?) values=[]
2022-05-13 04:39:38,220 - synapse.handlers.federation_event - 1409 - INFO - sync_partial_state_room-0-$dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo-$HE1j9EVMBRsegak3PC7F1RyzZIksseDv3RJkA8LGGQ8 - Persisting 1 of 1 remaining outliers: ['$i8nCcFBnqP8R5st9TQC0CcFE4YpTnsSLCruGpZ7Ut-Y']
2022-05-13 04:39:38,223 - synapse.storage.database - 1056 - INFO - persist_events-6 - simple_insert_many_txn sql=INSERT INTO event_forward_extremities (event_id, room_id) VALUES(?, ?) values=[]
2022-05-13 04:39:38,223 - synapse.storage.database - 1056 - INFO - persist_events-6 - simple_insert_many_txn sql=INSERT INTO stream_ordering_to_exterm (room_id, event_id, stream_ordering) VALUES(?, ?, ?) values=[]
2022-05-13 04:39:38,224 - synapse.storage.database - 1056 - INFO - persist_events-6 - simple_insert_many_txn sql=INSERT INTO event_json (event_id, room_id, internal_metadata, json, format_version) VALUES(?, ?, ?, ?, ?) values=<generator object PersistEventsStore._store_event_txn.<locals>.<genexpr> at 0x7faf6636db30>
2022-05-13 04:39:38,224 - synapse.storage.database - 1056 - INFO - persist_events-6 - simple_insert_many_txn sql=INSERT INTO events (instance_name, stream_ordering, topological_ordering, depth, event_id, room_id, type, processed, outlier, origin_server_ts, received_ts, sender, contains_url, state_key, rejection_reason) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) values=<generator object PersistEventsStore._store_event_txn.<locals>.<genexpr> at 0x7faf6636db30>
2022-05-13 04:39:38,225 - synapse.storage.database - 1056 - INFO - persist_events-6 - simple_insert_many_txn sql=INSERT INTO state_events (event_id, room_id, type, state_key) VALUES(?, ?, ?, ?) values=<generator object PersistEventsStore._store_event_txn.<locals>.<genexpr> at 0x7faf6636db30>
2022-05-13 04:39:38,225 - synapse.storage.database - 1056 - INFO - persist_events-6 - simple_insert_many_txn sql=INSERT INTO partial_state_events (room_id, event_id) VALUES(?, ?) values=[]
2022-05-13 04:39:38,226 - synapse.storage.database - 1056 - INFO - persist_events-6 - simple_insert_many_txn sql=INSERT INTO event_auth (event_id, room_id, auth_id) VALUES(?, ?, ?) values=[('$i8nCcFBnqP8R5st9TQC0CcFE4YpTnsSLCruGpZ7Ut-Y', '!tNpiAqfxkwVfethRjU:hs1', '$8VkNhAhCRNnIJzT1BV-tPIjgzSGcufAwcHZN9Msb93Q'), ('$i8nCcFBnqP8R5st9TQC0CcFE4YpTnsSLCruGpZ7Ut-Y', '!tNpiAqfxkwVfethRjU:hs1', '$BqmEAYfPvqoXZzYrMdldWEY-ZXfWZ-msHsY5Gh3oFxE'), ('$i8nCcFBnqP8R5st9TQC0CcFE4YpTnsSLCruGpZ7Ut-Y', '!tNpiAqfxkwVfethRjU:hs1', '$Woh3E6gvB2iXS6ixGJqQmpSD7ojIY6nRe0W0qqIiJDE')]
2022-05-13 04:39:38,227 - synapse.storage.database - 1056 - INFO - persist_events-6 - simple_insert_many_txn sql=INSERT INTO event_auth_chains (event_id, chain_id, sequence_number) VALUES(?, ?, ?) values=[('$i8nCcFBnqP8R5st9TQC0CcFE4YpTnsSLCruGpZ7Ut-Y', 8, 1)]
2022-05-13 04:39:38,227 - synapse.storage.database - 1056 - INFO - persist_events-6 - simple_insert_many_txn sql=INSERT INTO event_auth_chain_links (origin_chain_id, origin_sequence_number, target_chain_id, target_sequence_number) VALUES(?, ?, ?, ?) values=[(8, 1, 3, 1), (8, 1, 1, 1), (8, 1, 4, 1), (8, 1, 2, 1)]
2022-05-13 04:39:38,228 - synapse.storage.database - 1056 - INFO - persist_events-6 - simple_insert_many_txn sql=INSERT INTO event_edges (event_id, prev_event_id, room_id, is_state) VALUES(?, ?, ?, ?) values=[('$i8nCcFBnqP8R5st9TQC0CcFE4YpTnsSLCruGpZ7Ut-Y', '$AfvRfXijO7LnkYQyZZ2ExU7Hw-WxO3JAHRwHEQQsob4', '!tNpiAqfxkwVfethRjU:hs1', False)]
2022-05-13 04:39:38,228 - synapse.storage.database - 1056 - INFO - persist_events-6 - simple_insert_many_txn sql=INSERT INTO room_memberships (event_id, user_id, sender, room_id, membership, display_name, avatar_url) VALUES(?, ?, ?, ?, ?, ?, ?) values=[('$i8nCcFBnqP8R5st9TQC0CcFE4YpTnsSLCruGpZ7Ut-Y', '@alice:hs1', '@alice:hs1', '!tNpiAqfxkwVfethRjU:hs1', 'join', 'Alice', None)]
2022-05-13 04:39:38,229 - synapse.storage.database - 1056 - INFO - persist_events-6 - simple_insert_many_txn sql=INSERT INTO event_reference_hashes (event_id, algorithm, hash) VALUES(?, ?, ?) values=[('$i8nCcFBnqP8R5st9TQC0CcFE4YpTnsSLCruGpZ7Ut-Y', 'sha256', <memory at 0x7faf76288e80>)]
2022-05-13 04:39:38,242 - synapse.http.matrixfederationclient - 285 - INFO - sync_partial_state_room-0-$dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo-$HE1j9EVMBRsegak3PC7F1RyzZIksseDv3RJkA8LGGQ8-$HE1j9EVMBRsegak3PC7F1RyzZIksseDv3RJkA8LGGQ8 - {GET-O-6} [hs1] Completed request: 200 OK in 0.01 secs, got 722 bytes - GET matrix://hs1/_matrix/federation/v1/event/%24HE1j9EVMBRsegak3PC7F1RyzZIksseDv3RJkA8LGGQ8
2022-05-13 04:39:38,247 - synapse.handlers.federation_event - 1356 - INFO - sync_partial_state_room-0-$dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo-$HE1j9EVMBRsegak3PC7F1RyzZIksseDv3RJkA8LGGQ8 - Fetched 1 events of 1 requested
2022-05-13 04:39:38,248 - synapse.handlers.federation_event - 1409 - INFO - sync_partial_state_room-0-$dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo-$HE1j9EVMBRsegak3PC7F1RyzZIksseDv3RJkA8LGGQ8 - Persisting 1 of 1 remaining outliers: ['$HE1j9EVMBRsegak3PC7F1RyzZIksseDv3RJkA8LGGQ8']
2022-05-13 04:39:38,250 - synapse.storage.database - 1056 - INFO - persist_events-7 - simple_insert_many_txn sql=INSERT INTO event_forward_extremities (event_id, room_id) VALUES(?, ?) values=[]
2022-05-13 04:39:38,250 - synapse.storage.database - 1056 - INFO - persist_events-7 - simple_insert_many_txn sql=INSERT INTO stream_ordering_to_exterm (room_id, event_id, stream_ordering) VALUES(?, ?, ?) values=[]
2022-05-13 04:39:38,250 - synapse.storage.database - 1056 - INFO - persist_events-7 - simple_insert_many_txn sql=INSERT INTO event_json (event_id, room_id, internal_metadata, json, format_version) VALUES(?, ?, ?, ?, ?) values=<generator object PersistEventsStore._store_event_txn.<locals>.<genexpr> at 0x7faf66367740>
2022-05-13 04:39:38,251 - synapse.storage.database - 1056 - INFO - persist_events-7 - simple_insert_many_txn sql=INSERT INTO events (instance_name, stream_ordering, topological_ordering, depth, event_id, room_id, type, processed, outlier, origin_server_ts, received_ts, sender, contains_url, state_key, rejection_reason) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) values=<generator object PersistEventsStore._store_event_txn.<locals>.<genexpr> at 0x7faf66367740>
2022-05-13 04:39:38,252 - synapse.storage.database - 1056 - INFO - persist_events-7 - simple_insert_many_txn sql=INSERT INTO state_events (event_id, room_id, type, state_key) VALUES(?, ?, ?, ?) values=<generator object PersistEventsStore._store_event_txn.<locals>.<genexpr> at 0x7faf66367740>
2022-05-13 04:39:38,252 - synapse.storage.database - 1056 - INFO - persist_events-7 - simple_insert_many_txn sql=INSERT INTO partial_state_events (room_id, event_id) VALUES(?, ?) values=[]
2022-05-13 04:39:38,253 - synapse.storage.database - 1056 - INFO - persist_events-7 - simple_insert_many_txn sql=INSERT INTO event_edges (event_id, prev_event_id, room_id, is_state) VALUES(?, ?, ?, ?) values=[('$HE1j9EVMBRsegak3PC7F1RyzZIksseDv3RJkA8LGGQ8', '$i7VqnHfrHTI-pMzaX5K7E_rLg7q214e9FC0nifs7K0w', '!tNpiAqfxkwVfethRjU:hs1', False)]
2022-05-13 04:39:38,253 - synapse.storage.database - 1056 - INFO - persist_events-7 - simple_insert_many_txn sql=INSERT INTO room_memberships (event_id, user_id, sender, room_id, membership, display_name, avatar_url) VALUES(?, ?, ?, ?, ?, ?, ?) values=[]
2022-05-13 04:39:38,254 - synapse.storage.database - 1056 - INFO - persist_events-7 - simple_insert_many_txn sql=INSERT INTO event_reference_hashes (event_id, algorithm, hash) VALUES(?, ?, ?) values=[('$HE1j9EVMBRsegak3PC7F1RyzZIksseDv3RJkA8LGGQ8', 'sha256', <memory at 0x7faf76288dc0>)]
2022-05-13 04:39:38,265 - synapse.storage.database - 1056 - INFO - sync_partial_state_room-0-$dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo - simple_insert_many_txn sql=INSERT INTO state_groups_state (state_group, room_id, type, state_key, event_id) VALUES(?, ?, ?, ?, ?) values=[(3, '!tNpiAqfxkwVfethRjU:hs1', 'm.room.create', '', '$Woh3E6gvB2iXS6ixGJqQmpSD7ojIY6nRe0W0qqIiJDE'), (3, '!tNpiAqfxkwVfethRjU:hs1', 'm.room.member', '@alice:hs1', '$i8nCcFBnqP8R5st9TQC0CcFE4YpTnsSLCruGpZ7Ut-Y'), (3, '!tNpiAqfxkwVfethRjU:hs1', 'm.room.history_visibility', '', '$PIhf8N4pkn8Y4YJG8M1P8Q1GbNQz1VKoRfUW6pv2TkI'), (3, '!tNpiAqfxkwVfethRjU:hs1', 'm.room.member', '@the-bridge-user:hs1', '$3PvG4kbdHIzOUcaqZdC1asc2tHeE4F7EfSOERzq7O7Q'), (3, '!tNpiAqfxkwVfethRjU:hs1', 'm.room.join_rules', '', '$BqmEAYfPvqoXZzYrMdldWEY-ZXfWZ-msHsY5Gh3oFxE'), (3, '!tNpiAqfxkwVfethRjU:hs1', 'm.room.power_levels', '', '$8VkNhAhCRNnIJzT1BV-tPIjgzSGcufAwcHZN9Msb93Q'), (3, '!tNpiAqfxkwVfethRjU:hs1', 'm.room.name', '', '$AfvRfXijO7LnkYQyZZ2ExU7Hw-WxO3JAHRwHEQQsob4')]
2022-05-13 04:39:38,271 - synapse.storage.database - 1056 - INFO - sync_partial_state_room-0-$dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo - simple_insert_many_txn sql=INSERT INTO state_groups_state (state_group, room_id, type, state_key, event_id) VALUES(?, ?, ?, ?, ?) values=[(4, '!tNpiAqfxkwVfethRjU:hs1', 'm.room.member', '@charlie:hs2', '$dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo')]
2022-05-13 04:39:38,280 - synapse.storage.util.partial_state_events_tracker - 51 - INFO - sync_partial_state_room-0-$dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo - Notifying 2 things waiting for un-partial-stating of event $dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo
2022-05-13 04:39:38,280 - synapse.storage.util.partial_state_events_tracker - 108 - INFO - _process_incoming_pdus_in_room_inner-0-$d-ZaRzttjmLPwP8SMHHxvNiheVLozYME5stmHctL54Q - Events dict_keys(['$dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo']) all un-partial-stated
2022-05-13 04:39:38,281 - synapse.storage.util.partial_state_events_tracker - 108 - INFO - GET-11 - Events dict_keys(['$dkUFr4Pw-CDrJySCSHeHTL2u82YZrhK80OXaHKIU0vo']) all un-partial-stated
2022-05-13 04:39:38,286 - synapse.handlers.federation - 1474 - INFO - sync_partial_state_room-0 - Updating current state for !tNpiAqfxkwVfethRjU:hs1
2022-05-13 04:39:38,295 - synapse.handlers.federation - 1480 - INFO - sync_partial_state_room-0 - Clearing partial-state flag for !tNpiAqfxkwVfethRjU:hs1
2022-05-13 04:39:38,298 - synapse.storage.databases.main.room - 1100 - INFO - sync_partial_state_room-0 - _clear_partial_state_room_txn room_id=!tNpiAqfxkwVfethRjU:hs1
2022-05-13 04:39:38,302 - synapse.handlers.federation - 1483 - INFO - sync_partial_state_room-0 - State resync complete for !tNpiAqfxkwVfethRjU:hs1
2022-05-13 04:39:38,312 - synapse.storage.database - 1056 - INFO - persist_events-8 - simple_insert_many_txn sql=INSERT INTO event_forward_extremities (event_id, room_id) VALUES(?, ?) values=[('$d-ZaRzttjmLPwP8SMHHxvNiheVLozYME5stmHctL54Q', '!tNpiAqfxkwVfethRjU:hs1')]
2022-05-13 04:39:38,312 - synapse.storage.database - 1056 - INFO - persist_events-8 - simple_insert_many_txn sql=INSERT INTO stream_ordering_to_exterm (room_id, event_id, stream_ordering) VALUES(?, ?, ?) values=[('!tNpiAqfxkwVfethRjU:hs1', '$d-ZaRzttjmLPwP8SMHHxvNiheVLozYME5stmHctL54Q', 3)]
2022-05-13 04:39:38,313 - synapse.storage.database - 1056 - INFO - persist_events-8 - simple_insert_many_txn sql=INSERT INTO event_json (event_id, room_id, internal_metadata, json, format_version) VALUES(?, ?, ?, ?, ?) values=<generator object PersistEventsStore._store_event_txn.<locals>.<genexpr> at 0x7faf6636df20>
2022-05-13 04:39:38,313 - synapse.storage.database - 1056 - INFO - persist_events-8 - simple_insert_many_txn sql=INSERT INTO events (instance_name, stream_ordering, topological_ordering, depth, event_id, room_id, type, processed, outlier, origin_server_ts, received_ts, sender, contains_url, state_key, rejection_reason) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) values=<generator object PersistEventsStore._store_event_txn.<locals>.<genexpr> at 0x7faf6636df20>
2022-05-13 04:39:38,314 - synapse.storage.database - 1056 - INFO - persist_events-8 - simple_insert_many_txn sql=INSERT INTO state_events (event_id, room_id, type, state_key) VALUES(?, ?, ?, ?) values=<generator object PersistEventsStore._store_event_txn.<locals>.<genexpr> at 0x7faf6636df20>
2022-05-13 04:39:38,314 - synapse.storage.database - 1056 - INFO - persist_events-8 - simple_insert_many_txn sql=INSERT INTO partial_state_events (room_id, event_id) VALUES(?, ?) values=[('!tNpiAqfxkwVfethRjU:hs1', '$d-ZaRzttjmLPwP8SMHHxvNiheVLozYME5stmHctL54Q')]
2022-05-13 04:39:38,316 - synapse.federation.federation_server - 1098 - ERROR - _process_incoming_pdus_in_room_inner-0 - Failed to handle PDU $d-ZaRzttjmLPwP8SMHHxvNiheVLozYME5stmHctL54Q
Traceback (most recent call last):
  File "/usr/local/lib/python3.9/site-packages/synapse/federation/federation_server.py", line 1088, in _process_incoming_pdus_in_room_inner
    await self._federation_event_handler.on_receive_pdu(
  File "/usr/local/lib/python3.9/site-packages/synapse/handlers/federation_event.py", line 275, in on_receive_pdu
    await self._process_received_pdu(origin, pdu, state=None)
  File "/usr/local/lib/python3.9/site-packages/synapse/handlers/federation_event.py", line 1119, in _process_received_pdu
    await self._run_push_actions_and_persist_event(event, context, backfilled)
  File "/usr/local/lib/python3.9/site-packages/synapse/handlers/federation_event.py", line 1966, in _run_push_actions_and_persist_event
    await self.persist_events_and_notify(
  File "/usr/local/lib/python3.9/site-packages/synapse/handlers/federation_event.py", line 2016, in persist_events_and_notify
    events, max_stream_token = await self._storage.persistence.persist_events(
  File "/usr/local/lib/python3.9/site-packages/synapse/logging/opentracing.py", line 797, in _trace_inner
    return await func(*args, **kwargs)
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/persist_events.py", line 326, in persist_events
    ret_vals = await yieldable_gather_results(enqueue, partitioned.items())
  File "/usr/local/lib/python3.9/site-packages/synapse/util/async_helpers.py", line 291, in yieldable_gather_results
    raise dfe.subFailure.value from None
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 1656, in _inlineCallbacks
    result = current_context.run(
  File "/usr/local/lib/python3.9/site-packages/twisted/python/failure.py", line 514, in throwExceptionIntoGenerator
    return g.throw(self.type, self.value, self.tb)
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/persist_events.py", line 322, in enqueue
    return await self._event_persist_queue.add_to_queue(
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/persist_events.py", line 200, in add_to_queue
    res = await make_deferred_yieldable(end_item.deferred.observe())
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/persist_events.py", line 243, in handle_queue_loop
    ret = await self._per_item_callback(
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/persist_events.py", line 636, in _persist_event_batch
    await self.persist_events_store._persist_events_and_state_updates(
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/databases/main/events.py", line 191, in _persist_events_and_state_updates
    await self.db_pool.runInteraction(
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/database.py", line 814, in runInteraction
    return await delay_cancellation(_runInteraction())
  File "/usr/local/lib/python3.9/site-packages/twisted/internet/defer.py", line 1656, in _inlineCallbacks
    result = current_context.run(
  File "/usr/local/lib/python3.9/site-packages/twisted/python/failure.py", line 514, in throwExceptionIntoGenerator
    return g.throw(self.type, self.value, self.tb)
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/database.py", line 786, in _runInteraction
    result = await self.runWithConnection(
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/database.py", line 909, in runWithConnection
    return await make_deferred_yieldable(
  File "/usr/local/lib/python3.9/site-packages/twisted/python/threadpool.py", line 244, in inContext
    result = inContext.theWork()  # type: ignore[attr-defined]
  File "/usr/local/lib/python3.9/site-packages/twisted/python/threadpool.py", line 260, in <lambda>
    inContext.theWork = lambda: context.call(  # type: ignore[attr-defined]
  File "/usr/local/lib/python3.9/site-packages/twisted/python/context.py", line 117, in callWithContext
    return self.currentContext().callWithContext(ctx, func, *args, **kw)
  File "/usr/local/lib/python3.9/site-packages/twisted/python/context.py", line 82, in callWithContext
    return func(*args, **kw)
  File "/usr/local/lib/python3.9/site-packages/twisted/enterprise/adbapi.py", line 282, in _runWithConnection
    result = func(conn, *args, **kw)
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/database.py", line 902, in inner_func
    return func(db_conn, *args, **kwargs)
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/database.py", line 650, in new_transaction
    r = func(cursor, *args, **kwargs)
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/databases/main/events.py", line 413, in _persist_events_txn
    self._store_event_state_mappings_txn(txn, events_and_contexts)
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/databases/main/events.py", line 2237, in _store_event_state_mappings_txn
    self.db_pool.simple_insert_many_txn(
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/database.py", line 1058, in simple_insert_many_txn
    txn.execute_batch(sql, values)
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/database.py", line 317, in execute_batch
    self.executemany(sql, args)
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/database.py", line 340, in executemany
    self._do_execute(self.txn.executemany, sql, *args)
  File "/usr/local/lib/python3.9/site-packages/synapse/storage/database.py", line 379, in _do_execute
    return func(sql, *args, **kwargs)
sqlite3.IntegrityError: FOREIGN KEY constraint failed

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like this sort of thing is being tracked by #12646 and is planned for this quarter ⏩

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like this sort of thing is being tracked by #12646 and is planned for this quarter fast_forward

Now #12988 I think

continue
richvdh marked this conversation as resolved.
Show resolved Hide resolved

events = await self.store.get_events_as_list(
batch,
redact_behaviour=EventRedactBehaviour.AS_IS,
allow_rejected=True,
)
for event in events:
await self._federation_event_handler.update_state_for_partial_state_event(
destination, event
)
39 changes: 39 additions & 0 deletions synapse/handlers/federation_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,45 @@ async def process_remote_join(

return await self.persist_events_and_notify(room_id, [(event, context)])

async def update_state_for_partial_state_event(
self, destination: str, event: EventBase
) -> None:
"""Recalculate the state at an event as part of a de-partial-stating process

Args:
destination: server to request full state from
event: partial-state event to be de-partial-stated
"""
logger.info("Updating state for %s", event.event_id)
with nested_logging_context(suffix=event.event_id):
# if we have all the event's prev_events, then we can work out the
# state based on their states. Otherwise, we request it from the destination
# server.
#
# This is the same operation as we do when we receive a regular event
# over federation.
state = await self._resolve_state_at_missing_prevs(destination, event)

# build a new state group for it if need be
context = await self._state_handler.compute_event_context(
event,
old_state=state,
)
if context.partial_state:
# this can happen if some or all of the event's prev_events still have
# partial state - ie, an event has an earlier stream_ordering than one
# or more of its prev_events, so we de-partial-state it before its
# prev_events.
#
# TODO(faster_joins): we probably need to be more intelligent, and
# exclude partial-state prev_events from consideration
logger.warning(
"%s still has partial state: can't de-partial-state it yet",
event.event_id,
)
return
await self._store.update_state_for_partial_state_event(event, context)

async def backfill(
self, dest: str, room_id: str, limit: int, extremities: Collection[str]
) -> None:
Expand Down
15 changes: 15 additions & 0 deletions synapse/storage/databases/main/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -963,6 +963,21 @@ def _persist_transaction_ids_txn(
values=to_insert,
)

async def update_current_state(
self,
room_id: str,
state_delta: DeltaState,
stream_id: int,
) -> None:
"""Update the current state stored in the datatabase for the given room"""

await self.db_pool.runInteraction(
"update_current_state",
self._update_current_state_txn,
state_delta_by_room={room_id: state_delta},
stream_id=stream_id,
)

def _update_current_state_txn(
self,
txn: LoggingTransaction,
Expand Down
24 changes: 24 additions & 0 deletions synapse/storage/databases/main/events_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -1979,3 +1979,27 @@ async def is_partial_state_event(self, event_id: str) -> bool:
desc="is_partial_state_event",
)
return result is not None

async def get_partial_state_events_batch(self, room_id: str) -> List[str]:
"""Get a list of events in the given room that have partial state"""
return await self.db_pool.runInteraction(
"get_partial_state_events_batch",
self._get_partial_state_events_batch_txn,
room_id,
)

@staticmethod
def _get_partial_state_events_batch_txn(
txn: LoggingTransaction, room_id: str
) -> List[str]:
txn.execute(
"""
SELECT event_id FROM partial_state_events AS pse
JOIN events USING (event_id)
WHERE pse.room_id = ?
ORDER BY events.stream_ordering
LIMIT 100
""",
(room_id,),
)
return [row[0] for row in txn]
31 changes: 31 additions & 0 deletions synapse/storage/databases/main/room.py
Original file line number Diff line number Diff line change
Expand Up @@ -1077,6 +1077,37 @@ def get_rooms_for_retention_period_in_range_txn(
get_rooms_for_retention_period_in_range_txn,
)

async def clear_partial_state_room(self, room_id: str) -> bool:
# this can race with incoming events, so we watch out for FK errors.
# TODO(faster_joins): this still doesn't completely fix the race, since the persist process
# is not atomic. I fear we need an application-level lock.
try:
await self.db_pool.runInteraction(
"clear_partial_state_room", self._clear_partial_state_room_txn, room_id
)
return True
except self.db_pool.engine.module.DatabaseError as e:
# TODO(faster_joins): how do we distinguish between FK errors and other errors?
logger.warning(
"Exception while clearing lazy partial-state-room %s, retrying: %s",
room_id,
e,
)
return False

@staticmethod
def _clear_partial_state_room_txn(txn: LoggingTransaction, room_id: str) -> None:
DatabasePool.simple_delete_txn(
txn,
table="partial_state_rooms_servers",
keyvalues={"room_id": room_id},
)
DatabasePool.simple_delete_one_txn(
txn,
table="partial_state_rooms",
keyvalues={"room_id": room_id},
)


class _BackgroundUpdates:
REMOVE_TOMESTONED_ROOMS_BG_UPDATE = "remove_tombstoned_rooms_from_directory"
Expand Down
48 changes: 48 additions & 0 deletions synapse/storage/databases/main/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from synapse.api.errors import NotFoundError, UnsupportedRoomVersionError
from synapse.api.room_versions import KNOWN_ROOM_VERSIONS, RoomVersion
from synapse.events import EventBase
from synapse.events.snapshot import EventContext
from synapse.storage._base import SQLBaseStore
from synapse.storage.database import (
DatabasePool,
Expand Down Expand Up @@ -354,6 +355,53 @@ async def get_referenced_state_groups(

return {row["state_group"] for row in rows}

async def update_state_for_partial_state_event(
self,
event: EventBase,
context: EventContext,
) -> None:
"""Update the state group for a partial state event"""
await self.db_pool.runInteraction(
"update_state_for_partial_state_event",
self._update_state_for_partial_state_event_txn,
event,
context,
)

def _update_state_for_partial_state_event_txn(
self,
txn,
event: EventBase,
context: EventContext,
):
# we shouldn't have any outliers here
assert not event.internal_metadata.is_outlier()

# anything that was rejected should have the same state as its
# predecessor.
if context.rejected:
assert context.state_group == context.state_group_before_event

self.db_pool.simple_update_txn(
txn,
table="event_to_state_groups",
keyvalues={"event_id": event.event_id},
updatevalues={"state_group": context.state_group},
)

self.db_pool.simple_delete_one_txn(
txn,
table="partial_state_events",
keyvalues={"event_id": event.event_id},
)
Comment on lines +385 to +396
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems unfortunate that these will be called separately per event, but I don't see a good way to make this method take a list of events.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we think about optimising it later. Hopefully there won't be a huge number of events anyway.


# TODO(faster_joins): need to do something about workers here
txn.call_after(
self._get_state_group_for_event.prefill,
(event.event_id,),
context.state_group,
)


class MainStateBackgroundUpdateStore(RoomMemberWorkerStore):

Expand Down
56 changes: 56 additions & 0 deletions synapse/storage/persist_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,62 @@ async def persist_event(
pos = PersistedEventPosition(self._instance_name, event_stream_id)
return event, pos, self.main_store.get_room_max_token()

async def update_current_state(self, room_id: str) -> None:
"""Recalculate the current state for a room, and persist it"""
state = await self._calculate_current_state(room_id)
delta = await self._calculate_state_delta(room_id, state)

# TODO(faster_joins): get a real stream ordering, to make this work correctly
# across workers.
#
# TODO(faster_joins): this can race against event persistence, in which case we
# will end up with incorrect state. Perhaps we should make this a job we
# farm out to the event persister, somehow.
stream_id = self.main_store.get_room_max_stream_ordering()
await self.persist_events_store.update_current_state(room_id, delta, stream_id)

async def _calculate_current_state(self, room_id: str) -> StateMap[str]:
"""Calculate the current state of a room, based on the forward extremities

Args:
room_id: room for which to calculate current state

Returns:
map from (type, state_key) to event id for the current state in the room
"""
latest_event_ids = await self.main_store.get_latest_event_ids_in_room(room_id)
state_groups = set(
(
await self.main_store._get_state_group_for_events(latest_event_ids)
).values()
)

state_maps_by_state_group = await self.state_store._get_state_for_groups(
state_groups
)

if len(state_groups) == 1:
# If there is only one state group, then we know what the current
# state is.
return state_maps_by_state_group[state_groups.pop()]

# Ok, we need to defer to the state handler to resolve our state sets.
logger.debug("calling resolve_state_groups from preserve_events")

# Avoid a circular import.
from synapse.state import StateResolutionStore

room_version = await self.main_store.get_room_version_id(room_id)
res = await self._state_resolution_handler.resolve_state_groups(
room_id,
room_version,
state_maps_by_state_group,
event_map=None,
state_res_store=StateResolutionStore(self.main_store),
)

return res.state

async def _persist_event_batch(
self,
events_and_contexts: List[Tuple[EventBase, EventContext]],
Expand Down