From 91c60f304256c08e8aff53ed13d5b282057277d6 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
Date: Wed, 23 Sep 2020 16:42:44 +0100
Subject: [PATCH] Improve logging of state resolution (#8371)

I'd like to get a better insight into what we are doing with respect to state
res. The list of state groups we are resolving across should be short (if it
isn't, that's a massive problem in itself), so it should be fine to log it in
ite entiretly.

I've done some grepping and found approximately zero cases in which the
"shortcut" code delivered the result, so I've ripped that out too.
---
 changelog.d/8371.misc     |  1 +
 synapse/state/__init__.py | 64 ++++++++++-----------------------------
 2 files changed, 17 insertions(+), 48 deletions(-)
 create mode 100644 changelog.d/8371.misc

diff --git a/changelog.d/8371.misc b/changelog.d/8371.misc
new file mode 100644
index 000000000000..6a54a9496afb
--- /dev/null
+++ b/changelog.d/8371.misc
@@ -0,0 +1 @@
+Improve logging of state resolution.
diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py
index 56d6afb86353..5a5ea39e0103 100644
--- a/synapse/state/__init__.py
+++ b/synapse/state/__init__.py
@@ -25,7 +25,6 @@
     Sequence,
     Set,
     Union,
-    cast,
     overload,
 )
 
@@ -42,7 +41,7 @@
 from synapse.state import v1, v2
 from synapse.storage.databases.main.events_worker import EventRedactBehaviour
 from synapse.storage.roommember import ProfileInfo
-from synapse.types import Collection, MutableStateMap, StateMap
+from synapse.types import Collection, StateMap
 from synapse.util import Clock
 from synapse.util.async_helpers import Linearizer
 from synapse.util.caches.expiringcache import ExpiringCache
@@ -472,10 +471,9 @@ class StateResolutionHandler:
     def __init__(self, hs):
         self.clock = hs.get_clock()
 
-        # dict of set of event_ids -> _StateCacheEntry.
-        self._state_cache = None
         self.resolve_linearizer = Linearizer(name="state_resolve_lock")
 
+        # dict of set of event_ids -> _StateCacheEntry.
         self._state_cache = ExpiringCache(
             cache_name="state_cache",
             clock=self.clock,
@@ -519,57 +517,28 @@ async def resolve_state_groups(
         Returns:
             The resolved state
         """
-        logger.debug("resolve_state_groups state_groups %s", state_groups_ids.keys())
-
         group_names = frozenset(state_groups_ids.keys())
 
         with (await self.resolve_linearizer.queue(group_names)):
-            if self._state_cache is not None:
-                cache = self._state_cache.get(group_names, None)
-                if cache:
-                    return cache
+            cache = self._state_cache.get(group_names, None)
+            if cache:
+                return cache
 
             logger.info(
-                "Resolving state for %s with %d groups", room_id, len(state_groups_ids)
+                "Resolving state for %s with groups %s", room_id, list(group_names),
             )
 
             state_groups_histogram.observe(len(state_groups_ids))
 
-            # start by assuming we won't have any conflicted state, and build up the new
-            # state map by iterating through the state groups. If we discover a conflict,
-            # we give up and instead use `resolve_events_with_store`.
-            #
-            # XXX: is this actually worthwhile, or should we just let
-            # resolve_events_with_store do it?
-            new_state = {}  # type: MutableStateMap[str]
-            conflicted_state = False
-            for st in state_groups_ids.values():
-                for key, e_id in st.items():
-                    if key in new_state:
-                        conflicted_state = True
-                        break
-                    new_state[key] = e_id
-                if conflicted_state:
-                    break
-
-            if conflicted_state:
-                logger.info("Resolving conflicted state for %r", room_id)
-                with Measure(self.clock, "state._resolve_events"):
-                    # resolve_events_with_store returns a StateMap, but we can
-                    # treat it as a MutableStateMap as it is above. It isn't
-                    # actually mutated anymore (and is frozen in
-                    # _make_state_cache_entry below).
-                    new_state = cast(
-                        MutableStateMap,
-                        await resolve_events_with_store(
-                            self.clock,
-                            room_id,
-                            room_version,
-                            list(state_groups_ids.values()),
-                            event_map=event_map,
-                            state_res_store=state_res_store,
-                        ),
-                    )
+            with Measure(self.clock, "state._resolve_events"):
+                new_state = await resolve_events_with_store(
+                    self.clock,
+                    room_id,
+                    room_version,
+                    list(state_groups_ids.values()),
+                    event_map=event_map,
+                    state_res_store=state_res_store,
+                )
 
             # if the new state matches any of the input state groups, we can
             # use that state group again. Otherwise we will generate a state_id
@@ -579,8 +548,7 @@ async def resolve_state_groups(
             with Measure(self.clock, "state.create_group_ids"):
                 cache = _make_state_cache_entry(new_state, state_groups_ids)
 
-            if self._state_cache is not None:
-                self._state_cache[group_names] = cache
+            self._state_cache[group_names] = cache
 
             return cache