From 70c52821ce9e755e4a5c3081510fb1260f609ee3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 17 Sep 2019 12:41:23 +0100 Subject: [PATCH] Fix race condition in room stats. (#6029) Broke in #5971 Basically the bug is that if get_current_state_deltas returns no new updates and we then take the max pos, its possible that we miss an update that happens in between the two calls. (e.g. get_current_state_deltas looks up to stream pos 5, then an event persists and so getting the max stream pos returns 6, meaning that next time we check for things with a stream pos bigger than 6) --- changelog.d/6029.bugfix | 1 + synapse/handlers/stats.py | 14 ++++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) create mode 100644 changelog.d/6029.bugfix diff --git a/changelog.d/6029.bugfix b/changelog.d/6029.bugfix new file mode 100644 index 000000000000..9ea095103b45 --- /dev/null +++ b/changelog.d/6029.bugfix @@ -0,0 +1 @@ +Fix room and user stats tracking. diff --git a/synapse/handlers/stats.py b/synapse/handlers/stats.py index 3c265f3718e0..cbac7c347aab 100644 --- a/synapse/handlers/stats.py +++ b/synapse/handlers/stats.py @@ -84,6 +84,13 @@ def _unsafe_process(self): # Loop round handling deltas until we're up to date while True: + # Be sure to read the max stream_ordering *before* checking if there are any outstanding + # deltas, since there is otherwise a chance that we could miss updates which arrive + # after we check the deltas. + room_max_stream_ordering = yield self.store.get_room_max_stream_ordering() + if self.pos == room_max_stream_ordering: + break + deltas = yield self.store.get_current_state_deltas(self.pos) if deltas: @@ -94,7 +101,7 @@ def _unsafe_process(self): else: room_deltas = {} user_deltas = {} - max_pos = yield self.store.get_room_max_stream_ordering() + max_pos = room_max_stream_ordering # Then count deltas for total_events and total_event_bytes. room_count, user_count = yield self.store.get_changes_room_total_events_and_bytes( @@ -117,10 +124,9 @@ def _unsafe_process(self): stream_id=max_pos, ) - event_processing_positions.labels("stats").set(max_pos) + logger.debug("Handled room stats to %s -> %s", self.pos, max_pos) - if self.pos == max_pos: - break + event_processing_positions.labels("stats").set(max_pos) self.pos = max_pos