Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Skip processing stats for broken rooms. #14873

Merged
merged 3 commits into from
Jan 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/14873.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a long-standing bug where the `populate_room_stats` background job could fail on broken rooms.
6 changes: 5 additions & 1 deletion synapse/storage/databases/main/events_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@
)


class InvalidEventError(Exception):
"""The event retrieved from the database is invalid and cannot be used."""


@attr.s(slots=True, auto_attribs=True)
class EventCacheEntry:
event: EventBase
Expand Down Expand Up @@ -1299,7 +1303,7 @@ async def _fetch_event_ids_and_get_outstanding_redactions(
# invites, so just accept it for all membership events.
#
if d["type"] != EventTypes.Member:
raise Exception(
raise InvalidEventError(
Comment on lines 1305 to +1306
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not the goal of this PR, but in passing: Is it possible that there's no type field in d?

"Room %s for event %s is unknown" % (d["room_id"], event_id)
)

Expand Down
13 changes: 12 additions & 1 deletion synapse/storage/databases/main/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
LoggingDatabaseConnection,
LoggingTransaction,
)
from synapse.storage.databases.main.events_worker import InvalidEventError
from synapse.storage.databases.main.state_deltas import StateDeltasStore
from synapse.types import JsonDict
from synapse.util.caches.descriptors import cached
Expand Down Expand Up @@ -554,7 +555,17 @@ def _fetch_current_state_stats(
"get_initial_state_for_room", _fetch_current_state_stats
)

state_event_map = await self.get_events(event_ids, get_prev_content=False) # type: ignore[attr-defined]
try:
state_event_map = await self.get_events(event_ids, get_prev_content=False) # type: ignore[attr-defined]
except InvalidEventError as e:
# If an exception occurs fetching events then the room is broken;
# skip process it to avoid being stuck on a room.
logger.warning(
"Failed to fetch events for room %s, skipping stats calculation: %r.",
room_id,
e,
)
return

room_state: Dict[str, Union[None, bool, str]] = {
"join_rules": None,
Expand Down
88 changes: 54 additions & 34 deletions tests/storage/databases/main/test_room.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,23 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
self.token = self.login("foo", "pass")

def _generate_room(self) -> str:
room_id = self.helper.create_room_as(self.user_id, tok=self.token)
"""Create a room and return the room ID."""
return self.helper.create_room_as(self.user_id, tok=self.token)

return room_id
def run_background_updates(self, update_name: str) -> None:
"""Insert and run the background update."""
self.get_success(
self.store.db_pool.simple_insert(
"background_updates",
{"update_name": update_name, "progress_json": "{}"},
)
)

# ... and tell the DataStore that it hasn't finished all updates yet
self.store.db_pool.updates._all_done = False

# Now let's actually drive the updates to completion
self.wait_for_background_updates()

def test_background_populate_rooms_creator_column(self) -> None:
"""Test that the background update to populate the rooms creator column
Expand Down Expand Up @@ -71,22 +85,7 @@ def test_background_populate_rooms_creator_column(self) -> None:
)
self.assertEqual(room_creator_before, None)

# Insert and run the background update.
self.get_success(
self.store.db_pool.simple_insert(
"background_updates",
{
"update_name": _BackgroundUpdates.POPULATE_ROOMS_CREATOR_COLUMN,
"progress_json": "{}",
},
)
)

# ... and tell the DataStore that it hasn't finished all updates yet
self.store.db_pool.updates._all_done = False

# Now let's actually drive the updates to completion
self.wait_for_background_updates()
self.run_background_updates(_BackgroundUpdates.POPULATE_ROOMS_CREATOR_COLUMN)

# Make sure the background update filled in the room creator
room_creator_after = self.get_success(
Expand Down Expand Up @@ -137,22 +136,7 @@ def test_background_add_room_type_column(self) -> None:
)
)

# Insert and run the background update
self.get_success(
self.store.db_pool.simple_insert(
"background_updates",
{
"update_name": _BackgroundUpdates.ADD_ROOM_TYPE_COLUMN,
"progress_json": "{}",
},
)
)

# ... and tell the DataStore that it hasn't finished all updates yet
self.store.db_pool.updates._all_done = False

# Now let's actually drive the updates to completion
self.wait_for_background_updates()
self.run_background_updates(_BackgroundUpdates.ADD_ROOM_TYPE_COLUMN)

# Make sure the background update filled in the room type
room_type_after = self.get_success(
Expand All @@ -164,3 +148,39 @@ def test_background_add_room_type_column(self) -> None:
)
)
self.assertEqual(room_type_after, RoomTypes.SPACE)

def test_populate_stats_broken_rooms(self) -> None:
"""Ensure that re-populating room stats skips broken rooms."""

# Create a good room.
good_room_id = self._generate_room()

# Create a room and then break it by having no room version.
room_id = self._generate_room()
self.get_success(
self.store.db_pool.simple_update(
table="rooms",
keyvalues={"room_id": room_id},
updatevalues={"room_version": None},
desc="test",
)
)

# Nuke any current stats in the database.
self.get_success(
self.store.db_pool.simple_delete(
table="room_stats_state", keyvalues={"1": 1}, desc="test"
)
)

self.run_background_updates("populate_stats_process_rooms")

# Only the good room appears in the stats tables.
results = self.get_success(
self.store.db_pool.simple_select_onecol(
table="room_stats_state",
keyvalues={},
retcol="room_id",
)
)
self.assertEqual(results, [good_room_id])