Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Include whether the requesting user has participated in a thread. #11577

Merged
merged 6 commits into from
Jan 18, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 29 additions & 10 deletions synapse/storage/databases/main/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,23 +382,24 @@ def _get_applicable_edit_txn(txn: LoggingTransaction) -> Optional[str]:

@cached()
async def get_thread_summary(
self, event_id: str, room_id: str
) -> Tuple[int, Optional[EventBase]]:
self, event_id: str, room_id: str, user_id: str
) -> Tuple[int, Optional[EventBase], bool]:
"""Get the number of threaded replies, the senders of those replies, and
the latest reply (if any) for the given event.

Args:
event_id: Summarize the thread related to this event ID.
room_id: The room the event belongs to.
user_id: The user requesting the summary.

Returns:
The number of items in the thread and the most recent response, if any.
"""

def _get_thread_summary_txn(
txn: LoggingTransaction,
) -> Tuple[int, Optional[str]]:
# Fetch the count of threaded events and the latest event ID.
) -> Tuple[int, Optional[str], bool]:
# Fetch the latest event ID in the thread.
# TODO Should this only allow m.room.message events.
sql = """
SELECT event_id
Expand All @@ -415,10 +416,11 @@ def _get_thread_summary_txn(
txn.execute(sql, (event_id, room_id, RelationTypes.THREAD))
row = txn.fetchone()
if row is None:
return 0, None
return 0, None, False

latest_event_id = row[0]

# Fetch the number of threaded replies.
sql = """
SELECT COUNT(event_id)
FROM event_relations
Expand All @@ -431,17 +433,33 @@ def _get_thread_summary_txn(
txn.execute(sql, (event_id, room_id, RelationTypes.THREAD))
count = cast(Tuple[int], txn.fetchone())[0]

return count, latest_event_id
# Fetch whether the requester has participated or not.
sql = """
SELECT 1
FROM event_relations
INNER JOIN events USING (event_id)
WHERE
relates_to_id = ?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any idea how well this performs? E.g. are there relevant indices for the query planner to make use of?

I see

    "event_relations_relates" btree (relates_to_id, relation_type, aggregation_key)
    "events_order_room2" btree (room_id, topological_ordering, stream_ordering)

I'd guess this means it has to scan the entire room or thread to find the sender of interest. Maybe that's fine---we're assuming most threads aren't that long?

For what it's worth:

matrix=> EXPLAIN
SELECT 1
FROM event_relations
INNER JOIN events USING (event_id)
WHERE
    relates_to_id='a'
    AND room_id = 'b'
    AND relation_type = 'c'
    AND sender = 'd'
;
                                              QUERY PLAN                                              
══════════════════════════════════════════════════════════════════════════════════════════════════════
 Nested Loop  (cost=1.40..12.12 rows=1 width=4)
   ->  Index Scan using event_relations_relates on event_relations  (cost=0.69..4.71 rows=1 width=42)
         Index Cond: ((relates_to_id = 'a'::text) AND (relation_type = 'c'::text))
   ->  Index Scan using events_event_id_key on events  (cost=0.70..4.73 rows=1 width=38)
         Index Cond: (event_id = event_relations.event_id)
         Filter: ((room_id = 'b'::text) AND (sender = 'd'::text))
(6 rows)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure, but it is pretty much the same as the other queries we're doing on relations, so I don't think it will be a problem?

I wonder if these queries (in general) would benefit from an index of relates_to_id, relation_type. The current one seems to include aggregation_key.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good to me.

As for the indices, I think the existing one is sufficient, because a btree index on (X, Y, Z) can be used as an index on (X, Y). But I can't find a reference for this. Maybe @reivilibre knows one?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But I can't find a reference for this.

Searching for "Index" and "prefix" gets you stuff related to WHERE strcol LIKE 'foo%'. What I wanted to find was

https://use-the-index-luke.com/sql/where-clause/the-equals-operator/concatenated-keys and https://www.postgresql.org/docs/10/indexes-multicolumn.html . To quote from postgres docs specifically

A multicolumn B-tree index can be used with query conditions that involve any subset of the index's columns, but the index is most efficient when there are constraints on the leading (leftmost) columns.

AND room_id = ?
AND relation_type = ?
AND sender = ?
"""

txn.execute(sql, (event_id, room_id, RelationTypes.THREAD, user_id))
row = txn.fetchone()
participated = bool(txn.fetchone())

return count, latest_event_id, participated

count, latest_event_id = await self.db_pool.runInteraction(
count, latest_event_id, participated = await self.db_pool.runInteraction(
"get_thread_summary", _get_thread_summary_txn
)

latest_event = None
if latest_event_id:
latest_event = await self.get_event(latest_event_id, allow_none=True) # type: ignore[attr-defined]

return count, latest_event
return count, latest_event, participated

async def events_have_relations(
self,
Expand Down Expand Up @@ -602,12 +620,13 @@ async def _get_bundled_aggregation_for_event(
(
thread_count,
latest_thread_event,
) = await self.get_thread_summary(event_id, room_id)
participated,
) = await self.get_thread_summary(event_id, room_id, user_id)
if latest_thread_event:
aggregations[RelationTypes.THREAD] = {
# Don't bundle aggregations as this could recurse forever.
"latest_event": latest_thread_event,
"count": thread_count,
"current_user_participated": participated,
}

# Store the bundled aggregations in the event metadata for later use.
Expand Down
3 changes: 3 additions & 0 deletions tests/rest/client/test_relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,9 @@ def assert_bundle(actual):
2,
actual[RelationTypes.THREAD].get("count"),
)
self.assertTrue(
actual[RelationTypes.THREAD].get("current_user_participated")
)
# The latest thread event has some fields that don't matter.
self.assert_dict(
{
Expand Down