-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Refactor getting replication updates from database. #7636
Changes from 9 commits
cc34308
8de532d
53db1be
1975a4d
85c9a94
67e7276
b6e35f2
9b492b6
d00dce6
d3ed450
dbe1760
df6c3b0
a902de2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Refactor getting replication updates from database. |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1077,9 +1077,29 @@ def get_ex_outlier_stream_rows_txn(txn): | |
"get_ex_outlier_stream_rows", get_ex_outlier_stream_rows_txn | ||
) | ||
|
||
def get_all_new_backfill_event_rows(self, last_id, current_id, limit): | ||
async def get_all_new_backfill_event_rows( | ||
self, instance_name: str, last_id: int, current_id: int, limit: int | ||
) -> Tuple[List[Tuple[int, list]], int, bool]: | ||
"""Get updates for backfill replication stream, including all new | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this docstring is very helpful, but please can all the updated/new storage methods have one, not just this method? |
||
backfilled events and events that have gone from being outliers to not. | ||
|
||
Args: | ||
instance_name: The writer we want to fetch updates from. Unused | ||
here sincethere is only ever one writer. | ||
erikjohnston marked this conversation as resolved.
Show resolved
Hide resolved
|
||
last_id: The token to fetch updates from. Exclusive. | ||
current_id: The token to fetch updates up to. Inclusive. | ||
limit: The requested limit for the number of rows to return. The | ||
function may return more or fewer rows. | ||
|
||
Returns: | ||
A tuple consisting of: the updates, the position of the rows | ||
returned up to, and whether we returned fewer rows than exists | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you rephrase "the position of the rows returned up to"? it's somewhat unclear: inconsistent use of "position" and "token", inclusive or exclusive, etc. "the last token included in the results", maybe? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hopefully I've clarified it. Unfortunately, technically, the returned token doesn't have to be the last token included in the results (since |
||
between the requested tokens due to the limit. | ||
|
||
The updates are list of 2-tuples of stream ID and the row. | ||
erikjohnston marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
if last_id == current_id: | ||
return defer.succeed([]) | ||
return [], current_id, False | ||
|
||
def get_all_new_backfill_event_rows(txn): | ||
sql = ( | ||
|
@@ -1094,10 +1114,12 @@ def get_all_new_backfill_event_rows(txn): | |
" LIMIT ?" | ||
) | ||
txn.execute(sql, (-last_id, -current_id, limit)) | ||
new_event_updates = txn.fetchall() | ||
new_event_updates = [(row[0], row[1:]) for row in txn] | ||
|
||
limited = False | ||
if len(new_event_updates) == limit: | ||
upper_bound = new_event_updates[-1][0] | ||
limited = True | ||
else: | ||
upper_bound = current_id | ||
|
||
|
@@ -1114,11 +1136,15 @@ def get_all_new_backfill_event_rows(txn): | |
" ORDER BY event_stream_ordering DESC" | ||
) | ||
txn.execute(sql, (-last_id, -upper_bound)) | ||
new_event_updates.extend(txn.fetchall()) | ||
new_event_updates.extend((row[0], row[1:]) for row in txn) | ||
|
||
if len(new_event_updates) >= limit: | ||
upper_bound = new_event_updates[-1][0] | ||
limited = True | ||
|
||
return new_event_updates | ||
return new_event_updates, upper_bound, limited | ||
|
||
return self.db.runInteraction( | ||
return await self.db.runInteraction( | ||
"get_all_new_backfill_event_rows", get_all_new_backfill_event_rows | ||
) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,7 +16,7 @@ | |
|
||
import abc | ||
import logging | ||
from typing import Union | ||
from typing import List, Tuple, Union | ||
|
||
from canonicaljson import json | ||
|
||
|
@@ -348,23 +348,33 @@ def bulk_get_push_rules_enabled(self, user_ids): | |
results.setdefault(row["user_name"], {})[row["rule_id"]] = enabled | ||
return results | ||
|
||
def get_all_push_rule_updates(self, last_id, current_id, limit): | ||
async def get_all_push_rule_updates( | ||
self, instance_name: str, last_id: int, current_id: int, limit: int | ||
) -> Tuple[List[Tuple[int, tuple]], int, bool]: | ||
"""Get all the push rules changes that have happend on the server""" | ||
if last_id == current_id: | ||
return defer.succeed([]) | ||
return [], current_id, False | ||
|
||
def get_all_push_rule_updates_txn(txn): | ||
sql = ( | ||
"SELECT stream_id, event_stream_ordering, user_id, rule_id," | ||
" op, priority_class, priority, conditions, actions" | ||
" FROM push_rules_stream" | ||
" WHERE ? < stream_id AND stream_id <= ?" | ||
" ORDER BY stream_id ASC LIMIT ?" | ||
) | ||
sql = """ | ||
SELECT stream_id, user_id | ||
FROM push_rules_stream | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are we stuffing all those columns into the table if we don't care about them... 😕 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, hmm, it does look like they're not used anywhere else either. Though I don't really propose doing anything in this PR. |
||
WHERE ? < stream_id AND stream_id <= ? | ||
ORDER BY stream_id ASC | ||
LIMIT ? | ||
""" | ||
txn.execute(sql, (last_id, current_id, limit)) | ||
return txn.fetchall() | ||
updates = [(stream_id, (user_id,)) for stream_id, user_id in txn] | ||
|
||
limited = False | ||
upper_bound = current_id | ||
if len(updates) == limit: | ||
limited = True | ||
upper_bound = updates[-1][0] | ||
|
||
return updates, upper_bound, limited | ||
|
||
return self.db.runInteraction( | ||
return await self.db.runInteraction( | ||
"get_all_push_rule_updates", get_all_push_rule_updates_txn | ||
) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is doing the wrong thing for the returned token when the limit is hit.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am such a crank 🤦
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
MAYBE THIS TIME I'VE fIXeD IT?!!!??!1?