Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Fix devices_last_seen background update. #6135

Merged
merged 2 commits into from
Oct 1, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/6135.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix bug in background update that adds last seen information to the `devices` table, and improve its performance on Postgres.
46 changes: 39 additions & 7 deletions synapse/storage/client_ips.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,14 +463,46 @@ def _devices_last_seen_update(self, progress, batch_size):
last_device_id = progress.get("last_device_id", "")

def _devices_last_seen_update_txn(txn):
# This consists of two queries:
#
# 1. The sub-query searches for the next N devices and joins
# against user_ips to find the max last_seen associated with
# that device.
# 2. The outer query then joins again against user_ips on
# user/device/last_seen. This *should* hopefully only
# return one row, but if it does return more than one then
# we'll just end up updating the same device row multiple
# times, which is fine.

if self.database_engine.supports_tuple_comparison:
where_clause = "(user_id, device_id) > (?, ?)"
where_args = [last_user_id, last_device_id]
else:
# We explicitly do a `user_id >= ? AND (...)` here to ensure
# that an index is used, as doing `user_id > ? OR (user_id = ? AND ...)`
# makes it hard for query optimiser to tell that it can use the
# index on user_id
where_clause = "user_id >= ? AND (user_id > ? OR device_id > ?)"
where_args = [last_user_id, last_user_id, last_device_id]

sql = """
SELECT u.last_seen, u.ip, u.user_agent, user_id, device_id FROM devices
INNER JOIN user_ips AS u USING (user_id, device_id)
WHERE user_id > ? OR (user_id = ? AND device_id > ?)
ORDER BY user_id ASC, device_id ASC
LIMIT ?
"""
txn.execute(sql, (last_user_id, last_user_id, last_device_id, batch_size))
SELECT
last_seen, ip, user_agent, user_id, device_id
FROM (
SELECT
user_id, device_id, MAX(u.last_seen) AS last_seen
FROM devices
INNER JOIN user_ips AS u USING (user_id, device_id)
WHERE %(where_clause)s
GROUP BY user_id, device_id
ORDER BY user_id ASC, device_id ASC
LIMIT ?
) c
INNER JOIN user_ips AS u USING (user_id, device_id, last_seen)
""" % {
"where_clause": where_clause
}
txn.execute(sql, where_args + [batch_size])

rows = txn.fetchall()
if not rows:
Expand Down
7 changes: 7 additions & 0 deletions synapse/storage/engines/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,13 @@ def can_native_upsert(self):
"""
return True

@property
def supports_tuple_comparison(self):
"""
Do we support comparing tuples, i.e. `(a, b) > (c, d)`?
"""
return True

def is_deadlock(self, error):
if isinstance(error, self.module.DatabaseError):
# https://www.postgresql.org/docs/current/static/errcodes-appendix.html
Expand Down
8 changes: 8 additions & 0 deletions synapse/storage/engines/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ def can_native_upsert(self):
"""
return self.module.sqlite_version_info >= (3, 24, 0)

@property
def supports_tuple_comparison(self):
"""
Do we support comparing tuples, i.e. `(a, b) > (c, d)`? This requires
SQLite 3.15+.
"""
return self.module.sqlite_version_info >= (3, 15, 0)

def check_database(self, txn):
pass

Expand Down