Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Delete messages for hidden devices from device_inbox #11199

Merged
merged 3 commits into from
Nov 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/11199.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Delete `to_device` messages for hidden devices that will never be read, reducing database size.
89 changes: 89 additions & 0 deletions synapse/storage/databases/main/deviceinbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,7 @@ def _add_messages_to_local_device_inbox_txn(
class DeviceInboxBackgroundUpdateStore(SQLBaseStore):
DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop"
REMOVE_DELETED_DEVICES = "remove_deleted_devices_from_device_inbox"
REMOVE_HIDDEN_DEVICES = "remove_hidden_devices_from_device_inbox"

def __init__(self, database: DatabasePool, db_conn, hs: "HomeServer"):
super().__init__(database, db_conn, hs)
Expand All @@ -577,6 +578,11 @@ def __init__(self, database: DatabasePool, db_conn, hs: "HomeServer"):
self._remove_deleted_devices_from_device_inbox,
)

self.db_pool.updates.register_background_update_handler(
self.REMOVE_HIDDEN_DEVICES,
self._remove_hidden_devices_from_device_inbox,
)

async def _background_drop_index_device_inbox(self, progress, batch_size):
def reindex_txn(conn):
txn = conn.cursor()
Expand Down Expand Up @@ -672,6 +678,89 @@ def _remove_deleted_devices_from_device_inbox_txn(

return number_deleted

async def _remove_hidden_devices_from_device_inbox(
self, progress: JsonDict, batch_size: int
) -> int:
"""A background update that deletes all device_inboxes for hidden devices.

This should only need to be run once (when users upgrade to v1.47.0)

Args:
progress: JsonDict used to store progress of this background update
batch_size: the maximum number of rows to retrieve in a single select query

Returns:
The number of deleted rows
"""

def _remove_hidden_devices_from_device_inbox_txn(
txn: LoggingTransaction,
) -> int:
"""stream_id is not unique
we need to use an inclusive `stream_id >= ?` clause,
since we might not have deleted all hidden device messages for the stream_id
returned from the previous query

Then delete only rows matching the `(user_id, device_id, stream_id)` tuple,
to avoid problems of deleting a large number of rows all at once
due to a single device having lots of device messages.
"""

last_stream_id = progress.get("stream_id", 0)

sql = """
SELECT device_id, user_id, stream_id
FROM device_inbox
WHERE
stream_id >= ?
AND (device_id, user_id) IN (
SELECT device_id, user_id FROM devices WHERE hidden = ?
)
ORDER BY stream_id
LIMIT ?
"""

txn.execute(sql, (last_stream_id, True, batch_size))
rows = txn.fetchall()

num_deleted = 0
for row in rows:
num_deleted += self.db_pool.simple_delete_txn(
txn,
"device_inbox",
{"device_id": row[0], "user_id": row[1], "stream_id": row[2]},
)

if rows:
# We don't just save the `stream_id` in progress as
# otherwise it can happen in large deployments that
# no change of status is visible in the log file, as
# it may be that the stream_id does not change in several runs
self.db_pool.updates._background_update_progress_txn(
txn,
self.REMOVE_HIDDEN_DEVICES,
{
"device_id": rows[-1][0],
"user_id": rows[-1][1],
"stream_id": rows[-1][2],
},
)

return num_deleted

number_deleted = await self.db_pool.runInteraction(
"_remove_hidden_devices_from_device_inbox",
_remove_hidden_devices_from_device_inbox_txn,
)

# The task is finished when no more lines are deleted.
if not number_deleted:
await self.db_pool.updates._end_background_update(
self.REMOVE_HIDDEN_DEVICES
)

return number_deleted


class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore):
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/* Copyright 2021 The Matrix.org Foundation C.I.C
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/


-- Remove messages from the device_inbox table which were orphaned
-- because a device was hidden using Synapse earlier than 1.47.0.
-- This runs as background task, but may take a bit to finish.

INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
(6503, 'remove_hidden_devices_from_device_inbox', '{}');
74 changes: 74 additions & 0 deletions tests/storage/databases/main/test_deviceinbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,77 @@ def test_background_remove_deleted_devices_from_device_inbox(self):
)
self.assertEqual(1, len(res))
self.assertEqual(res[0], "cur_device")

def test_background_remove_hidden_devices_from_device_inbox(self):
"""Test that the background task to delete hidden devices
from device_inboxes works properly."""

# create a valid device
self.get_success(
self.store.store_device(self.user_id, "cur_device", "display_name")
)

# create a hidden device
self.get_success(
self.store.db_pool.simple_insert(
"devices",
values={
"user_id": self.user_id,
"device_id": "hidden_device",
"display_name": "hidden_display_name",
"hidden": True,
},
)
)

# Add device_inbox to devices
self.get_success(
self.store.db_pool.simple_insert(
"device_inbox",
{
"user_id": self.user_id,
"device_id": "cur_device",
"stream_id": 1,
"message_json": "{}",
},
)
)
self.get_success(
self.store.db_pool.simple_insert(
"device_inbox",
{
"user_id": self.user_id,
"device_id": "hidden_device",
"stream_id": 2,
"message_json": "{}",
},
)
)

# Insert and run the background update.
self.get_success(
self.store.db_pool.simple_insert(
"background_updates",
{
"update_name": "remove_hidden_devices_from_device_inbox",
"progress_json": "{}",
},
)
)

# ... and tell the DataStore that it hasn't finished all updates yet
self.store.db_pool.updates._all_done = False

self.wait_for_background_updates()

# Make sure the background task deleted hidden devices from device_inbox
res = self.get_success(
self.store.db_pool.simple_select_onecol(
table="device_inbox",
keyvalues={},
retcol="device_id",
desc="get_device_id_from_device_inbox",
)
)
self.assertEqual(1, len(res))
self.assertEqual(res[0], "cur_device")