Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Delete messages from device_inbox table when deleting device #10969

Merged
merged 22 commits into from
Oct 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/10969.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a long-standing bug where messages in the `device_inbox` table for deleted devices would persist indefinitely. Contributed by @dklimpel and @JohannesKleine.
92 changes: 91 additions & 1 deletion synapse/storage/databases/main/deviceinbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@
from synapse.logging.opentracing import log_kv, set_tag, trace
from synapse.replication.tcp.streams import ToDeviceStream
from synapse.storage._base import SQLBaseStore, db_to_json
from synapse.storage.database import DatabasePool
from synapse.storage.database import DatabasePool, LoggingTransaction
from synapse.storage.engines import PostgresEngine
from synapse.storage.util.id_generators import MultiWriterIdGenerator, StreamIdGenerator
from synapse.types import JsonDict
from synapse.util import json_encoder
from synapse.util.caches.expiringcache import ExpiringCache
from synapse.util.caches.stream_change_cache import StreamChangeCache
Expand Down Expand Up @@ -552,6 +553,7 @@ def _add_messages_to_local_device_inbox_txn(

class DeviceInboxBackgroundUpdateStore(SQLBaseStore):
DEVICE_INBOX_STREAM_ID = "device_inbox_stream_drop"
REMOVE_DELETED_DEVICES = "remove_deleted_devices_from_device_inbox"

def __init__(self, database: DatabasePool, db_conn, hs):
super().__init__(database, db_conn, hs)
Expand All @@ -567,6 +569,11 @@ def __init__(self, database: DatabasePool, db_conn, hs):
self.DEVICE_INBOX_STREAM_ID, self._background_drop_index_device_inbox
)

self.db_pool.updates.register_background_update_handler(
self.REMOVE_DELETED_DEVICES,
self._remove_deleted_devices_from_device_inbox,
)

async def _background_drop_index_device_inbox(self, progress, batch_size):
def reindex_txn(conn):
txn = conn.cursor()
Expand All @@ -579,6 +586,89 @@ def reindex_txn(conn):

return 1

async def _remove_deleted_devices_from_device_inbox(
self, progress: JsonDict, batch_size: int
) -> int:
"""A background update that deletes all device_inboxes for deleted devices.

This should only need to be run once (when users upgrade to v1.46.0)

Args:
progress: JsonDict used to store progress of this background update
batch_size: the maximum number of rows to retrieve in a single select query

Returns:
The number of deleted rows
"""

def _remove_deleted_devices_from_device_inbox_txn(
txn: LoggingTransaction,
) -> int:
"""stream_id is not unique
we need to use an inclusive `stream_id >= ?` clause,
since we might not have deleted all dead device messages for the stream_id
returned from the previous query

Then delete only rows matching the `(user_id, device_id, stream_id)` tuple,
to avoid problems of deleting a large number of rows all at once
due to a single device having lots of device messages.
"""

last_stream_id = progress.get("stream_id", 0)

sql = """
SELECT device_id, user_id, stream_id
FROM device_inbox
WHERE
stream_id >= ?
AND (device_id, user_id) NOT IN (
SELECT device_id, user_id FROM devices
)
ORDER BY stream_id
LIMIT ?
"""

txn.execute(sql, (last_stream_id, batch_size))
rows = txn.fetchall()

num_deleted = 0
for row in rows:
num_deleted += self.db_pool.simple_delete_txn(
txn,
"device_inbox",
{"device_id": row[0], "user_id": row[1], "stream_id": row[2]},
)

if rows:
# send more than stream_id to progress
# otherwise it can happen in large deployments that
# no change of status is visible in the log file
# it may be that the stream_id does not change in several runs
self.db_pool.updates._background_update_progress_txn(
txn,
self.REMOVE_DELETED_DEVICES,
{
"device_id": rows[-1][0],
"user_id": rows[-1][1],
"stream_id": rows[-1][2],
},
Comment on lines +650 to +654
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or should it be easier like:

Suggested change
{
"device_id": rows[-1][0],
"user_id": rows[-1][1],
"stream_id": rows[-1][2],
},
row[-1],

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the current code is a bit clearer tbh

)

return num_deleted

number_deleted = await self.db_pool.runInteraction(
"_remove_deleted_devices_from_device_inbox",
_remove_deleted_devices_from_device_inbox_txn,
)

# The task is finished when no more lines are deleted.
if not number_deleted:
await self.db_pool.updates._end_background_update(
self.REMOVE_DELETED_DEVICES
)

return number_deleted


class DeviceInboxStore(DeviceInboxWorkerStore, DeviceInboxBackgroundUpdateStore):
pass
35 changes: 21 additions & 14 deletions synapse/storage/databases/main/devices.py
Original file line number Diff line number Diff line change
Expand Up @@ -1121,19 +1121,14 @@ async def store_device(
raise StoreError(500, "Problem storing device.")

async def delete_device(self, user_id: str, device_id: str) -> None:
"""Delete a device.
"""Delete a device and its device_inbox.

Args:
user_id: The ID of the user which owns the device
device_id: The ID of the device to delete
"""
await self.db_pool.simple_delete_one(
table="devices",
keyvalues={"user_id": user_id, "device_id": device_id, "hidden": False},
desc="delete_device",
)

self.device_id_exists_cache.invalidate((user_id, device_id))
await self.delete_devices(user_id, [device_id])

async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
"""Deletes several devices.
Expand All @@ -1142,13 +1137,25 @@ async def delete_devices(self, user_id: str, device_ids: List[str]) -> None:
user_id: The ID of the user which owns the devices
device_ids: The IDs of the devices to delete
"""
await self.db_pool.simple_delete_many(
table="devices",
column="device_id",
iterable=device_ids,
keyvalues={"user_id": user_id, "hidden": False},
desc="delete_devices",
)

def _delete_devices_txn(txn: LoggingTransaction) -> None:
self.db_pool.simple_delete_many_txn(
txn,
table="devices",
column="device_id",
values=device_ids,
keyvalues={"user_id": user_id, "hidden": False},
)

self.db_pool.simple_delete_many_txn(
txn,
table="device_inbox",
column="device_id",
values=device_ids,
keyvalues={"user_id": user_id},
)

await self.db_pool.runInteraction("delete_devices", _delete_devices_txn)
for device_id in device_ids:
self.device_id_exists_cache.invalidate((user_id, device_id))

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/* Copyright 2021 The Matrix.org Foundation C.I.C
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/


-- Remove messages from the device_inbox table which were orphaned
-- when a device was deleted using Synapse earlier than 1.46.0.
-- This runs as background task, but may take a bit to finish.

INSERT INTO background_updates (ordering, update_name, progress_json) VALUES
(6402, 'remove_deleted_devices_from_device_inbox', '{}');
31 changes: 31 additions & 0 deletions tests/handlers/test_device.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,37 @@ def test_delete_device(self):
# we'd like to check the access token was invalidated, but that's a
# bit of a PITA.

def test_delete_device_and_device_inbox(self):
self._record_users()

# add an device_inbox
self.get_success(
self.store.db_pool.simple_insert(
"device_inbox",
{
"user_id": user1,
"device_id": "abc",
"stream_id": 1,
"message_json": "{}",
},
)
)

# delete the device
self.get_success(self.handler.delete_device(user1, "abc"))

# check that the device_inbox was deleted
res = self.get_success(
self.store.db_pool.simple_select_one(
table="device_inbox",
keyvalues={"user_id": user1, "device_id": "abc"},
retcols=("user_id", "device_id"),
allow_none=True,
desc="get_device_id_from_device_inbox",
)
)
self.assertIsNone(res)

def test_update_device(self):
self._record_users()

Expand Down
90 changes: 90 additions & 0 deletions tests/storage/databases/main/test_deviceinbox.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Copyright 2021 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an 'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from synapse.rest import admin
from synapse.rest.client import devices

from tests.unittest import HomeserverTestCase


class DeviceInboxBackgroundUpdateStoreTestCase(HomeserverTestCase):

servlets = [
admin.register_servlets,
devices.register_servlets,
]

def prepare(self, reactor, clock, hs):
self.store = hs.get_datastore()
self.user_id = self.register_user("foo", "pass")

def test_background_remove_deleted_devices_from_device_inbox(self):
"""Test that the background task to delete old device_inboxes works properly."""

# create a valid device
self.get_success(
self.store.store_device(self.user_id, "cur_device", "display_name")
)

# Add device_inbox to devices
self.get_success(
self.store.db_pool.simple_insert(
"device_inbox",
{
"user_id": self.user_id,
"device_id": "cur_device",
"stream_id": 1,
"message_json": "{}",
},
)
)
self.get_success(
self.store.db_pool.simple_insert(
"device_inbox",
{
"user_id": self.user_id,
"device_id": "old_device",
"stream_id": 2,
"message_json": "{}",
},
)
)

# Insert and run the background update.
self.get_success(
self.store.db_pool.simple_insert(
"background_updates",
{
"update_name": "remove_deleted_devices_from_device_inbox",
"progress_json": "{}",
},
)
)

# ... and tell the DataStore that it hasn't finished all updates yet
self.store.db_pool.updates._all_done = False

self.wait_for_background_updates()

# Make sure the background task deleted old device_inbox
res = self.get_success(
self.store.db_pool.simple_select_onecol(
table="device_inbox",
keyvalues={},
retcol="device_id",
desc="get_device_id_from_device_inbox",
)
)
self.assertEqual(1, len(res))
self.assertEqual(res[0], "cur_device")