-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Handle non-strings in the event_search
table in synapse_port_db
#12037
Changes from 5 commits
7b9d806
5889d87
c6d0534
c7b6710
b6aff6e
76d88af
f045019
a895049
37c1798
02d8fe4
4b5ec98
b7ad0a9
75bc1bf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Clean up after a long-standing bug that was fixed by accident in Synapse 1.44.0 where integers could be inserted into the `event_search` table when using sqlite. | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -115,6 +115,7 @@ class SearchBackgroundUpdateStore(SearchWorkerStore): | |
EVENT_SEARCH_ORDER_UPDATE_NAME = "event_search_order" | ||
EVENT_SEARCH_USE_GIST_POSTGRES_NAME = "event_search_postgres_gist" | ||
EVENT_SEARCH_USE_GIN_POSTGRES_NAME = "event_search_postgres_gin" | ||
EVENT_SEARCH_DELETE_NON_STRINGS = "event_search_sqlite_delete_non_strings" | ||
|
||
def __init__( | ||
self, | ||
|
@@ -147,6 +148,10 @@ def __init__( | |
self.EVENT_SEARCH_USE_GIN_POSTGRES_NAME, self._background_reindex_gin_search | ||
) | ||
|
||
self.db_pool.updates.register_background_update_handler( | ||
self.EVENT_SEARCH_DELETE_NON_STRINGS, self._background_delete_non_strings | ||
) | ||
|
||
async def _background_reindex_search(self, progress, batch_size): | ||
# we work through the events table from highest stream id to lowest | ||
target_min_stream_id = progress["target_min_stream_id_inclusive"] | ||
|
@@ -372,6 +377,28 @@ def reindex_search_txn(txn): | |
|
||
return num_rows | ||
|
||
async def _background_delete_non_strings( | ||
self, progress: JsonDict, batch_size: int | ||
) -> int: | ||
"""Deletes rows with non-string `value`s from `event_search` if using sqlite. | ||
|
||
Prior to Synapse 1.44.0, malformed events received over federation could cause integers | ||
to be inserted into the `event_search` table when using sqlite. | ||
""" | ||
|
||
def delete_non_strings_txn(txn: LoggingTransaction) -> None: | ||
txn.execute("DELETE FROM event_search WHERE typeof(value) != 'text'") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to limit this in any way? (Will this cause any locks on the table?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good question! It crossed my mind at some point and I forgot to make a note of it on the PR.
I think this will lock the table for the duration of the DELETE. The sqlite docs suggest that locks are done on the file level, so it's actually the entire database that gets locked, which is unfortunate. We could add an index first, then delete in batches, but if we're going to ask sqlite to scan the whole table to build an index, we might as well just do the delete. |
||
|
||
if isinstance(self.database_engine, Sqlite3Engine): | ||
squahtx marked this conversation as resolved.
Show resolved
Hide resolved
|
||
await self.db_pool.runInteraction( | ||
self.EVENT_SEARCH_DELETE_NON_STRINGS, delete_non_strings_txn | ||
) | ||
|
||
await self.db_pool.updates._end_background_update( | ||
self.EVENT_SEARCH_DELETE_NON_STRINGS | ||
) | ||
return 1 | ||
|
||
|
||
class SearchStore(SearchBackgroundUpdateStore): | ||
def __init__( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
/* Copyright 2022 The Matrix.org Foundation C.I.C | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
|
||
-- Delete rows with non-string `value`s from `event_search` if using sqlite. | ||
-- | ||
-- Prior to Synapse 1.44.0, malformed events received over federation could | ||
-- cause integers to be inserted into the `event_search` table. | ||
INSERT INTO background_updates (ordering, update_name, progress_json) VALUES | ||
(6805, 'event_search_sqlite_delete_non_strings', '{}'); | ||
clokep marked this conversation as resolved.
Show resolved
Hide resolved
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The wording's a little clumsy here. Suggestions are welcome!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe this is a little cleaner? Just a thought!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it help to mention symptoms?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you both for the suggestions!