From f16584cec24ccb25454b84a24686cd3f3f8f9216 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dagfinn=20Ilmari=20Manns=C3=A5ker?= Date: Sat, 23 May 2020 23:37:03 +0100 Subject: [PATCH 1/2] Improve performance of _get_state_groups_from_groups_txn (#7567) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The query keeps showing up in my slow query log. This changes the plan under the top-level Sort node from WindowAgg (cost=280335.88..292963.15 rows=561212 width=80) (actual time=138.651..160.562 rows=27112 loops=1) -> Sort (cost=280335.88..281738.91 rows=561212 width=84) (actual time=138.597..140.622 rows=27112 loops=1) Sort Key: state_groups_state.type, state_groups_state.state_key, state_groups_state.state_group Sort Method: quicksort Memory: 4581kB -> Nested Loop (cost=2.83..226745.22 rows=561212 width=84) (actual time=21.548..47.657 rows=27112 loops=1) -> HashAggregate (cost=2.27..3.28 rows=101 width=8) (actual time=21.526..21.535 rows=20 loops=1) Group Key: state.state_group -> CTE Scan on state (cost=0.00..2.02 rows=101 width=8) (actual time=21.280..21.493 rows=20 loops=1) -> Index Scan using state_groups_state_type_idx on state_groups_state (cost=0.56..2189.40 rows=5557 width=84) (actual time=0.005..0.991 rows=1356 loops=20) Index Cond: (state_group = state.state_group) to Nested Loop (cost=2.83..226745.22 rows=561212 width=84) (actual time=24.194..52.834 rows=27112 loops=1) -> HashAggregate (cost=2.27..3.28 rows=101 width=8) (actual time=24.130..24.138 rows=20 loops=1) Group Key: state.state_group -> CTE Scan on state (cost=0.00..2.02 rows=101 width=8) (actual time=23.887..24.113 rows=20 loops=1) -> Index Scan using state_groups_state_type_idx on state_groups_state (cost=0.56..2189.40 rows=5557 width=84) (actual time=0.016..1.159 rows=1356 loops=20) Index Cond: (state_group = state.state_group) This cuts the execution time from ~190ms to ~130ms, i.e. a reduction of ~30%. The full plans are visualised at https://explain.depesz.com/s/WpbT and https://explain.depesz.com/s/KlEk Signed-off-by: Dagfinn Ilmari Mannsåker --- changelog.d/7567.misc | 1 + synapse/storage/data_stores/state/bg_updates.py | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) create mode 100644 changelog.d/7567.misc diff --git a/changelog.d/7567.misc b/changelog.d/7567.misc new file mode 100644 index 000000000000..d243626f0f43 --- /dev/null +++ b/changelog.d/7567.misc @@ -0,0 +1 @@ +Improve performance of `_get_state_groups_from_groups_txn`. diff --git a/synapse/storage/data_stores/state/bg_updates.py b/synapse/storage/data_stores/state/bg_updates.py index e8edaf9f7ba4..ff000bc9ec09 100644 --- a/synapse/storage/data_stores/state/bg_updates.py +++ b/synapse/storage/data_stores/state/bg_updates.py @@ -109,20 +109,20 @@ def _get_state_groups_from_groups_txn( SELECT prev_state_group FROM state_group_edges e, state s WHERE s.state_group = e.state_group ) - SELECT DISTINCT type, state_key, last_value(event_id) OVER ( - PARTITION BY type, state_key ORDER BY state_group ASC - ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING - ) AS event_id FROM state_groups_state + SELECT DISTINCT ON (type, state_key) + type, state_key, event_id + FROM state_groups_state WHERE state_group IN ( SELECT state_group FROM state - ) + ) %s + ORDER BY type, state_key, state_group DESC """ for group in groups: args = [group] args.extend(where_args) - txn.execute(sql + where_clause, args) + txn.execute(sql % (where_clause,), args) for row in txn: typ, state_key, event_id = row key = (typ, state_key) From 3399b9beafd4ebe7512d12b4a1a9e4262d7ef3ac Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 1 Jun 2020 14:07:13 +0100 Subject: [PATCH 2/2] Update changelog.d/7567.misc --- changelog.d/7567.misc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog.d/7567.misc b/changelog.d/7567.misc index d243626f0f43..b086d5d02616 100644 --- a/changelog.d/7567.misc +++ b/changelog.d/7567.misc @@ -1 +1 @@ -Improve performance of `_get_state_groups_from_groups_txn`. +Improve query performance for fetching state from a PostgreSQL database.