Skip to content

Commit

Permalink
Remove metadata when dropping chunk
Browse files Browse the repository at this point in the history
Historically we preserve chunk metadata because the old format of the
Continuous Aggregate has the `chunk_id` column in the materialization
hypertable so in order to don't have chunk ids left over there we just
mark it as dropped whe dropping chunks.

In #4269 we introduced a new Continuous Aggregate format that don't
store the `chunk_id` in the materialization hypertable anymore so it's
safe to also remove the metadata when dropping chunk and all associated
Continuous Aggregates are in the new format.

Also added a post-update SQL script to cleanup unecessary dropped chunk
metadata in our catalog.

Closes #6570
  • Loading branch information
fabriziomello committed Feb 9, 2024
1 parent d93aa5c commit cfaadbe
Show file tree
Hide file tree
Showing 22 changed files with 1,034 additions and 480 deletions.
2 changes: 2 additions & 0 deletions .unreleased/pr_6621
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fixes: #6621 Remove metadata when dropping chunks
Thanks: @ndjzurawsk For reporting error when dropping chunks
62 changes: 62 additions & 0 deletions sql/updates/post-update.sql
Original file line number Diff line number Diff line change
Expand Up @@ -172,3 +172,65 @@ $$;
-- Repair relations that have relacl entries for users that do not
-- exist in pg_authid
CALL _timescaledb_functions.repair_relation_acls();

-- Cleanup metadata for deleted chunks
DO $$
DECLARE
ts_major INTEGER;
ts_minor INTEGER;
BEGIN
SELECT ((string_to_array(extversion,'.'))[1])::int, ((string_to_array(extversion,'.'))[2])::int
INTO ts_major, ts_minor
FROM pg_extension WHERE extname = 'timescaledb';

IF ts_major >= 2 AND ts_minor >= 15 THEN
CREATE UNLOGGED TABLE _timescaledb_catalog._chunks_remove AS
SELECT id FROM _timescaledb_catalog.chunk
WHERE dropped IS TRUE
AND NOT EXISTS (
SELECT FROM information_schema.tables
WHERE tables.table_schema = chunk.schema_name
AND tables.table_name = chunk.table_name
)
AND NOT EXISTS (
SELECT FROM _timescaledb_catalog.hypertable
JOIN _timescaledb_catalog.continuous_agg ON continuous_agg.raw_hypertable_id = hypertable.id
WHERE hypertable.id = chunk.hypertable_id
-- for the old caggs format we need to keep chunk metadata for dropped chunks
AND continuous_agg.finalized IS FALSE
);

WITH _dimension_slice_remove AS (
DELETE FROM _timescaledb_catalog.dimension_slice
USING _timescaledb_catalog.chunk_constraint, _timescaledb_catalog._chunks_remove
WHERE dimension_slice.id = chunk_constraint.dimension_slice_id
AND chunk_constraint.chunk_id = _chunks_remove.id
RETURNING _timescaledb_catalog.dimension_slice.id
)
DELETE FROM _timescaledb_catalog.chunk_constraint
USING _dimension_slice_remove
WHERE chunk_constraint.dimension_slice_id = _dimension_slice_remove.id;

DELETE FROM _timescaledb_internal.bgw_policy_chunk_stats
USING _timescaledb_catalog._chunks_remove
WHERE bgw_policy_chunk_stats.chunk_id = _chunks_remove.id;

DELETE FROM _timescaledb_catalog.chunk_index
USING _timescaledb_catalog._chunks_remove
WHERE chunk_index.chunk_id = _chunks_remove.id;

DELETE FROM _timescaledb_catalog.compression_chunk_size
USING _timescaledb_catalog._chunks_remove
WHERE compression_chunk_size.chunk_id = _chunks_remove.id
OR compression_chunk_size.compressed_chunk_id = _chunks_remove.id;

DELETE FROM _timescaledb_catalog.chunk
USING _timescaledb_catalog._chunks_remove
WHERE chunk.id = _chunks_remove.id
OR chunk.compressed_chunk_id = _chunks_remove.id;

ALTER EXTENSION timescaledb DROP TABLE _timescaledb_catalog._chunks_remove;
DROP TABLE _timescaledb_catalog._chunks_remove;
END IF;
END;
$$;
3 changes: 2 additions & 1 deletion src/chunk.c
Original file line number Diff line number Diff line change
Expand Up @@ -3946,6 +3946,7 @@ ts_chunk_do_drop_chunks(Hypertable *ht, int64 older_than, int64 newer_than, int3
}
}

bool all_caggs_finalized = ts_continuous_agg_hypertable_all_finalized(hypertable_id);
List *dropped_chunk_names = NIL;
for (uint64 i = 0; i < num_chunks; i++)
{
Expand All @@ -3968,7 +3969,7 @@ ts_chunk_do_drop_chunks(Hypertable *ht, int64 older_than, int64 newer_than, int3
chunk_name = psprintf("%s.%s", schema_name, table_name);
dropped_chunk_names = lappend(dropped_chunk_names, chunk_name);

if (has_continuous_aggs)
if (has_continuous_aggs && !all_caggs_finalized)
ts_chunk_drop_preserve_catalog_row(chunks + i, DROP_RESTRICT, log_level);
else
ts_chunk_drop(chunks + i, DROP_RESTRICT, log_level);
Expand Down
27 changes: 27 additions & 0 deletions src/ts_catalog/continuous_agg.c
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,33 @@ ts_continuous_agg_hypertable_status(int32 hypertable_id)
return status;
}

TSDLLEXPORT bool
ts_continuous_agg_hypertable_all_finalized(int32 raw_hypertable_id)
{
ScanIterator iterator =
ts_scan_iterator_create(CONTINUOUS_AGG, AccessShareLock, CurrentMemoryContext);
bool all_finalized = true;

init_scan_by_raw_hypertable_id(&iterator, raw_hypertable_id);
ts_scanner_foreach(&iterator)
{
FormData_continuous_agg data;
TupleInfo *ti = ts_scan_iterator_tuple_info(&iterator);

continuous_agg_formdata_fill(&data, ti);

if (!data.finalized)
{
all_finalized = false;
break;
}
}

ts_scan_iterator_close(&iterator);

return all_finalized;
}

TSDLLEXPORT List *
ts_continuous_aggs_find_by_raw_table_id(int32 raw_hypertable_id)
{
Expand Down
1 change: 1 addition & 0 deletions src/ts_catalog/continuous_agg.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ extern TSDLLEXPORT void ts_materialization_invalidation_log_delete_inner(int32 m

extern TSDLLEXPORT ContinuousAggHypertableStatus
ts_continuous_agg_hypertable_status(int32 hypertable_id);
extern TSDLLEXPORT bool ts_continuous_agg_hypertable_all_finalized(int32 raw_hypertable_id);
extern TSDLLEXPORT List *ts_continuous_aggs_find_by_raw_table_id(int32 raw_hypertable_id);
extern TSDLLEXPORT ContinuousAgg *ts_continuous_agg_find_by_view_name(const char *schema,
const char *name,
Expand Down
14 changes: 6 additions & 8 deletions test/sql/updates/post.catalog.sql
Original file line number Diff line number Diff line change
Expand Up @@ -45,20 +45,18 @@ SELECT count(*)
-- The list of tables configured to be dumped.
SELECT unnest(extconfig)::regclass::text, unnest(extcondition) FROM pg_extension WHERE extname = 'timescaledb' ORDER BY 1;

-- Show dropped chunks
SELECT id, hypertable_id, schema_name, table_name, dropped
FROM _timescaledb_catalog.chunk c
WHERE c.dropped
ORDER BY c.id, c.hypertable_id;

-- Show chunks that are not dropped and include owner in the output
SELECT c.id, c.hypertable_id, c.schema_name, c.table_name, c.dropped, cl.relowner::regrole
FROM _timescaledb_catalog.chunk c
INNER JOIN pg_class cl ON (cl.oid=format('%I.%I', schema_name, table_name)::regclass)
WHERE NOT c.dropped
WHERE c.dropped IS FALSE
ORDER BY c.id, c.hypertable_id;

SELECT * FROM _timescaledb_catalog.chunk_constraint ORDER BY chunk_id, dimension_slice_id, constraint_name;
SELECT chunk_constraint.* FROM _timescaledb_catalog.chunk_constraint
JOIN _timescaledb_catalog.chunk ON chunk.id = chunk_constraint.chunk_id
WHERE chunk.dropped IS FALSE
ORDER BY chunk_constraint.chunk_id, chunk_constraint.dimension_slice_id, chunk_constraint.constraint_name;

SELECT index_name FROM _timescaledb_catalog.chunk_index ORDER BY index_name;

-- Show attnum of all regclass objects belonging to our extension
Expand Down
71 changes: 34 additions & 37 deletions tsl/test/expected/cagg_ddl-13.out
Original file line number Diff line number Diff line change
Expand Up @@ -573,14 +573,11 @@ SELECT * FROM drop_chunks_table ORDER BY time ASC limit 1;
30 | 30
(1 row)

--we see the chunks row with the dropped flags set;
SELECT id, hypertable_id, schema_name, table_name, compressed_chunk_id, dropped, status, osm_chunk FROM _timescaledb_catalog.chunk where dropped;
id | hypertable_id | schema_name | table_name | compressed_chunk_id | dropped | status | osm_chunk
----+---------------+-----------------------+--------------------+---------------------+---------+--------+-----------
13 | 10 | _timescaledb_internal | _hyper_10_13_chunk | | t | 0 | f
14 | 10 | _timescaledb_internal | _hyper_10_14_chunk | | t | 0 | f
15 | 10 | _timescaledb_internal | _hyper_10_15_chunk | | t | 0 | f
(3 rows)
--chunks are removed
SELECT id, hypertable_id, schema_name, table_name, compressed_chunk_id, dropped, status, osm_chunk FROM _timescaledb_catalog.chunk WHERE dropped;
id | hypertable_id | schema_name | table_name | compressed_chunk_id | dropped | status | osm_chunk
----+---------------+-------------+------------+---------------------+---------+--------+-----------
(0 rows)

--still see data in the view
SELECT * FROM drop_chunks_view WHERE time_bucket < (integer_now_test2()-9) ORDER BY time_bucket DESC;
Expand Down Expand Up @@ -634,9 +631,9 @@ WHERE hypertable_name = 'drop_chunks_table'
ORDER BY range_start_integer;
chunk_name | range_start_integer | range_end_integer
--------------------+---------------------+-------------------
_hyper_10_13_chunk | 0 | 10
_hyper_10_14_chunk | 10 | 20
_hyper_10_15_chunk | 20 | 30
_hyper_10_18_chunk | 0 | 10
_hyper_10_19_chunk | 10 | 20
_hyper_10_20_chunk | 20 | 30
_hyper_10_16_chunk | 30 | 40
(4 rows)

Expand Down Expand Up @@ -681,7 +678,7 @@ FROM timescaledb_information.chunks
WHERE hypertable_name = :'drop_chunks_mat_table_name' ORDER BY range_start_integer;
chunk_name | range_start_integer | range_end_integer
--------------------+---------------------+-------------------
_hyper_11_20_chunk | 0 | 100
_hyper_11_23_chunk | 0 | 100
(1 row)

\set ON_ERROR_STOP 0
Expand All @@ -702,12 +699,12 @@ WHERE hypertable_name = 'drop_chunks_table'
ORDER BY 2,3;
chunk_name | range_start_integer | range_end_integer
--------------------+---------------------+-------------------
_hyper_10_13_chunk | 0 | 10
_hyper_10_14_chunk | 10 | 20
_hyper_10_15_chunk | 20 | 30
_hyper_10_18_chunk | 0 | 10
_hyper_10_19_chunk | 10 | 20
_hyper_10_20_chunk | 20 | 30
_hyper_10_16_chunk | 30 | 40
_hyper_10_18_chunk | 40 | 50
_hyper_10_19_chunk | 50 | 60
_hyper_10_21_chunk | 40 | 50
_hyper_10_22_chunk | 50 | 60
(6 rows)

-- Pick the second chunk as the one to drop
Expand Down Expand Up @@ -766,11 +763,11 @@ WHERE hypertable_name = 'drop_chunks_table'
ORDER BY 2,3;
chunk_name | range_start_integer | range_end_integer
--------------------+---------------------+-------------------
_hyper_10_13_chunk | 0 | 10
_hyper_10_15_chunk | 20 | 30
_hyper_10_18_chunk | 0 | 10
_hyper_10_20_chunk | 20 | 30
_hyper_10_16_chunk | 30 | 40
_hyper_10_18_chunk | 40 | 50
_hyper_10_19_chunk | 50 | 60
_hyper_10_21_chunk | 40 | 50
_hyper_10_22_chunk | 50 | 60
(5 rows)

-- Data is no longer in the table but still in the view
Expand Down Expand Up @@ -799,8 +796,8 @@ CALL refresh_continuous_aggregate('drop_chunks_view', NULL, 30);
SELECT drop_chunks('drop_chunks_table', older_than=>30);
drop_chunks
------------------------------------------
_timescaledb_internal._hyper_10_13_chunk
_timescaledb_internal._hyper_10_15_chunk
_timescaledb_internal._hyper_10_18_chunk
_timescaledb_internal._hyper_10_20_chunk
(2 rows)

-- Verify that the chunks are dropped
Expand All @@ -811,8 +808,8 @@ ORDER BY 2,3;
chunk_name | range_start_integer | range_end_integer
--------------------+---------------------+-------------------
_hyper_10_16_chunk | 30 | 40
_hyper_10_18_chunk | 40 | 50
_hyper_10_19_chunk | 50 | 60
_hyper_10_21_chunk | 40 | 50
_hyper_10_22_chunk | 50 | 60
(3 rows)

-- The continuous aggregate should be refreshed in the regions covered
Expand Down Expand Up @@ -906,8 +903,8 @@ SELECT user_view,
AND user_view::text LIKE 'whatever_view%';
user_view | mat_table | mat_tablespace | chunk_name | chunk_tablespace
-----------------+-----------------------------+----------------+--------------------+------------------
whatever_view_1 | _materialized_hypertable_13 | | _hyper_13_24_chunk |
whatever_view_2 | _materialized_hypertable_14 | tablespace1 | _hyper_14_25_chunk | tablespace1
whatever_view_1 | _materialized_hypertable_13 | | _hyper_13_27_chunk |
whatever_view_2 | _materialized_hypertable_14 | tablespace1 | _hyper_14_28_chunk | tablespace1
(2 rows)

ALTER MATERIALIZED VIEW whatever_view_1 SET TABLESPACE tablespace2;
Expand All @@ -921,14 +918,14 @@ SELECT user_view,
AND user_view::text LIKE 'whatever_view%';
user_view | mat_table | mat_tablespace | chunk_name | chunk_tablespace
-----------------+-----------------------------+----------------+--------------------+------------------
whatever_view_1 | _materialized_hypertable_13 | tablespace2 | _hyper_13_24_chunk | tablespace2
whatever_view_2 | _materialized_hypertable_14 | tablespace1 | _hyper_14_25_chunk | tablespace1
whatever_view_1 | _materialized_hypertable_13 | tablespace2 | _hyper_13_27_chunk | tablespace2
whatever_view_2 | _materialized_hypertable_14 | tablespace1 | _hyper_14_28_chunk | tablespace1
(2 rows)

DROP MATERIALIZED VIEW whatever_view_1;
NOTICE: drop cascades to table _timescaledb_internal._hyper_13_24_chunk
NOTICE: drop cascades to table _timescaledb_internal._hyper_13_27_chunk
DROP MATERIALIZED VIEW whatever_view_2;
NOTICE: drop cascades to table _timescaledb_internal._hyper_14_25_chunk
NOTICE: drop cascades to table _timescaledb_internal._hyper_14_28_chunk
-- test bucket width expressions on integer hypertables
CREATE TABLE metrics_int2 (
time int2 NOT NULL,
Expand Down Expand Up @@ -1128,7 +1125,7 @@ SUM(value), COUNT(value)
FROM conditionsnm GROUP BY bucket WITH DATA;
NOTICE: refreshing continuous aggregate "conditionsnm_4"
DROP materialized view conditionsnm_4;
NOTICE: drop cascades to table _timescaledb_internal._hyper_26_37_chunk
NOTICE: drop cascades to table _timescaledb_internal._hyper_26_40_chunk
-- Case 2: DROP CASCADE should have similar behaviour as DROP
CREATE MATERIALIZED VIEW conditionsnm_4
WITH (timescaledb.continuous, timescaledb.materialized_only = TRUE)
Expand All @@ -1138,7 +1135,7 @@ SUM(value), COUNT(value)
FROM conditionsnm GROUP BY bucket WITH DATA;
NOTICE: refreshing continuous aggregate "conditionsnm_4"
DROP materialized view conditionsnm_4 CASCADE;
NOTICE: drop cascades to table _timescaledb_internal._hyper_27_38_chunk
NOTICE: drop cascades to table _timescaledb_internal._hyper_27_41_chunk
-- Case 3: require CASCADE in case of dependent object
CREATE MATERIALIZED VIEW conditionsnm_4
WITH (timescaledb.continuous, timescaledb.materialized_only = TRUE)
Expand All @@ -1155,7 +1152,7 @@ ERROR: cannot drop view conditionsnm_4 because other objects depend on it
-- Case 4: DROP CASCADE with dependency
DROP MATERIALIZED VIEW conditionsnm_4 CASCADE;
NOTICE: drop cascades to view see_cagg
NOTICE: drop cascades to table _timescaledb_internal._hyper_28_39_chunk
NOTICE: drop cascades to table _timescaledb_internal._hyper_28_42_chunk
-- Test DROP SCHEMA CASCADE with continuous aggregates
--
-- Issue: #2350
Expand Down Expand Up @@ -1580,7 +1577,7 @@ DELETE FROM test_setting WHERE val = 20;
--TEST test with multiple settings on continuous aggregates with real time aggregates turned off initially --
-- test for materialized_only + compress combinations (real time aggs enabled initially)
DROP MATERIALIZED VIEW test_setting_cagg;
NOTICE: drop cascades to table _timescaledb_internal._hyper_40_47_chunk
NOTICE: drop cascades to table _timescaledb_internal._hyper_40_50_chunk
CREATE MATERIALIZED VIEW test_setting_cagg with (timescaledb.continuous, timescaledb.materialized_only = true)
AS SELECT time_bucket('1h',time), avg(val), count(*) FROM test_setting GROUP BY 1;
NOTICE: refreshing continuous aggregate "test_setting_cagg"
Expand Down Expand Up @@ -1770,8 +1767,8 @@ Indexes:
"_materialized_hypertable_45_bucket_idx" btree (bucket DESC)
Triggers:
ts_insert_blocker BEFORE INSERT ON _timescaledb_internal._materialized_hypertable_45 FOR EACH ROW EXECUTE FUNCTION _timescaledb_functions.insert_blocker()
Child tables: _timescaledb_internal._hyper_45_52_chunk,
_timescaledb_internal._hyper_45_53_chunk
Child tables: _timescaledb_internal._hyper_45_55_chunk,
_timescaledb_internal._hyper_45_56_chunk

\d+ 'cashflows'
View "public.cashflows"
Expand Down
Loading

0 comments on commit cfaadbe

Please sign in to comment.