diff --git a/.unreleased/PR_6155 b/.unreleased/PR_6155 new file mode 100644 index 00000000000..634d6558d83 --- /dev/null +++ b/.unreleased/PR_6155 @@ -0,0 +1 @@ +Fixes: #6155 Align gapfill bucket generation with time_bucket diff --git a/tsl/src/nodes/gapfill/gapfill_exec.c b/tsl/src/nodes/gapfill/gapfill_exec.c index 94ec6cc2f01..24e2acd1cb4 100644 --- a/tsl/src/nodes/gapfill/gapfill_exec.c +++ b/tsl/src/nodes/gapfill/gapfill_exec.c @@ -640,15 +640,15 @@ gapfill_advance_timestamp(GapFillState *state) { case DATEOID: next = DirectFunctionCall2(date_pl_interval, - DateADTGetDatum(state->next_timestamp), - IntervalPGetDatum(state->gapfill_interval)); + DateADTGetDatum(state->gapfill_start), + IntervalPGetDatum(state->next_offset)); next = DirectFunctionCall1(timestamp_date, next); state->next_timestamp = DatumGetDateADT(next); break; case TIMESTAMPOID: next = DirectFunctionCall2(timestamp_pl_interval, - TimestampGetDatum(state->next_timestamp), - IntervalPGetDatum(state->gapfill_interval)); + TimestampGetDatum(state->gapfill_start), + IntervalPGetDatum(state->next_offset)); state->next_timestamp = DatumGetTimestamp(next); break; case TIMESTAMPTZOID: @@ -658,14 +658,22 @@ gapfill_advance_timestamp(GapFillState *state) */ next = DirectFunctionCall2(state->have_timezone ? timestamptz_pl_interval : timestamp_pl_interval, - TimestampTzGetDatum(state->next_timestamp), - IntervalPGetDatum(state->gapfill_interval)); + TimestampTzGetDatum(state->gapfill_start), + IntervalPGetDatum(state->next_offset)); state->next_timestamp = DatumGetTimestampTz(next); break; default: state->next_timestamp += state->gapfill_period; break; } + /* Advance the interval offset if necessary */ + if (state->gapfill_interval) + { + Datum tspan = DirectFunctionCall2(interval_pl, + IntervalPGetDatum(state->gapfill_interval), + IntervalPGetDatum(state->next_offset)); + state->next_offset = DatumGetIntervalP(tspan); + } } /* @@ -742,6 +750,7 @@ gapfill_begin(CustomScanState *node, EState *estate, int eflags) state->gapfill_start = align_with_time_bucket(state, get_start_arg(state)); } state->next_timestamp = state->gapfill_start; + state->next_offset = state->gapfill_interval; /* gap fill end */ if (is_const_null(get_finish_arg(state))) @@ -938,6 +947,7 @@ gapfill_state_reset_group(GapFillState *state, TupleTableSlot *slot) break; } } + state->next_offset = state->gapfill_interval; } /* diff --git a/tsl/src/nodes/gapfill/gapfill_internal.h b/tsl/src/nodes/gapfill/gapfill_internal.h index 6c69141854c..8b65eb1917f 100644 --- a/tsl/src/nodes/gapfill/gapfill_internal.h +++ b/tsl/src/nodes/gapfill/gapfill_internal.h @@ -104,6 +104,8 @@ typedef struct GapFillState Interval *gapfill_interval; int64 next_timestamp; + /* interval offset for next_timestamp from gapfill_start */ + Interval *next_offset; int64 subslot_time; /* time of tuple in subslot */ int time_index; /* position of time column */ diff --git a/tsl/test/shared/expected/gapfill-13.out b/tsl/test/shared/expected/gapfill-13.out index d2e2e4ec598..a34879cd718 100644 --- a/tsl/test/shared/expected/gapfill-13.out +++ b/tsl/test/shared/expected/gapfill-13.out @@ -313,6 +313,7 @@ QUERY PLAN -> Seq Scan on _hyper_X_X_chunk (8 rows) +DROP TABLE gapfill_plan_test; \set METRICS metrics_int -- All test against table :METRICS first \set ON_ERROR_STOP 0 @@ -1579,6 +1580,7 @@ SELECT * FROM gapfill_insert_test; 4 (4 rows) +DROP TABLE gapfill_insert_test; -- test join SELECT t1.*,t2.m FROM ( @@ -3292,11 +3294,11 @@ SELECT time_bucket_gapfill('2 month'::interval, ts, 'Europe/Berlin', '2000-01-01 time_bucket_gapfill Fri Dec 31 15:00:00 1999 PST Tue Feb 29 15:00:00 2000 PST - Sat Apr 29 15:00:00 2000 PDT - Thu Jun 29 15:00:00 2000 PDT - Tue Aug 29 15:00:00 2000 PDT - Sun Oct 29 15:00:00 2000 PST - Fri Dec 29 15:00:00 2000 PST + Sun Apr 30 15:00:00 2000 PDT + Fri Jun 30 15:00:00 2000 PDT + Thu Aug 31 15:00:00 2000 PDT + Tue Oct 31 15:00:00 2000 PST + Sun Dec 31 15:00:00 2000 PST (7 rows) SELECT time_bucket_gapfill('2 month'::interval, ts, current_setting('timezone'), '2000-01-01','2001-01-01') FROM (VALUES ('2000-03-01'::timestamptz)) v(ts) GROUP BY 1; @@ -3313,11 +3315,11 @@ SELECT time_bucket_gapfill('2 month'::interval, ts, 'UTC', '2000-01-01','2001-01 time_bucket_gapfill Fri Dec 31 16:00:00 1999 PST Tue Feb 29 16:00:00 2000 PST - Sat Apr 29 16:00:00 2000 PDT - Thu Jun 29 16:00:00 2000 PDT - Tue Aug 29 16:00:00 2000 PDT - Sun Oct 29 16:00:00 2000 PST - Fri Dec 29 16:00:00 2000 PST + Sun Apr 30 16:00:00 2000 PDT + Fri Jun 30 16:00:00 2000 PDT + Thu Aug 31 16:00:00 2000 PDT + Tue Oct 31 16:00:00 2000 PST + Sun Dec 31 16:00:00 2000 PST (7 rows) SET timezone TO 'Europe/Berlin'; @@ -3332,14 +3334,13 @@ SELECT time_bucket_gapfill('2 month'::interval, ts, 'Europe/Berlin', '2000-01-01 (6 rows) RESET timezone; -DROP INDEX gapfill_plan_test_indx; -- Test gapfill with arrays (#5981) SELECT time_bucket_gapfill(5, ts, 1, 100) as ts, int_arr, locf(last(value, ts)) FROM ( SELECT ARRAY[1,2,3,4]::int[] as int_arr, x as ts, x+500000 as value FROM generate_series(1, 10, 100) as x ) t -GROUP BY 1, 2 +GROUP BY 1, 2; ts | int_arr | locf ----+-----------+-------- 0 | {1,2,3,4} | 500001 @@ -3364,3 +3365,30 @@ GROUP BY 1, 2 95 | {1,2,3,4} | 500001 (20 rows) +-- Test gapfill is aligned with non-gapfill time_bucket +-- when using different timezones and month bucketing +CREATE TABLE month_timezone(time timestamptz NOT NULL, value float); +SELECT table_name FROM create_hypertable('month_timezone','time'); + table_name + month_timezone +(1 row) + +INSERT INTO month_timezone VALUES ('2023-03-01 14:05:00+01', 3.123), ('2023-04-01 14:05:00+01',4.123), ('2023-05-01 14:05:00+01', 5.123); +SELECT + time_bucket_gapfill('1 month'::interval, time, 'Europe/Berlin', '2023-01-01', '2023-07-01') AS time, + sum(value) +FROM + month_timezone +GROUP BY 1; + time | sum +------------------------------+------- + Sat Dec 31 15:00:00 2022 PST | + Tue Jan 31 15:00:00 2023 PST | + Tue Feb 28 15:00:00 2023 PST | 3.123 + Fri Mar 31 15:00:00 2023 PDT | 4.123 + Sun Apr 30 15:00:00 2023 PDT | 5.123 + Wed May 31 15:00:00 2023 PDT | + Fri Jun 30 15:00:00 2023 PDT | +(7 rows) + +DROP TABLE month_timezone; diff --git a/tsl/test/shared/expected/gapfill-14.out b/tsl/test/shared/expected/gapfill-14.out index d2e2e4ec598..a34879cd718 100644 --- a/tsl/test/shared/expected/gapfill-14.out +++ b/tsl/test/shared/expected/gapfill-14.out @@ -313,6 +313,7 @@ QUERY PLAN -> Seq Scan on _hyper_X_X_chunk (8 rows) +DROP TABLE gapfill_plan_test; \set METRICS metrics_int -- All test against table :METRICS first \set ON_ERROR_STOP 0 @@ -1579,6 +1580,7 @@ SELECT * FROM gapfill_insert_test; 4 (4 rows) +DROP TABLE gapfill_insert_test; -- test join SELECT t1.*,t2.m FROM ( @@ -3292,11 +3294,11 @@ SELECT time_bucket_gapfill('2 month'::interval, ts, 'Europe/Berlin', '2000-01-01 time_bucket_gapfill Fri Dec 31 15:00:00 1999 PST Tue Feb 29 15:00:00 2000 PST - Sat Apr 29 15:00:00 2000 PDT - Thu Jun 29 15:00:00 2000 PDT - Tue Aug 29 15:00:00 2000 PDT - Sun Oct 29 15:00:00 2000 PST - Fri Dec 29 15:00:00 2000 PST + Sun Apr 30 15:00:00 2000 PDT + Fri Jun 30 15:00:00 2000 PDT + Thu Aug 31 15:00:00 2000 PDT + Tue Oct 31 15:00:00 2000 PST + Sun Dec 31 15:00:00 2000 PST (7 rows) SELECT time_bucket_gapfill('2 month'::interval, ts, current_setting('timezone'), '2000-01-01','2001-01-01') FROM (VALUES ('2000-03-01'::timestamptz)) v(ts) GROUP BY 1; @@ -3313,11 +3315,11 @@ SELECT time_bucket_gapfill('2 month'::interval, ts, 'UTC', '2000-01-01','2001-01 time_bucket_gapfill Fri Dec 31 16:00:00 1999 PST Tue Feb 29 16:00:00 2000 PST - Sat Apr 29 16:00:00 2000 PDT - Thu Jun 29 16:00:00 2000 PDT - Tue Aug 29 16:00:00 2000 PDT - Sun Oct 29 16:00:00 2000 PST - Fri Dec 29 16:00:00 2000 PST + Sun Apr 30 16:00:00 2000 PDT + Fri Jun 30 16:00:00 2000 PDT + Thu Aug 31 16:00:00 2000 PDT + Tue Oct 31 16:00:00 2000 PST + Sun Dec 31 16:00:00 2000 PST (7 rows) SET timezone TO 'Europe/Berlin'; @@ -3332,14 +3334,13 @@ SELECT time_bucket_gapfill('2 month'::interval, ts, 'Europe/Berlin', '2000-01-01 (6 rows) RESET timezone; -DROP INDEX gapfill_plan_test_indx; -- Test gapfill with arrays (#5981) SELECT time_bucket_gapfill(5, ts, 1, 100) as ts, int_arr, locf(last(value, ts)) FROM ( SELECT ARRAY[1,2,3,4]::int[] as int_arr, x as ts, x+500000 as value FROM generate_series(1, 10, 100) as x ) t -GROUP BY 1, 2 +GROUP BY 1, 2; ts | int_arr | locf ----+-----------+-------- 0 | {1,2,3,4} | 500001 @@ -3364,3 +3365,30 @@ GROUP BY 1, 2 95 | {1,2,3,4} | 500001 (20 rows) +-- Test gapfill is aligned with non-gapfill time_bucket +-- when using different timezones and month bucketing +CREATE TABLE month_timezone(time timestamptz NOT NULL, value float); +SELECT table_name FROM create_hypertable('month_timezone','time'); + table_name + month_timezone +(1 row) + +INSERT INTO month_timezone VALUES ('2023-03-01 14:05:00+01', 3.123), ('2023-04-01 14:05:00+01',4.123), ('2023-05-01 14:05:00+01', 5.123); +SELECT + time_bucket_gapfill('1 month'::interval, time, 'Europe/Berlin', '2023-01-01', '2023-07-01') AS time, + sum(value) +FROM + month_timezone +GROUP BY 1; + time | sum +------------------------------+------- + Sat Dec 31 15:00:00 2022 PST | + Tue Jan 31 15:00:00 2023 PST | + Tue Feb 28 15:00:00 2023 PST | 3.123 + Fri Mar 31 15:00:00 2023 PDT | 4.123 + Sun Apr 30 15:00:00 2023 PDT | 5.123 + Wed May 31 15:00:00 2023 PDT | + Fri Jun 30 15:00:00 2023 PDT | +(7 rows) + +DROP TABLE month_timezone; diff --git a/tsl/test/shared/expected/gapfill-15.out b/tsl/test/shared/expected/gapfill-15.out index d2e2e4ec598..a34879cd718 100644 --- a/tsl/test/shared/expected/gapfill-15.out +++ b/tsl/test/shared/expected/gapfill-15.out @@ -313,6 +313,7 @@ QUERY PLAN -> Seq Scan on _hyper_X_X_chunk (8 rows) +DROP TABLE gapfill_plan_test; \set METRICS metrics_int -- All test against table :METRICS first \set ON_ERROR_STOP 0 @@ -1579,6 +1580,7 @@ SELECT * FROM gapfill_insert_test; 4 (4 rows) +DROP TABLE gapfill_insert_test; -- test join SELECT t1.*,t2.m FROM ( @@ -3292,11 +3294,11 @@ SELECT time_bucket_gapfill('2 month'::interval, ts, 'Europe/Berlin', '2000-01-01 time_bucket_gapfill Fri Dec 31 15:00:00 1999 PST Tue Feb 29 15:00:00 2000 PST - Sat Apr 29 15:00:00 2000 PDT - Thu Jun 29 15:00:00 2000 PDT - Tue Aug 29 15:00:00 2000 PDT - Sun Oct 29 15:00:00 2000 PST - Fri Dec 29 15:00:00 2000 PST + Sun Apr 30 15:00:00 2000 PDT + Fri Jun 30 15:00:00 2000 PDT + Thu Aug 31 15:00:00 2000 PDT + Tue Oct 31 15:00:00 2000 PST + Sun Dec 31 15:00:00 2000 PST (7 rows) SELECT time_bucket_gapfill('2 month'::interval, ts, current_setting('timezone'), '2000-01-01','2001-01-01') FROM (VALUES ('2000-03-01'::timestamptz)) v(ts) GROUP BY 1; @@ -3313,11 +3315,11 @@ SELECT time_bucket_gapfill('2 month'::interval, ts, 'UTC', '2000-01-01','2001-01 time_bucket_gapfill Fri Dec 31 16:00:00 1999 PST Tue Feb 29 16:00:00 2000 PST - Sat Apr 29 16:00:00 2000 PDT - Thu Jun 29 16:00:00 2000 PDT - Tue Aug 29 16:00:00 2000 PDT - Sun Oct 29 16:00:00 2000 PST - Fri Dec 29 16:00:00 2000 PST + Sun Apr 30 16:00:00 2000 PDT + Fri Jun 30 16:00:00 2000 PDT + Thu Aug 31 16:00:00 2000 PDT + Tue Oct 31 16:00:00 2000 PST + Sun Dec 31 16:00:00 2000 PST (7 rows) SET timezone TO 'Europe/Berlin'; @@ -3332,14 +3334,13 @@ SELECT time_bucket_gapfill('2 month'::interval, ts, 'Europe/Berlin', '2000-01-01 (6 rows) RESET timezone; -DROP INDEX gapfill_plan_test_indx; -- Test gapfill with arrays (#5981) SELECT time_bucket_gapfill(5, ts, 1, 100) as ts, int_arr, locf(last(value, ts)) FROM ( SELECT ARRAY[1,2,3,4]::int[] as int_arr, x as ts, x+500000 as value FROM generate_series(1, 10, 100) as x ) t -GROUP BY 1, 2 +GROUP BY 1, 2; ts | int_arr | locf ----+-----------+-------- 0 | {1,2,3,4} | 500001 @@ -3364,3 +3365,30 @@ GROUP BY 1, 2 95 | {1,2,3,4} | 500001 (20 rows) +-- Test gapfill is aligned with non-gapfill time_bucket +-- when using different timezones and month bucketing +CREATE TABLE month_timezone(time timestamptz NOT NULL, value float); +SELECT table_name FROM create_hypertable('month_timezone','time'); + table_name + month_timezone +(1 row) + +INSERT INTO month_timezone VALUES ('2023-03-01 14:05:00+01', 3.123), ('2023-04-01 14:05:00+01',4.123), ('2023-05-01 14:05:00+01', 5.123); +SELECT + time_bucket_gapfill('1 month'::interval, time, 'Europe/Berlin', '2023-01-01', '2023-07-01') AS time, + sum(value) +FROM + month_timezone +GROUP BY 1; + time | sum +------------------------------+------- + Sat Dec 31 15:00:00 2022 PST | + Tue Jan 31 15:00:00 2023 PST | + Tue Feb 28 15:00:00 2023 PST | 3.123 + Fri Mar 31 15:00:00 2023 PDT | 4.123 + Sun Apr 30 15:00:00 2023 PDT | 5.123 + Wed May 31 15:00:00 2023 PDT | + Fri Jun 30 15:00:00 2023 PDT | +(7 rows) + +DROP TABLE month_timezone; diff --git a/tsl/test/shared/expected/gapfill-16.out b/tsl/test/shared/expected/gapfill-16.out index 4ba9c41a2bf..1f430c6adef 100644 --- a/tsl/test/shared/expected/gapfill-16.out +++ b/tsl/test/shared/expected/gapfill-16.out @@ -315,6 +315,7 @@ QUERY PLAN -> Index Only Scan using _hyper_X_X_chunk_gapfill_plan_test_indx on _hyper_X_X_chunk (10 rows) +DROP TABLE gapfill_plan_test; \set METRICS metrics_int -- All test against table :METRICS first \set ON_ERROR_STOP 0 @@ -1581,6 +1582,7 @@ SELECT * FROM gapfill_insert_test; 4 (4 rows) +DROP TABLE gapfill_insert_test; -- test join SELECT t1.*,t2.m FROM ( @@ -3294,11 +3296,11 @@ SELECT time_bucket_gapfill('2 month'::interval, ts, 'Europe/Berlin', '2000-01-01 time_bucket_gapfill Fri Dec 31 15:00:00 1999 PST Tue Feb 29 15:00:00 2000 PST - Sat Apr 29 15:00:00 2000 PDT - Thu Jun 29 15:00:00 2000 PDT - Tue Aug 29 15:00:00 2000 PDT - Sun Oct 29 15:00:00 2000 PST - Fri Dec 29 15:00:00 2000 PST + Sun Apr 30 15:00:00 2000 PDT + Fri Jun 30 15:00:00 2000 PDT + Thu Aug 31 15:00:00 2000 PDT + Tue Oct 31 15:00:00 2000 PST + Sun Dec 31 15:00:00 2000 PST (7 rows) SELECT time_bucket_gapfill('2 month'::interval, ts, current_setting('timezone'), '2000-01-01','2001-01-01') FROM (VALUES ('2000-03-01'::timestamptz)) v(ts) GROUP BY 1; @@ -3315,11 +3317,11 @@ SELECT time_bucket_gapfill('2 month'::interval, ts, 'UTC', '2000-01-01','2001-01 time_bucket_gapfill Fri Dec 31 16:00:00 1999 PST Tue Feb 29 16:00:00 2000 PST - Sat Apr 29 16:00:00 2000 PDT - Thu Jun 29 16:00:00 2000 PDT - Tue Aug 29 16:00:00 2000 PDT - Sun Oct 29 16:00:00 2000 PST - Fri Dec 29 16:00:00 2000 PST + Sun Apr 30 16:00:00 2000 PDT + Fri Jun 30 16:00:00 2000 PDT + Thu Aug 31 16:00:00 2000 PDT + Tue Oct 31 16:00:00 2000 PST + Sun Dec 31 16:00:00 2000 PST (7 rows) SET timezone TO 'Europe/Berlin'; @@ -3334,14 +3336,13 @@ SELECT time_bucket_gapfill('2 month'::interval, ts, 'Europe/Berlin', '2000-01-01 (6 rows) RESET timezone; -DROP INDEX gapfill_plan_test_indx; -- Test gapfill with arrays (#5981) SELECT time_bucket_gapfill(5, ts, 1, 100) as ts, int_arr, locf(last(value, ts)) FROM ( SELECT ARRAY[1,2,3,4]::int[] as int_arr, x as ts, x+500000 as value FROM generate_series(1, 10, 100) as x ) t -GROUP BY 1, 2 +GROUP BY 1, 2; ts | int_arr | locf ----+-----------+-------- 0 | {1,2,3,4} | 500001 @@ -3366,3 +3367,30 @@ GROUP BY 1, 2 95 | {1,2,3,4} | 500001 (20 rows) +-- Test gapfill is aligned with non-gapfill time_bucket +-- when using different timezones and month bucketing +CREATE TABLE month_timezone(time timestamptz NOT NULL, value float); +SELECT table_name FROM create_hypertable('month_timezone','time'); + table_name + month_timezone +(1 row) + +INSERT INTO month_timezone VALUES ('2023-03-01 14:05:00+01', 3.123), ('2023-04-01 14:05:00+01',4.123), ('2023-05-01 14:05:00+01', 5.123); +SELECT + time_bucket_gapfill('1 month'::interval, time, 'Europe/Berlin', '2023-01-01', '2023-07-01') AS time, + sum(value) +FROM + month_timezone +GROUP BY 1; + time | sum +------------------------------+------- + Sat Dec 31 15:00:00 2022 PST | + Tue Jan 31 15:00:00 2023 PST | + Tue Feb 28 15:00:00 2023 PST | 3.123 + Fri Mar 31 15:00:00 2023 PDT | 4.123 + Sun Apr 30 15:00:00 2023 PDT | 5.123 + Wed May 31 15:00:00 2023 PDT | + Fri Jun 30 15:00:00 2023 PDT | +(7 rows) + +DROP TABLE month_timezone; diff --git a/tsl/test/shared/sql/gapfill.sql.in b/tsl/test/shared/sql/gapfill.sql.in index 2358eb60323..fdc5cfe5c1d 100644 --- a/tsl/test/shared/sql/gapfill.sql.in +++ b/tsl/test/shared/sql/gapfill.sql.in @@ -139,6 +139,8 @@ ORDER BY 1,2; FROM gapfill_plan_test ORDER BY 2,1; +DROP TABLE gapfill_plan_test; + \set METRICS metrics_int -- All test against table :METRICS first @@ -623,6 +625,8 @@ CREATE TABLE gapfill_insert_test(id INT); INSERT INTO gapfill_insert_test SELECT time_bucket_gapfill(1,time,1,5) FROM (VALUES (1),(2)) v(time) GROUP BY 1 ORDER BY 1; SELECT * FROM gapfill_insert_test; +DROP TABLE gapfill_insert_test; + -- test join SELECT t1.*,t2.m FROM ( @@ -1510,12 +1514,26 @@ SET timezone TO 'Europe/Berlin'; SELECT time_bucket_gapfill('2 month'::interval, ts, 'Europe/Berlin', '2000-01-01','2001-01-01') FROM (VALUES ('2000-03-01'::timestamptz)) v(ts) GROUP BY 1; RESET timezone; -DROP INDEX gapfill_plan_test_indx; - -- Test gapfill with arrays (#5981) SELECT time_bucket_gapfill(5, ts, 1, 100) as ts, int_arr, locf(last(value, ts)) FROM ( SELECT ARRAY[1,2,3,4]::int[] as int_arr, x as ts, x+500000 as value FROM generate_series(1, 10, 100) as x ) t -GROUP BY 1, 2 +GROUP BY 1, 2; + +-- Test gapfill is aligned with non-gapfill time_bucket +-- when using different timezones and month bucketing +CREATE TABLE month_timezone(time timestamptz NOT NULL, value float); +SELECT table_name FROM create_hypertable('month_timezone','time'); + +INSERT INTO month_timezone VALUES ('2023-03-01 14:05:00+01', 3.123), ('2023-04-01 14:05:00+01',4.123), ('2023-05-01 14:05:00+01', 5.123); + +SELECT + time_bucket_gapfill('1 month'::interval, time, 'Europe/Berlin', '2023-01-01', '2023-07-01') AS time, + sum(value) +FROM + month_timezone +GROUP BY 1; + +DROP TABLE month_timezone;