From 1014ea4d9f9c32c757461c5b9ed90c4dd69d0232 Mon Sep 17 00:00:00 2001 From: Vincent Pochet Date: Fri, 16 Feb 2024 16:11:18 +0100 Subject: [PATCH] feat(unique-count): Prorated unique count queries --- .../stores/clickhouse/unique_count_query.rb | 28 ++++++++++--- .../events/stores/clickhouse_store.rb | 2 + .../stores/postgres/unique_count_query.rb | 40 ++++++++++++++++--- app/services/events/stores/postgres_store.rb | 2 + 4 files changed, 60 insertions(+), 12 deletions(-) diff --git a/app/services/events/stores/clickhouse/unique_count_query.rb b/app/services/events/stores/clickhouse/unique_count_query.rb index 5dbe6be4068..3421e625bff 100644 --- a/app/services/events/stores/clickhouse/unique_count_query.rb +++ b/app/services/events/stores/clickhouse/unique_count_query.rb @@ -30,7 +30,7 @@ def query GROUP BY property ) - SELECT SUM(sum_adjusted_value) AS aggregation FROM event_values + SELECT coalesce(SUM(sum_adjusted_value), 0) AS aggregation FROM event_values SQL end @@ -55,7 +55,7 @@ def prorated_query GROUP BY property, operation_type, timestamp ) - SELECT SUM(period_ratio) as aggregation + SELECT coalesce(SUM(period_ratio), 0) as aggregation FROM ( SELECT (#{period_ratio_sql}) AS period_ratio FROM event_values @@ -87,7 +87,7 @@ def grouped_query SELECT #{group_names}, - SUM(sum_adjusted_value) as aggregation + coalesce(SUM(sum_adjusted_value), 0) as aggregation FROM event_values GROUP BY #{group_names} SQL @@ -119,7 +119,7 @@ def grouped_prorated_query SELECT #{group_names}, - SUM(period_ratio) as aggregation + coalesce(SUM(period_ratio), 0) as aggregation FROM ( SELECT (#{grouped_period_ratio_sql}) AS period_ratio, @@ -246,7 +246,15 @@ def period_ratio_sql operation_type = 'add', -- NOTE: duration in seconds between current add and next remove - using end of period as final boundaries if no remove toDecimal128( - (date_diff('seconds', timestamp, leadInFrame(timestamp, 1, toDateTime64(:to_datetime, 5, 'UTC')) OVER (PARTITION BY property ORDER BY timestamp ASC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING))), + (date_diff( + 'seconds', + if(timestamp < toDateTime64(:from_datetime, 5, 'UTC'), toDateTime64(:from_datetime, 5, 'UTC'), timestamp), + if( + (leadInFrame(timestamp, 1, toDateTime64(:to_datetime, 5, 'UTC')) OVER (PARTITION BY property ORDER BY timestamp ASC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)) < toDateTime64(:from_datetime, 5, 'UTC'), + toDateTime64(:to_datetime, 5, 'UTC'), + leadInFrame(timestamp, 1, toDateTime64(:to_datetime, 5, 'UTC')) OVER (PARTITION BY property ORDER BY timestamp ASC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + ) + )), :decimal_scale ) / @@ -265,7 +273,15 @@ def grouped_period_ratio_sql operation_type = 'add', -- NOTE: duration in seconds between current add and next remove - using end of period as final boundaries if no remove toDecimal128( - (date_diff('seconds', timestamp, leadInFrame(timestamp, 1, toDateTime64(:to_datetime, 5, 'UTC')) OVER (PARTITION BY #{group_names}, property ORDER BY timestamp ASC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING))), + (date_diff( + 'seconds', + if(timestamp < toDateTime64(:from_datetime, 5, 'UTC'), toDateTime64(:from_datetime, 5, 'UTC'), timestamp), + if( + (leadInFrame(timestamp, 1, toDateTime64(:to_datetime, 5, 'UTC')) OVER (PARTITION BY #{group_names}, property ORDER BY timestamp ASC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)) < toDateTime64(:from_datetime, 5, 'UTC'), + toDateTime64(:to_datetime, 5, 'UTC'), + leadInFrame(timestamp, 1, toDateTime64(:to_datetime, 5, 'UTC')) OVER (PARTITION BY #{group_names}, property ORDER BY timestamp ASC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) + ) + )), :decimal_scale ) / diff --git a/app/services/events/stores/clickhouse_store.rb b/app/services/events/stores/clickhouse_store.rb index 83ce7e64a67..623c3567eb8 100644 --- a/app/services/events/stores/clickhouse_store.rb +++ b/app/services/events/stores/clickhouse_store.rb @@ -136,6 +136,7 @@ def prorated_unique_count [ query.prorated_query, { + from_datetime:, to_datetime: to_datetime.ceil, decimal_scale: DECIMAL_SCALE, }, @@ -167,6 +168,7 @@ def grouped_prorated_unique_count [ query.grouped_prorated_query, { + from_datetime:, to_datetime: to_datetime.ceil, decimal_scale: DECIMAL_SCALE, }, diff --git a/app/services/events/stores/postgres/unique_count_query.rb b/app/services/events/stores/postgres/unique_count_query.rb index 54f54b525d7..2d0c0866689 100644 --- a/app/services/events/stores/postgres/unique_count_query.rb +++ b/app/services/events/stores/postgres/unique_count_query.rb @@ -30,7 +30,7 @@ def query GROUP BY property ) - SELECT SUM(sum_adjusted_value) AS aggregation FROM event_values + SELECT COALESCE(SUM(sum_adjusted_value), 0) AS aggregation FROM event_values SQL end @@ -55,7 +55,7 @@ def prorated_query GROUP BY property, operation_type, timestamp ) - SELECT SUM(period_ratio) as aggregation + SELECT COALESCE(SUM(period_ratio), 0) as aggregation FROM ( SELECT (#{period_ratio_sql}) AS period_ratio FROM event_values @@ -87,7 +87,7 @@ def grouped_query SELECT #{group_names}, - SUM(sum_adjusted_value) as aggregation + COALESCE(SUM(sum_adjusted_value), 0) as aggregation FROM event_values GROUP BY #{group_names} SQL @@ -119,7 +119,7 @@ def grouped_prorated_query SELECT #{group_names}, - SUM(period_ratio) as aggregation + COALESCE(SUM(period_ratio), 0) as aggregation FROM ( SELECT (#{grouped_period_ratio_sql}) AS period_ratio, @@ -237,7 +237,21 @@ def period_ratio_sql CASE WHEN operation_type = 'add' THEN -- NOTE: duration in seconds between current event and next one - using end of period as final boundaries - EXTRACT(EPOCH FROM LEAD(timestamp, 1, :to_datetime) OVER (PARTITION BY property ORDER BY timestamp) - timestamp) + EXTRACT( + EPOCH FROM ( + ( + -- NOTE: if following event is older than the start of the period, we use the start of the period as the reference + CASE WHEN (LEAD(timestamp, 1, :to_datetime) OVER (PARTITION BY property ORDER BY timestamp)) < :from_datetime + THEN :from_datetime + ELSE LEAD(timestamp, 1, :to_datetime) OVER (PARTITION BY property ORDER BY timestamp) + END + ) + - ( + -- NOTE: if events is older than the start of the period, we use the start of the period as the reference + CASE WHEN timestamp < :from_datetime THEN :from_datetime ELSE timestamp END + ) + ) + ) / -- NOTE: full duration of the period #{charges_duration.days.to_i} @@ -252,7 +266,21 @@ def grouped_period_ratio_sql CASE WHEN operation_type = 'add' THEN -- NOTE: duration in seconds between current event and next one - using end of period as final boundaries - EXTRACT(EPOCH FROM LEAD(timestamp, 1, :to_datetime) OVER (PARTITION BY #{group_names}, property ORDER BY timestamp) - timestamp) + EXTRACT( + EPOCH FROM ( + ( + -- NOTE: if following event is older than the start of the period, we use the start of the period as the reference + CASE WHEN (LEAD(timestamp, 1, :to_datetime) OVER (PARTITION BY #{group_names}, property ORDER BY timestamp)) < :from_datetime + THEN :from_datetime + ELSE LEAD(timestamp, 1, :to_datetime) OVER (PARTITION BY #{group_names}, property ORDER BY timestamp) + END + ) + - ( + -- NOTE: if events is older than the start of the period, we use the start of the period as the reference + CASE WHEN timestamp < :from_datetime THEN :from_datetime ELSE timestamp END + ) + ) + ) / -- NOTE: full duration of the period #{charges_duration.days.to_i} diff --git a/app/services/events/stores/postgres_store.rb b/app/services/events/stores/postgres_store.rb index 8fc4c5f28c5..8fd788fb791 100644 --- a/app/services/events/stores/postgres_store.rb +++ b/app/services/events/stores/postgres_store.rb @@ -93,6 +93,7 @@ def prorated_unique_count [ query.prorated_query, { + from_datetime:, to_datetime: to_datetime.ceil, }, ], @@ -118,6 +119,7 @@ def grouped_prorated_unique_count [ query.grouped_prorated_query, { + from_datetime:, to_datetime: to_datetime.ceil, }, ],