Skip to content

Commit

Permalink
feat(unique-count): Add grouped_prorated_unique_count to ClickhouseStore
Browse files Browse the repository at this point in the history
  • Loading branch information
rsempe committed Feb 16, 2024
1 parent 875a513 commit 15a9fd5
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 1 deletion.
56 changes: 56 additions & 0 deletions app/services/events/stores/clickhouse/unique_count_query.rb
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,43 @@ def grouped_query
SQL
end

def grouped_prorated_query
<<-SQL
#{grouped_events_cte_sql},
event_values AS (
SELECT
#{group_names},
property,
operation_type,
timestamp
FROM (
SELECT
timestamp,
property,
operation_type,
#{group_names},
#{grouped_operation_value_sql} AS adjusted_value
FROM events_data
ORDER BY timestamp ASC
) adjusted_event_values
WHERE adjusted_value != 0 -- adjusted_value = 0 does not impact the total
GROUP BY #{group_names}, property, operation_type, timestamp
)
SELECT
#{group_names},
SUM(period_ratio) as aggregation
FROM (
SELECT
(#{grouped_period_ratio_sql}) AS period_ratio,
#{group_names}
FROM event_values
) cumulated_ratios
GROUP BY #{group_names}
SQL
end

# NOTE: Not used in production, only for debug purpose to check the computed values before aggregation
# Returns an array of event's timestamp, property, operation type and operation value
# Example:
Expand Down Expand Up @@ -222,6 +259,25 @@ def period_ratio_sql
SQL
end

def grouped_period_ratio_sql
<<-SQL
if(
operation_type = 'add',
-- NOTE: duration in seconds between current add and next remove - using end of period as final boundaries if no remove
toDecimal128(
(date_diff('seconds', timestamp, leadInFrame(timestamp, 1, toDateTime64(:to_datetime, 5, 'UTC')) OVER (PARTITION BY #{group_names}, property ORDER BY timestamp ASC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING))),
:decimal_scale
)
/
-- NOTE: full duration of the period
#{charges_duration.days.to_i},
-- NOTE: operation was a remove, so the duration is 0
toDecimal128(0, :decimal_scale)
)
SQL
end

def group_names
@group_names ||= store.grouped_by.map.with_index { |_, index| "g_#{index}" }.join(', ')
end
Expand Down
14 changes: 14 additions & 0 deletions app/services/events/stores/clickhouse_store.rb
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,20 @@ def grouped_unique_count
prepare_grouped_result(::Clickhouse::EventsRaw.connection.select_all(sql).rows)
end

def grouped_prorated_unique_count
query = Events::Stores::Clickhouse::UniqueCountQuery.new(store: self)
sql = ActiveRecord::Base.sanitize_sql_for_conditions(
[
query.grouped_prorated_query,
{
to_datetime: to_datetime.ceil,
decimal_scale: DECIMAL_SCALE,
},
],
)
prepare_grouped_result(::Clickhouse::EventsRaw.connection.select_all(sql).rows)
end

def max
events.maximum(Arel.sql(sanitized_numeric_property))
end
Expand Down
80 changes: 79 additions & 1 deletion spec/services/events/stores/clickhouse_store_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@
expect(null_group[:groups]['other']).to be_nil
expect(null_group[:value]).to eq(1)

expect(result[...-1].map { |r| r[:value] }).to contain_exactly(1, 0)
expect((result - [null_group]).map { |r| r[:value] }).to contain_exactly(1, 0)
end

context 'with no events' do
Expand All @@ -283,6 +283,84 @@
end
end

describe '#grouped_prorated_unique_count' do
let(:grouped_by) { %w[agent_name other] }
let(:started_at) { Time.zone.parse('2023-03-01') }

let(:events) do
[
Clickhouse::EventsRaw.create!(
organization_id: organization.id,
external_subscription_id: subscription.external_id,
external_customer_id: customer.external_id,
code:,
timestamp: boundaries[:from_datetime] + 1.day,
properties: {
billable_metric.field_name => 2,
agent_name: 'frodo',
},
),
Clickhouse::EventsRaw.create!(
organization_id: organization.id,
external_subscription_id: subscription.external_id,
external_customer_id: customer.external_id,
code:,
timestamp: boundaries[:from_datetime] + 1.day,
properties: {
billable_metric.field_name => 2,
agent_name: 'aragorn',
},
),
Clickhouse::EventsRaw.create!(
organization_id: organization.id,
external_subscription_id: subscription.external_id,
external_customer_id: customer.external_id,
code:,
timestamp: boundaries[:from_datetime] + 2.days,
properties: {
billable_metric.field_name => 2,
agent_name: 'aragorn',
operation_type: 'remove',
},
),
Clickhouse::EventsRaw.create!(
organization_id: organization.id,
external_subscription_id: subscription.external_id,
external_customer_id: customer.external_id,
code:,
timestamp: boundaries[:from_datetime] + 2.days,
properties: { billable_metric.field_name => 2 },
),
]
end

before do
event_store.aggregation_property = billable_metric.field_name
end

it 'returns the unique count of event properties' do
result = event_store.grouped_prorated_unique_count

expect(result.count).to eq(3)

null_group = result.find { |v| v[:groups]['agent_name'].nil? }
expect(null_group[:groups]['other']).to be_nil
expect(null_group[:value].round(3)).to eq(0.935) # 29/31

# NOTE: Events calculation: [1/31, 30/31]
expect((result - [null_group]).map { |r| r[:value].round(3) }).to contain_exactly(0.032, 0.968)
end

context 'with no events' do
let(:events) { [] }

it 'returns the unique count of event properties' do
result = event_store.grouped_prorated_unique_count
expect(result.count).to eq(0)
end
end
end

describe '.events_values' do
it 'returns the value attached to each event' do
event_store.aggregation_property = billable_metric.field_name
Expand Down

0 comments on commit 15a9fd5

Please sign in to comment.