Skip to content

Commit

Permalink
refactor(graphqlmetrics): dont store raw data for intermediate table,…
Browse files Browse the repository at this point in the history
… use NULL engine (#1376)
  • Loading branch information
StarpTech authored Nov 18, 2024
1 parent 6c3c4a0 commit 94bd6a6
Show file tree
Hide file tree
Showing 3 changed files with 146 additions and 119 deletions.
165 changes: 46 additions & 119 deletions graphqlmetrics/core/metrics_service_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,52 +104,31 @@ func TestPublishGraphQLMetrics(t *testing.T) {
GROUP BY OperationHash LIMIT 1
`).Scan(&opCount))

assert.Greater(t, opCount, uint64(0))
assert.Equal(t, uint64(1), opCount)

// Validate insert
// Validate materialized view

var fieldUsageCount uint64
var fieldUsageCountMv uint64
require.NoError(t, db.QueryRow(ctx, `
SELECT COUNT(*) FROM gql_metrics_schema_usage
SELECT COUNT(*) FROM gql_metrics_schema_usage_5m_90d
WHERE OperationHash = 'hash123' AND
OrganizationID = 'org123' AND
FederatedGraphID = 'fed123' AND
RouterConfigVersion = 'v1' AND
Attributes['test'] = 'test123' AND
HttpStatusCode = '200' AND
HasError = true AND
TotalErrors = 1 AND
TotalUsages = 1 AND
TotalClientErrors = 0 AND
ClientName = 'wundergraph' AND
ClientVersion = '1.0.0' AND
hasAny(TypeNames, ['Query']) AND
startsWith(Path, ['hello'])
`).Scan(&fieldUsageCount))
startsWith(Path, ['hi'])
`).Scan(&fieldUsageCountMv))

assert.Greater(t, fieldUsageCount, uint64(0))
assert.Equal(t, uint64(1), fieldUsageCountMv)

var indirectFieldUsageCount uint64
var fieldUsageCount2Mv uint64
require.NoError(t, db.QueryRow(ctx, `
SELECT COUNT(*) FROM gql_metrics_schema_usage
WHERE OperationHash = 'hash123' AND
OrganizationID = 'org123' AND
FederatedGraphID = 'fed123' AND
RouterConfigVersion = 'v1' AND
Attributes['test'] = 'test123' AND
HttpStatusCode = '200' AND
HasError = true AND
ClientName = 'wundergraph' AND
ClientVersion = '1.0.0' AND
hasAny(TypeNames, ['Query']) AND
startsWith(Path, ['hello']) AND
IsIndirectFieldUsage = true
`).Scan(&indirectFieldUsageCount))

assert.Greater(t, fieldUsageCount, uint64(0))

// Validate materialized view

var fieldUsageCountMv uint64
require.NoError(t, db.QueryRow(ctx, `
SELECT COUNT(*) FROM gql_metrics_schema_usage_5m_90d_mv
SELECT COUNT(*) FROM gql_metrics_schema_usage_5m_90d
WHERE OperationHash = 'hash123' AND
OrganizationID = 'org123' AND
FederatedGraphID = 'fed123' AND
Expand All @@ -161,9 +140,18 @@ func TestPublishGraphQLMetrics(t *testing.T) {
ClientVersion = '1.0.0' AND
hasAny(TypeNames, ['Query']) AND
startsWith(Path, ['hello'])
`).Scan(&fieldUsageCountMv))
`).Scan(&fieldUsageCount2Mv))

assert.Equal(t, uint64(1), fieldUsageCount2Mv)

assert.Greater(t, fieldUsageCountMv, uint64(0))
var fieldUsageLiteCount uint64
require.NoError(t, db.QueryRow(ctx, `
SELECT COUNT(*) FROM gql_metrics_schema_usage_lite_1d_90d
WHERE OrganizationID = 'org123' AND
FederatedGraphID = 'fed123'
`).Scan(&fieldUsageLiteCount))

assert.Equal(t, 2, int(fieldUsageLiteCount))

var requestCount uint64
require.NoError(t, db.QueryRow(ctx, `
Expand Down Expand Up @@ -222,18 +210,27 @@ func TestPublishGraphQLMetricsSendEmptyAndFilledMetrics(t *testing.T) {
OperationContent = 'query Hello { hello }'
`).Scan(&opCount))

assert.Equal(t, opCount, uint64(2))
assert.Equal(t, uint64(2), opCount)

// Validate insert
// Validate materialized view

var fieldUsageCount uint64
require.NoError(t, db.QueryRow(ctx, `
SELECT COUNT(*) FROM gql_metrics_schema_usage
SELECT COUNT(*) FROM gql_metrics_schema_usage_5m_90d
WHERE OrganizationID = 'org123' AND
FederatedGraphID = 'fed123'
`).Scan(&fieldUsageCount))

assert.Equal(t, int(fieldUsageCount), 3)
assert.Equal(t, 2, int(fieldUsageCount))

var fieldUsageLiteCount uint64
require.NoError(t, db.QueryRow(ctx, `
SELECT COUNT(*) FROM gql_metrics_schema_usage_lite_1d_90d
WHERE OrganizationID = 'org123' AND
FederatedGraphID = 'fed123'
`).Scan(&fieldUsageLiteCount))

assert.Equal(t, 2, int(fieldUsageLiteCount))

var requestCount uint64
require.NoError(t, db.QueryRow(ctx, `
Expand Down Expand Up @@ -340,47 +337,11 @@ func TestPublishGraphQLMetricsSmallBatches(t *testing.T) {

assert.Equal(t, opCount, uint64(count))

// Validate insert

var fieldUsageCount uint64
require.NoError(t, db.QueryRow(ctx, `
SELECT COUNT(*) FROM gql_metrics_schema_usage
WHERE OrganizationID = 'org123' AND
FederatedGraphID = 'fed123' AND
RouterConfigVersion = 'v1' AND
Attributes['test'] = 'test123' AND
HttpStatusCode = '200' AND
HasError = true AND
ClientName = 'wundergraph' AND
ClientVersion = '1.0.0' AND
hasAny(TypeNames, ['Query']) AND
startsWith(Path, ['hello'])
`).Scan(&fieldUsageCount))

assert.Greater(t, fieldUsageCount, uint64(0))

var allHelloEntries uint64
require.NoError(t, db.QueryRow(ctx, `
SELECT COUNT(*) FROM gql_metrics_schema_usage
WHERE OrganizationID = 'org123' AND
FederatedGraphID = 'fed123' AND
RouterConfigVersion = 'v1' AND
Attributes['test'] = 'test123' AND
HttpStatusCode = '200' AND
HasError = true AND
ClientName = 'wundergraph' AND
ClientVersion = '1.0.0' AND
hasAny(TypeNames, ['Query']) AND
has(Path, 'hello')
`).Scan(&allHelloEntries))

assert.Equal(t, int(fieldUsageCount), count)

// Validate materialized view

var fieldUsageCountMv uint64
require.NoError(t, db.QueryRow(ctx, `
SELECT COUNT(*) FROM gql_metrics_schema_usage_5m_90d_mv
SELECT COUNT(*) FROM gql_metrics_schema_usage_5m_90d
WHERE OrganizationID = 'org123' AND
FederatedGraphID = 'fed123' AND
RouterConfigVersion = 'v1' AND
Expand All @@ -393,7 +354,7 @@ func TestPublishGraphQLMetricsSmallBatches(t *testing.T) {
startsWith(Path, ['hello'])
`).Scan(&fieldUsageCountMv))

assert.Greater(t, fieldUsageCountMv, uint64(0))
assert.Equal(t, uint64(20_000), fieldUsageCountMv)

var requestCount uint64
require.NoError(t, db.QueryRow(ctx, `
Expand Down Expand Up @@ -426,12 +387,14 @@ func TestPublishAggregatedGraphQLMetrics(t *testing.T) {
TypeNames: []string{"Query"},
SubgraphIDs: []string{"sub123"},
IndirectInterfaceField: false,
Count: 1,
},
{
Path: []string{"hi"},
TypeNames: []string{"Query"},
SubgraphIDs: []string{"sub123"},
IndirectInterfaceField: true,
Count: 1,
},
},
OperationInfo: &graphqlmetricsv1.OperationInfo{
Expand Down Expand Up @@ -490,52 +453,13 @@ func TestPublishAggregatedGraphQLMetrics(t *testing.T) {
GROUP BY OperationHash LIMIT 1
`).Scan(&opCount))

assert.Greater(t, opCount, uint64(0))

// Validate insert

var fieldUsageCount uint64
require.NoError(t, db.QueryRow(ctx, `
SELECT COUNT(*) FROM gql_metrics_schema_usage
WHERE OperationHash = 'hash123' AND
OrganizationID = 'org123' AND
FederatedGraphID = 'fed123' AND
RouterConfigVersion = 'v1' AND
Attributes['test'] = 'test123' AND
HttpStatusCode = '200' AND
HasError = true AND
ClientName = 'wundergraph' AND
ClientVersion = '1.0.0' AND
hasAny(TypeNames, ['Query']) AND
startsWith(Path, ['hello'])
`).Scan(&fieldUsageCount))

assert.Greater(t, fieldUsageCount, uint64(0))

var indirectFieldUsageCount uint64
require.NoError(t, db.QueryRow(ctx, `
SELECT COUNT(*) FROM gql_metrics_schema_usage
WHERE OperationHash = 'hash123' AND
OrganizationID = 'org123' AND
FederatedGraphID = 'fed123' AND
RouterConfigVersion = 'v1' AND
Attributes['test'] = 'test123' AND
HttpStatusCode = '200' AND
HasError = true AND
ClientName = 'wundergraph' AND
ClientVersion = '1.0.0' AND
hasAny(TypeNames, ['Query']) AND
startsWith(Path, ['hello']) AND
IsIndirectFieldUsage = true
`).Scan(&indirectFieldUsageCount))

assert.Greater(t, fieldUsageCount, uint64(0))
assert.Equal(t, uint64(1), opCount)

// Validate materialized view

var fieldUsageCountMv uint64
require.NoError(t, db.QueryRow(ctx, `
SELECT COUNT(*) FROM gql_metrics_schema_usage_5m_90d_mv
SELECT COUNT(*) FROM gql_metrics_schema_usage_5m_90d
WHERE OperationHash = 'hash123' AND
OrganizationID = 'org123' AND
FederatedGraphID = 'fed123' AND
Expand All @@ -549,7 +473,7 @@ func TestPublishAggregatedGraphQLMetrics(t *testing.T) {
startsWith(Path, ['hello'])
`).Scan(&fieldUsageCountMv))

assert.Greater(t, fieldUsageCountMv, uint64(0))
assert.Equal(t, uint64(1), fieldUsageCountMv)

var requestCount uint64
require.NoError(t, db.QueryRow(ctx, `
Expand Down Expand Up @@ -1098,6 +1022,7 @@ func buildSchemaUsageInfoItem(hash, reqDoc string, numArgMetrics, numTypeMetrics
argMetrics = append(argMetrics, &graphqlmetricsv1.ArgumentUsageInfo{
Path: []string{"hello"},
TypeName: "testType",
Count: 1,
})
}

Expand All @@ -1107,6 +1032,7 @@ func buildSchemaUsageInfoItem(hash, reqDoc string, numArgMetrics, numTypeMetrics
TypeNames: []string{"Query"},
SubgraphIDs: []string{"sub123"},
IndirectInterfaceField: false,
Count: 1,
})
}

Expand All @@ -1115,6 +1041,7 @@ func buildSchemaUsageInfoItem(hash, reqDoc string, numArgMetrics, numTypeMetrics
Path: []string{"hello"},
TypeName: "testType",
EnumValues: []string{"test"},
Count: 1,
})
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
-- migrate:up

DROP TABLE IF EXISTS gql_metrics_schema_usage;

-- migrate:down

CREATE TABLE IF NOT EXISTS gql_metrics_schema_usage
(
`Timestamp` DateTime('UTC') CODEC(Delta(4), ZSTD(3)),
`OrganizationID` LowCardinality(String) CODEC(ZSTD(3)),
`FederatedGraphID` LowCardinality(String) CODEC(ZSTD(3)),
`RouterConfigVersion` LowCardinality(String) CODEC(ZSTD(3)),
`OperationHash` LowCardinality(String) CODEC(ZSTD(3)),
`OperationName` LowCardinality(String) CODEC(ZSTD(3)),
`OperationType` LowCardinality(String) CODEC(ZSTD(3)),
`Count` UInt64 CODEC(Delta(8), ZSTD(3)),
`Path` Array(String) CODEC(ZSTD(3)),
`TypeNames` Array(String) CODEC(ZSTD(3)),
`NamedType` String CODEC(ZSTD(3)),
`ClientName` LowCardinality(String) CODEC(ZSTD(3)),
`ClientVersion` LowCardinality(String) CODEC(ZSTD(3)),
`HttpStatusCode` String CODEC(ZSTD(3)),
`HasError` Bool CODEC(ZSTD(3)),
`SubgraphIDs` Array(LowCardinality(String)) CODEC(ZSTD(3)),
`IsArgument` Bool CODEC(ZSTD(3)),
`IsInput` Bool CODEC(ZSTD(3)),
`Attributes` Map(LowCardinality(String), String) CODEC(ZSTD(3)),
`IsIndirectFieldUsage` Bool DEFAULT false CODEC(ZSTD(3)),
INDEX idx_operation_hash OperationHash TYPE bloom_filter(0.001) GRANULARITY 1,
INDEX idx_path Path TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_source_ids SubgraphIDs TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_type_names TypeNames TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_attr_key mapKeys(Attributes) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_attr_value mapValues(Attributes) TYPE bloom_filter(0.01) GRANULARITY 1,
INDEX idx_count Count TYPE minmax GRANULARITY 1
)
ENGINE = MergeTree
PARTITION BY toDate(Timestamp)
ORDER BY (OrganizationID, FederatedGraphID, ClientName, ClientVersion, RouterConfigVersion, OperationHash, HttpStatusCode, HasError, toUnixTimestamp(Timestamp))
TTL toDateTime(Timestamp) + toIntervalDay(7)
SETTINGS index_granularity = 8192, ttl_only_drop_parts = 1

Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
-- migrate:up

CREATE TABLE IF NOT EXISTS gql_metrics_schema_usage
(
-- See https://github.com/PostHog/posthog/issues/10616 why ZSTD(3) is used
Timestamp DateTime('UTC') CODEC(Delta, ZSTD(3)),

-- Organization
OrganizationID LowCardinality(String) CODEC(ZSTD(3)),

-- Router configuration
FederatedGraphID LowCardinality(String) CODEC(ZSTD(3)),
RouterConfigVersion LowCardinality(String) CODEC(ZSTD(3)), -- running schema version

-- Operation
OperationHash LowCardinality(String) CODEC(ZSTD(3)),
OperationName LowCardinality(String) CODEC(ZSTD(3)),
OperationType LowCardinality(String) CODEC(ZSTD(3)), -- query, mutation, subscription

-- Define how often a field is used. Useful for batching at the collection layer.
Count UInt64 CODEC(Delta, ZSTD(3)),

-- Schema usage
Path Array(String) CODEC(ZSTD(3)),
TypeNames Array(String) CODEC(ZSTD(3)), -- Sorted before insertion
NamedType String CODEC(ZSTD(3)),

-- Client information
ClientName LowCardinality(String) CODEC(ZSTD(3)),
ClientVersion LowCardinality(String) CODEC(ZSTD(3)),

--- Request information
HttpStatusCode String CODEC (ZSTD(3)),
HasError bool CODEC(ZSTD(3)), -- Whether the operation has an error of any kind

-- SubgraphIDs identify the subgraphs that were used to resolve the field
SubgraphIDs Array(LowCardinality(String)) CODEC(ZSTD(3)), -- Sorted before insertion

-- Indicates if the usage was from an argument or a field
IsArgument bool CODEC(ZSTD(3)),

-- Indicates if the usage was from an input field
IsInput bool CODEC(ZSTD(3)),

-- Additional information
Attributes Map(LowCardinality(String), String) CODEC(ZSTD(3)),

IsIndirectFieldUsage bool DEFAULT false
)
-- The Null table engine is a powerful optimization - think of it as /dev/null.
-- When data is inserted into the Null table, it is immediately discarded but materialized views are still updated.
-- This is useful for cases where you want to track metrics but don't need to store the raw data.
engine = Null;

-- migrate:down

DROP TABLE IF EXISTS gql_metrics_schema_usage;

0 comments on commit 94bd6a6

Please sign in to comment.