Skip to content

Commit

Permalink
chore(recordings): remove hub dependency on recordings ingestion (#14418
Browse files Browse the repository at this point in the history
)

* chore(recordings): remove hub dependency on recordings ingestion

Hub is a grab bag of depencencies that are not all required for
recordings ingestion. To keep the recordings ingestion lean, we
remove the hub dependency and use the postgres and kafka client
directly.

This should increase the availability of the session recordings
workload, e.g. it should not go down it Redis or ClickHouse is down.

* fix capabilities call

* reuse clients if available

* wip

* wip

* wip

* fix tests

* fix healthcheck
  • Loading branch information
Harry Waye authored Feb 28, 2023
1 parent e50b136 commit dcc9acc
Show file tree
Hide file tree
Showing 15 changed files with 435 additions and 259 deletions.
14 changes: 14 additions & 0 deletions plugin-server/functional_tests/session-recordings.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,20 @@ test.concurrent(
20000
)

test.concurrent(`liveness check endpoint works`, async () => {
await waitForExpect(async () => {
const response = await fetch('http://localhost:6738/_health')
expect(response.status).toBe(200)

const body = await response.json()
expect(body).toEqual(
expect.objectContaining({
checks: expect.objectContaining({ 'session-recordings': 'ok' }),
})
)
})
})

test.concurrent(
`consumer handles empty messages`,
async () => {
Expand Down
20 changes: 18 additions & 2 deletions plugin-server/src/capabilities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export function getPluginServerCapabilities(config: PluginsServerConfig): Plugin
switch (mode) {
case null:
return {
mmdb: true,
ingestion: true,
ingestionOverflow: true,
pluginScheduledTasks: true,
Expand All @@ -19,40 +20,55 @@ export function getPluginServerCapabilities(config: PluginsServerConfig): Plugin
case 'ingestion':
// NOTE: this mode will be removed in the future and replaced with
// `analytics-ingestion` and `recordings-ingestion` modes.
return { ingestion: true, sessionRecordingIngestion: true, ...sharedCapabilities }
return {
mmdb: true,
ingestion: true,
sessionRecordingIngestion: true,
...sharedCapabilities,
}
case 'ingestion-overflow':
return { ingestionOverflow: true, ...sharedCapabilities }
return {
mmdb: true,
ingestionOverflow: true,
...sharedCapabilities,
}
case 'analytics-ingestion':
return {
mmdb: true,
ingestion: true,
...sharedCapabilities,
}
case 'recordings-ingestion':
return {
mmdb: false,
sessionRecordingIngestion: true,
...sharedCapabilities,
}

case 'async':
return {
mmdb: true,
processPluginJobs: true,
processAsyncHandlers: true,
pluginScheduledTasks: true,
...sharedCapabilities,
}
case 'exports':
return {
mmdb: true,
processAsyncHandlers: true,
...sharedCapabilities,
}
case 'jobs': {
return {
mmdb: true,
processPluginJobs: true,
...sharedCapabilities,
}
}
case 'scheduler':
return {
mmdb: true,
pluginScheduledTasks: true,
...sharedCapabilities,
}
Expand Down
4 changes: 3 additions & 1 deletion plugin-server/src/index.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { Hub } from '../src/types'
import { getPluginServerCapabilities } from './capabilities'
import { defaultConfig, formatConfigHelp } from './config/config'
import { healthcheckWithExit } from './healthcheck'
import { initApp } from './init'
Expand Down Expand Up @@ -62,6 +63,7 @@ switch (alternativeMode) {
default:
// void the returned promise
initApp(defaultConfig)
void startPluginsServer(defaultConfig, makePiscina)
const capabilities = getPluginServerCapabilities(defaultConfig)
void startPluginsServer(defaultConfig, makePiscina, capabilities)
break
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import { PluginEvent } from '@posthog/plugin-scaffold'
import { StatsD } from 'hot-shots'
import { EachBatchPayload, Kafka } from 'kafkajs'
import { exponentialBuckets, Histogram } from 'prom-client'

Expand All @@ -17,12 +16,10 @@ import { latestOffsetTimestampGauge } from './metrics'
export const startSessionRecordingEventsConsumer = async ({
teamManager,
kafka,
statsd,
partitionsConsumedConcurrently = 5,
}: {
teamManager: TeamManager
kafka: Kafka
statsd?: StatsD
partitionsConsumedConcurrently: number
}) => {
/*
Expand All @@ -40,10 +37,11 @@ export const startSessionRecordingEventsConsumer = async ({
// the Kafka consumer handler.
const producer = kafka.producer()
await producer.connect()
const producerWrapper = new KafkaProducerWrapper(producer, statsd, { KAFKA_FLUSH_FREQUENCY_MS: 0 } as any)
const producerWrapper = new KafkaProducerWrapper(producer, undefined, { KAFKA_FLUSH_FREQUENCY_MS: 0 } as any)

const groupId = 'session-recordings'
const consumer = kafka.consumer({ groupId: groupId })
const sessionTimeout = 30000
const consumer = kafka.consumer({ groupId: groupId, sessionTimeout: sessionTimeout })
setupEventHandlers(consumer)

status.info('🔁', 'Starting session recordings consumer')
Expand All @@ -61,7 +59,31 @@ export const startSessionRecordingEventsConsumer = async ({
},
})

return consumer
// Subscribe to the heatbeat event to track when the consumer has last
// successfully consumed a message. This is used to determine if the
// consumer is healthy.
const { HEARTBEAT } = consumer.events
let lastHeartbeat: number = Date.now()
consumer.on(HEARTBEAT, ({ timestamp }) => (lastHeartbeat = timestamp))

const isHealthy = async () => {
// Consumer has heartbeat within the session timeout, so it is healthy.
if (Date.now() - lastHeartbeat < sessionTimeout) {
return true
}

// Consumer has not heartbeat, but maybe it's because the group is
// currently rebalancing.
try {
const { state } = await consumer.describeGroup()

return ['CompletingRebalance', 'PreparingRebalance'].includes(state)
} catch (error) {
return false
}
}

return { consumer, isHealthy }
}

export const eachBatch =
Expand Down
Loading

0 comments on commit dcc9acc

Please sign in to comment.