Skip to content

Commit

Permalink
chore(healthchecks): refactor out of modules (#3465)
Browse files Browse the repository at this point in the history
  • Loading branch information
iainsproat authored Nov 8, 2024
1 parent f716d0c commit de4235a
Show file tree
Hide file tree
Showing 8 changed files with 139 additions and 102 deletions.
4 changes: 4 additions & 0 deletions packages/server/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ import { BaseError, ForbiddenError } from '@/modules/shared/errors'
import { loggingPlugin } from '@/modules/core/graph/plugins/logging'
import { shouldLogAsInfoLevel } from '@/logging/graphqlError'
import { getUserFactory } from '@/modules/core/repositories/users'
import { initFactory as healthchecksInitFactory } from '@/healthchecks'

const GRAPHQL_PATH = '/graphql'

Expand Down Expand Up @@ -401,6 +402,9 @@ export async function init() {
// Initialize default modules, including rest api handlers
await ModulesSetup.init(app)

// Initialize healthchecks
await healthchecksInitFactory()(app, true)

// Init HTTP server & subscription server
const server = http.createServer(app)
const subscriptionServer = buildApolloSubscriptionServer(server)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,85 +1,54 @@
import * as express from 'express'
import { createRedisClient } from '@/modules/shared/redis/redis'
import {
getRedisUrl,
highFrequencyMetricsCollectionPeriodMs,
postgresMaxConnections
} from '@/modules/shared/helpers/envHelper'
import { getRedisUrl, postgresMaxConnections } from '@/modules/shared/helpers/envHelper'
import type { Redis } from 'ioredis'
import { numberOfFreeConnections } from '@/modules/shared/helpers/dbHelper'
import { db } from '@/db/knex'
import type { Knex } from 'knex'
import { getServerInfoFactory } from '@/modules/core/repositories/server'
import { BaseError } from '@/modules/shared/errors'
import { ensureErrorOrWrapAsCause } from '@/modules/shared/errors/ensureError'

type FreeConnectionsCalculator = {
export type ReadinessHandler = () => Promise<{ details: Record<string, string> }>

export type FreeConnectionsCalculator = {
mean: () => number
}

export default (app: express.Application) => {
const knexFreeDbConnectionSamplerLiveness = knexFreeDbConnectionSamplerFactory({
db,
collectionPeriod: highFrequencyMetricsCollectionPeriodMs(),
sampledDuration: 600000 //number of ms over which to average the database connections, before declaring not alive. 10 minutes.
})
knexFreeDbConnectionSamplerLiveness.start()

const knexFreeDbConnectionSamplerReadiness = knexFreeDbConnectionSamplerFactory({
db,
collectionPeriod: highFrequencyMetricsCollectionPeriodMs(),
sampledDuration: 20000 //number of ms over which to average the database connections, before declaring unready. 20 seconds.
})
knexFreeDbConnectionSamplerReadiness.start()

app.options('/liveness')
app.get(
'/liveness',
handleLivenessFactory({
isRedisAlive,
isPostgresAlive,
freeConnectionsCalculator: knexFreeDbConnectionSamplerLiveness
})
)
app.options('/readiness')
app.get(
'/readiness',
handleReadinessFactory({
isRedisAlive,
isPostgresAlive,
freeConnectionsCalculator: knexFreeDbConnectionSamplerReadiness
})
)
class LivenessError extends BaseError {
static defaultMessage = 'The application is not yet alive. Please try again later.'
static code = 'LIVENESS_ERROR'
static statusCode = 500
}

class ReadinessError extends BaseError {
static defaultMessage =
'The application is not ready to accept requests. Please try again later.'
static code = 'READINESS_ERROR'
static statusCode = 500
}

const handleLivenessFactory =
export const handleLivenessFactory =
(deps: {
isRedisAlive: RedisCheck
isPostgresAlive: DBCheck
freeConnectionsCalculator: FreeConnectionsCalculator
}): express.RequestHandler =>
async (req, res) => {
}) =>
async () => {
const postgres = await deps.isPostgresAlive()
if (!postgres.isAlive) {
req.log.error(
postgres.err,
'Liveness health check failed. Postgres is not available.'
throw new LivenessError(
'Liveness health check failed. Postgres is not available.',
{
cause: ensureErrorOrWrapAsCause(postgres.err, 'Unknown postgres error.')
}
)
res.status(500).json({
message: 'Postgres is not available',
error: postgres.err
})
res.send()
return
}

const redis = await deps.isRedisAlive()
if (!redis.isAlive) {
req.log.error(redis.err, 'Liveness health check failed. Redis is not available.')
res.status(500).json({
message: 'Redis is not available.',
error: redis.err
throw new LivenessError('Liveness health check failed. Redis is not available.', {
cause: ensureErrorOrWrapAsCause(redis.err, 'Unknown redis error.')
})
res.send()
return
}

const numFreeConnections = await deps.freeConnectionsCalculator.mean()
Expand All @@ -88,49 +57,40 @@ const handleLivenessFactory =
)
//unready if less than 10%
if (percentageFreeConnections < 10) {
const message =
throw new LivenessError(
'Liveness health check failed. Insufficient free database connections for a sustained duration.'
req.log.error(message)
res.status(500).json({
message
})
res.send()
return
)
}

res.status(200)
res.send()
return {
details: {
postgres: 'true',
redis: 'true',
percentageFreeConnections: percentageFreeConnections.toFixed(0)
}
}
}

const handleReadinessFactory = (deps: {
export const handleReadinessFactory = (deps: {
isRedisAlive: RedisCheck
isPostgresAlive: DBCheck
freeConnectionsCalculator: FreeConnectionsCalculator
}): express.RequestHandler => {
return async (req, res) => {
}): ReadinessHandler => {
return async () => {
const postgres = await deps.isPostgresAlive()
if (!postgres.isAlive) {
req.log.error(
postgres.err,
'Readiness health check failed. Postgres is not available.'
throw new ReadinessError(
'Readiness health check failed. Postgres is not available.',
{ cause: ensureErrorOrWrapAsCause(postgres.err, 'Unknown postgres error.') }
)
res.status(500).json({
message: 'Postgres is not available',
error: postgres.err
})
res.send()
return
}

const redis = await deps.isRedisAlive()
if (!redis.isAlive) {
req.log.error(redis.err, 'Readiness health check failed. Redis is not available.')
res.status(500).json({
message: 'Redis is not available.',
error: redis.err
})
res.send()
return
throw new ReadinessError(
'Readiness health check failed. Redis is not available.',
{ cause: ensureErrorOrWrapAsCause(redis.err, 'Unknown Redis error.') }
)
}

const numFreeConnections = await deps.freeConnectionsCalculator.mean()
Expand All @@ -141,24 +101,24 @@ const handleReadinessFactory = (deps: {
if (percentageFreeConnections < 10) {
const message =
'Readiness health check failed. Insufficient free database connections for a sustained duration.'
req.log.error(message)
res.status(500).json({
message
})
res.send()
return
throw new ReadinessError(message)
}

res.status(200)
res.send()
return {
details: {
postgres: 'true',
redis: 'true',
percentageFreeConnections: percentageFreeConnections.toFixed(0)
}
}
}
}

type CheckResponse = { isAlive: true } | { isAlive: false; err: unknown }

type DBCheck = () => Promise<CheckResponse>

const isPostgresAlive: DBCheck = async (): Promise<CheckResponse> => {
export const isPostgresAlive: DBCheck = async (): Promise<CheckResponse> => {
const getServerInfo = getServerInfoFactory({ db })

try {
Expand All @@ -171,7 +131,7 @@ const isPostgresAlive: DBCheck = async (): Promise<CheckResponse> => {

type RedisCheck = () => Promise<CheckResponse>

const isRedisAlive: RedisCheck = async (): Promise<CheckResponse> => {
export const isRedisAlive: RedisCheck = async (): Promise<CheckResponse> => {
let client: Redis | undefined = undefined
let result: CheckResponse = { isAlive: true }
try {
Expand Down
61 changes: 61 additions & 0 deletions packages/server/healthchecks/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import { healthCheckLogger } from '@/logging/logging'
import { db } from '@/db/knex'
import { highFrequencyMetricsCollectionPeriodMs } from '@/modules/shared/helpers/envHelper'
import {
handleLivenessFactory,
handleReadinessFactory,
knexFreeDbConnectionSamplerFactory,
isRedisAlive,
isPostgresAlive,
FreeConnectionsCalculator
} from '@/healthchecks/health'
import { Application } from 'express'

export const initFactory: () => (
app: Application,
isInitial: boolean
) => Promise<void> = () => {
let knexFreeDbConnectionSamplerLiveness: FreeConnectionsCalculator & {
start: () => void
}
let knexFreeDbConnectionSamplerReadiness: FreeConnectionsCalculator & {
start: () => void
}
return async (app, isInitial) => {
healthCheckLogger.info('💓 Init health check')
if (isInitial) {
knexFreeDbConnectionSamplerLiveness = knexFreeDbConnectionSamplerFactory({
db,
collectionPeriod: highFrequencyMetricsCollectionPeriodMs(),
sampledDuration: 600000 //number of ms over which to average the database connections, before declaring not alive. 10 minutes.
})
knexFreeDbConnectionSamplerLiveness.start()

knexFreeDbConnectionSamplerReadiness = knexFreeDbConnectionSamplerFactory({
db,
collectionPeriod: highFrequencyMetricsCollectionPeriodMs(),
sampledDuration: 20000 //number of ms over which to average the database connections, before declaring unready. 20 seconds.
})
knexFreeDbConnectionSamplerReadiness.start()
}
const livenessHandler = handleLivenessFactory({
isRedisAlive,
isPostgresAlive,
freeConnectionsCalculator: knexFreeDbConnectionSamplerLiveness
})

app.get('/liveness', async (req, res) => {
const result = await livenessHandler()
res.status(200).json({ status: 'ok', ...result })
})

app.get('/readiness', async (req, res) => {
const result = await handleReadinessFactory({
isRedisAlive,
isPostgresAlive,
freeConnectionsCalculator: knexFreeDbConnectionSamplerReadiness
})()
res.status(200).json({ status: 'ok', ...result })
})
}
}
1 change: 1 addition & 0 deletions packages/server/logging/logging.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ export const authLogger = extendLoggerComponent(logger, 'auth')
export const crossServerSyncLogger = extendLoggerComponent(logger, 'cross-server-sync')
export const automateLogger = extendLoggerComponent(logger, 'automate')
export const subscriptionLogger = extendLoggerComponent(logger, 'subscription')
export const healthCheckLogger = extendLoggerComponent(logger, 'healthcheck')

export type Logger = typeof logger
export { extendLoggerComponent, Observability }
4 changes: 0 additions & 4 deletions packages/server/modules/core/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import uploadRest from '@/modules/core/rest/upload'
import downloadRest from '@/modules/core/rest/download'
import diffUpload from '@/modules/core/rest/diffUpload'
import diffDownload from '@/modules/core/rest/diffDownload'
import healthRest from '@/modules/core/rest/health'
import scopes from '@/modules/core/scopes'
import roles from '@/modules/core/roles'
import Redis from 'ioredis'
Expand All @@ -33,9 +32,6 @@ const coreModule: SpeckleModule<{
// Initialize the static route
staticRest(app)

// Initialize the health check route
healthRest(app)

// Initialises the two main bulk upload/download endpoints
uploadRest(app)
downloadRest(app)
Expand Down
7 changes: 5 additions & 2 deletions packages/server/modules/notifications/services/queue.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import Bull from 'bull'
import { buildBaseQueueOptions } from '@/modules/shared/helpers/bullHelper'
import cryptoRandomString from 'crypto-random-string'
import { logger, notificationsLogger, Observability } from '@/logging/logging'
import { ensureErrorOrWrapAsCause } from '@/modules/shared/errors/ensureError'

export type NotificationJobResult = {
status: NotificationJobResultsStatus
Expand Down Expand Up @@ -153,8 +154,10 @@ export async function consumeIncomingNotifications() {
}
} catch (e: unknown) {
notificationsLogger.error(e)
const err =
e instanceof Error ? e : new Error('Unexpected notification consumption error')
const err = ensureErrorOrWrapAsCause(
e,
'Unexpected notification consumption error'
)

if (!(err instanceof NotificationValidationError)) {
throw err
Expand Down
11 changes: 11 additions & 0 deletions packages/server/modules/shared/errors/ensureError.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/**
* In JS catch clauses can receive not only Errors, but pretty much any other
* kind of data type, so you can use this helper to ensure that
* whatever is passed in is a real error.
* If it is not a real error, it will be wrapped in a new error
* with the provided message and the original error as the cause.
*/
export function ensureErrorOrWrapAsCause(e: unknown, fallbackMessage?: string): Error {
if (e instanceof Error) return e
return new Error(fallbackMessage, { cause: e })
}
1 change: 1 addition & 0 deletions packages/server/tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@
},
"include": [
"db/**/*",
"healthchecks/**/*",
"logging/**/*",
"modules/**/*",
"bin/**/*",
Expand Down

0 comments on commit de4235a

Please sign in to comment.