Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(healthchecks): refactor out of modules #3465

Merged
merged 3 commits into from
Nov 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions packages/server/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ import { BaseError, ForbiddenError } from '@/modules/shared/errors'
import { loggingPlugin } from '@/modules/core/graph/plugins/logging'
import { shouldLogAsInfoLevel } from '@/logging/graphqlError'
import { getUserFactory } from '@/modules/core/repositories/users'
import { initFactory as healthchecksInitFactory } from '@/healthchecks'

const GRAPHQL_PATH = '/graphql'

Expand Down Expand Up @@ -401,6 +402,9 @@ export async function init() {
// Initialize default modules, including rest api handlers
await ModulesSetup.init(app)

// Initialize healthchecks
await healthchecksInitFactory()(app, true)

// Init HTTP server & subscription server
const server = http.createServer(app)
const subscriptionServer = buildApolloSubscriptionServer(server)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,85 +1,54 @@
import * as express from 'express'
import { createRedisClient } from '@/modules/shared/redis/redis'
import {
getRedisUrl,
highFrequencyMetricsCollectionPeriodMs,
postgresMaxConnections
} from '@/modules/shared/helpers/envHelper'
import { getRedisUrl, postgresMaxConnections } from '@/modules/shared/helpers/envHelper'
import type { Redis } from 'ioredis'
import { numberOfFreeConnections } from '@/modules/shared/helpers/dbHelper'
import { db } from '@/db/knex'
import type { Knex } from 'knex'
import { getServerInfoFactory } from '@/modules/core/repositories/server'
import { BaseError } from '@/modules/shared/errors'
import { ensureErrorOrWrapAsCause } from '@/modules/shared/errors/ensureError'

type FreeConnectionsCalculator = {
export type ReadinessHandler = () => Promise<{ details: Record<string, string> }>

export type FreeConnectionsCalculator = {
mean: () => number
}

export default (app: express.Application) => {
const knexFreeDbConnectionSamplerLiveness = knexFreeDbConnectionSamplerFactory({
db,
collectionPeriod: highFrequencyMetricsCollectionPeriodMs(),
sampledDuration: 600000 //number of ms over which to average the database connections, before declaring not alive. 10 minutes.
})
knexFreeDbConnectionSamplerLiveness.start()

const knexFreeDbConnectionSamplerReadiness = knexFreeDbConnectionSamplerFactory({
db,
collectionPeriod: highFrequencyMetricsCollectionPeriodMs(),
sampledDuration: 20000 //number of ms over which to average the database connections, before declaring unready. 20 seconds.
})
knexFreeDbConnectionSamplerReadiness.start()

app.options('/liveness')
app.get(
'/liveness',
handleLivenessFactory({
isRedisAlive,
isPostgresAlive,
freeConnectionsCalculator: knexFreeDbConnectionSamplerLiveness
})
)
app.options('/readiness')
app.get(
'/readiness',
handleReadinessFactory({
isRedisAlive,
isPostgresAlive,
freeConnectionsCalculator: knexFreeDbConnectionSamplerReadiness
})
)
class LivenessError extends BaseError {
static defaultMessage = 'The application is not yet alive. Please try again later.'
static code = 'LIVENESS_ERROR'
static statusCode = 500
}

class ReadinessError extends BaseError {
static defaultMessage =
'The application is not ready to accept requests. Please try again later.'
static code = 'READINESS_ERROR'
static statusCode = 500
}

const handleLivenessFactory =
export const handleLivenessFactory =
(deps: {
isRedisAlive: RedisCheck
isPostgresAlive: DBCheck
freeConnectionsCalculator: FreeConnectionsCalculator
}): express.RequestHandler =>
async (req, res) => {
}) =>
async () => {
const postgres = await deps.isPostgresAlive()
if (!postgres.isAlive) {
req.log.error(
postgres.err,
'Liveness health check failed. Postgres is not available.'
throw new LivenessError(
'Liveness health check failed. Postgres is not available.',
{
cause: ensureErrorOrWrapAsCause(postgres.err, 'Unknown postgres error.')
}
)
res.status(500).json({
message: 'Postgres is not available',
error: postgres.err
})
res.send()
return
}

const redis = await deps.isRedisAlive()
if (!redis.isAlive) {
req.log.error(redis.err, 'Liveness health check failed. Redis is not available.')
res.status(500).json({
message: 'Redis is not available.',
error: redis.err
throw new LivenessError('Liveness health check failed. Redis is not available.', {
cause: ensureErrorOrWrapAsCause(redis.err, 'Unknown redis error.')
})
res.send()
return
}

const numFreeConnections = await deps.freeConnectionsCalculator.mean()
Expand All @@ -88,49 +57,40 @@ const handleLivenessFactory =
)
//unready if less than 10%
if (percentageFreeConnections < 10) {
const message =
throw new LivenessError(
'Liveness health check failed. Insufficient free database connections for a sustained duration.'
req.log.error(message)
res.status(500).json({
message
})
res.send()
return
)
}

res.status(200)
res.send()
return {
details: {
postgres: 'true',
redis: 'true',
percentageFreeConnections: percentageFreeConnections.toFixed(0)
}
}
}

const handleReadinessFactory = (deps: {
export const handleReadinessFactory = (deps: {
isRedisAlive: RedisCheck
isPostgresAlive: DBCheck
freeConnectionsCalculator: FreeConnectionsCalculator
}): express.RequestHandler => {
return async (req, res) => {
}): ReadinessHandler => {
return async () => {
const postgres = await deps.isPostgresAlive()
if (!postgres.isAlive) {
req.log.error(
postgres.err,
'Readiness health check failed. Postgres is not available.'
throw new ReadinessError(
'Readiness health check failed. Postgres is not available.',
{ cause: ensureErrorOrWrapAsCause(postgres.err, 'Unknown postgres error.') }
)
res.status(500).json({
message: 'Postgres is not available',
error: postgres.err
})
res.send()
return
}

const redis = await deps.isRedisAlive()
if (!redis.isAlive) {
req.log.error(redis.err, 'Readiness health check failed. Redis is not available.')
res.status(500).json({
message: 'Redis is not available.',
error: redis.err
})
res.send()
return
throw new ReadinessError(
'Readiness health check failed. Redis is not available.',
{ cause: ensureErrorOrWrapAsCause(redis.err, 'Unknown Redis error.') }
)
}

const numFreeConnections = await deps.freeConnectionsCalculator.mean()
Expand All @@ -141,24 +101,24 @@ const handleReadinessFactory = (deps: {
if (percentageFreeConnections < 10) {
const message =
'Readiness health check failed. Insufficient free database connections for a sustained duration.'
req.log.error(message)
res.status(500).json({
message
})
res.send()
return
throw new ReadinessError(message)
}

res.status(200)
res.send()
return {
details: {
postgres: 'true',
redis: 'true',
percentageFreeConnections: percentageFreeConnections.toFixed(0)
}
}
}
}

type CheckResponse = { isAlive: true } | { isAlive: false; err: unknown }

type DBCheck = () => Promise<CheckResponse>

const isPostgresAlive: DBCheck = async (): Promise<CheckResponse> => {
export const isPostgresAlive: DBCheck = async (): Promise<CheckResponse> => {
const getServerInfo = getServerInfoFactory({ db })

try {
Expand All @@ -171,7 +131,7 @@ const isPostgresAlive: DBCheck = async (): Promise<CheckResponse> => {

type RedisCheck = () => Promise<CheckResponse>

const isRedisAlive: RedisCheck = async (): Promise<CheckResponse> => {
export const isRedisAlive: RedisCheck = async (): Promise<CheckResponse> => {
let client: Redis | undefined = undefined
let result: CheckResponse = { isAlive: true }
try {
Expand Down
61 changes: 61 additions & 0 deletions packages/server/healthchecks/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import { healthCheckLogger } from '@/logging/logging'
import { db } from '@/db/knex'
import { highFrequencyMetricsCollectionPeriodMs } from '@/modules/shared/helpers/envHelper'
import {
handleLivenessFactory,
handleReadinessFactory,
knexFreeDbConnectionSamplerFactory,
isRedisAlive,
isPostgresAlive,
FreeConnectionsCalculator
} from '@/healthchecks/health'
import { Application } from 'express'

export const initFactory: () => (
app: Application,
isInitial: boolean
) => Promise<void> = () => {
let knexFreeDbConnectionSamplerLiveness: FreeConnectionsCalculator & {
start: () => void
}
let knexFreeDbConnectionSamplerReadiness: FreeConnectionsCalculator & {
start: () => void
}
return async (app, isInitial) => {
healthCheckLogger.info('💓 Init health check')
if (isInitial) {
knexFreeDbConnectionSamplerLiveness = knexFreeDbConnectionSamplerFactory({
db,
collectionPeriod: highFrequencyMetricsCollectionPeriodMs(),
sampledDuration: 600000 //number of ms over which to average the database connections, before declaring not alive. 10 minutes.
})
knexFreeDbConnectionSamplerLiveness.start()

knexFreeDbConnectionSamplerReadiness = knexFreeDbConnectionSamplerFactory({
db,
collectionPeriod: highFrequencyMetricsCollectionPeriodMs(),
sampledDuration: 20000 //number of ms over which to average the database connections, before declaring unready. 20 seconds.
})
knexFreeDbConnectionSamplerReadiness.start()
}
const livenessHandler = handleLivenessFactory({
isRedisAlive,
isPostgresAlive,
freeConnectionsCalculator: knexFreeDbConnectionSamplerLiveness
})

app.get('/liveness', async (req, res) => {
const result = await livenessHandler()
res.status(200).json({ status: 'ok', ...result })
})

app.get('/readiness', async (req, res) => {
const result = await handleReadinessFactory({
isRedisAlive,
isPostgresAlive,
freeConnectionsCalculator: knexFreeDbConnectionSamplerReadiness
})()
res.status(200).json({ status: 'ok', ...result })
})
}
}
1 change: 1 addition & 0 deletions packages/server/logging/logging.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ export const authLogger = extendLoggerComponent(logger, 'auth')
export const crossServerSyncLogger = extendLoggerComponent(logger, 'cross-server-sync')
export const automateLogger = extendLoggerComponent(logger, 'automate')
export const subscriptionLogger = extendLoggerComponent(logger, 'subscription')
export const healthCheckLogger = extendLoggerComponent(logger, 'healthcheck')

export type Logger = typeof logger
export { extendLoggerComponent, Observability }
4 changes: 0 additions & 4 deletions packages/server/modules/core/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import uploadRest from '@/modules/core/rest/upload'
import downloadRest from '@/modules/core/rest/download'
import diffUpload from '@/modules/core/rest/diffUpload'
import diffDownload from '@/modules/core/rest/diffDownload'
import healthRest from '@/modules/core/rest/health'
import scopes from '@/modules/core/scopes'
import roles from '@/modules/core/roles'
import Redis from 'ioredis'
Expand All @@ -33,9 +32,6 @@ const coreModule: SpeckleModule<{
// Initialize the static route
staticRest(app)

// Initialize the health check route
healthRest(app)

// Initialises the two main bulk upload/download endpoints
uploadRest(app)
downloadRest(app)
Expand Down
7 changes: 5 additions & 2 deletions packages/server/modules/notifications/services/queue.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import Bull from 'bull'
import { buildBaseQueueOptions } from '@/modules/shared/helpers/bullHelper'
import cryptoRandomString from 'crypto-random-string'
import { logger, notificationsLogger, Observability } from '@/logging/logging'
import { ensureErrorOrWrapAsCause } from '@/modules/shared/errors/ensureError'

export type NotificationJobResult = {
status: NotificationJobResultsStatus
Expand Down Expand Up @@ -153,8 +154,10 @@ export async function consumeIncomingNotifications() {
}
} catch (e: unknown) {
notificationsLogger.error(e)
const err =
e instanceof Error ? e : new Error('Unexpected notification consumption error')
const err = ensureErrorOrWrapAsCause(
e,
'Unexpected notification consumption error'
)

if (!(err instanceof NotificationValidationError)) {
throw err
Expand Down
11 changes: 11 additions & 0 deletions packages/server/modules/shared/errors/ensureError.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/**
* In JS catch clauses can receive not only Errors, but pretty much any other
* kind of data type, so you can use this helper to ensure that
* whatever is passed in is a real error.
* If it is not a real error, it will be wrapped in a new error
* with the provided message and the original error as the cause.
*/
export function ensureErrorOrWrapAsCause(e: unknown, fallbackMessage?: string): Error {
if (e instanceof Error) return e
return new Error(fallbackMessage, { cause: e })
}
1 change: 1 addition & 0 deletions packages/server/tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@
},
"include": [
"db/**/*",
"healthchecks/**/*",
"logging/**/*",
"modules/**/*",
"bin/**/*",
Expand Down