From de4235a9d014157c6f53e13b0bfbad08f2b3d368 Mon Sep 17 00:00:00 2001 From: Iain Sproat <68657+iainsproat@users.noreply.github.com> Date: Fri, 8 Nov 2024 13:36:02 +0000 Subject: [PATCH] chore(healthchecks): refactor out of modules (#3465) --- packages/server/app.ts | 4 + .../core/rest => healthchecks}/health.ts | 152 +++++++----------- packages/server/healthchecks/index.ts | 61 +++++++ packages/server/logging/logging.ts | 1 + packages/server/modules/core/index.ts | 4 - .../modules/notifications/services/queue.ts | 7 +- .../modules/shared/errors/ensureError.ts | 11 ++ packages/server/tsconfig.json | 1 + 8 files changed, 139 insertions(+), 102 deletions(-) rename packages/server/{modules/core/rest => healthchecks}/health.ts (54%) create mode 100644 packages/server/healthchecks/index.ts create mode 100644 packages/server/modules/shared/errors/ensureError.ts diff --git a/packages/server/app.ts b/packages/server/app.ts index 744c69f723..0a54fd0554 100644 --- a/packages/server/app.ts +++ b/packages/server/app.ts @@ -67,6 +67,7 @@ import { BaseError, ForbiddenError } from '@/modules/shared/errors' import { loggingPlugin } from '@/modules/core/graph/plugins/logging' import { shouldLogAsInfoLevel } from '@/logging/graphqlError' import { getUserFactory } from '@/modules/core/repositories/users' +import { initFactory as healthchecksInitFactory } from '@/healthchecks' const GRAPHQL_PATH = '/graphql' @@ -401,6 +402,9 @@ export async function init() { // Initialize default modules, including rest api handlers await ModulesSetup.init(app) + // Initialize healthchecks + await healthchecksInitFactory()(app, true) + // Init HTTP server & subscription server const server = http.createServer(app) const subscriptionServer = buildApolloSubscriptionServer(server) diff --git a/packages/server/modules/core/rest/health.ts b/packages/server/healthchecks/health.ts similarity index 54% rename from packages/server/modules/core/rest/health.ts rename to packages/server/healthchecks/health.ts index 4e6199f9ed..7a1429c7e3 100644 --- a/packages/server/modules/core/rest/health.ts +++ b/packages/server/healthchecks/health.ts @@ -1,85 +1,54 @@ -import * as express from 'express' import { createRedisClient } from '@/modules/shared/redis/redis' -import { - getRedisUrl, - highFrequencyMetricsCollectionPeriodMs, - postgresMaxConnections -} from '@/modules/shared/helpers/envHelper' +import { getRedisUrl, postgresMaxConnections } from '@/modules/shared/helpers/envHelper' import type { Redis } from 'ioredis' import { numberOfFreeConnections } from '@/modules/shared/helpers/dbHelper' import { db } from '@/db/knex' import type { Knex } from 'knex' import { getServerInfoFactory } from '@/modules/core/repositories/server' +import { BaseError } from '@/modules/shared/errors' +import { ensureErrorOrWrapAsCause } from '@/modules/shared/errors/ensureError' -type FreeConnectionsCalculator = { +export type ReadinessHandler = () => Promise<{ details: Record }> + +export type FreeConnectionsCalculator = { mean: () => number } -export default (app: express.Application) => { - const knexFreeDbConnectionSamplerLiveness = knexFreeDbConnectionSamplerFactory({ - db, - collectionPeriod: highFrequencyMetricsCollectionPeriodMs(), - sampledDuration: 600000 //number of ms over which to average the database connections, before declaring not alive. 10 minutes. - }) - knexFreeDbConnectionSamplerLiveness.start() - - const knexFreeDbConnectionSamplerReadiness = knexFreeDbConnectionSamplerFactory({ - db, - collectionPeriod: highFrequencyMetricsCollectionPeriodMs(), - sampledDuration: 20000 //number of ms over which to average the database connections, before declaring unready. 20 seconds. - }) - knexFreeDbConnectionSamplerReadiness.start() - - app.options('/liveness') - app.get( - '/liveness', - handleLivenessFactory({ - isRedisAlive, - isPostgresAlive, - freeConnectionsCalculator: knexFreeDbConnectionSamplerLiveness - }) - ) - app.options('/readiness') - app.get( - '/readiness', - handleReadinessFactory({ - isRedisAlive, - isPostgresAlive, - freeConnectionsCalculator: knexFreeDbConnectionSamplerReadiness - }) - ) +class LivenessError extends BaseError { + static defaultMessage = 'The application is not yet alive. Please try again later.' + static code = 'LIVENESS_ERROR' + static statusCode = 500 +} + +class ReadinessError extends BaseError { + static defaultMessage = + 'The application is not ready to accept requests. Please try again later.' + static code = 'READINESS_ERROR' + static statusCode = 500 } -const handleLivenessFactory = +export const handleLivenessFactory = (deps: { isRedisAlive: RedisCheck isPostgresAlive: DBCheck freeConnectionsCalculator: FreeConnectionsCalculator - }): express.RequestHandler => - async (req, res) => { + }) => + async () => { const postgres = await deps.isPostgresAlive() if (!postgres.isAlive) { - req.log.error( - postgres.err, - 'Liveness health check failed. Postgres is not available.' + throw new LivenessError( + 'Liveness health check failed. Postgres is not available.', + { + cause: ensureErrorOrWrapAsCause(postgres.err, 'Unknown postgres error.') + } ) - res.status(500).json({ - message: 'Postgres is not available', - error: postgres.err - }) - res.send() - return } const redis = await deps.isRedisAlive() if (!redis.isAlive) { - req.log.error(redis.err, 'Liveness health check failed. Redis is not available.') - res.status(500).json({ - message: 'Redis is not available.', - error: redis.err + throw new LivenessError('Liveness health check failed. Redis is not available.', { + cause: ensureErrorOrWrapAsCause(redis.err, 'Unknown redis error.') }) - res.send() - return } const numFreeConnections = await deps.freeConnectionsCalculator.mean() @@ -88,49 +57,40 @@ const handleLivenessFactory = ) //unready if less than 10% if (percentageFreeConnections < 10) { - const message = + throw new LivenessError( 'Liveness health check failed. Insufficient free database connections for a sustained duration.' - req.log.error(message) - res.status(500).json({ - message - }) - res.send() - return + ) } - res.status(200) - res.send() + return { + details: { + postgres: 'true', + redis: 'true', + percentageFreeConnections: percentageFreeConnections.toFixed(0) + } + } } -const handleReadinessFactory = (deps: { +export const handleReadinessFactory = (deps: { isRedisAlive: RedisCheck isPostgresAlive: DBCheck freeConnectionsCalculator: FreeConnectionsCalculator -}): express.RequestHandler => { - return async (req, res) => { +}): ReadinessHandler => { + return async () => { const postgres = await deps.isPostgresAlive() if (!postgres.isAlive) { - req.log.error( - postgres.err, - 'Readiness health check failed. Postgres is not available.' + throw new ReadinessError( + 'Readiness health check failed. Postgres is not available.', + { cause: ensureErrorOrWrapAsCause(postgres.err, 'Unknown postgres error.') } ) - res.status(500).json({ - message: 'Postgres is not available', - error: postgres.err - }) - res.send() - return } const redis = await deps.isRedisAlive() if (!redis.isAlive) { - req.log.error(redis.err, 'Readiness health check failed. Redis is not available.') - res.status(500).json({ - message: 'Redis is not available.', - error: redis.err - }) - res.send() - return + throw new ReadinessError( + 'Readiness health check failed. Redis is not available.', + { cause: ensureErrorOrWrapAsCause(redis.err, 'Unknown Redis error.') } + ) } const numFreeConnections = await deps.freeConnectionsCalculator.mean() @@ -141,16 +101,16 @@ const handleReadinessFactory = (deps: { if (percentageFreeConnections < 10) { const message = 'Readiness health check failed. Insufficient free database connections for a sustained duration.' - req.log.error(message) - res.status(500).json({ - message - }) - res.send() - return + throw new ReadinessError(message) } - res.status(200) - res.send() + return { + details: { + postgres: 'true', + redis: 'true', + percentageFreeConnections: percentageFreeConnections.toFixed(0) + } + } } } @@ -158,7 +118,7 @@ type CheckResponse = { isAlive: true } | { isAlive: false; err: unknown } type DBCheck = () => Promise -const isPostgresAlive: DBCheck = async (): Promise => { +export const isPostgresAlive: DBCheck = async (): Promise => { const getServerInfo = getServerInfoFactory({ db }) try { @@ -171,7 +131,7 @@ const isPostgresAlive: DBCheck = async (): Promise => { type RedisCheck = () => Promise -const isRedisAlive: RedisCheck = async (): Promise => { +export const isRedisAlive: RedisCheck = async (): Promise => { let client: Redis | undefined = undefined let result: CheckResponse = { isAlive: true } try { diff --git a/packages/server/healthchecks/index.ts b/packages/server/healthchecks/index.ts new file mode 100644 index 0000000000..2c66d34801 --- /dev/null +++ b/packages/server/healthchecks/index.ts @@ -0,0 +1,61 @@ +import { healthCheckLogger } from '@/logging/logging' +import { db } from '@/db/knex' +import { highFrequencyMetricsCollectionPeriodMs } from '@/modules/shared/helpers/envHelper' +import { + handleLivenessFactory, + handleReadinessFactory, + knexFreeDbConnectionSamplerFactory, + isRedisAlive, + isPostgresAlive, + FreeConnectionsCalculator +} from '@/healthchecks/health' +import { Application } from 'express' + +export const initFactory: () => ( + app: Application, + isInitial: boolean +) => Promise = () => { + let knexFreeDbConnectionSamplerLiveness: FreeConnectionsCalculator & { + start: () => void + } + let knexFreeDbConnectionSamplerReadiness: FreeConnectionsCalculator & { + start: () => void + } + return async (app, isInitial) => { + healthCheckLogger.info('💓 Init health check') + if (isInitial) { + knexFreeDbConnectionSamplerLiveness = knexFreeDbConnectionSamplerFactory({ + db, + collectionPeriod: highFrequencyMetricsCollectionPeriodMs(), + sampledDuration: 600000 //number of ms over which to average the database connections, before declaring not alive. 10 minutes. + }) + knexFreeDbConnectionSamplerLiveness.start() + + knexFreeDbConnectionSamplerReadiness = knexFreeDbConnectionSamplerFactory({ + db, + collectionPeriod: highFrequencyMetricsCollectionPeriodMs(), + sampledDuration: 20000 //number of ms over which to average the database connections, before declaring unready. 20 seconds. + }) + knexFreeDbConnectionSamplerReadiness.start() + } + const livenessHandler = handleLivenessFactory({ + isRedisAlive, + isPostgresAlive, + freeConnectionsCalculator: knexFreeDbConnectionSamplerLiveness + }) + + app.get('/liveness', async (req, res) => { + const result = await livenessHandler() + res.status(200).json({ status: 'ok', ...result }) + }) + + app.get('/readiness', async (req, res) => { + const result = await handleReadinessFactory({ + isRedisAlive, + isPostgresAlive, + freeConnectionsCalculator: knexFreeDbConnectionSamplerReadiness + })() + res.status(200).json({ status: 'ok', ...result }) + }) + } +} diff --git a/packages/server/logging/logging.ts b/packages/server/logging/logging.ts index 2bee9ebbe6..34390451ec 100644 --- a/packages/server/logging/logging.ts +++ b/packages/server/logging/logging.ts @@ -30,6 +30,7 @@ export const authLogger = extendLoggerComponent(logger, 'auth') export const crossServerSyncLogger = extendLoggerComponent(logger, 'cross-server-sync') export const automateLogger = extendLoggerComponent(logger, 'automate') export const subscriptionLogger = extendLoggerComponent(logger, 'subscription') +export const healthCheckLogger = extendLoggerComponent(logger, 'healthcheck') export type Logger = typeof logger export { extendLoggerComponent, Observability } diff --git a/packages/server/modules/core/index.ts b/packages/server/modules/core/index.ts index fe2b81ab12..58f100cdc5 100644 --- a/packages/server/modules/core/index.ts +++ b/packages/server/modules/core/index.ts @@ -11,7 +11,6 @@ import uploadRest from '@/modules/core/rest/upload' import downloadRest from '@/modules/core/rest/download' import diffUpload from '@/modules/core/rest/diffUpload' import diffDownload from '@/modules/core/rest/diffDownload' -import healthRest from '@/modules/core/rest/health' import scopes from '@/modules/core/scopes' import roles from '@/modules/core/roles' import Redis from 'ioredis' @@ -33,9 +32,6 @@ const coreModule: SpeckleModule<{ // Initialize the static route staticRest(app) - // Initialize the health check route - healthRest(app) - // Initialises the two main bulk upload/download endpoints uploadRest(app) downloadRest(app) diff --git a/packages/server/modules/notifications/services/queue.ts b/packages/server/modules/notifications/services/queue.ts index 65eb3c4834..6ab010a69c 100644 --- a/packages/server/modules/notifications/services/queue.ts +++ b/packages/server/modules/notifications/services/queue.ts @@ -17,6 +17,7 @@ import Bull from 'bull' import { buildBaseQueueOptions } from '@/modules/shared/helpers/bullHelper' import cryptoRandomString from 'crypto-random-string' import { logger, notificationsLogger, Observability } from '@/logging/logging' +import { ensureErrorOrWrapAsCause } from '@/modules/shared/errors/ensureError' export type NotificationJobResult = { status: NotificationJobResultsStatus @@ -153,8 +154,10 @@ export async function consumeIncomingNotifications() { } } catch (e: unknown) { notificationsLogger.error(e) - const err = - e instanceof Error ? e : new Error('Unexpected notification consumption error') + const err = ensureErrorOrWrapAsCause( + e, + 'Unexpected notification consumption error' + ) if (!(err instanceof NotificationValidationError)) { throw err diff --git a/packages/server/modules/shared/errors/ensureError.ts b/packages/server/modules/shared/errors/ensureError.ts new file mode 100644 index 0000000000..8a9e7e7f89 --- /dev/null +++ b/packages/server/modules/shared/errors/ensureError.ts @@ -0,0 +1,11 @@ +/** + * In JS catch clauses can receive not only Errors, but pretty much any other + * kind of data type, so you can use this helper to ensure that + * whatever is passed in is a real error. + * If it is not a real error, it will be wrapped in a new error + * with the provided message and the original error as the cause. + */ +export function ensureErrorOrWrapAsCause(e: unknown, fallbackMessage?: string): Error { + if (e instanceof Error) return e + return new Error(fallbackMessage, { cause: e }) +} diff --git a/packages/server/tsconfig.json b/packages/server/tsconfig.json index 254ed6bec5..8605ec88e7 100644 --- a/packages/server/tsconfig.json +++ b/packages/server/tsconfig.json @@ -106,6 +106,7 @@ }, "include": [ "db/**/*", + "healthchecks/**/*", "logging/**/*", "modules/**/*", "bin/**/*",