From f3a8c264348280aedf984e551d0c0f4107a3f167 Mon Sep 17 00:00:00 2001 From: Santiago Palladino Date: Thu, 2 Jan 2025 17:43:04 -0300 Subject: [PATCH 1/2] chore: Export trace data to google cloud Google cloud observability supports tracing, using specific fields for defining spans and traces (see [here](https://cloud.google.com/trace/docs/setup/nodejs-ot?authuser=1#config-structured-logging)). This PR adds a LOG_GCLOUD env var that injects pino formatters based on google cloud docs to export trace data in a format that google can understand. --- yarn-project/foundation/src/config/env_var.ts | 1 + .../foundation/src/log/gcloud-logger.ts | 71 +++++++++++++++++++ .../foundation/src/log/pino-logger.ts | 43 +++-------- yarn-project/telemetry-client/src/otel.ts | 1 - .../src/vendor/otel-pino-stream.ts | 9 +++ 5 files changed, 91 insertions(+), 34 deletions(-) create mode 100644 yarn-project/foundation/src/log/gcloud-logger.ts diff --git a/yarn-project/foundation/src/config/env_var.ts b/yarn-project/foundation/src/config/env_var.ts index 3fa142074e0..c24c399182b 100644 --- a/yarn-project/foundation/src/config/env_var.ts +++ b/yarn-project/foundation/src/config/env_var.ts @@ -57,6 +57,7 @@ export type EnvVar = | 'L2_QUEUE_SIZE' | 'LOG_ELAPSED_TIME' | 'LOG_JSON' + | 'LOG_GCLOUD' | 'LOG_MULTILINE' | 'LOG_LEVEL' | 'MNEMONIC' diff --git a/yarn-project/foundation/src/log/gcloud-logger.ts b/yarn-project/foundation/src/log/gcloud-logger.ts new file mode 100644 index 00000000000..65aef7c5b55 --- /dev/null +++ b/yarn-project/foundation/src/log/gcloud-logger.ts @@ -0,0 +1,71 @@ +import { type pino } from 'pino'; + +/* eslint-disable camelcase */ + +const GOOGLE_CLOUD_TRACE_ID = 'logging.googleapis.com/trace'; +const GOOGLE_CLOUD_SPAN_ID = 'logging.googleapis.com/spanId'; +const GOOGLE_CLOUD_TRACE_SAMPLED = 'logging.googleapis.com/trace_sampled'; + +/** + * Pino configuration for google cloud observability. Tweaks message and timestamp, + * adds trace context attributes, and injects severity level. + * Adapted from https://cloud.google.com/trace/docs/setup/nodejs-ot#config-structured-logging. + */ +export const GoogleCloudLoggerConfig = { + messageKey: 'message', + // Same as pino.stdTimeFunctions.isoTime but uses "timestamp" key instead of "time" + timestamp(): string { + return `,"timestamp":"${new Date(Date.now()).toISOString()}"`; + }, + formatters: { + log(object: Record): Record { + // Add trace context attributes following Cloud Logging structured log format described + // in https://cloud.google.com/logging/docs/structured-logging#special-payload-fields + const { trace_id, span_id, trace_flags, ...rest } = object; + + if (trace_id && span_id) { + return { + [GOOGLE_CLOUD_TRACE_ID]: trace_id, + [GOOGLE_CLOUD_SPAN_ID]: span_id, + [GOOGLE_CLOUD_TRACE_SAMPLED]: trace_flags ? trace_flags === '01' : undefined, + trace_flags, // Keep the original trace_flags for otel-pino-stream + ...rest, + }; + } + return object; + }, + level(label: string, level: number): object { + // Inspired by https://github.com/pinojs/pino/issues/726#issuecomment-605814879 + // Severity labels https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry#LogSeverity + let severity: string; + + switch (label as pino.Level | keyof typeof customLevels) { + case 'trace': + case 'debug': + severity = 'DEBUG'; + break; + case 'verbose': + case 'info': + severity = 'INFO'; + break; + case 'warn': + severity = 'WARNING'; + break; + case 'error': + severity = 'ERROR'; + break; + case 'fatal': + severity = 'CRITICAL'; + break; + default: + severity = 'DEFAULT'; + break; + } + + return { severity, level }; + }, + }, +} satisfies pino.LoggerOptions; + +// Define custom logging levels for pino. Duplicate from pino-logger.ts. +const customLevels = { verbose: 25 }; diff --git a/yarn-project/foundation/src/log/pino-logger.ts b/yarn-project/foundation/src/log/pino-logger.ts index f8e74948314..7d68d3d795b 100644 --- a/yarn-project/foundation/src/log/pino-logger.ts +++ b/yarn-project/foundation/src/log/pino-logger.ts @@ -5,6 +5,7 @@ import { type Writable } from 'stream'; import { inspect } from 'util'; import { compactArray } from '../collection/array.js'; +import { GoogleCloudLoggerConfig } from './gcloud-logger.js'; import { getLogLevelFromFilters, parseEnv } from './log-filters.js'; import { type LogLevel } from './log-levels.js'; import { type LogData, type LogFn } from './log_fn.js'; @@ -70,44 +71,20 @@ const [logLevel, logFilters] = parseEnv(process.env.LOG_LEVEL, defaultLogLevel); // Define custom logging levels for pino. const customLevels = { verbose: 25 }; -// inspired by https://github.com/pinojs/pino/issues/726#issuecomment-605814879 -const levelToSeverityFormatter = (label: string, level: number): object => { - // Severity labels https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry#LogSeverity - let severity: string; - - switch (label as pino.Level | keyof typeof customLevels) { - case 'trace': - case 'debug': - severity = 'DEBUG'; - break; - case 'verbose': - case 'info': - severity = 'INFO'; - break; - case 'warn': - severity = 'WARNING'; - break; - case 'error': - severity = 'ERROR'; - break; - case 'fatal': - severity = 'CRITICAL'; - break; - default: - severity = 'DEFAULT'; - break; - } +// Do we want to log in JSON format? +const logJson = ['1', 'true', 'TRUE'].includes(process.env.LOG_JSON ?? ''); - return { severity, level }; -}; +// Are we in google cloud? If so, format logs for its logs and traces explorer. +const logGoogleCloud = ['1', 'true', 'TRUE'].includes(process.env.LOG_GCLOUD ?? ''); +// Define global options for pino. const pinoOpts: pino.LoggerOptions = { customLevels, useOnlyCustomLevels: false, level: logLevel, - formatters: { - level: levelToSeverityFormatter, - }, + // While it'd be great to set the google cloud formatters for the stdio transport only, + // pino requires that we set formatters at the logger level instead. + ...(logGoogleCloud ? GoogleCloudLoggerConfig : {}), }; export const levels = { @@ -170,7 +147,7 @@ function makeLogger() { // Regular nodejs with transports on worker thread, using pino-pretty for console logging if LOG_JSON // is not set, and an optional OTLP transport if the OTLP endpoint is provided. const targets: pino.TransportSingleOptions[] = compactArray([ - ['1', 'true', 'TRUE'].includes(process.env.LOG_JSON ?? '') ? stdioTransport : prettyTransport, + logJson ? stdioTransport : prettyTransport, otlpEndpoint ? otelTransport : undefined, ]); return pino(pinoOpts, pino.transport({ targets, levels: levels.values })); diff --git a/yarn-project/telemetry-client/src/otel.ts b/yarn-project/telemetry-client/src/otel.ts index e8b6d767b48..e05248924ea 100644 --- a/yarn-project/telemetry-client/src/otel.ts +++ b/yarn-project/telemetry-client/src/otel.ts @@ -126,7 +126,6 @@ export class OpenTelemetryClient implements TelemetryClient { public static async createAndStart(config: TelemetryClientConfig, log: Logger): Promise { const resource = await getOtelResource(); - // TODO(palla/log): Should we show traces as logs in stdout when otel collection is disabled? const tracerProvider = new NodeTracerProvider({ resource, spanProcessors: config.tracesCollectorUrl diff --git a/yarn-project/telemetry-client/src/vendor/otel-pino-stream.ts b/yarn-project/telemetry-client/src/vendor/otel-pino-stream.ts index 3f1361df7a7..f40ddde46d9 100644 --- a/yarn-project/telemetry-client/src/vendor/otel-pino-stream.ts +++ b/yarn-project/telemetry-client/src/vendor/otel-pino-stream.ts @@ -222,6 +222,10 @@ export class OTelPinoStream extends Writable { // [aztec] They are not redundant, we depend on them for correlation. // The instrumentation package seems to be adding these fields via a custom hook. // We push them from the logger module in foundation, so we don't want to clear them here. + // We do rename the google-cloud specific fields though, back to their expected names. + ['logging.googleapis.com/trace']: trace_id, + ['logging.googleapis.com/spanId']: span_id, + ['logging.googleapis.com/trace_sampled']: _trace_flags, ...attributes } = recObj; @@ -232,6 +236,11 @@ export class OTelPinoStream extends Writable { timestamp = Date.now(); } + if (span_id && trace_id) { + attributes['trace_id'] = trace_id; + attributes['span_id'] = span_id; + } + // This avoids a possible subtle bug when a Pino logger uses // `time: pino.stdTimeFunctions.unixTime` and logs in the first half-second // since process start. The rounding involved results in: From f6049e2f7b38db3496f5e748f267922db4f7b31d Mon Sep 17 00:00:00 2001 From: Santiago Palladino Date: Fri, 3 Jan 2025 11:24:59 -0300 Subject: [PATCH 2/2] Add LOG_GCLOUD env var to templates --- spartan/aztec-network/templates/boot-node.yaml | 2 ++ spartan/aztec-network/templates/faucet.yaml | 2 ++ spartan/aztec-network/templates/prover-agent.yaml | 2 ++ spartan/aztec-network/templates/prover-broker.yaml | 2 ++ spartan/aztec-network/templates/prover-node.yaml | 2 ++ spartan/aztec-network/templates/pxe.yaml | 2 ++ spartan/aztec-network/templates/transaction-bot.yaml | 2 ++ spartan/aztec-network/templates/validator.yaml | 2 ++ spartan/aztec-network/values.yaml | 1 + spartan/terraform/deploy-release/main.tf | 5 +++++ 10 files changed, 22 insertions(+) diff --git a/spartan/aztec-network/templates/boot-node.yaml b/spartan/aztec-network/templates/boot-node.yaml index e4ce5f4a6aa..06ea64dee9d 100644 --- a/spartan/aztec-network/templates/boot-node.yaml +++ b/spartan/aztec-network/templates/boot-node.yaml @@ -180,6 +180,8 @@ spec: value: "{{ .Values.bootNode.logLevel }}" - name: LOG_JSON value: "1" + - name: LOG_GCLOUD + value: "{{ .Values.telemetry.gcloud }}" - name: P2P_ENABLED value: "{{ .Values.bootNode.p2p.enabled }}" - name: COINBASE diff --git a/spartan/aztec-network/templates/faucet.yaml b/spartan/aztec-network/templates/faucet.yaml index 5053999d0a6..63cf3c8632c 100644 --- a/spartan/aztec-network/templates/faucet.yaml +++ b/spartan/aztec-network/templates/faucet.yaml @@ -79,6 +79,8 @@ spec: value: "{{ .Values.faucet.l1Assets }}" - name: LOG_JSON value: "1" + - name: LOG_GCLOUD + value: "{{ .Values.telemetry.gcloud }}" - name: LOG_LEVEL value: "{{ .Values.faucet.logLevel }}" ports: diff --git a/spartan/aztec-network/templates/prover-agent.yaml b/spartan/aztec-network/templates/prover-agent.yaml index ef7399d2e7f..7ae69d52e41 100644 --- a/spartan/aztec-network/templates/prover-agent.yaml +++ b/spartan/aztec-network/templates/prover-agent.yaml @@ -94,6 +94,8 @@ spec: value: "{{ .Values.proverAgent.logLevel }}" - name: LOG_JSON value: "1" + - name: LOG_GCLOUD + value: "{{ .Values.telemetry.gcloud }}" - name: PROVER_REAL_PROOFS value: "{{ .Values.aztec.realProofs }}" - name: PROVER_AGENT_COUNT diff --git a/spartan/aztec-network/templates/prover-broker.yaml b/spartan/aztec-network/templates/prover-broker.yaml index 8c0ee46d94e..1b8ce10d249 100644 --- a/spartan/aztec-network/templates/prover-broker.yaml +++ b/spartan/aztec-network/templates/prover-broker.yaml @@ -76,6 +76,8 @@ spec: value: "{{ .Values.proverBroker.logLevel }}" - name: LOG_JSON value: "1" + - name: LOG_GCLOUD + value: "{{ .Values.telemetry.gcloud }}" - name: PROVER_BROKER_POLL_INTERVAL_MS value: "{{ .Values.proverBroker.pollIntervalMs }}" - name: PROVER_BROKER_JOB_TIMEOUT_MS diff --git a/spartan/aztec-network/templates/prover-node.yaml b/spartan/aztec-network/templates/prover-node.yaml index f28ea686b17..b9d70650bd2 100644 --- a/spartan/aztec-network/templates/prover-node.yaml +++ b/spartan/aztec-network/templates/prover-node.yaml @@ -125,6 +125,8 @@ spec: value: "{{ .Values.proverNode.logLevel }}" - name: LOG_JSON value: "1" + - name: LOG_GCLOUD + value: "{{ .Values.telemetry.gcloud }}" - name: PROVER_REAL_PROOFS value: "{{ .Values.aztec.realProofs }}" - name: PROVER_AGENT_COUNT diff --git a/spartan/aztec-network/templates/pxe.yaml b/spartan/aztec-network/templates/pxe.yaml index e91bbd2a10a..860afa0a842 100644 --- a/spartan/aztec-network/templates/pxe.yaml +++ b/spartan/aztec-network/templates/pxe.yaml @@ -93,6 +93,8 @@ spec: value: "{{ .Values.pxe.service.nodePort }}" - name: LOG_JSON value: "1" + - name: LOG_GCLOUD + value: "{{ .Values.telemetry.gcloud }}" - name: LOG_LEVEL value: "{{ .Values.pxe.logLevel }}" - name: PXE_PROVER_ENABLED diff --git a/spartan/aztec-network/templates/transaction-bot.yaml b/spartan/aztec-network/templates/transaction-bot.yaml index 7d4837ad40f..e9405a865cc 100644 --- a/spartan/aztec-network/templates/transaction-bot.yaml +++ b/spartan/aztec-network/templates/transaction-bot.yaml @@ -87,6 +87,8 @@ spec: value: "{{ .Values.bot.service.nodePort }}" - name: LOG_JSON value: "1" + - name: LOG_GCLOUD + value: "{{ .Values.telemetry.gcloud }}" - name: LOG_LEVEL value: "{{ .Values.bot.logLevel }}" - name: BOT_PRIVATE_KEY diff --git a/spartan/aztec-network/templates/validator.yaml b/spartan/aztec-network/templates/validator.yaml index 8ff7cf7735f..7ff91a2a856 100644 --- a/spartan/aztec-network/templates/validator.yaml +++ b/spartan/aztec-network/templates/validator.yaml @@ -162,6 +162,8 @@ spec: value: "{{ .Values.validator.logLevel }}" - name: LOG_JSON value: "1" + - name: LOG_GCLOUD + value: "{{ .Values.telemetry.gcloud }}" - name: P2P_ENABLED value: "{{ .Values.validator.p2p.enabled }}" - name: VALIDATOR_DISABLED diff --git a/spartan/aztec-network/values.yaml b/spartan/aztec-network/values.yaml index efdac924caa..4ab69454641 100644 --- a/spartan/aztec-network/values.yaml +++ b/spartan/aztec-network/values.yaml @@ -13,6 +13,7 @@ network: telemetry: enabled: false + gcloud: otelCollectorEndpoint: images: diff --git a/spartan/terraform/deploy-release/main.tf b/spartan/terraform/deploy-release/main.tf index 3972a690489..37a97eadace 100644 --- a/spartan/terraform/deploy-release/main.tf +++ b/spartan/terraform/deploy-release/main.tf @@ -100,6 +100,11 @@ resource "helm_release" "aztec-gke-cluster" { value = var.L1_DEPLOYMENT_SALT } + set { + name = "telemetry.gcloud" + value = "true" + } + # Setting timeout and wait conditions timeout = 1200 # 20 minutes in seconds wait = true