Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Export trace data to google cloud #11019

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/boot-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@ spec:
value: "{{ .Values.bootNode.logLevel }}"
- name: LOG_JSON
value: "1"
- name: LOG_GCLOUD
value: "{{ .Values.telemetry.gcloud }}"
- name: P2P_ENABLED
value: "{{ .Values.bootNode.p2p.enabled }}"
- name: COINBASE
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/faucet.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ spec:
value: "{{ .Values.faucet.l1Assets }}"
- name: LOG_JSON
value: "1"
- name: LOG_GCLOUD
value: "{{ .Values.telemetry.gcloud }}"
- name: LOG_LEVEL
value: "{{ .Values.faucet.logLevel }}"
ports:
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/prover-agent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ spec:
value: "{{ .Values.proverAgent.logLevel }}"
- name: LOG_JSON
value: "1"
- name: LOG_GCLOUD
value: "{{ .Values.telemetry.gcloud }}"
- name: PROVER_REAL_PROOFS
value: "{{ .Values.aztec.realProofs }}"
- name: PROVER_AGENT_COUNT
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/prover-broker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ spec:
value: "{{ .Values.proverBroker.logLevel }}"
- name: LOG_JSON
value: "1"
- name: LOG_GCLOUD
value: "{{ .Values.telemetry.gcloud }}"
- name: PROVER_BROKER_POLL_INTERVAL_MS
value: "{{ .Values.proverBroker.pollIntervalMs }}"
- name: PROVER_BROKER_JOB_TIMEOUT_MS
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/prover-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ spec:
value: "{{ .Values.proverNode.logLevel }}"
- name: LOG_JSON
value: "1"
- name: LOG_GCLOUD
value: "{{ .Values.telemetry.gcloud }}"
- name: PROVER_REAL_PROOFS
value: "{{ .Values.aztec.realProofs }}"
- name: PROVER_AGENT_COUNT
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/pxe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ spec:
value: "{{ .Values.pxe.service.nodePort }}"
- name: LOG_JSON
value: "1"
- name: LOG_GCLOUD
value: "{{ .Values.telemetry.gcloud }}"
- name: LOG_LEVEL
value: "{{ .Values.pxe.logLevel }}"
- name: PXE_PROVER_ENABLED
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/transaction-bot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ spec:
value: "{{ .Values.bot.service.nodePort }}"
- name: LOG_JSON
value: "1"
- name: LOG_GCLOUD
value: "{{ .Values.telemetry.gcloud }}"
- name: LOG_LEVEL
value: "{{ .Values.bot.logLevel }}"
- name: BOT_PRIVATE_KEY
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/validator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ spec:
value: "{{ .Values.validator.logLevel }}"
- name: LOG_JSON
value: "1"
- name: LOG_GCLOUD
value: "{{ .Values.telemetry.gcloud }}"
- name: P2P_ENABLED
value: "{{ .Values.validator.p2p.enabled }}"
- name: VALIDATOR_DISABLED
Expand Down
1 change: 1 addition & 0 deletions spartan/aztec-network/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ network:

telemetry:
enabled: false
gcloud:
otelCollectorEndpoint:

images:
Expand Down
5 changes: 5 additions & 0 deletions spartan/terraform/deploy-release/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,11 @@ resource "helm_release" "aztec-gke-cluster" {
value = var.L1_DEPLOYMENT_SALT
}

set {
name = "telemetry.gcloud"
value = "true"
}

# Setting timeout and wait conditions
timeout = 1200 # 20 minutes in seconds
wait = true
Expand Down
1 change: 1 addition & 0 deletions yarn-project/foundation/src/config/env_var.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ export type EnvVar =
| 'L2_QUEUE_SIZE'
| 'LOG_ELAPSED_TIME'
| 'LOG_JSON'
| 'LOG_GCLOUD'
| 'LOG_MULTILINE'
| 'LOG_LEVEL'
| 'MNEMONIC'
Expand Down
71 changes: 71 additions & 0 deletions yarn-project/foundation/src/log/gcloud-logger.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import { type pino } from 'pino';

/* eslint-disable camelcase */

const GOOGLE_CLOUD_TRACE_ID = 'logging.googleapis.com/trace';
const GOOGLE_CLOUD_SPAN_ID = 'logging.googleapis.com/spanId';
const GOOGLE_CLOUD_TRACE_SAMPLED = 'logging.googleapis.com/trace_sampled';

/**
* Pino configuration for google cloud observability. Tweaks message and timestamp,
* adds trace context attributes, and injects severity level.
* Adapted from https://cloud.google.com/trace/docs/setup/nodejs-ot#config-structured-logging.
*/
export const GoogleCloudLoggerConfig = {
messageKey: 'message',
// Same as pino.stdTimeFunctions.isoTime but uses "timestamp" key instead of "time"
timestamp(): string {
return `,"timestamp":"${new Date(Date.now()).toISOString()}"`;
},
formatters: {
log(object: Record<string, unknown>): Record<string, unknown> {
// Add trace context attributes following Cloud Logging structured log format described
// in https://cloud.google.com/logging/docs/structured-logging#special-payload-fields
const { trace_id, span_id, trace_flags, ...rest } = object;

if (trace_id && span_id) {
return {
[GOOGLE_CLOUD_TRACE_ID]: trace_id,
[GOOGLE_CLOUD_SPAN_ID]: span_id,
[GOOGLE_CLOUD_TRACE_SAMPLED]: trace_flags ? trace_flags === '01' : undefined,
trace_flags, // Keep the original trace_flags for otel-pino-stream
...rest,
};
}
return object;
},
level(label: string, level: number): object {
// Inspired by https://github.com/pinojs/pino/issues/726#issuecomment-605814879
// Severity labels https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry#LogSeverity
let severity: string;

switch (label as pino.Level | keyof typeof customLevels) {
case 'trace':
case 'debug':
severity = 'DEBUG';
break;
case 'verbose':
case 'info':
severity = 'INFO';
break;
case 'warn':
severity = 'WARNING';
break;
case 'error':
severity = 'ERROR';
break;
case 'fatal':
severity = 'CRITICAL';
break;
default:
severity = 'DEFAULT';
break;
}

return { severity, level };
},
},
} satisfies pino.LoggerOptions;

// Define custom logging levels for pino. Duplicate from pino-logger.ts.
const customLevels = { verbose: 25 };
43 changes: 10 additions & 33 deletions yarn-project/foundation/src/log/pino-logger.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { type Writable } from 'stream';
import { inspect } from 'util';

import { compactArray } from '../collection/array.js';
import { GoogleCloudLoggerConfig } from './gcloud-logger.js';
import { getLogLevelFromFilters, parseEnv } from './log-filters.js';
import { type LogLevel } from './log-levels.js';
import { type LogData, type LogFn } from './log_fn.js';
Expand Down Expand Up @@ -70,44 +71,20 @@ const [logLevel, logFilters] = parseEnv(process.env.LOG_LEVEL, defaultLogLevel);
// Define custom logging levels for pino.
const customLevels = { verbose: 25 };

// inspired by https://github.com/pinojs/pino/issues/726#issuecomment-605814879
const levelToSeverityFormatter = (label: string, level: number): object => {
// Severity labels https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry#LogSeverity
let severity: string;

switch (label as pino.Level | keyof typeof customLevels) {
case 'trace':
case 'debug':
severity = 'DEBUG';
break;
case 'verbose':
case 'info':
severity = 'INFO';
break;
case 'warn':
severity = 'WARNING';
break;
case 'error':
severity = 'ERROR';
break;
case 'fatal':
severity = 'CRITICAL';
break;
default:
severity = 'DEFAULT';
break;
}
// Do we want to log in JSON format?
const logJson = ['1', 'true', 'TRUE'].includes(process.env.LOG_JSON ?? '');

return { severity, level };
};
// Are we in google cloud? If so, format logs for its logs and traces explorer.
const logGoogleCloud = ['1', 'true', 'TRUE'].includes(process.env.LOG_GCLOUD ?? '');

// Define global options for pino.
const pinoOpts: pino.LoggerOptions<keyof typeof customLevels> = {
customLevels,
useOnlyCustomLevels: false,
level: logLevel,
formatters: {
level: levelToSeverityFormatter,
},
// While it'd be great to set the google cloud formatters for the stdio transport only,
// pino requires that we set formatters at the logger level instead.
...(logGoogleCloud ? GoogleCloudLoggerConfig : {}),
};

export const levels = {
Expand Down Expand Up @@ -170,7 +147,7 @@ function makeLogger() {
// Regular nodejs with transports on worker thread, using pino-pretty for console logging if LOG_JSON
// is not set, and an optional OTLP transport if the OTLP endpoint is provided.
const targets: pino.TransportSingleOptions[] = compactArray([
['1', 'true', 'TRUE'].includes(process.env.LOG_JSON ?? '') ? stdioTransport : prettyTransport,
logJson ? stdioTransport : prettyTransport,
otlpEndpoint ? otelTransport : undefined,
]);
return pino(pinoOpts, pino.transport({ targets, levels: levels.values }));
Expand Down
1 change: 0 additions & 1 deletion yarn-project/telemetry-client/src/otel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,6 @@ export class OpenTelemetryClient implements TelemetryClient {
public static async createAndStart(config: TelemetryClientConfig, log: Logger): Promise<OpenTelemetryClient> {
const resource = await getOtelResource();

// TODO(palla/log): Should we show traces as logs in stdout when otel collection is disabled?
const tracerProvider = new NodeTracerProvider({
resource,
spanProcessors: config.tracesCollectorUrl
Expand Down
9 changes: 9 additions & 0 deletions yarn-project/telemetry-client/src/vendor/otel-pino-stream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,10 @@ export class OTelPinoStream extends Writable {
// [aztec] They are not redundant, we depend on them for correlation.
// The instrumentation package seems to be adding these fields via a custom hook.
// We push them from the logger module in foundation, so we don't want to clear them here.
// We do rename the google-cloud specific fields though, back to their expected names.
['logging.googleapis.com/trace']: trace_id,
['logging.googleapis.com/spanId']: span_id,
['logging.googleapis.com/trace_sampled']: _trace_flags,

...attributes
} = recObj;
Expand All @@ -232,6 +236,11 @@ export class OTelPinoStream extends Writable {
timestamp = Date.now();
}

if (span_id && trace_id) {
attributes['trace_id'] = trace_id;
attributes['span_id'] = span_id;
}

// This avoids a possible subtle bug when a Pino logger uses
// `time: pino.stdTimeFunctions.unixTime` and logs in the first half-second
// since process start. The rounding involved results in:
Expand Down
Loading