Skip to content

Commit

Permalink
feat: emit an error log on potential memory leak scenario (#870)
Browse files Browse the repository at this point in the history
  • Loading branch information
kjin authored Oct 3, 2018
1 parent c19850d commit 0072e5f
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 25 deletions.
61 changes: 40 additions & 21 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,26 +24,6 @@ export type CLSMechanism =

/** Available configuration options. */
export interface Config {
/**
* The trace context propagation mechanism to use. The following options are
* available:
* - 'async-hooks' uses an implementation of CLS on top of the Node core
* `async_hooks` module in Node 8+. This option should not be used if the
* Node binary version requirements are not met.
* - 'async-listener' uses an implementation of CLS on top of the
* `continuation-local-storage` module.
* - 'auto' behaves like 'async-hooks' on Node 8+, and 'async-listener'
* otherwise.
* - 'none' disables CLS completely.
* - 'singular' allows one root span to exist at a time. This option is meant
* to be used internally by Google Cloud Functions, or in any other
* environment where it is guaranteed that only one request is being served
* at a time.
* The 'auto' mechanism is used by default if this configuration option is
* not explicitly set.
*/
clsMechanism?: CLSMechanism;

/**
* Log levels: 0=disabled, 1=error, 2=warn, 3=info, 4=debug
* The value of GCLOUD_TRACE_LOGLEVEL takes precedence over this value.
Expand All @@ -70,6 +50,43 @@ export interface Config {
*/
rootSpanNameOverride?: string|((name: string) => string);

/**
* The trace context propagation mechanism to use. The following options are
* available:
* - 'async-hooks' uses an implementation of CLS on top of the Node core
* `async_hooks` module in Node 8+. This option should not be used if the
* Node binary version requirements are not met.
* - 'async-listener' uses an implementation of CLS on top of the
* `continuation-local-storage` module.
* - 'auto' behaves like 'async-hooks' on Node 8+, and 'async-listener'
* otherwise.
* - 'none' disables CLS completely.
* - 'singular' allows one root span to exist at a time. This option is meant
* to be used internally by Google Cloud Functions, or in any other
* environment where it is guaranteed that only one request is being served
* at a time.
* The 'auto' mechanism is used by default if this configuration option is
* not explicitly set.
*/
clsMechanism?: CLSMechanism;

/**
* The number of local spans per trace to allow before emitting an error log.
* An unexpectedly large number of spans per trace may suggest a memory leak.
* This value should be 1-2x the estimated maximum number of RPCs made on
* behalf of a single incoming request.
*/
spansPerTraceSoftLimit?: number;

/**
* The maximum number of local spans per trace to allow in total. Creating
* more spans in a single trace will cause the agent to log an error, and such
* spans will be dropped. (This limit does not apply when using a RootSpan
* instance to create child spans.)
* This value should be greater than spansPerTraceSoftLimit.
*/
spansPerTraceHardLimit?: number;

/**
* The maximum number of characters reported on a label value. This value
* cannot exceed 16383, the maximum value accepted by the service.
Expand Down Expand Up @@ -197,11 +214,13 @@ export interface Config {
* user-provided value will be used to extend the default value.
*/
export const defaultConfig = {
clsMechanism: 'auto' as CLSMechanism,
logLevel: 1,
enabled: true,
enhancedDatabaseReporting: false,
rootSpanNameOverride: (name: string) => name,
clsMechanism: 'auto' as CLSMechanism,
spansPerTraceSoftLimit: 200,
spansPerTraceHardLimit: 1000,
maximumLabelValueSize: 512,
plugins: {
// enable all by default
Expand Down
42 changes: 41 additions & 1 deletion src/trace-api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ export interface StackdriverTracerConfig extends
enhancedDatabaseReporting: boolean;
ignoreContextHeader: boolean;
rootSpanNameOverride: (path: string) => string;
spansPerTraceSoftLimit: number;
spansPerTraceHardLimit: number;
}

interface IncomingTraceContext {
Expand Down Expand Up @@ -236,12 +238,50 @@ export class StackdriverTracer implements Tracer {
// with continuously growing number of child spans. The second case
// seems to have some value, but isn't representable. The user probably
// needs a custom outer span that encompasses the entirety of work.
this.logger!.warn(`TraceApi#createChildSpan: [${
this.logger!.error(`TraceApi#createChildSpan: [${
this.pluginName}] Creating phantom child span [${
options.name}] because root span [${
rootSpan.span.name}] was already closed.`);
return UNCORRELATED_CHILD_SPAN;
}
if (rootSpan.trace.spans.length >= this.config!.spansPerTraceHardLimit) {
// As in the previous case, a root span with a large number of child
// spans suggests a memory leak stemming from context confusion. This
// is likely due to userspace task queues or Promise implementations.
this.logger!.error(`TraceApi#createChildSpan: [${
this.pluginName}] Creating phantom child span [${
options.name}] because the trace with root span [${
rootSpan.span.name}] has reached a limit of ${
this.config!
.spansPerTraceHardLimit} spans. This is likely a memory leak.`);
this.logger!.error([
'TraceApi#createChildSpan: Please see',
'https://github.com/googleapis/cloud-trace-nodejs/wiki',
'for details and suggested actions.'
].join(' '));
return UNCORRELATED_CHILD_SPAN;
}
if (rootSpan.trace.spans.length === this.config!.spansPerTraceSoftLimit) {
// As in the previous case, a root span with a large number of child
// spans suggests a memory leak stemming from context confusion. This
// is likely due to userspace task queues or Promise implementations.

// Note that since child spans can be created by users directly on a
// RootSpanData instance, this block might be skipped because it only
// checks equality -- this is OK because no automatic tracing plugin
// uses the RootSpanData API directly.
this.logger!.warn(`TraceApi#createChildSpan: [${
this.pluginName}] Adding child span [${
options.name}] will cause the trace with root span [${
rootSpan.span.name}] to contain more than ${
this.config!
.spansPerTraceSoftLimit} spans. This is likely a memory leak.`);
this.logger!.error([
'TraceApi#createChildSpan: Please see',
'https://github.com/googleapis/cloud-trace-nodejs/wiki',
'for details and suggested actions.'
].join(' '));
}
// Create a new child span and return it.
const childContext = rootSpan.createChildSpan({
name: options.name,
Expand Down
4 changes: 3 additions & 1 deletion test/plugins/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,9 @@ shimmer.wrap(trace, 'start', function(original) {
enhancedDatabaseReporting: false,
ignoreContextHeader: false,
rootSpanNameOverride: (name: string) => name,
samplingRate: 0
samplingRate: 0,
spansPerTraceSoftLimit: Infinity,
spansPerTraceHardLimit: Infinity
}, new TestLogger());
testTraceAgent.policy = new TracingPolicy.TraceAllPolicy();
return result;
Expand Down
4 changes: 3 additions & 1 deletion test/test-plugin-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,9 @@ describe('Trace Plugin Loader', () => {
enhancedDatabaseReporting: false,
ignoreContextHeader: false,
rootSpanNameOverride: (name: string) => name,
projectId: '0'
projectId: '0',
spansPerTraceSoftLimit: Infinity,
spansPerTraceHardLimit: Infinity
},
config),
logger);
Expand Down
23 changes: 22 additions & 1 deletion test/test-trace-api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ describe('Trace Interface', () => {
enhancedDatabaseReporting: false,
ignoreContextHeader: false,
rootSpanNameOverride: (name: string) => name,
samplingRate: 0
samplingRate: 0,
spansPerTraceSoftLimit: Infinity,
spansPerTraceHardLimit: Infinity
},
config),
logger);
Expand Down Expand Up @@ -155,6 +157,25 @@ describe('Trace Interface', () => {
});
});

it('should warn when the spans per trace soft limit has been exceeded',
() => {
const tracer = createTraceAgent(
null, {spansPerTraceSoftLimit: 10, spansPerTraceHardLimit: 20});
tracer.runInRootSpan({name: 'root'}, (rootSpan) => {
for (let i = 0; i < 10; i++) {
tracer.createChildSpan({name: `span-${i}`}).endSpan();
}
assert.strictEqual(logger.getNumLogsWith('warn', '[span-9]'), 1);
for (let i = 0; i < 9; i++) {
tracer.createChildSpan({name: `span-${i + 10}`}).endSpan();
}
const child = tracer.createChildSpan({name: `span-19`});
assert.ok(!tracer.isRealSpan(child));
assert.strictEqual(logger.getNumLogsWith('error', '[span-19]'), 1);
rootSpan.endSpan();
});
});

it('should return null context id when one does not exist', () => {
const traceAPI = createTraceAgent();
assert.strictEqual(traceAPI.getCurrentContextId(), null);
Expand Down

0 comments on commit 0072e5f

Please sign in to comment.