Skip to content

Commit

Permalink
PROF-10320: Separate SSI telemetry and heuristics activation (#4592)
Browse files Browse the repository at this point in the history
* Telemetry emission and heuristics are now independent.
* profiling.enabled is four-state now (true, false, auto, undefined)
* Adds injectionEnabled config
* Removes profiling.{ssi, heuristicsEnabled} config.
* Move derived telemetry.logCollection value computation to _applyCalculated()
* Tidy _merge()
  • Loading branch information
szegedi authored Aug 14, 2024
1 parent 156d510 commit 4df8c4d
Show file tree
Hide file tree
Showing 11 changed files with 223 additions and 178 deletions.
62 changes: 32 additions & 30 deletions packages/dd-trace/src/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,7 @@ class Config {
this._setValue(defaults, 'iast.redactionValuePattern', null)
this._setValue(defaults, 'iast.requestSampling', 30)
this._setValue(defaults, 'iast.telemetryVerbosity', 'INFORMATION')
this._setValue(defaults, 'injectionEnabled', [])
this._setValue(defaults, 'isAzureFunction', false)
this._setValue(defaults, 'isCiVisibility', false)
this._setValue(defaults, 'isEarlyFlakeDetectionEnabled', false)
Expand All @@ -459,8 +460,6 @@ class Config {
this._setValue(defaults, 'profiling.enabled', undefined)
this._setValue(defaults, 'profiling.exporters', 'agent')
this._setValue(defaults, 'profiling.sourceMap', true)
this._setValue(defaults, 'profiling.ssi', false)
this._setValue(defaults, 'profiling.heuristicsEnabled', false)
this._setValue(defaults, 'profiling.longLivedThreshold', undefined)
this._setValue(defaults, 'protocolVersion', '0.4')
this._setValue(defaults, 'queryStringObfuscation', qsRegex)
Expand Down Expand Up @@ -681,6 +680,7 @@ class Config {
}
this._envUnprocessed['iast.requestSampling'] = DD_IAST_REQUEST_SAMPLING
this._setString(env, 'iast.telemetryVerbosity', DD_IAST_TELEMETRY_VERBOSITY)
this._setArray(env, 'injectionEnabled', DD_INJECTION_ENABLED)
this._setBoolean(env, 'isAzureFunction', getIsAzureFunction())
this._setBoolean(env, 'isGCPFunction', getIsGCPFunction())
this._setBoolean(env, 'logInjection', DD_LOGS_INJECTION)
Expand All @@ -696,18 +696,18 @@ class Config {
this._envUnprocessed.peerServiceMapping = DD_TRACE_PEER_SERVICE_MAPPING
}
this._setString(env, 'port', DD_TRACE_AGENT_PORT)
this._setBoolean(env, 'profiling.enabled', coalesce(DD_EXPERIMENTAL_PROFILING_ENABLED, DD_PROFILING_ENABLED))
const profilingEnabledEnv = coalesce(DD_EXPERIMENTAL_PROFILING_ENABLED, DD_PROFILING_ENABLED)
const profilingEnabled = isTrue(profilingEnabledEnv)
? 'true'
: isFalse(profilingEnabledEnv)
? 'false'
: profilingEnabledEnv === 'auto' ? 'auto' : undefined
this._setString(env, 'profiling.enabled', profilingEnabled)
this._setString(env, 'profiling.exporters', DD_PROFILING_EXPORTERS)
this._setBoolean(env, 'profiling.sourceMap', DD_PROFILING_SOURCE_MAP && !isFalse(DD_PROFILING_SOURCE_MAP))
if (DD_PROFILING_ENABLED === 'auto' || DD_INJECTION_ENABLED) {
this._setBoolean(env, 'profiling.ssi', true)
if (DD_PROFILING_ENABLED === 'auto' || DD_INJECTION_ENABLED.split(',').includes('profiler')) {
this._setBoolean(env, 'profiling.heuristicsEnabled', true)
}
if (DD_INTERNAL_PROFILING_LONG_LIVED_THRESHOLD) {
// This is only used in testing to not have to wait 30s
this._setValue(env, 'profiling.longLivedThreshold', Number(DD_INTERNAL_PROFILING_LONG_LIVED_THRESHOLD))
}
if (DD_INTERNAL_PROFILING_LONG_LIVED_THRESHOLD) {
// This is only used in testing to not have to wait 30s
this._setValue(env, 'profiling.longLivedThreshold', Number(DD_INTERNAL_PROFILING_LONG_LIVED_THRESHOLD))
}

this._setString(env, 'protocolVersion', DD_TRACE_AGENT_PROTOCOL_VERSION)
Expand Down Expand Up @@ -762,12 +762,7 @@ class Config {
this._setBoolean(env, 'telemetry.dependencyCollection', DD_TELEMETRY_DEPENDENCY_COLLECTION_ENABLED)
this._setValue(env, 'telemetry.heartbeatInterval', maybeInt(Math.floor(DD_TELEMETRY_HEARTBEAT_INTERVAL * 1000)))
this._envUnprocessed['telemetry.heartbeatInterval'] = DD_TELEMETRY_HEARTBEAT_INTERVAL * 1000
const hasTelemetryLogsUsingFeatures =
env['iast.enabled'] || env['profiling.enabled'] || env['profiling.heuristicsEnabled']
? true
: undefined
this._setBoolean(env, 'telemetry.logCollection', coalesce(DD_TELEMETRY_LOG_COLLECTION_ENABLED,
hasTelemetryLogsUsingFeatures))
this._setBoolean(env, 'telemetry.logCollection', DD_TELEMETRY_LOG_COLLECTION_ENABLED)
this._setBoolean(env, 'telemetry.metrics', DD_TELEMETRY_METRICS_ENABLED)
this._setBoolean(env, 'traceId128BitGenerationEnabled', DD_TRACE_128_BIT_TRACEID_GENERATION_ENABLED)
this._setBoolean(env, 'traceId128BitLoggingEnabled', DD_TRACE_128_BIT_TRACEID_LOGGING_ENABLED)
Expand Down Expand Up @@ -862,7 +857,10 @@ class Config {
this._setValue(opts, 'peerServiceMapping', options.peerServiceMapping)
this._setBoolean(opts, 'plugins', options.plugins)
this._setString(opts, 'port', options.port)
this._setBoolean(opts, 'profiling.enabled', options.profiling)
const strProfiling = String(options.profiling)
if (['true', 'false', 'auto'].includes(strProfiling)) {
this._setString(opts, 'profiling.enabled', strProfiling)
}
this._setString(opts, 'protocolVersion', options.protocolVersion)
if (options.remoteConfig) {
this._setValue(opts, 'remoteConfig.pollInterval', maybeFloat(options.remoteConfig.pollInterval))
Expand All @@ -885,10 +883,6 @@ class Config {
this._setBoolean(opts, 'spanRemoveIntegrationFromService', options.spanRemoveIntegrationFromService)
this._setBoolean(opts, 'startupLogs', options.startupLogs)
this._setTags(opts, 'tags', tags)
const hasTelemetryLogsUsingFeatures =
(options.iast && (options.iast === true || options.iast?.enabled === true)) ||
(options.profiling && options.profiling === true)
this._setBoolean(opts, 'telemetry.logCollection', hasTelemetryLogsUsingFeatures)
this._setBoolean(opts, 'traceId128BitGenerationEnabled', options.traceId128BitGenerationEnabled)
this._setBoolean(opts, 'traceId128BitLoggingEnabled', options.traceId128BitLoggingEnabled)
this._setString(opts, 'version', options.version || tags.version)
Expand Down Expand Up @@ -1019,6 +1013,13 @@ class Config {
calc['tracePropagationStyle.inject'] = calc['tracePropagationStyle.inject'] || defaultPropagationStyle
calc['tracePropagationStyle.extract'] = calc['tracePropagationStyle.extract'] || defaultPropagationStyle
}

const iastEnabled = coalesce(this._options['iast.enabled'], this._env['iast.enabled'])
const profilingEnabled = coalesce(this._options['profiling.enabled'], this._env['profiling.enabled'])
const injectionIncludesProfiler = (this._env.injectionEnabled || []).includes('profiler')
if (iastEnabled || ['auto', 'true'].includes(profilingEnabled) || injectionIncludesProfiler) {
this._setBoolean(calc, 'telemetry.logCollection', true)
}
}

_applyRemote (options) {
Expand Down Expand Up @@ -1143,17 +1144,18 @@ class Config {
for (const name in this._defaults) {
for (let i = 0; i < containers.length; i++) {
const container = containers[i]
const origin = origins[i]
const unprocessed = unprocessedValues[i]
const value = container[name]

if ((container[name] !== null && container[name] !== undefined) || container === this._defaults) {
if (get(this, name) === container[name] && has(this, name)) break
if ((value !== null && value !== undefined) || container === this._defaults) {
if (get(this, name) === value && has(this, name)) break

let value = container[name]
set(this, name, value)
value = unprocessed[name] || value

changes.push({ name, value, origin })
changes.push({
name,
value: unprocessedValues[i][name] || value,
origin: origins[i]
})

break
}
Expand Down
4 changes: 1 addition & 3 deletions packages/dd-trace/src/profiler.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ process.once('beforeExit', () => { profiler.stop() })
module.exports = {
start: config => {
const { service, version, env, url, hostname, port, tags, repositoryUrl, commitSHA } = config
const { enabled, sourceMap, exporters, heuristicsEnabled } = config.profiling
const { sourceMap, exporters } = config.profiling
const logger = {
debug: (message) => log.debug(message),
info: (message) => log.info(message),
Expand All @@ -18,8 +18,6 @@ module.exports = {
}

return profiler.start({
enabled,
heuristicsEnabled,
service,
version,
env,
Expand Down
4 changes: 0 additions & 4 deletions packages/dd-trace/src/profiling/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ class Config {
DD_PROFILING_CODEHOTSPOTS_ENABLED,
DD_PROFILING_CPU_ENABLED,
DD_PROFILING_DEBUG_SOURCE_MAPS,
DD_PROFILING_ENABLED,
DD_PROFILING_ENDPOINT_COLLECTION_ENABLED,
DD_PROFILING_EXPERIMENTAL_CODEHOTSPOTS_ENABLED,
DD_PROFILING_EXPERIMENTAL_CPU_ENABLED,
Expand All @@ -49,7 +48,6 @@ class Config {
DD_VERSION
} = process.env

const enabled = isTrue(coalesce(options.enabled, DD_PROFILING_ENABLED, true))
const env = coalesce(options.env, DD_ENV)
const service = options.service || DD_SERVICE || 'node'
const host = os.hostname()
Expand All @@ -64,8 +62,6 @@ class Config {
const pprofPrefix = coalesce(options.pprofPrefix,
DD_PROFILING_PPROF_PREFIX, '')

this.enabled = enabled
this.heuristicsEnabled = options.heuristicsEnabled
this.service = service
this.env = env
this.host = host
Expand Down
1 change: 0 additions & 1 deletion packages/dd-trace/src/profiling/profiler.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ class Profiler extends EventEmitter {
if (this._enabled) return true

const config = this._config = new Config(options)
if (!config.enabled && !config.heuristicsEnabled) return false

this._logger = config.logger
this._enabled = true
Expand Down
107 changes: 49 additions & 58 deletions packages/dd-trace/src/profiling/ssi-heuristics.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,40 +7,6 @@ const dc = require('dc-polyfill')
// If the process lives for at least 30 seconds, it's considered long-lived
const DEFAULT_LONG_LIVED_THRESHOLD = 30000

const EnablementChoice = {
MANUALLY_ENABLED: Symbol('SSITelemetry.EnablementChoice.MANUALLY_ENABLED'),
SSI_ENABLED: Symbol('SSITelemetry.EnablementChoice.SSI_ENABLED'),
SSI_NOT_ENABLED: Symbol('SSITelemetry.EnablementChoice.SSI_NOT_ENABLED'),
DISABLED: Symbol('SSITelemetry.EnablementChoice.DISABLED')
}
Object.freeze(EnablementChoice)

function getEnablementChoiceFromConfig (config) {
if (config.ssi === false || config.enabled === false) {
return EnablementChoice.DISABLED
} else if (config.heuristicsEnabled === true) {
return EnablementChoice.SSI_ENABLED
} else if (config.enabled === true) {
return EnablementChoice.MANUALLY_ENABLED
} else {
return EnablementChoice.SSI_NOT_ENABLED
}
}

function enablementChoiceToTagValue (enablementChoice) {
switch (enablementChoice) {
case EnablementChoice.MANUALLY_ENABLED:
return 'manually_enabled'
case EnablementChoice.SSI_ENABLED:
return 'ssi_enabled'
case EnablementChoice.SSI_NOT_ENABLED:
return 'not_enabled'
case EnablementChoice.DISABLED:
// Can't emit this one as a tag
throw new Error('Invalid enablement choice')
}
}

/**
* This class embodies the SSI profiler-triggering heuristics and also emits telemetry metrics about
* the profiler behavior under SSI. It emits the following metrics:
Expand All @@ -56,9 +22,23 @@ function enablementChoiceToTagValue (enablementChoice) {
*/
class SSIHeuristics {
constructor (config) {
this.enablementChoice = getEnablementChoiceFromConfig(config)
const injectionIncludesProfiler = config.injectionEnabled.includes('profiler')
this._heuristicsActive = injectionIncludesProfiler || config.profiling.enabled === 'auto'
this._emitsTelemetry = config.injectionEnabled.length > 0 && config.profiling.enabled !== 'false'

if (this._emitsTelemetry) {
if (config.profiling.enabled === 'true') {
this.enablementChoice = 'manually_enabled'
} else if (injectionIncludesProfiler) {
this.enablementChoice = 'ssi_enabled'
} else if (config.profiling.enabled === 'auto') {
this.enablementChoice = 'auto_enabled'
} else {
this.enablementChoice = 'ssi_not_enabled'
}
}

const longLivedThreshold = config.longLivedThreshold || DEFAULT_LONG_LIVED_THRESHOLD
const longLivedThreshold = config.profiling.longLivedThreshold || DEFAULT_LONG_LIVED_THRESHOLD
if (typeof longLivedThreshold !== 'number' || longLivedThreshold <= 0) {
throw new Error('Long-lived threshold must be a positive number')
}
Expand All @@ -69,12 +49,16 @@ class SSIHeuristics {
this.shortLived = true
}

enabled () {
return this.enablementChoice !== EnablementChoice.DISABLED
get emitsTelemetry () {
return this._emitsTelemetry
}

get heuristicsActive () {
return this._heuristicsActive
}

start () {
if (this.enabled()) {
if (this.heuristicsActive || this.emitsTelemetry) {
// Used to determine short-livedness of the process. We could use the process start time as the
// reference point, but the tracer initialization point is more relevant, as we couldn't be
// collecting profiles earlier anyway. The difference is not particularly significant if the
Expand All @@ -85,13 +69,17 @@ class SSIHeuristics {
}, this.longLivedThreshold).unref()

this._onSpanCreated = this._onSpanCreated.bind(this)
this._onProfileSubmitted = this._onProfileSubmitted.bind(this)
this._onMockProfileSubmitted = this._onMockProfileSubmitted.bind(this)
this._onAppClosing = this._onAppClosing.bind(this)

dc.subscribe('dd-trace:span:start', this._onSpanCreated)
dc.subscribe('datadog:profiling:profile-submitted', this._onProfileSubmitted)
dc.subscribe('datadog:profiling:mock-profile-submitted', this._onMockProfileSubmitted)

if (this.emitsTelemetry) {
this._onProfileSubmitted = this._onProfileSubmitted.bind(this)
this._onMockProfileSubmitted = this._onMockProfileSubmitted.bind(this)

dc.subscribe('datadog:profiling:profile-submitted', this._onProfileSubmitted)
dc.subscribe('datadog:profiling:mock-profile-submitted', this._onMockProfileSubmitted)
}

this._onAppClosing = this._onAppClosing.bind(this)
dc.subscribe('datadog:telemetry:app-closing', this._onAppClosing)
}
}
Expand Down Expand Up @@ -152,7 +140,7 @@ class SSIHeuristics {

const tags = [
'installation:ssi',
`enablement_choice:${enablementChoiceToTagValue(this.enablementChoice)}`,
`enablement_choice:${this.enablementChoice}`,
`has_sent_profiles:${this.hasSentProfiles}`,
`heuristic_hypothetical_decision:${decision.join('_')}`
]
Expand All @@ -163,9 +151,9 @@ class SSIHeuristics {
if (
!this._emittedRuntimeId &&
decision[0] === 'triggered' &&
// When enablement choice is SSI_ENABLED, hasSentProfiles can transition from false to true when the
// When heuristics are active, hasSentProfiles can transition from false to true when the
// profiler gets started and the first profile is submitted, so we have to wait for it.
(this.enablementChoice !== EnablementChoice.SSI_ENABLED || this.hasSentProfiles)
(!this.heuristicsActive || this.hasSentProfiles)
) {
// Tags won't change anymore, so we can emit the runtime ID metric now.
this._emittedRuntimeId = true
Expand All @@ -174,22 +162,25 @@ class SSIHeuristics {
}

_onAppClosing () {
this._ensureProfileMetrics()
// Last ditch effort to emit a runtime ID count metric
if (!this._emittedRuntimeId) {
this._emittedRuntimeId = true
this._runtimeIdCount.inc()
if (this.emitsTelemetry) {
this._ensureProfileMetrics()
// Last ditch effort to emit a runtime ID count metric
if (!this._emittedRuntimeId) {
this._emittedRuntimeId = true
this._runtimeIdCount.inc()
}
// So we have the metrics in the final state
this._profileCount.inc(0)

dc.unsubscribe('datadog:profiling:profile-submitted', this._onProfileSubmitted)
dc.unsubscribe('datadog:profiling:mock-profile-submitted', this._onMockProfileSubmitted)
}
// So we have the metrics in the final state
this._profileCount.inc(0)

dc.unsubscribe('datadog:profiling:profile-submitted', this._onProfileSubmitted)
dc.unsubscribe('datadog:profiling:mock-profile-submitted', this._onMockProfileSubmitted)
dc.unsubscribe('datadog:telemetry:app-closing', this._onAppClosing)
if (this.noSpan) {
dc.unsubscribe('dd-trace:span:start', this._onSpanCreated)
}
}
}

module.exports = { SSIHeuristics, EnablementChoice }
module.exports = { SSIHeuristics }
36 changes: 22 additions & 14 deletions packages/dd-trace/src/proxy.js
Original file line number Diff line number Diff line change
Expand Up @@ -116,24 +116,32 @@ class Tracer extends NoopProxy {
require('./serverless').maybeStartServerlessMiniAgent(config)
}

const ssiHeuristics = new SSIHeuristics(config.profiling)
ssiHeuristics.start()
if (config.profiling.enabled) {
this._profilerStarted = this._startProfiler(config)
} else if (config.profiling.ssi) {
const mockProfiler = require('./profiling/ssi-telemetry-mock-profiler')
mockProfiler.start(config)

if (config.profiling.heuristicsEnabled) {
if (config.profiling.enabled !== 'false') {
const ssiHeuristics = new SSIHeuristics(config)
ssiHeuristics.start()
let mockProfiler = null
if (config.profiling.enabled === 'true') {
this._profilerStarted = this._startProfiler(config)
} else if (ssiHeuristics.emitsTelemetry) {
// Start a mock profiler that emits mock profile-submitted events for the telemetry.
// It will be stopped if the real profiler is started by the heuristics.
mockProfiler = require('./profiling/ssi-telemetry-mock-profiler')
mockProfiler.start(config)
}

if (ssiHeuristics.heuristicsActive) {
ssiHeuristics.onTriggered(() => {
mockProfiler.stop()
if (mockProfiler) {
mockProfiler.stop()
}
this._startProfiler(config)
ssiHeuristics.onTriggered()
ssiHeuristics.onTriggered() // deregister this callback
})
}
}
if (!this._profilerStarted) {
this._profilerStarted = Promise.resolve(false)

if (!this._profilerStarted) {
this._profilerStarted = Promise.resolve(false)
}
}

if (config.runtimeMetrics) {
Expand Down
Loading

0 comments on commit 4df8c4d

Please sign in to comment.