Skip to content

Commit

Permalink
Add profiler API telemetry metrics (#4832)
Browse files Browse the repository at this point in the history
* Add profiler API telemetry metrics

* Reduce related errors to warnings so they aren't sent over telemetry logs
  • Loading branch information
szegedi authored and rochdev committed Nov 21, 2024
1 parent 5211ec4 commit 599697b
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 11 deletions.
3 changes: 2 additions & 1 deletion integration-tests/profiler/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,5 @@ function busyWait (ms) {
})
}

setImmediate(async () => busyWait(500))
const durationMs = Number.parseInt(process.env.TEST_DURATION_MS ?? '500')
setImmediate(async () => busyWait(durationMs))
65 changes: 65 additions & 0 deletions integration-tests/profiler/profiler.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,71 @@ describe('profiler', () => {
})
})

context('Profiler API telemetry', () => {
beforeEach(async () => {
agent = await new FakeAgent().start()
})

afterEach(async () => {
proc.kill()
await agent.stop()
})

it('sends profiler API telemetry', () => {
proc = fork(profilerTestFile, {
cwd,
env: {
DD_TRACE_AGENT_PORT: agent.port,
DD_PROFILING_ENABLED: 1,
DD_PROFILING_UPLOAD_PERIOD: 1,
TEST_DURATION_MS: 2500
}
})

let requestCount = 0
let pointsCount = 0

const checkMetrics = agent.assertTelemetryReceived(({ _, payload }) => {
const pp = payload.payload
assert.equal(pp.namespace, 'profilers')
const series = pp.series
assert.lengthOf(series, 2)
assert.equal(series[0].metric, 'profile_api.requests')
assert.equal(series[0].type, 'count')
// There's a race between metrics and on-shutdown profile, so metric
// value will be between 2 and 3
requestCount = series[0].points[0][1]
assert.isAtLeast(requestCount, 2)
assert.isAtMost(requestCount, 3)

assert.equal(series[1].metric, 'profile_api.responses')
assert.equal(series[1].type, 'count')
assert.include(series[1].tags, 'status_code:200')

// Same number of requests and responses
assert.equal(series[1].points[0][1], requestCount)
}, timeout, 'generate-metrics')

const checkDistributions = agent.assertTelemetryReceived(({ _, payload }) => {
const pp = payload.payload
assert.equal(pp.namespace, 'profilers')
const series = pp.series
assert.lengthOf(series, 2)
assert.equal(series[0].metric, 'profile_api.bytes')
assert.equal(series[1].metric, 'profile_api.ms')

// Same number of points
pointsCount = series[0].points.length
assert.equal(pointsCount, series[1].points.length)
}, timeout, 'distributions')

return Promise.all([checkProfiles(agent, proc, timeout), checkMetrics, checkDistributions]).then(() => {
// Same number of requests and points
assert.equal(requestCount, pointsCount)
})
})
})

function forkSsi (args, whichEnv) {
const profilerEnablingEnv = whichEnv ? { DD_PROFILING_ENABLED: 'auto' } : { DD_INJECTION_ENABLED: 'profiler' }
return fork(ssiTestFile, args, {
Expand Down
47 changes: 42 additions & 5 deletions packages/dd-trace/src/profiling/exporters/agent.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,30 +13,67 @@ const os = require('os')
const { urlToHttpOptions } = require('url')
const perf = require('perf_hooks').performance

const telemetryMetrics = require('../../telemetry/metrics')
const profilersNamespace = telemetryMetrics.manager.namespace('profilers')

const containerId = docker.id()

const statusCodeCounters = []
const requestCounter = profilersNamespace.count('profile_api.requests', [])
const sizeDistribution = profilersNamespace.distribution('profile_api.bytes', [])
const durationDistribution = profilersNamespace.distribution('profile_api.ms', [])
const statusCodeErrorCounter = profilersNamespace.count('profile_api.errors', ['type:status_code'])
const networkErrorCounter = profilersNamespace.count('profile_api.errors', ['type:network'])
// TODO: implement timeout error counter when we have a way to track timeouts
// const timeoutErrorCounter = profilersNamespace.count('profile_api.errors', ['type:timeout'])

function countStatusCode (statusCode) {
let counter = statusCodeCounters[statusCode]
if (counter === undefined) {
counter = statusCodeCounters[statusCode] = profilersNamespace.count(
'profile_api.responses', [`status_code:${statusCode}`]
)
}
counter.inc()
}

function sendRequest (options, form, callback) {
const request = options.protocol === 'https:' ? httpsRequest : httpRequest

const store = storage.getStore()
storage.enterWith({ noop: true })
requestCounter.inc()
const start = perf.now()
const req = request(options, res => {
durationDistribution.track(perf.now() - start)
countStatusCode(res.statusCode)
if (res.statusCode >= 400) {
statusCodeErrorCounter.inc()
const error = new Error(`HTTP Error ${res.statusCode}`)
error.status = res.statusCode
callback(error)
} else {
callback(null, res)
}
})
req.on('error', callback)
if (form) form.pipe(req)

req.on('error', (err) => {
networkErrorCounter.inc()
callback(err)
})
if (form) {
sizeDistribution.track(form.size())
form.pipe(req)
}
storage.enterWith(store)
}

function getBody (stream, callback) {
const chunks = []
stream.on('error', callback)
stream.on('error', (err) => {
networkErrorCounter.inc()
callback(err)
})
stream.on('data', chunk => chunks.push(chunk))
stream.on('end', () => {
callback(null, Buffer.concat(chunks))
Expand Down Expand Up @@ -198,7 +235,7 @@ class AgentExporter {
if (err) {
const { status } = err
if ((typeof status !== 'number' || status >= 500 || status === 429) && operation.retry(err)) {
this._logger.error(`Error from the agent: ${err.message}`)
this._logger.warn(`Error from the agent: ${err.message}`)
} else {
reject(err)
}
Expand All @@ -207,7 +244,7 @@ class AgentExporter {

getBody(response, (err, body) => {
if (err) {
this._logger.error(`Error reading agent response: ${err.message}`)
this._logger.warn(`Error reading agent response: ${err.message}`)
} else {
this._logger.debug(() => {
const bytes = (body.toString('hex').match(/../g) || []).join(' ')
Expand Down
7 changes: 5 additions & 2 deletions packages/dd-trace/src/profiling/profiler.js
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,11 @@ class Profiler extends EventEmitter {
tags.snapshot = snapshotKind
for (const exporter of this._config.exporters) {
const task = exporter.export({ profiles, start, end, tags })
.catch(err => this._logError(err))

.catch(err => {
if (this._logger) {
this._logger.warn(err)
}
})
tasks.push(task)
}

Expand Down
4 changes: 2 additions & 2 deletions packages/dd-trace/test/profiling/exporters/agent.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ describe('exporters/agent', function () {
}

let index = 0
const exporter = newAgentExporter({ url, logger: { debug: onMessage, error: onMessage } })
const exporter = newAgentExporter({ url, logger: { debug: onMessage, warn: onMessage } })
const start = new Date()
const end = new Date()
const tags = { foo: 'bar' }
Expand Down Expand Up @@ -353,7 +353,7 @@ describe('exporters/agent', function () {
})

it('should not retry on 4xx errors', async function () {
const exporter = newAgentExporter({ url, logger: { debug: () => {}, error: () => {} } })
const exporter = newAgentExporter({ url, logger: { debug: () => {}, warn: () => {} } })
const start = new Date()
const end = new Date()
const tags = { foo: 'bar' }
Expand Down
2 changes: 1 addition & 1 deletion packages/dd-trace/test/profiling/profiler.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ describe('profiler', function () {

await waitForExport()

sinon.assert.calledOnce(consoleLogger.error)
sinon.assert.calledOnce(consoleLogger.warn)
})

it('should log encoded profile', async () => {
Expand Down

0 comments on commit 599697b

Please sign in to comment.