dd-trace
Version:
Datadog APM tracing client for JavaScript
342 lines (283 loc) • 10.5 kB
JavaScript
'use strict'
const { storage } = require('../../../../datadog-core')
const dc = require('dc-polyfill')
const runtimeMetrics = require('../../runtime_metrics')
const telemetryMetrics = require('../../telemetry/metrics')
const {
END_TIMESTAMP_LABEL,
SPAN_ID_LABEL,
LOCAL_ROOT_SPAN_ID_LABEL,
getNonJSThreadsLabels,
getThreadLabels,
encodeProfileAsync
} = require('./shared')
const { isWebServerSpan, endpointNameFromTags, getStartedSpans } = require('../webspan-utils')
const beforeCh = dc.channel('dd-trace:storage:before')
const enterCh = dc.channel('dd-trace:storage:enter')
const spanFinishCh = dc.channel('dd-trace:span:finish')
const profilerTelemetryMetrics = telemetryMetrics.manager.namespace('profilers')
const ProfilingContext = Symbol('NativeWallProfiler.ProfilingContext')
let kSampleCount
function getActiveSpan () {
const store = storage('legacy').getStore()
return store && store.span
}
let channelsActivated = false
function ensureChannelsActivated () {
if (channelsActivated) return
const { AsyncLocalStorage, createHook } = require('async_hooks')
const shimmer = require('../../../../datadog-shimmer')
createHook({ before: () => beforeCh.publish() }).enable()
let inRun = false
shimmer.wrap(AsyncLocalStorage.prototype, 'enterWith', function (original) {
return function (...args) {
const retVal = original.apply(this, args)
if (!inRun) enterCh.publish()
return retVal
}
})
shimmer.wrap(AsyncLocalStorage.prototype, 'run', function (original) {
return function (store, callback, ...args) {
const wrappedCb = shimmer.wrapFunction(callback, cb => function (...args) {
inRun = false
enterCh.publish()
const retVal = cb.apply(this, args)
inRun = true
return retVal
})
inRun = true
const retVal = original.call(this, store, wrappedCb, ...args)
enterCh.publish()
inRun = false
return retVal
}
})
channelsActivated = true
}
class NativeWallProfiler {
type = 'wall'
_mapper
_pprof
_started = false
constructor (options = {}) {
this._samplingIntervalMicros = (options.samplingInterval || 1e3 / 99) * 1000 // 99hz
this._flushIntervalMillis = options.flushInterval || 60 * 1e3 // 60 seconds
this._codeHotspotsEnabled = !!options.codeHotspotsEnabled
this._endpointCollectionEnabled = !!options.endpointCollectionEnabled
this._timelineEnabled = !!options.timelineEnabled
this._cpuProfilingEnabled = !!options.cpuProfilingEnabled
// We need to capture span data into the sample context for either code hotspots
// or endpoint collection.
this._captureSpanData = this._codeHotspotsEnabled || this._endpointCollectionEnabled
// We need to run the pprof wall profiler with sample contexts if we're either
// capturing span data or timeline is enabled (so we need sample timestamps, and for now
// timestamps require the sample contexts feature in the pprof wall profiler), or
// cpu profiling is enabled.
this._withContexts = this._captureSpanData || this._timelineEnabled || this._cpuProfilingEnabled
this._v8ProfilerBugWorkaroundEnabled = !!options.v8ProfilerBugWorkaroundEnabled
// Bind these to this so they can be used as callbacks
if (this._withContexts && this._captureSpanData) {
this._enter = this._enter.bind(this)
this._spanFinished = this._spanFinished.bind(this)
}
this._generateLabels = this._generateLabels.bind(this)
this._logger = options.logger
}
codeHotspotsEnabled () {
return this._codeHotspotsEnabled
}
endpointCollectionEnabled () {
return this._endpointCollectionEnabled
}
start ({ mapper } = {}) {
if (this._started) return
this._mapper = mapper
this._pprof = require('@datadog/pprof')
kSampleCount = this._pprof.time.constants.kSampleCount
// pprof otherwise crashes in worker threads
if (!process._startProfilerIdleNotifier) {
process._startProfilerIdleNotifier = () => {}
}
if (!process._stopProfilerIdleNotifier) {
process._stopProfilerIdleNotifier = () => {}
}
this._pprof.time.start({
intervalMicros: this._samplingIntervalMicros,
durationMillis: this._flushIntervalMillis,
sourceMapper: this._mapper,
withContexts: this._withContexts,
lineNumbers: false,
workaroundV8Bug: this._v8ProfilerBugWorkaroundEnabled,
collectCpuTime: this._cpuProfilingEnabled
})
if (this._withContexts) {
this._setNewContext()
if (this._captureSpanData) {
this._profilerState = this._pprof.time.getState()
this._lastSampleCount = 0
ensureChannelsActivated()
beforeCh.subscribe(this._enter)
enterCh.subscribe(this._enter)
spanFinishCh.subscribe(this._spanFinished)
}
}
this._started = true
}
_enter () {
if (!this._started) return
const sampleCount = this._profilerState[kSampleCount]
if (sampleCount !== this._lastSampleCount) {
this._lastSampleCount = sampleCount
const context = this._currentContext.ref
this._setNewContext()
this._updateContext(context)
}
const span = getActiveSpan()
this._currentContext.ref = span ? this._getProfilingContext(span) : {}
}
_getProfilingContext (span) {
let profilingContext = span[ProfilingContext]
if (profilingContext === undefined) {
const context = span.context()
const startedSpans = getStartedSpans(context)
let spanId
let rootSpanId
if (this._codeHotspotsEnabled) {
spanId = context._spanId
rootSpanId = startedSpans.length ? startedSpans[0].context()._spanId : context._spanId
}
let webTags
if (this._endpointCollectionEnabled) {
const tags = context._tags
if (isWebServerSpan(tags)) {
webTags = tags
} else {
// Get parent's context's web tags
const parentId = context._parentId
for (let i = startedSpans.length; --i >= 0;) {
const ispan = startedSpans[i]
if (ispan.context()._spanId === parentId) {
webTags = this._getProfilingContext(ispan).webTags
break
}
}
}
}
profilingContext = { spanId, rootSpanId, webTags }
span[ProfilingContext] = profilingContext
}
return profilingContext
}
_setNewContext () {
this._pprof.time.setContext(
this._currentContext = {
ref: {}
}
)
}
_updateContext (context) {
if (context.spanId !== null && typeof context.spanId === 'object') {
context.spanId = context.spanId.toBigInt()
}
if (context.rootSpanId !== null && typeof context.rootSpanId === 'object') {
context.rootSpanId = context.rootSpanId.toBigInt()
}
if (context.webTags !== undefined && context.endpoint === undefined) {
// endpoint may not be determined yet, but keep it as fallback
// if tags are not available anymore during serialization
context.endpoint = endpointNameFromTags(context.webTags)
}
}
_spanFinished (span) {
if (span[ProfilingContext] !== undefined) {
span[ProfilingContext] = undefined
}
}
_reportV8bug (maybeBug) {
const tag = `v8_profiler_bug_workaround_enabled:${this._v8ProfilerBugWorkaroundEnabled}`
const metric = `v8_cpu_profiler${maybeBug ? '_maybe' : ''}_stuck_event_loop`
this._logger?.warn(`Wall profiler: ${maybeBug ? 'possible ' : ''}v8 profiler stuck event loop detected.`)
// report as runtime metric (can be removed in the future when telemetry is mature)
runtimeMetrics.increment(`runtime.node.profiler.${metric}`, tag, true)
// report as telemetry metric
profilerTelemetryMetrics.count(metric, [tag]).inc()
}
_stop (restart) {
if (!this._started) return
if (this._captureSpanData) {
// update last sample context if needed
this._enter()
this._lastSampleCount = 0
}
const profile = this._pprof.time.stop(restart, this._generateLabels)
if (restart) {
const v8BugDetected = this._pprof.time.v8ProfilerStuckEventLoopDetected()
if (v8BugDetected !== 0) {
this._reportV8bug(v8BugDetected === 1)
}
} else {
if (this._captureSpanData) {
beforeCh.unsubscribe(this._enter)
enterCh.unsubscribe(this._enter)
spanFinishCh.unsubscribe(this._spanFinished)
this._profilerState = undefined
}
this._started = false
}
return profile
}
_generateLabels ({ node, context }) {
// check for special node that represents CPU time all non-JS threads.
// In that case only return a special thread name label since we cannot associate any timestamp/span/endpoint to it.
if (node.name === this._pprof.time.constants.NON_JS_THREADS_FUNCTION_NAME) {
return getNonJSThreadsLabels()
}
if (context == null) {
// generateLabels is also called for samples without context.
// In that case just return thread labels.
return getThreadLabels()
}
const labels = { ...getThreadLabels() }
if (this._timelineEnabled) {
// Incoming timestamps are in microseconds, we emit nanos.
labels[END_TIMESTAMP_LABEL] = context.timestamp * 1000n
}
const asyncId = context.asyncId
if (asyncId !== undefined && asyncId !== -1) {
labels['async id'] = asyncId
}
// Native profiler doesn't set context.context for some samples, such as idle samples or when
// the context was otherwise unavailable when the sample was taken.
const ref = context.context?.ref
if (typeof ref !== 'object') {
return labels
}
const { spanId, rootSpanId, webTags, endpoint } = ref
if (spanId !== undefined) {
labels[SPAN_ID_LABEL] = spanId
}
if (rootSpanId !== undefined) {
labels[LOCAL_ROOT_SPAN_ID_LABEL] = rootSpanId
}
if (webTags !== undefined && Object.keys(webTags).length !== 0) {
labels['trace endpoint'] = endpointNameFromTags(webTags)
} else if (endpoint) {
// fallback to endpoint computed when sample was taken
labels['trace endpoint'] = endpoint
}
return labels
}
profile (restart) {
return this._stop(restart)
}
encode (profile) {
return encodeProfileAsync(profile)
}
stop () {
this._stop(false)
}
isStarted () {
return this._started
}
}
module.exports = NativeWallProfiler