UNPKG

dd-trace

Version:

Datadog APM tracing client for JavaScript

317 lines (254 loc) 9.49 kB
'use strict' // TODO: capture every second and flush every 10 seconds const v8 = require('v8') const os = require('os') const { DogStatsDClient, MetricsAggregationClient } = require('../dogstatsd') const log = require('../log') const { performance, PerformanceObserver } = require('perf_hooks') const { getEnvironmentVariable } = require('../config-helper') const { NODE_MAJOR, NODE_MINOR } = require('../../../../version') const DD_RUNTIME_METRICS_FLUSH_INTERVAL = getEnvironmentVariable('DD_RUNTIME_METRICS_FLUSH_INTERVAL') ?? '10000' const INTERVAL = Number.parseInt(DD_RUNTIME_METRICS_FLUSH_INTERVAL, 10) // Node >=16 has PerformanceObserver with `gc` type, but <16.7 had a critical bug. // See: https://github.com/nodejs/node/issues/39548 const hasGCObserver = NODE_MAJOR >= 18 || (NODE_MAJOR === 16 && NODE_MINOR >= 7) let nativeMetrics = null let gcObserver = null let interval let client let time let cpuUsage let elu reset() const runtimeMetrics = module.exports = { start (config) { const clientConfig = DogStatsDClient.generateClientConfig(config) const watchers = [] if (config.runtimeMetrics.gc !== false) { if (hasGCObserver) { startGCObserver() } else { watchers.push('gc') } } if (config.runtimeMetrics.eventLoop !== false) { watchers.push('loop') } try { nativeMetrics = require('@datadog/native-metrics') nativeMetrics.start(...watchers) } catch (e) { log.error('Error starting native metrics', e) nativeMetrics = null } client = new MetricsAggregationClient(new DogStatsDClient(clientConfig)) time = process.hrtime() if (nativeMetrics) { interval = setInterval(() => { captureCommonMetrics() captureNativeMetrics() client.flush() }, INTERVAL) } else { cpuUsage = process.cpuUsage() interval = setInterval(() => { captureCommonMetrics() captureCpuUsage() captureHeapSpace() client.flush() }, INTERVAL) } interval.unref() }, stop () { if (nativeMetrics) { nativeMetrics.stop() } clearInterval(interval) reset() }, track (span) { if (nativeMetrics) { const handle = nativeMetrics.track(span) return { finish: () => nativeMetrics.finish(handle) } } return { finish: () => {} } }, boolean (name, value, tag) { client && client.boolean(name, value, tag) }, histogram (name, value, tag) { client && client.histogram(name, value, tag) }, count (name, count, tag, monotonic = false) { client && client.count(name, count, tag, monotonic) }, gauge (name, value, tag) { client && client.gauge(name, value, tag) }, increment (name, tag, monotonic) { this.count(name, 1, tag, monotonic) }, decrement (name, tag) { this.count(name, -1, tag) } } function reset () { interval = null client = null time = null cpuUsage = null nativeMetrics = null gcObserver && gcObserver.disconnect() gcObserver = null } function captureCpuUsage () { if (!process.cpuUsage) return const elapsedTime = process.hrtime(time) const elapsedUsage = process.cpuUsage(cpuUsage) time = process.hrtime() cpuUsage = process.cpuUsage() const elapsedMs = elapsedTime[0] * 1000 + elapsedTime[1] / 1_000_000 const userPercent = 100 * elapsedUsage.user / 1000 / elapsedMs const systemPercent = 100 * elapsedUsage.system / 1000 / elapsedMs const totalPercent = userPercent + systemPercent client.gauge('runtime.node.cpu.system', systemPercent.toFixed(2)) client.gauge('runtime.node.cpu.user', userPercent.toFixed(2)) client.gauge('runtime.node.cpu.total', totalPercent.toFixed(2)) } function captureMemoryUsage () { const stats = process.memoryUsage() client.gauge('runtime.node.mem.heap_total', stats.heapTotal) client.gauge('runtime.node.mem.heap_used', stats.heapUsed) client.gauge('runtime.node.mem.rss', stats.rss) client.gauge('runtime.node.mem.total', os.totalmem()) client.gauge('runtime.node.mem.free', os.freemem()) stats.external && client.gauge('runtime.node.mem.external', stats.external) } function captureProcess () { client.gauge('runtime.node.process.uptime', Math.round(process.uptime())) } function captureHeapStats () { const stats = v8.getHeapStatistics() client.gauge('runtime.node.heap.total_heap_size', stats.total_heap_size) client.gauge('runtime.node.heap.total_heap_size_executable', stats.total_heap_size_executable) client.gauge('runtime.node.heap.total_physical_size', stats.total_physical_size) client.gauge('runtime.node.heap.total_available_size', stats.total_available_size) client.gauge('runtime.node.heap.heap_size_limit', stats.heap_size_limit) stats.malloced_memory && client.gauge('runtime.node.heap.malloced_memory', stats.malloced_memory) stats.peak_malloced_memory && client.gauge('runtime.node.heap.peak_malloced_memory', stats.peak_malloced_memory) } function captureHeapSpace () { if (!v8.getHeapSpaceStatistics) return const stats = v8.getHeapSpaceStatistics() for (let i = 0, l = stats.length; i < l; i++) { const tags = [`space:${stats[i].space_name}`] client.gauge('runtime.node.heap.size.by.space', stats[i].space_size, tags) client.gauge('runtime.node.heap.used_size.by.space', stats[i].space_used_size, tags) client.gauge('runtime.node.heap.available_size.by.space', stats[i].space_available_size, tags) client.gauge('runtime.node.heap.physical_size.by.space', stats[i].physical_space_size, tags) } } /** * Gathers and reports Event Loop Utilization (ELU) since last run * * ELU is a measure of how busy the event loop is, like running JavaScript or * waiting on *Sync functions. The value is between 0 (idle) and 1 (exhausted). * * performance.eventLoopUtilization available in Node.js >= v14.10, >= v12.19, >= v16 */ let captureELU = () => {} if ('eventLoopUtilization' in performance) { captureELU = () => { // if elu is undefined (first run) the measurement is from start of process elu = performance.eventLoopUtilization(elu) client.gauge('runtime.node.event_loop.utilization', elu.utilization) } } function captureCommonMetrics () { captureMemoryUsage() captureProcess() captureHeapStats() captureELU() } function captureNativeMetrics () { const stats = nativeMetrics.stats() const spaces = stats.heap.spaces const elapsedTime = process.hrtime(time) time = process.hrtime() const elapsedUs = elapsedTime[0] * 1e6 + elapsedTime[1] / 1e3 const userPercent = 100 * stats.cpu.user / elapsedUs const systemPercent = 100 * stats.cpu.system / elapsedUs const totalPercent = userPercent + systemPercent client.gauge('runtime.node.cpu.system', systemPercent.toFixed(2)) client.gauge('runtime.node.cpu.user', userPercent.toFixed(2)) client.gauge('runtime.node.cpu.total', totalPercent.toFixed(2)) histogram('runtime.node.event_loop.delay', stats.eventLoop) Object.keys(stats.gc).forEach(type => { if (type === 'all') { histogram('runtime.node.gc.pause', stats.gc[type]) } else { histogram('runtime.node.gc.pause.by.type', stats.gc[type], `gc_type:${type}`) } }) for (let i = 0, l = spaces.length; i < l; i++) { const tag = `heap_space:${spaces[i].space_name}` client.gauge('runtime.node.heap.size.by.space', spaces[i].space_size, tag) client.gauge('runtime.node.heap.used_size.by.space', spaces[i].space_used_size, tag) client.gauge('runtime.node.heap.available_size.by.space', spaces[i].space_available_size, tag) client.gauge('runtime.node.heap.physical_size.by.space', spaces[i].physical_space_size, tag) } } function histogram (name, stats, tag) { client.gauge(`${name}.min`, stats.min, tag) client.gauge(`${name}.max`, stats.max, tag) client.increment(`${name}.sum`, stats.sum, tag) client.increment(`${name}.total`, stats.sum, tag) client.gauge(`${name}.avg`, stats.avg, tag) client.increment(`${name}.count`, stats.count, tag) client.gauge(`${name}.median`, stats.median, tag) client.gauge(`${name}.95percentile`, stats.p95, tag) } function startGCObserver () { if (gcObserver) return gcObserver = new PerformanceObserver(list => { for (const entry of list.getEntries()) { const type = gcType(entry.detail?.kind || entry.kind) runtimeMetrics.histogram('runtime.node.gc.pause.by.type', entry.duration, `gc_type:${type}`) runtimeMetrics.histogram('runtime.node.gc.pause', entry.duration) } }) gcObserver.observe({ type: 'gc' }) } function gcType (kind) { if (NODE_MAJOR >= 22) { switch (kind) { case 1: return 'scavenge' case 2: return 'minor_mark_sweep' case 4: return 'mark_sweep_compact' // Deprecated, might be removed soon. case 8: return 'incremental_marking' case 16: return 'process_weak_callbacks' case 31: return 'all' } } else if (NODE_MAJOR >= 18) { switch (kind) { case 1: return 'scavenge' case 2: return 'minor_mark_compact' case 4: return 'mark_sweep_compact' case 8: return 'incremental_marking' case 16: return 'process_weak_callbacks' case 31: return 'all' } } else { switch (kind) { case 1: return 'scavenge' case 2: return 'mark_sweep_compact' case 4: return 'incremental_marking' case 8: return 'process_weak_callbacks' case 15: return 'all' } } return 'unknown' }