dd-trace
Version:
Datadog APM tracing client for JavaScript
213 lines (170 loc) • 6.28 kB
JavaScript
const { URL } = require('url')
const log = require('../../log')
const HTTP2_HEADER_AUTHORITY = ':authority'
const HTTP2_HEADER_SCHEME = ':scheme'
const HTTP2_HEADER_PATH = ':path'
const PATH_REGEX = /^(?:[a-z]+:\/\/(?:[^?/]+))?(?<path>\/[^?]*)(?:(\?).*)?$/
const INT_SEGMENT = /^[1-9][0-9]+$/ // Integer of size at least 2 (>=10)
const INT_ID_SEGMENT = /^(?=.*[0-9].*)[0-9._-]{3,}$/ // Mixed string with digits and delimiters
const HEX_SEGMENT = /^(?=.*[0-9].*)[A-Fa-f0-9]{6,}$/ // Hexadecimal digits of size at least 6 with at least one decimal digit
const HEX_ID_SEGMENT = /^(?=.*[0-9].*)[A-Fa-f0-9._-]{6,}$/ // Mixed string with hex digits and delimiters
const STRING_SEGMENT = /^.{20,}|.*[%&'()*+,:=@].*$/ // Long string or a string containing special characters
/**
* Extract full URL from HTTP request
* @param {import('http').IncomingMessage} req
* @returns {string} Full URL
*/
function extractURL (req) {
const headers = req.headers
if (req.stream) {
return `${headers[HTTP2_HEADER_SCHEME]}://${headers[HTTP2_HEADER_AUTHORITY]}${headers[HTTP2_HEADER_PATH]}`
}
const protocol = getProtocol(req)
return `${protocol}://${req.headers.host}${req.originalUrl || req.url}`
}
function getProtocol (req) {
// Do not check deprecated `req.connection` property.
return req.socket?.encrypted ? 'https' : 'http'
}
/**
* Obfuscate query string
*
* @param {object} config
* @param {string} url
* @returns {string} obfuscated URL
*/
function obfuscateQs (config, url) {
const { queryStringObfuscation } = config
if (queryStringObfuscation === false) return url
const i = url.indexOf('?')
if (i === -1) return url
const path = url.slice(0, i)
if (queryStringObfuscation === true) return path
let qs = url.slice(i + 1)
qs = qs.replace(queryStringObfuscation, '<redacted>')
return `${path}?${qs}`
}
const qsObfuscatorCache = new Map()
/**
* Compile the configured query-string obfuscator (a regex string, or a boolean)
* into the boolean / RegExp form that `obfuscateQs` consumes. The compiled regex
* is cached, since the configuration is stable for the process lifetime.
*
* @param {{ queryStringObfuscation?: boolean | string }} config
* @returns {boolean | RegExp}
*/
function getQsObfuscator (config) {
const obfuscator = config.queryStringObfuscation
if (typeof obfuscator === 'boolean') return obfuscator
if (typeof obfuscator === 'string') {
const cached = qsObfuscatorCache.get(obfuscator)
if (cached !== undefined) return cached
let compiled = true
if (obfuscator === '') {
compiled = false // disable obfuscator
} else if (obfuscator !== '.*') { // '.*' optimizes to a full redact (true)
try {
compiled = new RegExp(obfuscator, 'gi')
} catch (err) {
log.error('Error getting qs obfuscator', err)
}
}
qsObfuscatorCache.set(obfuscator, compiled)
return compiled
}
if (Object.hasOwn(config, 'queryStringObfuscation')) {
log.error('Expected `queryStringObfuscation` to be a regex string or boolean.')
}
return true
}
/**
* Build a client span's `http.url` with its query retained but obfuscated per
* `config.queryStringObfuscation` (OTel `url.full` is the absolute URL including
* the redacted query). Falls back to `strippedUrl` when there is no query.
* Callers gate this behind `DD_TRACE_OTEL_SEMANTICS_ENABLED`, so the default
* (flag off) hot path stays a plain tag assignment.
*
* @param {{ queryStringObfuscation?: boolean | string }} config
* @param {string} base `scheme://host[:port]`
* @param {string} [pathname] raw request path, may include `?query`
* @param {string} strippedUrl `base` + query-stripped path (used when there is no query)
* @returns {string}
*/
function buildClientHttpUrl (config, base, pathname, strippedUrl) {
if (pathname?.includes('?')) {
// `config.queryStringObfuscation` is the raw config value here (client plugins
// don't normalize it the way the server does), so compile it first.
return obfuscateQs({ queryStringObfuscation: getQsObfuscator(config) }, `${base}${pathname}`)
}
return strippedUrl
}
/**
* Extract URL path from URL using regex pattern instead of Node.js URL API because:
*
* - Handles edge cases like malformed URLs
* - Works with relative paths
* - Cross tracers compatibility
*
* @param {string} url
* @returns {string} Url path
*/
function extractPathFromUrl (url) {
if (!url) return '/'
const match = url.match(PATH_REGEX)
return match?.groups?.path || '/'
}
/**
* Calculate http.endpoint from URL path
*
* @param {string} url
* @returns {string} The normalized endpoint
*/
function calculateHttpEndpoint (url) {
const path = extractPathFromUrl(url)
// Split path by '/' and filter empty elements
const elements = path.split('/').filter(Boolean)
// Keep only first 8 non-empty elements
const limitedElements = elements.slice(0, 8)
// Apply regex replacements to each element respecting this order
const normalizedElements = limitedElements.map(element => {
if (INT_SEGMENT.test(element)) return '{param:int}'
if (INT_ID_SEGMENT.test(element)) return '{param:int_id}'
if (HEX_SEGMENT.test(element)) return '{param:hex}'
if (HEX_ID_SEGMENT.test(element)) return '{param:hex_id}'
if (STRING_SEGMENT.test(element)) return '{param:str}'
// No match
return element
})
return normalizedElements.length > 0
? '/' + normalizedElements.join('/')
: '/'
}
function filterSensitiveInfoFromRepository (repositoryUrl) {
if (!repositoryUrl) {
return ''
}
if (repositoryUrl.startsWith('git@')) {
return repositoryUrl
}
// Remove the username from ssh URLs
if (repositoryUrl.startsWith('ssh://')) {
const sshRegex = /^(ssh:\/\/)[^@/]*@/
return repositoryUrl.replace(sshRegex, '$1')
}
try {
const { protocol, host, pathname } = new URL(repositoryUrl)
return `${protocol}//${host}${pathname === '/' ? '' : pathname}`
} catch {
return ''
}
}
module.exports = {
extractURL,
obfuscateQs,
getQsObfuscator,
buildClientHttpUrl,
calculateHttpEndpoint,
filterSensitiveInfoFromRepository,
extractPathFromUrl, // used by http-otel-semantics decomposeServerUrl fallback (and tests)
}