UNPKG

dd-trace

Version:

Datadog APM tracing client for JavaScript

371 lines (312 loc) 11.6 kB
'use strict' const util = require('node:util') const tracerVersion = require('../../../../package.json').version const logger = require('../log') const { ERROR_MESSAGE, ERROR_TYPE, ERROR_STACK, } = require('../constants') const { SPAN_KIND, MODEL_NAME, MODEL_PROVIDER, METADATA, COST_TAGS, TOOL_DEFINITIONS, INPUT_MESSAGES, INPUT_VALUE, INTEGRATION, OUTPUT_MESSAGES, INPUT_DOCUMENTS, OUTPUT_DOCUMENTS, OUTPUT_VALUE, METRICS, ML_APP, TAGS, PARENT_ID_KEY, SESSION_ID, NAME, INPUT_PROMPT, ROUTING_API_KEY, ROUTING_SITE, LLMOBS_SUBMITTED_TAG_KEY, } = require('./constants/tags') const { UNSERIALIZABLE_VALUE_TEXT } = require('./constants/text') const telemetry = require('./telemetry') const LLMObsTagger = require('./tagger') class LLMObservabilitySpan { /** * @param {string} kind span kind */ constructor (kind) { this.input = [] this.output = [] /** @type {string} */ this.kind = kind this._tags = {} } getTag (key) { return this._tags[key] } } class LLMObsSpanProcessor { /** @type {import('../config/config-base')} */ #config /** @type {((span: LLMObservabilitySpan) => LLMObservabilitySpan | null) | null} */ #userSpanProcessor /** @type {import('./writers/spans')} */ #writer constructor (config) { this.#config = config } setUserSpanProcessor (userSpanProcessor) { this.#userSpanProcessor = userSpanProcessor } setWriter (writer) { this.#writer = writer } // TODO: instead of relying on the tagger's weakmap registry, can we use some namespaced storage correlation? process (span) { if (!this.#config.llmobs.enabled) return // if the span is not in our private tagger map, it is not an llmobs span if (!LLMObsTagger.tagMap.has(span)) return try { const formattedEvent = this.format(span) telemetry.incrementLLMObsSpanFinishedCount(span) if (formattedEvent == null) return const mlObsTags = LLMObsTagger.tagMap.get(span) const routing = { apiKey: mlObsTags[ROUTING_API_KEY], site: mlObsTags[ROUTING_SITE], } const enqueued = this.#writer.append(formattedEvent, routing) // Marker read by the dd-go LLMObs trace-indexer: when reparenting OTel // gen_ai.* spans, the parent-chain walk stops at any span carrying this // tag, preserving this span as the immediate LLMObs parent. Set only // when the writer actually buffered the event — format may have dropped // it (user processor returned null), thrown, or the writer may have // dropped it silently when its buffer is full. Leaving this tag off in // those cases avoids dd-go reparenting OTel children under a span that // has no corresponding LLMObs event. if (enqueued) { span.context()._tags[LLMOBS_SUBMITTED_TAG_KEY] = '1' } } catch (e) { // this should be a rare case // we protect against unserializable properties in the format function, and in // safeguards in the tagger logger.warn(` Failed to append span to LLM Observability writer, likely due to an unserializable property. Span won't be sent to LLM Observability: ${e.message} `) } } format (span) { let inputType, outputType const spanTags = span.context()._tags const mlObsTags = LLMObsTagger.tagMap.get(span) const spanKind = mlObsTags[SPAN_KIND] const meta = { 'span.kind': spanKind, input: {}, output: {} } const input = {} const output = {} if (['llm', 'embedding'].includes(spanKind)) { meta.model_name = mlObsTags[MODEL_NAME] || 'custom' meta.model_provider = (mlObsTags[MODEL_PROVIDER] || 'custom').toLowerCase() } if (mlObsTags[METADATA] || mlObsTags[COST_TAGS]) { const metadata = {} if (mlObsTags[METADATA]) this.#addObject(mlObsTags[METADATA], metadata) // Only seed `metadata._dd` when there's something to put in it (currently cost_tags). Mirrors // dd-trace-py and the cross-language wire format enforced by system-tests — metadata-only // spans must not carry an empty `_dd: {}` block. if (mlObsTags[COST_TAGS]) { this.#getDdMetadata(metadata).cost_tags = mlObsTags[COST_TAGS] } meta.metadata = metadata } if (mlObsTags[TOOL_DEFINITIONS]) { meta.tool_definitions = [] this.#addObject(mlObsTags[TOOL_DEFINITIONS], meta.tool_definitions) } const llmObsSpan = new LLMObservabilitySpan(spanKind) if (spanKind === 'llm' && mlObsTags[INPUT_MESSAGES]) { llmObsSpan.input = mlObsTags[INPUT_MESSAGES] inputType = 'messages' } else if (spanKind === 'embedding' && mlObsTags[INPUT_DOCUMENTS]) { llmObsSpan.input = mlObsTags[INPUT_DOCUMENTS].map(doc => ({ content: doc.text, role: '' })) inputType = 'documents' } else if (mlObsTags[INPUT_VALUE]) { llmObsSpan.input = [{ role: '', content: mlObsTags[INPUT_VALUE] }] inputType = 'value' } if (spanKind === 'llm' && mlObsTags[OUTPUT_MESSAGES]) { llmObsSpan.output = mlObsTags[OUTPUT_MESSAGES] outputType = 'messages' } else if (spanKind === 'retrieval' && mlObsTags[OUTPUT_DOCUMENTS]) { llmObsSpan.output = mlObsTags[OUTPUT_DOCUMENTS].map(doc => ({ content: doc.text, role: '' })) outputType = 'documents' } else if (mlObsTags[OUTPUT_VALUE]) { llmObsSpan.output = [{ role: '', content: mlObsTags[OUTPUT_VALUE] }] outputType = 'value' } const error = spanTags.error || spanTags[ERROR_TYPE] if (error) { meta[ERROR_MESSAGE] = spanTags[ERROR_MESSAGE] || error.message || error.code meta[ERROR_TYPE] = spanTags[ERROR_TYPE] || error.name meta[ERROR_STACK] = spanTags[ERROR_STACK] || error.stack } const metrics = mlObsTags[METRICS] || {} const mlApp = mlObsTags[ML_APP] const sessionId = mlObsTags[SESSION_ID] const parentId = mlObsTags[PARENT_ID_KEY] const name = mlObsTags[NAME] || span._name const tags = this.#getTags(span, mlApp, sessionId, error) llmObsSpan._tags = tags const processedSpan = this.#runProcessor(llmObsSpan) if (processedSpan === undefined) return null if (processedSpan.input) { if (inputType === 'messages') { input.messages = processedSpan.input } else if (inputType === 'value') { input.value = processedSpan.input[0].content } else if (inputType === 'documents') { input.documents = processedSpan.input.map((processedDocument, processedDocumentIdx) => ({ ...mlObsTags[INPUT_DOCUMENTS][processedDocumentIdx], text: processedDocument.content, })) } } if (processedSpan.output) { if (outputType === 'messages') { output.messages = processedSpan.output } else if (outputType === 'value') { output.value = processedSpan.output[0].content } else if (outputType === 'documents') { output.documents = processedSpan.output.map((processedDocument, processedDocumentIdx) => ({ ...mlObsTags[OUTPUT_DOCUMENTS][processedDocumentIdx], text: processedDocument.content, })) } } if (input) meta.input = input if (output) meta.output = output const prompt = mlObsTags[INPUT_PROMPT] if (prompt && spanKind === 'llm') { // by this point, we should have logged a warning if the span kind was not llm meta.input.prompt = prompt } const llmObsSpanEvent = { trace_id: span.context().toTraceId(true), span_id: span.context().toSpanId(), parent_id: parentId, name, tags: this.#objectTagsToStringArrayTags(tags), start_ns: Math.round(span._startTime * 1e6), duration: Math.round(span._duration * 1e6), status: error ? 'error' : 'ok', meta, metrics, _dd: { span_id: span.context().toSpanId(), trace_id: span.context().toTraceId(true), }, } if (sessionId) llmObsSpanEvent.session_id = sessionId return llmObsSpanEvent } // For now, this only applies to metadata, as we let users annotate this field with any object // However, we want to protect against circular references or BigInts (unserializable) // This function can be reused for other fields if needed // Messages, Documents, and Metrics are safeguarded in `llmobs/tagger.js` #addObject (obj, carrier) { // Capture root object by default const seenObjects = new WeakSet([obj]) const isCircular = value => { if (value == null || typeof value !== 'object') return false if (seenObjects.has(value)) return true seenObjects.add(value) return false } const add = (obj, carrier) => { for (const key in obj) { const value = obj[key] if (!Object.hasOwn(obj, key)) continue if (typeof value === 'bigint' || isCircular(value)) { // mark as unserializable instead of dropping logger.warn(`Unserializable property found in metadata: ${key}`) carrier[key] = UNSERIALIZABLE_VALUE_TEXT continue } if (value !== null && typeof value === 'object') { carrier[key] = Array.isArray(value) ? [] : {} add(value, carrier[key]) } else { carrier[key] = value } } } add(obj, carrier) } /** * Returns `metadata._dd`, normalizing it to a fresh object if missing or invalid. * @param {Record<string, unknown>} metadata * @returns {Record<string, unknown>} */ #getDdMetadata (metadata) { if (!metadata._dd || typeof metadata._dd !== 'object' || Array.isArray(metadata._dd)) { metadata._dd = {} } return metadata._dd } #getTags (span, mlApp, sessionId, error) { let tags = { ...this.#config.parsedDdTags, version: this.#config.version, env: this.#config.env, service: this.#config.service, source: 'integration', ml_app: mlApp, 'ddtrace.version': tracerVersion, error: Number(!!error) || 0, language: 'javascript', } const errType = span.context()._tags[ERROR_TYPE] || error?.name if (errType) tags.error_type = errType if (sessionId) tags.session_id = sessionId const integration = LLMObsTagger.tagMap.get(span)?.[INTEGRATION] if (integration) tags.integration = integration const existingTags = LLMObsTagger.tagMap.get(span)?.[TAGS] || {} if (existingTags) tags = { ...tags, ...existingTags } return tags } #objectTagsToStringArrayTags (tags) { return Object.entries(tags).map(([key, value]) => `${key}:${value ?? ''}`) } /** * Runs the user span processor, emitting telemetry and adding some guardrails against invalid return types * @param {LLMObservabilitySpan} span * @returns {LLMObservabilitySpan | undefined} */ #runProcessor (span) { const processor = this.#userSpanProcessor if (!processor) return span let error = false try { const processedLLMObsSpan = processor(span) if (processedLLMObsSpan === null) return if (!(processedLLMObsSpan instanceof LLMObservabilitySpan)) { error = true logger.warn('User span processor must return an instance of an LLMObservabilitySpan or null, dropping span.') return } return processedLLMObsSpan } catch (e) { logger.error(`[LLMObs] Error in LLMObs span processor (${util.inspect(processor)}): ${util.inspect(e)}`) error = true } finally { telemetry.recordLLMObsUserProcessorCalled(error) } } } module.exports = LLMObsSpanProcessor