dd-trace
Version:
Datadog APM tracing client for JavaScript
398 lines (334 loc) • 12.4 kB
JavaScript
'use strict'
const { channel } = require('dc-polyfill')
const BaseLLMObsPlugin = require('../base')
const { getModelProvider } = require('../../../../../datadog-plugin-ai/src/utils')
const toolCreationCh = channel('dd-trace:vercel-ai:tool')
const setAttributesCh = channel('dd-trace:vercel-ai:span:setAttributes')
const { MODEL_NAME, MODEL_PROVIDER, NAME } = require('../../constants/tags')
const {
getSpanTags,
getOperation,
getUsage,
getJsonStringValue,
getModelMetadata,
getGenerationMetadata,
getToolNameFromTags,
getToolCallResultContent,
getLlmObsSpanName
} = require('./util')
/**
* @typedef {Record<string, unknown> & { description?: string, id?: string }} AvailableToolArgs
*/
/**
* @typedef {string | number | boolean | null | undefined | string[] | number[] | boolean[]} TagValue
* @typedef {Record<string, TagValue>} SpanTags
*
* @typedef {{ name?: string, description?: string }} ToolForModel
*
* @typedef {{ type: 'text' | 'reasoning' | 'redacted-reasoning', text?: string, data?: string }} TextPart
* @typedef {{ type: 'tool-call', toolName: string, toolCallId: string, args?: unknown, input?: unknown }} ToolCallPart
* @typedef {(
* { type: 'tool-result', toolCallId: string, output?: { type: string, value?: unknown }, result?: unknown } &
* Record<string, unknown>
* )} ToolResultPart
*
* @typedef {{
* role: 'system',
* content: string
* } | {
* role: 'user',
* content: TextPart[]
* } | {
* role: 'assistant',
* content: Array<TextPart | ToolCallPart>
* } | {
* role: 'tool',
* content: ToolResultPart[]
* }} AiSdkMessage
*/
const SPAN_NAME_TO_KIND_MAPPING = {
// embeddings
embed: 'workflow',
embedMany: 'workflow',
doEmbed: 'embedding',
// object generation
generateObject: 'workflow',
streamObject: 'workflow',
// text generation
generateText: 'workflow',
streamText: 'workflow',
// llm operations
doGenerate: 'llm',
doStream: 'llm',
// tools
toolCall: 'tool'
}
class VercelAILLMObsPlugin extends BaseLLMObsPlugin {
static id = 'ai'
static integration = 'ai'
static prefix = 'tracing:dd-trace:vercel-ai'
/**
* The available tools within the runtime scope of this integration.
* This essentially acts as a global registry for all tools made through the Vercel AI SDK.
* @type {Set<AvailableToolArgs>}
*/
#availableTools
/**
* A mapping of tool call IDs to tool names.
* This is used to map the tool call ID to the tool name for the output message.
* @type {Record<string, string>}
*/
#toolCallIdsToName
constructor (...args) {
super(...args)
this.#toolCallIdsToName = {}
this.#availableTools = new Set()
toolCreationCh.subscribe(toolArgs => {
this.#availableTools.add(toolArgs)
})
setAttributesCh.subscribe(({ ctx, attributes }) => {
Object.assign(ctx.attributes, attributes)
})
}
/**
* Does a best-effort attempt to find the right tool name for the given tool description.
* This is because the Vercel AI SDK does not tag tools by name properly, but
* rather by the index they were passed in. Tool names appear nowhere in the span tags.
*
* We use the tool description as the next best identifier for a tool.
*
* @param {string} toolName
* @param {string | undefined} toolDescription
* @returns {string | undefined}
*/
findToolName (toolName, toolDescription) {
if (Number.isNaN(Number.parseInt(toolName))) return toolName
for (const availableTool of this.#availableTools) {
const description = availableTool.description
if (description === toolDescription && availableTool.id) {
return availableTool.id
}
}
}
/**
* @override
*/
getLLMObsSpanRegisterOptions (ctx) {
const span = ctx.currentStore?.span
const operation = getOperation(span)
const kind = SPAN_NAME_TO_KIND_MAPPING[operation]
if (!kind) return
return { kind, name: getLlmObsSpanName(operation, ctx.attributes['ai.telemetry.functionId']) }
}
/**
* @override
*/
setLLMObsTags (ctx) {
const span = ctx.currentStore?.span
if (!span) return
const operation = getOperation(span)
const kind = SPAN_NAME_TO_KIND_MAPPING[operation]
if (!kind) return
const tags = getSpanTags(ctx)
if (['embedding', 'llm'].includes(kind)) {
this._tagger._setTag(span, MODEL_NAME, tags['ai.model.id'])
this._tagger._setTag(span, MODEL_PROVIDER, getModelProvider(tags))
}
switch (operation) {
case 'embed':
case 'embedMany':
this.setEmbeddingWorkflowTags(span, tags)
break
case 'doEmbed':
this.setEmbeddingTags(span, tags)
break
case 'generateObject':
case 'streamObject':
this.setObjectGenerationTags(span, tags)
break
case 'generateText':
case 'streamText':
this.setTextGenerationTags(span, tags)
break
case 'doGenerate':
case 'doStream':
this.setLLMOperationTags(span, tags)
break
case 'toolCall':
this.setToolTags(span, tags)
break
default:
break
}
}
setEmbeddingWorkflowTags (span, tags) {
const inputs = tags['ai.value'] ?? tags['ai.values']
const parsedInputs = Array.isArray(inputs)
? inputs.map(input => getJsonStringValue(input, ''))
: getJsonStringValue(inputs, '')
const embeddingsOutput = tags['ai.embedding'] ?? tags['ai.embeddings']
const isSingleEmbedding = !Array.isArray(embeddingsOutput)
const numberOfEmbeddings = isSingleEmbedding ? 1 : embeddingsOutput.length
const embeddingsLength = getJsonStringValue(isSingleEmbedding ? embeddingsOutput : embeddingsOutput?.[0], []).length
const output = `[${numberOfEmbeddings} embedding(s) returned with size ${embeddingsLength}]`
this._tagger.tagTextIO(span, parsedInputs, output)
const metadata = getGenerationMetadata(tags)
this._tagger.tagMetadata(span, metadata)
}
setEmbeddingTags (span, tags) {
const inputs = tags['ai.values']
if (!Array.isArray(inputs)) return
const parsedInputs = inputs.map(input => getJsonStringValue(input, ''))
const embeddingsOutput = tags['ai.embeddings']
const numberOfEmbeddings = embeddingsOutput?.length
const embeddingsLength = getJsonStringValue(embeddingsOutput?.[0], []).length
const output = `[${numberOfEmbeddings} embedding(s) returned with size ${embeddingsLength}]`
this._tagger.tagEmbeddingIO(span, parsedInputs, output)
const usage = tags['ai.usage.tokens']
this._tagger.tagMetrics(span, {
inputTokens: usage,
totalTokens: usage
})
}
setObjectGenerationTags (span, tags) {
const promptInfo = getJsonStringValue(tags['ai.prompt'], {})
const lastUserPrompt =
promptInfo.prompt ??
promptInfo.messages.reverse().find(message => message.role === 'user')?.content
const prompt = Array.isArray(lastUserPrompt) ? lastUserPrompt.map(part => part.text ?? '').join('') : lastUserPrompt
const output = tags['ai.response.object']
this._tagger.tagTextIO(span, prompt, output)
const metadata = getGenerationMetadata(tags) ?? {}
metadata.schema = getJsonStringValue(tags['ai.schema'], {})
this._tagger.tagMetadata(span, metadata)
}
setTextGenerationTags (span, tags) {
const promptInfo = getJsonStringValue(tags['ai.prompt'], {})
const lastUserPrompt =
promptInfo.prompt ??
promptInfo.messages.reverse().find(message => message.role === 'user')?.content
const prompt = Array.isArray(lastUserPrompt) ? lastUserPrompt.map(part => part.text ?? '').join('') : lastUserPrompt
const output = tags['ai.response.text']
this._tagger.tagTextIO(span, prompt, output)
const metadata = getGenerationMetadata(tags)
this._tagger.tagMetadata(span, metadata)
}
/**
* @param {import('../../../opentracing/span')} span
* @param {SpanTags} tags
*/
setLLMOperationTags (span, tags) {
const toolsForModel = tags['ai.prompt.tools']?.map(getJsonStringValue)
const inputMessages = getJsonStringValue(tags['ai.prompt.messages'], [])
const parsedInputMessages = []
for (const message of inputMessages) {
const formattedMessages = this.formatMessage(message, toolsForModel)
parsedInputMessages.push(...formattedMessages)
}
const outputMessage = this.formatOutputMessage(tags, toolsForModel)
this._tagger.tagLLMIO(span, parsedInputMessages, outputMessage)
const metadata = getModelMetadata(tags)
this._tagger.tagMetadata(span, metadata)
const usage = getUsage(tags)
this._tagger.tagMetrics(span, usage)
}
setToolTags (span, tags) {
const toolCallId = tags['ai.toolCall.id']
const name = getToolNameFromTags(tags) ?? this.#toolCallIdsToName[toolCallId]
if (name) this._tagger._setTag(span, NAME, name)
const input = tags['ai.toolCall.args']
const output = tags['ai.toolCall.result']
this._tagger.tagTextIO(span, input, output)
}
formatOutputMessage (tags, toolsForModel) {
const outputMessageText = tags['ai.response.text'] ?? tags['ai.response.object']
const outputMessageToolCalls = getJsonStringValue(tags['ai.response.toolCalls'], [])
const formattedToolCalls = []
for (const toolCall of outputMessageToolCalls) {
const toolArgs = toolCall.args ?? toolCall.input
const toolCallArgs = typeof toolArgs === 'string' ? getJsonStringValue(toolArgs, {}) : toolArgs
const toolDescription = toolsForModel?.find(tool => toolCall.toolName === tool.name)?.description
const name = this.findToolName(toolCall.toolName, toolDescription)
this.#toolCallIdsToName[toolCall.toolCallId] = name
formattedToolCalls.push({
arguments: toolCallArgs,
name,
toolId: toolCall.toolCallId,
type: toolCall.toolCallType ?? 'function'
})
}
return {
role: 'assistant',
content: outputMessageText,
toolCalls: formattedToolCalls
}
}
/**
* Returns a list of formatted messages from a message object.
* Most of these will just be one entry, but in the case of a "tool" role,
* it is possible to have multiple tool call results in a single message that we
* need to split into multiple messages.
*
* @param {AiSdkMessage} message
* @param {ToolForModel[] | null | undefined} toolsForModel
* @returns {Array<{role: string, content: string, toolId?: string,
* toolCalls?: Array<{arguments: string, name: string, toolId: string, type: string}>}>}
*/
formatMessage (message, toolsForModel) {
const { role, content } = message
if (role === 'system') {
return [{ role, content }]
} else if (role === 'user') {
let finalContent = ''
for (const part of content) {
const { type } = part
if (type === 'text') {
finalContent += part.text
}
}
return [{ role, content: finalContent }]
} else if (role === 'assistant') {
const toolCalls = []
let finalContent = ''
for (const part of content) {
const { type } = part
// TODO(sabrenner): do we want to include reasoning?
if (['text', 'reasoning', 'redacted-reasoning'].includes(type)) {
finalContent += part.text ?? part.data
} else if (type === 'tool-call') {
const toolDescription = toolsForModel?.find(tool => part.toolName === tool.name)?.description
const name = this.findToolName(part.toolName, toolDescription)
toolCalls.push({
arguments: part.args ?? part.input,
name,
toolId: part.toolCallId,
type: 'function'
})
}
}
const finalMessage = {
role,
content: finalContent
}
if (toolCalls.length) {
finalMessage.toolCalls = toolCalls.length ? toolCalls : undefined
}
return [finalMessage]
} else if (role === 'tool') {
const finalMessages = []
for (const part of content) {
if (part.type === 'tool-result') {
const safeResult = getToolCallResultContent(part)
finalMessages.push({
role,
content: safeResult,
toolId: part.toolCallId
})
}
}
return finalMessages
}
return []
}
}
module.exports = VercelAILLMObsPlugin