newrelic
Version:
New Relic agent
281 lines (252 loc) • 9.53 kB
JavaScript
/*
* Copyright 2025 New Relic Corporation. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
/*
* Copyright 2025 New Relic Corporation. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
const { AiMonitoringChatSubscriber } = require('../ai-monitoring')
const { AI } = require('#agentlib/metrics/names.js')
const semver = require('semver')
const MIN_STREAM_VERSION = '4.12.2'
const { LlmChatCompletionSummary, LlmChatCompletionMessage } = require('#agentlib/llm-events/openai/index.js')
class OpenAIChatCompletions extends AiMonitoringChatSubscriber {
constructor({ agent, logger, channelName = 'nr_completionsCreate' }) {
super({ agent, logger, channelName, packageName: 'openai', trackingPrefix: AI.OPENAI.TRACKING_PREFIX, name: AI.OPENAI.COMPLETION })
this.events = ['asyncEnd']
}
handler(data, ctx) {
const { arguments: args, moduleVersion } = data
const [request] = args
if (request.stream) {
if (semver.lt(moduleVersion, MIN_STREAM_VERSION)) {
this.logger.warn(`Instrumenting chat completion streams is only supported with openai version ${MIN_STREAM_VERSION}+.`)
return ctx
}
}
return super.handler(data, ctx)
}
asyncEnd(data) {
if (!this.enabled) {
this.logger.debug('`ai_monitoring.enabled` is set to false, stream will not be instrumented.')
return
}
const { result: response, arguments: args, error: err } = data
const [request] = args
if (request.stream && !this.streamingEnabled) {
this.logger.debug(
'`ai_monitoring.streaming.enabled` is set to `false`, stream will not be instrumented.'
)
this.agent.metrics.getOrCreateMetric(AI.STREAMING_DISABLED).incrementCallCount()
return
}
const ctx = this.agent.tracer.getContext()
if (request.stream) {
this.instrumentStream({
ctx,
request,
response,
err
})
} else {
this.recordChatCompletionEvents({
ctx,
request,
response,
err
})
}
}
/**
* `chat.completions.create` can return a stream once promise resolves.
* This wraps the iterator which is a generator function.
* We will call the original iterator, intercept chunks and yield
* to the original. On complete, we will construct the new message object
* with what we have seen in the stream and create the chat completion
* messages.
* @param {object} params input params
* @param {object} params.ctx active context
* @param {object} params.request chat completion params
* @param {object} params.response chat completion response
* @param {Error} [params.err] error if it exists
*/
instrumentStream({ ctx, request, response, err = null }) {
const self = this
if (!(ctx?.segment || ctx?.transaction)) {
this.logger.debug('Empty context, not instrumenting stream')
return
}
if (err) {
// If there is an error already e.g. APIConnectionError,
// the iterator will not be called, so we have to
// record the chat completion messages with the error now.
this.recordChatCompletionEvents({
ctx,
request,
response,
err
})
return
}
const orig = response.iterator
response.iterator = async function * wrappedIterator() {
let content = ''
let role = ''
let finishReason = ''
let chunk
let timeOfFirstToken
try {
const iterator = orig.apply(this, arguments)
for await (chunk of iterator) {
if (chunk.choices?.[0]?.delta?.role) {
role = chunk.choices[0].delta.role
}
if (chunk.choices?.[0]?.finish_reason) {
finishReason = chunk.choices[0].finish_reason
}
if (!timeOfFirstToken && (chunk.choices?.[0]?.delta?.content || chunk?.delta)) {
// If there is no content in the chunk, then we haven't
// recieved the first token yet, so we check if we
// recieved content and haven't set timeOfFirstToken yet.
timeOfFirstToken = Date.now()
}
content += chunk.choices?.[0]?.delta?.content ?? ''
yield chunk
}
} catch (streamErr) {
err = streamErr
throw err
} finally {
// when `chunk.choices` is an array that means the completions API is being used
// we must re-assign the finish reason, and construct a message object with role and content
// This is because if `include_usage` is enabled, the last chunk only contains usage info and no message deltas
if (Array.isArray(chunk?.choices)) {
chunk.choices = [{ finish_reason: finishReason, message: { role, content } }]
// This means it is the responses API and the entire message is in the response object
} else if (chunk?.response) {
chunk = chunk.response
}
self.recordChatCompletionEvents({
ctx,
request,
response: chunk,
timeOfFirstToken,
err
})
}
}
}
/**
* Creates the OpenAI LlmChatCompletionSummary.
* @param {object} params Function parameters.
* @param {Context} params.ctx Current context.
* @param {object} params.request OpenAI request object.
* @param {object} [params.response] OpenAI response object, defaults to `{}`.
* @param {object} [params.err] Error object if one occurred.
* @param {number} [params.timeOfFirstToken] Timestamp of when the first streaming token was sent.
* @returns {LlmChatCompletionSummary} The OpenAI LlmChatCompletionSummary instance.
*/
createCompletionSummary({ ctx, request, response = {}, timeOfFirstToken, err = null }) {
const { transaction, segment } = ctx
const headers = ctx?.extras?.headers || {}
const summary = new LlmChatCompletionSummary({
agent: this.agent,
segment,
transaction,
request,
response: { ...response, headers },
timeOfFirstToken,
error: !!err
})
return summary
}
getMessages({ request, response }) {
return [
...this.getMessagesFromRequest(request),
...this.getMessageFromResponse(response)
]
}
/**
* Parses all messages from the OpenAI request object.
*
* @param {object} request The OpenAI SDK request object
* @returns {Array<object>} an array of message objects with fields `content` and `role`
*/
getMessagesFromRequest(request) {
// There are a few different ways to pass messages to OpenAI SDK.
//
// For langchain and `chat.completions.create`, messages are passed
// as an array of objects with `content` and `role` properties
// to the `messages` field of the request.
//
// For `responses.create`, messages are passed as an array of objects
// with `content` and `role` properties OR as a single string (implied
// to be a user message) to the `input` field of the request.
let messages = []
if (Array.isArray(request?.input)) {
// Handle array of input messages
messages = request.input.filter((msg) => msg?.content && msg?.role)
} else if (typeof request?.input === 'string') {
// Handle single string input as a user message
messages = [{ content: request.input, role: 'user' }]
} else if (Array.isArray(request?.messages)) {
// Handle array of messages
messages = request.messages.filter((msg) => msg?.content && msg?.role)
} else {
this.logger.warn('No valid messages found in OpenAI request object.')
}
return messages
}
/**
* Parses the response from OpenAI and extracts the message content and role.
*
* @param {object} response The OpenAI SDK response object
* @returns {{ content: string, role: string }} the message object with fields `content` and `role`
*/
getMessageFromResponse(response) {
let content
let role
if (response?.object === 'response') {
content = response.output?.[0]?.content?.[0]?.text
role = response.output?.[0]?.role
} else {
const choice = response?.choices?.[0]
// A false response. Don't create a LlmChatCompletionMessage for this
// the full conversation will happen in another chat completion creation
if (choice?.finish_reason === 'tool_calls') {
return []
}
content = choice?.message?.content
role = choice?.message?.role
}
return [{ content, role }]
}
createCompletionMessage({ ctx, request, response, index, completionId, message }) {
const { transaction, segment } = ctx
const headers = ctx?.extras?.headers || {}
// Check if the given message is the response.
// The response object differs based on the API called.
// If it's `responses.create`, we check against `response.output`.
// If it's `chat.completions.create`, we check against `response.choices`.
let isResponse
if (response?.object === 'response') {
isResponse = message.content === response.output?.[0]?.content?.[0]?.text
} else if (response?.object?.includes('chat.completion')) {
isResponse = message.content === response.choices?.[0]?.message?.content
}
return new LlmChatCompletionMessage({
agent: this.agent,
segment,
transaction,
request,
response: { ...response, headers },
sequence: index,
completionId,
message,
isResponse
})
}
}
module.exports = OpenAIChatCompletions