@langgraph-js/pro
Version:
The Pro SDK for LangGraph - seamlessly integrate your AI agents with frontend interfaces and build complex AI workflows
358 lines (357 loc) • 16.3 kB
JavaScript
import { AIMessage, AIMessageChunk, isAIMessage } from "@langchain/core/messages";
import { ChatGenerationChunk } from "@langchain/core/outputs";
import { wrapOpenAIClientError } from "@langchain/openai";
import { formatToOpenAIToolChoice } from "./tools.js";
import { BaseChatOpenAI } from "@langchain/openai";
import { convertCompletionsDeltaToBaseMessageChunk, convertCompletionsMessageToBaseMessage, convertMessagesToCompletionsMessageParams } from "./converter.js";
/**
* OpenAI Completions API implementation.
* support deepseek and other reasoning models
* @internal
*/
export class ChatOpenAICompletions extends BaseChatOpenAI {
/** @internal */
invocationParams(options, extra) {
let strict;
if (options?.strict !== undefined) {
strict = options.strict;
}
else if (this.supportsStrictToolCalling !== undefined) {
strict = this.supportsStrictToolCalling;
}
let streamOptionsConfig = {};
if (options?.stream_options !== undefined) {
streamOptionsConfig = { stream_options: options.stream_options };
}
else if (this.streamUsage && (this.streaming || extra?.streaming)) {
streamOptionsConfig = { stream_options: { include_usage: true } };
}
const params = {
model: this.model,
temperature: this.temperature,
top_p: this.topP,
frequency_penalty: this.frequencyPenalty,
presence_penalty: this.presencePenalty,
logprobs: this.logprobs,
top_logprobs: this.topLogprobs,
n: this.n,
logit_bias: this.logitBias,
stop: options?.stop ?? this.stopSequences,
user: this.user,
// if include_usage is set or streamUsage then stream must be set to true.
stream: this.streaming,
functions: options?.functions,
function_call: options?.function_call,
tools: options?.tools?.length ? options.tools.map((tool) => this._convertChatOpenAIToolToCompletionsTool(tool, { strict })) : undefined,
tool_choice: formatToOpenAIToolChoice(options?.tool_choice),
response_format: this._getResponseFormat(options?.response_format),
seed: options?.seed,
...streamOptionsConfig,
parallel_tool_calls: options?.parallel_tool_calls,
...(this.audio || options?.audio ? { audio: this.audio || options?.audio } : {}),
...(this.modalities || options?.modalities ? { modalities: this.modalities || options?.modalities } : {}),
...this.modelKwargs,
prompt_cache_key: options?.promptCacheKey ?? this.promptCacheKey,
verbosity: options?.verbosity ?? this.verbosity,
};
if (options?.prediction !== undefined) {
params.prediction = options.prediction;
}
if (this.service_tier !== undefined) {
params.service_tier = this.service_tier;
}
if (options?.service_tier !== undefined) {
params.service_tier = options.service_tier;
}
const reasoning = this._getReasoningParams(options);
if (reasoning !== undefined && reasoning.effort !== undefined) {
params.reasoning_effort = reasoning.effort;
}
if (this.reasoning) {
params.max_completion_tokens = this.maxTokens === -1 ? undefined : this.maxTokens;
}
else {
params.max_tokens = this.maxTokens === -1 ? undefined : this.maxTokens;
}
return params;
}
async _generate(messages, options, runManager) {
const usageMetadata = {};
const params = this.invocationParams(options);
const messagesMapped = convertMessagesToCompletionsMessageParams({
messages,
model: this.model,
});
if (params.stream) {
const stream = this._streamResponseChunks(messages, options, runManager);
const finalChunks = {};
for await (const chunk of stream) {
chunk.message.response_metadata = {
...chunk.generationInfo,
...chunk.message.response_metadata,
};
const index = chunk.generationInfo?.completion ?? 0;
if (finalChunks[index] === undefined) {
finalChunks[index] = chunk;
}
else {
finalChunks[index] = finalChunks[index].concat(chunk);
}
}
const generations = Object.entries(finalChunks)
.sort(([aKey], [bKey]) => parseInt(aKey, 10) - parseInt(bKey, 10))
.map(([_, value]) => value);
const { functions, function_call } = this.invocationParams(options);
// OpenAI does not support token usage report under stream mode,
// fallback to estimation.
const promptTokenUsage = await this._getEstimatedTokenCountFromPrompt(messages, functions, function_call);
const completionTokenUsage = await this._getNumTokensFromGenerations(generations);
usageMetadata.input_tokens = promptTokenUsage;
usageMetadata.output_tokens = completionTokenUsage;
usageMetadata.total_tokens = promptTokenUsage + completionTokenUsage;
return {
generations,
llmOutput: {
estimatedTokenUsage: {
promptTokens: usageMetadata.input_tokens,
completionTokens: usageMetadata.output_tokens,
totalTokens: usageMetadata.total_tokens,
},
},
};
}
else {
const data = await this.completionWithRetry({
...params,
stream: false,
messages: messagesMapped,
}, {
signal: options?.signal,
...options?.options,
});
const { completion_tokens: completionTokens, prompt_tokens: promptTokens, total_tokens: totalTokens, prompt_tokens_details: promptTokensDetails, completion_tokens_details: completionTokensDetails, } = data?.usage ?? {};
if (completionTokens) {
usageMetadata.output_tokens = (usageMetadata.output_tokens ?? 0) + completionTokens;
}
if (promptTokens) {
usageMetadata.input_tokens = (usageMetadata.input_tokens ?? 0) + promptTokens;
}
if (totalTokens) {
usageMetadata.total_tokens = (usageMetadata.total_tokens ?? 0) + totalTokens;
}
if (promptTokensDetails?.audio_tokens !== null || promptTokensDetails?.cached_tokens !== null) {
usageMetadata.input_token_details = {
...(promptTokensDetails?.audio_tokens !== null && {
audio: promptTokensDetails?.audio_tokens,
}),
...(promptTokensDetails?.cached_tokens !== null && {
cache_read: promptTokensDetails?.cached_tokens,
}),
};
}
if (completionTokensDetails?.audio_tokens !== null || completionTokensDetails?.reasoning_tokens !== null) {
usageMetadata.output_token_details = {
...(completionTokensDetails?.audio_tokens !== null && {
audio: completionTokensDetails?.audio_tokens,
}),
...(completionTokensDetails?.reasoning_tokens !== null && {
reasoning: completionTokensDetails?.reasoning_tokens,
}),
};
}
const generations = [];
for (const part of data?.choices ?? []) {
const text = part.message?.content ?? "";
/** @ts-ignore */
const reasoning_content = part.message?.reasoning_content ?? part.message?.reasoning ?? "";
const generation = {
text,
message: this._convertCompletionsMessageToBaseMessage(part.message ?? { role: "assistant" }, data),
};
generation.message.additional_kwargs = {
...generation.message.additional_kwargs,
reasoning_content,
};
generation.generationInfo = {
...(part.finish_reason ? { finish_reason: part.finish_reason } : {}),
...(part.logprobs ? { logprobs: part.logprobs } : {}),
};
if (isAIMessage(generation.message)) {
generation.message.usage_metadata = usageMetadata;
}
// Fields are not serialized unless passed to the constructor
// Doing this ensures all fields on the message are serialized
generation.message = new AIMessage(Object.fromEntries(Object.entries(generation.message).filter(([key]) => !key.startsWith("lc_"))));
generations.push(generation);
}
return {
generations,
llmOutput: {
tokenUsage: {
promptTokens: usageMetadata.input_tokens,
completionTokens: usageMetadata.output_tokens,
totalTokens: usageMetadata.total_tokens,
},
},
};
}
}
async *_streamResponseChunks(messages, options, runManager) {
const messagesMapped = convertMessagesToCompletionsMessageParams({
messages,
model: this.model,
});
const params = {
...this.invocationParams(options, {
streaming: true,
}),
messages: messagesMapped,
stream: true,
};
let defaultRole;
const streamIterable = await this.completionWithRetry(params, options);
let usage;
for await (const data of streamIterable) {
const choice = data?.choices?.[0];
if (data.usage) {
usage = data.usage;
}
if (!choice) {
continue;
}
const { delta } = choice;
if (!delta) {
continue;
}
const chunk = this._convertCompletionsDeltaToBaseMessageChunk(delta, data, defaultRole);
/** @ts-ignore */
const reasoning_content = delta.reasoning_content ?? delta.reasoning ?? "";
if (reasoning_content) {
chunk.additional_kwargs = {
...chunk.additional_kwargs,
reasoning_content,
};
}
defaultRole = delta.role ?? defaultRole;
const newTokenIndices = {
prompt: options.promptIndex ?? 0,
completion: choice.index ?? 0,
};
if (typeof chunk.content !== "string") {
console.log("[WARNING]: Received non-string content from OpenAI. This is currently not supported.");
continue;
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const generationInfo = { ...newTokenIndices };
if (choice.finish_reason != null) {
generationInfo.finish_reason = choice.finish_reason;
// Only include system fingerprint in the last chunk for now
// to avoid concatenation issues
generationInfo.system_fingerprint = data.system_fingerprint;
generationInfo.model_name = data.model;
generationInfo.service_tier = data.service_tier;
}
if (this.logprobs) {
generationInfo.logprobs = choice.logprobs;
}
const generationChunk = new ChatGenerationChunk({
message: chunk,
text: chunk.content,
generationInfo,
});
yield generationChunk;
await runManager?.handleLLMNewToken(generationChunk.text ?? "", newTokenIndices, undefined, undefined, undefined, { chunk: generationChunk });
}
if (usage) {
const inputTokenDetails = {
...(usage.prompt_tokens_details?.audio_tokens !== null && {
audio: usage.prompt_tokens_details?.audio_tokens,
}),
...(usage.prompt_tokens_details?.cached_tokens !== null && {
cache_read: usage.prompt_tokens_details?.cached_tokens,
}),
};
const outputTokenDetails = {
...(usage.completion_tokens_details?.audio_tokens !== null && {
audio: usage.completion_tokens_details?.audio_tokens,
}),
...(usage.completion_tokens_details?.reasoning_tokens !== null && {
reasoning: usage.completion_tokens_details?.reasoning_tokens,
}),
};
const generationChunk = new ChatGenerationChunk({
message: new AIMessageChunk({
content: "",
response_metadata: {
usage: { ...usage },
},
usage_metadata: {
input_tokens: usage.prompt_tokens,
output_tokens: usage.completion_tokens,
total_tokens: usage.total_tokens,
...(Object.keys(inputTokenDetails).length > 0 && {
input_token_details: inputTokenDetails,
}),
...(Object.keys(outputTokenDetails).length > 0 && {
output_token_details: outputTokenDetails,
}),
},
}),
text: "",
});
yield generationChunk;
}
if (options.signal?.aborted) {
throw new Error("AbortError");
}
}
async completionWithRetry(request, requestOptions) {
const clientOptions = this._getClientOptions(requestOptions);
const isParseableFormat = request.response_format && request.response_format.type === "json_schema";
return this.caller.call(async () => {
try {
if (isParseableFormat && !request.stream) {
return await this.client.chat.completions.parse(request, clientOptions);
}
else {
return await this.client.chat.completions.create(request, clientOptions);
}
}
catch (e) {
const error = wrapOpenAIClientError(e);
throw error;
}
});
}
/**
* @deprecated
* This function was hoisted into a publicly accessible function from a
* different export, but to maintain backwards compatibility with chat models
* that depend on ChatOpenAICompletions, we'll keep it here as an overridable
* method. This will be removed in a future release
*/
_convertCompletionsDeltaToBaseMessageChunk(
// eslint-disable-next-line @typescript-eslint/no-explicit-any
delta, rawResponse, defaultRole) {
return convertCompletionsDeltaToBaseMessageChunk({
delta,
rawResponse,
includeRawResponse: this.__includeRawResponse,
defaultRole,
});
}
/**
* @deprecated
* This function was hoisted into a publicly accessible function from a
* different export, but to maintain backwards compatibility with chat models
* that depend on ChatOpenAICompletions, we'll keep it here as an overridable
* method. This will be removed in a future release
*/
_convertCompletionsMessageToBaseMessage(message, rawResponse) {
return convertCompletionsMessageToBaseMessage({
message,
rawResponse,
includeRawResponse: this.__includeRawResponse,
});
}
}