@langchain/openai
Version:
OpenAI integrations for LangChain.js
258 lines (256 loc) • 11.6 kB
JavaScript
import { wrapOpenAIClientError } from "../utils/client.js";
import { formatToOpenAIToolChoice } from "../utils/tools.js";
import { isReasoningModel } from "../utils/misc.js";
import { BaseChatOpenAI } from "./base.js";
import { convertCompletionsDeltaToBaseMessageChunk, convertCompletionsMessageToBaseMessage, convertMessagesToCompletionsMessageParams } from "../converters/completions.js";
import { AIMessage, AIMessageChunk, isAIMessage } from "@langchain/core/messages";
import { ChatGenerationChunk } from "@langchain/core/outputs";
//#region src/chat_models/completions.ts
/**
* OpenAI Completions API implementation.
* @internal
*/
var ChatOpenAICompletions = class extends BaseChatOpenAI {
/** @internal */
invocationParams(options, extra) {
let strict;
if (options?.strict !== void 0) strict = options.strict;
else if (this.supportsStrictToolCalling !== void 0) strict = this.supportsStrictToolCalling;
let streamOptionsConfig = {};
if (options?.stream_options !== void 0) streamOptionsConfig = { stream_options: options.stream_options };
else if (this.streamUsage && (this.streaming || extra?.streaming)) streamOptionsConfig = { stream_options: { include_usage: true } };
const params = {
model: this.model,
temperature: this.temperature,
top_p: this.topP,
frequency_penalty: this.frequencyPenalty,
presence_penalty: this.presencePenalty,
logprobs: this.logprobs,
top_logprobs: this.topLogprobs,
n: this.n,
logit_bias: this.logitBias,
stop: options?.stop ?? this.stopSequences,
user: this.user,
stream: this.streaming,
functions: options?.functions,
function_call: options?.function_call,
tools: options?.tools?.length ? options.tools.map((tool) => this._convertChatOpenAIToolToCompletionsTool(tool, { strict })) : void 0,
tool_choice: formatToOpenAIToolChoice(options?.tool_choice),
response_format: this._getResponseFormat(options?.response_format),
seed: options?.seed,
...streamOptionsConfig,
parallel_tool_calls: options?.parallel_tool_calls,
...this.audio || options?.audio ? { audio: this.audio || options?.audio } : {},
...this.modalities || options?.modalities ? { modalities: this.modalities || options?.modalities } : {},
...this.modelKwargs,
prompt_cache_key: options?.promptCacheKey ?? this.promptCacheKey,
prompt_cache_retention: options?.promptCacheRetention ?? this.promptCacheRetention,
verbosity: options?.verbosity ?? this.verbosity
};
if (options?.prediction !== void 0) params.prediction = options.prediction;
if (this.service_tier !== void 0) params.service_tier = this.service_tier;
if (options?.service_tier !== void 0) params.service_tier = options.service_tier;
const reasoning = this._getReasoningParams(options);
if (reasoning !== void 0 && reasoning.effort !== void 0) params.reasoning_effort = reasoning.effort;
if (isReasoningModel(params.model)) params.max_completion_tokens = this.maxTokens === -1 ? void 0 : this.maxTokens;
else params.max_tokens = this.maxTokens === -1 ? void 0 : this.maxTokens;
return params;
}
async _generate(messages, options, runManager) {
const usageMetadata = {};
const params = this.invocationParams(options);
const messagesMapped = convertMessagesToCompletionsMessageParams({
messages,
model: this.model
});
if (params.stream) {
const stream = this._streamResponseChunks(messages, options, runManager);
const finalChunks = {};
for await (const chunk of stream) {
chunk.message.response_metadata = {
...chunk.generationInfo,
...chunk.message.response_metadata
};
const index = chunk.generationInfo?.completion ?? 0;
if (finalChunks[index] === void 0) finalChunks[index] = chunk;
else finalChunks[index] = finalChunks[index].concat(chunk);
}
const generations = Object.entries(finalChunks).sort(([aKey], [bKey]) => parseInt(aKey, 10) - parseInt(bKey, 10)).map(([_, value]) => value);
const { functions, function_call } = this.invocationParams(options);
const promptTokenUsage = await this._getEstimatedTokenCountFromPrompt(messages, functions, function_call);
const completionTokenUsage = await this._getNumTokensFromGenerations(generations);
usageMetadata.input_tokens = promptTokenUsage;
usageMetadata.output_tokens = completionTokenUsage;
usageMetadata.total_tokens = promptTokenUsage + completionTokenUsage;
return {
generations,
llmOutput: { estimatedTokenUsage: {
promptTokens: usageMetadata.input_tokens,
completionTokens: usageMetadata.output_tokens,
totalTokens: usageMetadata.total_tokens
} }
};
} else {
const data = await this.completionWithRetry({
...params,
stream: false,
messages: messagesMapped
}, {
signal: options?.signal,
...options?.options
});
const { completion_tokens: completionTokens, prompt_tokens: promptTokens, total_tokens: totalTokens, prompt_tokens_details: promptTokensDetails, completion_tokens_details: completionTokensDetails } = data?.usage ?? {};
if (completionTokens) usageMetadata.output_tokens = (usageMetadata.output_tokens ?? 0) + completionTokens;
if (promptTokens) usageMetadata.input_tokens = (usageMetadata.input_tokens ?? 0) + promptTokens;
if (totalTokens) usageMetadata.total_tokens = (usageMetadata.total_tokens ?? 0) + totalTokens;
if (promptTokensDetails?.audio_tokens !== null || promptTokensDetails?.cached_tokens !== null) usageMetadata.input_token_details = {
...promptTokensDetails?.audio_tokens !== null && { audio: promptTokensDetails?.audio_tokens },
...promptTokensDetails?.cached_tokens !== null && { cache_read: promptTokensDetails?.cached_tokens }
};
if (completionTokensDetails?.audio_tokens !== null || completionTokensDetails?.reasoning_tokens !== null) usageMetadata.output_token_details = {
...completionTokensDetails?.audio_tokens !== null && { audio: completionTokensDetails?.audio_tokens },
...completionTokensDetails?.reasoning_tokens !== null && { reasoning: completionTokensDetails?.reasoning_tokens }
};
const generations = [];
for (const part of data?.choices ?? []) {
const text = part.message?.content ?? "";
const generation = {
text,
message: this._convertCompletionsMessageToBaseMessage(part.message ?? { role: "assistant" }, data)
};
generation.generationInfo = {
...part.finish_reason ? { finish_reason: part.finish_reason } : {},
...part.logprobs ? { logprobs: part.logprobs } : {}
};
if (isAIMessage(generation.message)) generation.message.usage_metadata = usageMetadata;
generation.message = new AIMessage(Object.fromEntries(Object.entries(generation.message).filter(([key]) => !key.startsWith("lc_"))));
generations.push(generation);
}
return {
generations,
llmOutput: { tokenUsage: {
promptTokens: usageMetadata.input_tokens,
completionTokens: usageMetadata.output_tokens,
totalTokens: usageMetadata.total_tokens
} }
};
}
}
async *_streamResponseChunks(messages, options, runManager) {
const messagesMapped = convertMessagesToCompletionsMessageParams({
messages,
model: this.model
});
const params = {
...this.invocationParams(options, { streaming: true }),
messages: messagesMapped,
stream: true
};
let defaultRole;
const streamIterable = await this.completionWithRetry(params, options);
let usage;
for await (const data of streamIterable) {
const choice = data?.choices?.[0];
if (data.usage) usage = data.usage;
if (!choice) continue;
const { delta } = choice;
if (!delta) continue;
const chunk = this._convertCompletionsDeltaToBaseMessageChunk(delta, data, defaultRole);
defaultRole = delta.role ?? defaultRole;
const newTokenIndices = {
prompt: options.promptIndex ?? 0,
completion: choice.index ?? 0
};
if (typeof chunk.content !== "string") {
console.log("[WARNING]: Received non-string content from OpenAI. This is currently not supported.");
continue;
}
const generationInfo = { ...newTokenIndices };
if (choice.finish_reason != null) {
generationInfo.finish_reason = choice.finish_reason;
generationInfo.system_fingerprint = data.system_fingerprint;
generationInfo.model_name = data.model;
generationInfo.service_tier = data.service_tier;
}
if (this.logprobs) generationInfo.logprobs = choice.logprobs;
const generationChunk = new ChatGenerationChunk({
message: chunk,
text: chunk.content,
generationInfo
});
yield generationChunk;
await runManager?.handleLLMNewToken(generationChunk.text ?? "", newTokenIndices, void 0, void 0, void 0, { chunk: generationChunk });
}
if (usage) {
const inputTokenDetails = {
...usage.prompt_tokens_details?.audio_tokens !== null && { audio: usage.prompt_tokens_details?.audio_tokens },
...usage.prompt_tokens_details?.cached_tokens !== null && { cache_read: usage.prompt_tokens_details?.cached_tokens }
};
const outputTokenDetails = {
...usage.completion_tokens_details?.audio_tokens !== null && { audio: usage.completion_tokens_details?.audio_tokens },
...usage.completion_tokens_details?.reasoning_tokens !== null && { reasoning: usage.completion_tokens_details?.reasoning_tokens }
};
const generationChunk = new ChatGenerationChunk({
message: new AIMessageChunk({
content: "",
response_metadata: { usage: { ...usage } },
usage_metadata: {
input_tokens: usage.prompt_tokens,
output_tokens: usage.completion_tokens,
total_tokens: usage.total_tokens,
...Object.keys(inputTokenDetails).length > 0 && { input_token_details: inputTokenDetails },
...Object.keys(outputTokenDetails).length > 0 && { output_token_details: outputTokenDetails }
}
}),
text: ""
});
yield generationChunk;
}
if (options.signal?.aborted) throw new Error("AbortError");
}
async completionWithRetry(request, requestOptions) {
const clientOptions = this._getClientOptions(requestOptions);
const isParseableFormat = request.response_format && request.response_format.type === "json_schema";
return this.caller.call(async () => {
try {
if (isParseableFormat && !request.stream) return await this.client.chat.completions.parse(request, clientOptions);
else return await this.client.chat.completions.create(request, clientOptions);
} catch (e) {
const error = wrapOpenAIClientError(e);
throw error;
}
});
}
/**
* @deprecated
* This function was hoisted into a publicly accessible function from a
* different export, but to maintain backwards compatibility with chat models
* that depend on ChatOpenAICompletions, we'll keep it here as an overridable
* method. This will be removed in a future release
*/
_convertCompletionsDeltaToBaseMessageChunk(delta, rawResponse, defaultRole) {
return convertCompletionsDeltaToBaseMessageChunk({
delta,
rawResponse,
includeRawResponse: this.__includeRawResponse,
defaultRole
});
}
/**
* @deprecated
* This function was hoisted into a publicly accessible function from a
* different export, but to maintain backwards compatibility with chat models
* that depend on ChatOpenAICompletions, we'll keep it here as an overridable
* method. This will be removed in a future release
*/
_convertCompletionsMessageToBaseMessage(message, rawResponse) {
return convertCompletionsMessageToBaseMessage({
message,
rawResponse,
includeRawResponse: this.__includeRawResponse
});
}
};
//#endregion
export { ChatOpenAICompletions };
//# sourceMappingURL=completions.js.map