UNPKG

ai-utils.js

Version:

Build AI applications, chatbots, and agents with JavaScript and TypeScript.

280 lines (279 loc) 9.59 kB
import z from "zod"; import { AbstractModel } from "../../../model-function/AbstractModel.js"; import { PromptMappingTextGenerationModel } from "../../../prompt/PromptMappingTextGenerationModel.js"; import { callWithRetryAndThrottle } from "../../../util/api/callWithRetryAndThrottle.js"; import { createJsonResponseHandler, postJsonToApi, } from "../../../util/api/postToApi.js"; import { failedOpenAICallResponseHandler } from "../OpenAIError.js"; import { TikTokenTokenizer } from "../TikTokenTokenizer.js"; import { createOpenAIChatFullDeltaIterableQueue, } from "./OpenAIChatStreamIterable.js"; import { countOpenAIChatPromptTokens } from "./countOpenAIChatMessageTokens.js"; /* * Available OpenAI chat models, their token limits, and pricing. * * @see https://platform.openai.com/docs/models/ * @see https://openai.com/pricing */ export const OPENAI_CHAT_MODELS = { "gpt-4": { contextWindowSize: 8192, promptTokenCostInMillicents: 3, completionTokenCostInMillicents: 6, }, "gpt-4-0314": { contextWindowSize: 8192, promptTokenCostInMillicents: 3, completionTokenCostInMillicents: 6, }, "gpt-4-0613": { contextWindowSize: 8192, promptTokenCostInMillicents: 3, completionTokenCostInMillicents: 6, }, "gpt-4-32k": { contextWindowSize: 32768, promptTokenCostInMillicents: 6, completionTokenCostInMillicents: 12, }, "gpt-4-32k-0314": { contextWindowSize: 32768, promptTokenCostInMillicents: 6, completionTokenCostInMillicents: 12, }, "gpt-4-32k-0613": { contextWindowSize: 32768, promptTokenCostInMillicents: 6, completionTokenCostInMillicents: 12, }, "gpt-3.5-turbo": { contextWindowSize: 4096, promptTokenCostInMillicents: 0.15, completionTokenCostInMillicents: 0.2, }, "gpt-3.5-turbo-0301": { contextWindowSize: 4096, promptTokenCostInMillicents: 0.15, completionTokenCostInMillicents: 0.2, }, "gpt-3.5-turbo-0613": { contextWindowSize: 4096, promptTokenCostInMillicents: 0.15, completionTokenCostInMillicents: 0.2, }, "gpt-3.5-turbo-16k": { contextWindowSize: 16384, promptTokenCostInMillicents: 0.3, completionTokenCostInMillicents: 0.4, }, "gpt-3.5-turbo-16k-0613": { contextWindowSize: 16384, promptTokenCostInMillicents: 0.3, completionTokenCostInMillicents: 0.4, }, }; export const isOpenAIChatModel = (model) => model in OPENAI_CHAT_MODELS; export const calculateOpenAIChatCostInMillicents = ({ model, response, }) => response.usage.prompt_tokens * OPENAI_CHAT_MODELS[model].promptTokenCostInMillicents + response.usage.completion_tokens * OPENAI_CHAT_MODELS[model].completionTokenCostInMillicents; /** * Create a text generation model that calls the OpenAI chat completion API. * * @see https://platform.openai.com/docs/api-reference/chat/create * * @example * const model = new OpenAIChatModel({ * model: "gpt-3.5-turbo", * temperature: 0.7, * maxTokens: 500, * }); * * const { text } = await generateText([ * model, * OpenAIChatMessage.system( * "Write a short story about a robot learning to love:" * ), * ]); */ export class OpenAIChatModel extends AbstractModel { constructor(settings) { super({ settings }); Object.defineProperty(this, "provider", { enumerable: true, configurable: true, writable: true, value: "openai" }); Object.defineProperty(this, "contextWindowSize", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "tokenizer", { enumerable: true, configurable: true, writable: true, value: void 0 }); this.tokenizer = new TikTokenTokenizer({ model: this.settings.model }); this.contextWindowSize = OPENAI_CHAT_MODELS[this.settings.model].contextWindowSize; } get modelName() { return this.settings.model; } get apiKey() { const apiKey = this.settings.apiKey ?? process.env.OPENAI_API_KEY; if (apiKey == null) { throw new Error(`OpenAI API key is missing. Pass it as an argument to the constructor or set it as an environment variable named OPENAI_API_KEY.`); } return apiKey; } /** * Counts the prompt tokens required for the messages. This includes the message base tokens * and the prompt base tokens. */ countPromptTokens(messages) { return countOpenAIChatPromptTokens({ messages, model: this.modelName, }); } async callAPI(messages, options) { const { run, settings, responseFormat } = options; const callSettings = Object.assign({ apiKey: this.apiKey, user: this.settings.isUserIdForwardingEnabled ? run?.userId : undefined, }, this.settings, settings, { abortSignal: run?.abortSignal, messages, responseFormat, }); return callWithRetryAndThrottle({ retry: callSettings.retry, throttle: callSettings.throttle, call: async () => callOpenAIChatCompletionAPI(callSettings), }); } generateTextResponse(prompt, options) { return this.callAPI(prompt, { ...options, responseFormat: OpenAIChatResponseFormat.json, }); } extractText(response) { return response.choices[0].message.content; } generateDeltaStreamResponse(prompt, options) { return this.callAPI(prompt, { ...options, responseFormat: OpenAIChatResponseFormat.deltaIterable, }); } extractTextDelta(fullDelta) { return fullDelta[0]?.delta.content ?? undefined; } /** * JSON generation uses the OpenAI GPT function calling API. * It provides a single function specification and instructs the model to provide parameters for calling the function. * The result is returned as parsed JSON. * * @see https://platform.openai.com/docs/guides/gpt/function-calling */ generateJsonResponse(prompt, options) { const settingsWithFunctionCall = Object.assign({}, options, { functionCall: prompt.functionCall, functions: prompt.functions, }); return this.callAPI(prompt.messages, { responseFormat: OpenAIChatResponseFormat.json, functionId: options?.functionId, settings: settingsWithFunctionCall, run: options?.run, }); } mapPrompt(promptMapping) { return new PromptMappingTextGenerationModel({ model: this.withStopTokens(promptMapping.stopTokens), promptMapping, }); } withSettings(additionalSettings) { return new OpenAIChatModel(Object.assign({}, this.settings, additionalSettings)); } get maxCompletionTokens() { return this.settings.maxTokens; } withMaxCompletionTokens(maxCompletionTokens) { return this.withSettings({ maxTokens: maxCompletionTokens }); } withStopTokens(stopTokens) { return this.withSettings({ stop: stopTokens }); } } const openAIChatResponseSchema = z.object({ id: z.string(), object: z.literal("chat.completion"), created: z.number(), model: z.string(), choices: z.array(z.object({ message: z.object({ role: z.literal("assistant"), content: z.string().nullable(), function_call: z .object({ name: z.string(), arguments: z.string(), }) .optional(), }), index: z.number(), logprobs: z.nullable(z.any()), finish_reason: z.string(), })), usage: z.object({ prompt_tokens: z.number(), completion_tokens: z.number(), total_tokens: z.number(), }), }); async function callOpenAIChatCompletionAPI({ baseUrl = "https://api.openai.com/v1", abortSignal, responseFormat, apiKey, model, messages, functions, functionCall, temperature, topP, n, stop, maxTokens, presencePenalty, frequencyPenalty, user, }) { return postJsonToApi({ url: `${baseUrl}/chat/completions`, apiKey, body: { stream: responseFormat.stream, model, messages, functions, function_call: functionCall, temperature, top_p: topP, n, stop, max_tokens: maxTokens, presence_penalty: presencePenalty, frequency_penalty: frequencyPenalty, user, }, failedResponseHandler: failedOpenAICallResponseHandler, successfulResponseHandler: responseFormat.handler, abortSignal, }); } export const OpenAIChatResponseFormat = { /** * Returns the response as a JSON object. */ json: { stream: false, handler: createJsonResponseHandler(openAIChatResponseSchema), }, /** * Returns an async iterable over the text deltas (only the tex different of the first choice). */ deltaIterable: { stream: true, handler: async ({ response }) => createOpenAIChatFullDeltaIterableQueue(response.body), }, };