ai-utils.js
Version:
Build AI applications, chatbots, and agents with JavaScript and TypeScript.
163 lines (162 loc) • 5.8 kB
JavaScript
import z from "zod";
import { AbstractModel } from "../../model-function/AbstractModel.js";
import { countTokens } from "../../model-function/tokenize-text/countTokens.js";
import { callWithRetryAndThrottle } from "../../util/api/callWithRetryAndThrottle.js";
import { createJsonResponseHandler, postJsonToApi, } from "../../util/api/postToApi.js";
import { failedOpenAICallResponseHandler } from "./OpenAIError.js";
import { TikTokenTokenizer } from "./TikTokenTokenizer.js";
export const OPENAI_TEXT_EMBEDDING_MODELS = {
"text-embedding-ada-002": {
contextWindowSize: 8192,
embeddingDimensions: 1536,
tokenCostInMillicents: 0.01,
},
};
export const isOpenAIEmbeddingModel = (model) => model in OPENAI_TEXT_EMBEDDING_MODELS;
export const calculateOpenAIEmbeddingCostInMillicents = ({ model, responses, }) => {
let amountInMilliseconds = 0;
for (const response of responses) {
amountInMilliseconds +=
response.usage.total_tokens *
OPENAI_TEXT_EMBEDDING_MODELS[model].tokenCostInMillicents;
}
return amountInMilliseconds;
};
/**
* Create a text embedding model that calls the OpenAI embedding API.
*
* @see https://platform.openai.com/docs/api-reference/embeddings
*
* @example
* const { embeddings } = await embedTexts(
* new OpenAITextEmbeddingModel({ model: "text-embedding-ada-002" }),
* [
* "At first, Nox didn't know what to do with the pup.",
* "He keenly observed and absorbed everything around him, from the birds in the sky to the trees in the forest.",
* ]
* );
*/
export class OpenAITextEmbeddingModel extends AbstractModel {
constructor(settings) {
super({ settings });
Object.defineProperty(this, "provider", {
enumerable: true,
configurable: true,
writable: true,
value: "openai"
});
Object.defineProperty(this, "maxTextsPerCall", {
enumerable: true,
configurable: true,
writable: true,
value: 1
});
Object.defineProperty(this, "embeddingDimensions", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "tokenizer", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "contextWindowSize", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
this.tokenizer = new TikTokenTokenizer({ model: this.modelName });
this.contextWindowSize =
OPENAI_TEXT_EMBEDDING_MODELS[this.modelName].contextWindowSize;
this.embeddingDimensions =
OPENAI_TEXT_EMBEDDING_MODELS[this.modelName].embeddingDimensions;
}
get modelName() {
return this.settings.model;
}
get apiKey() {
const apiKey = this.settings.apiKey ?? process.env.OPENAI_API_KEY;
if (apiKey == null) {
throw new Error(`OpenAI API key is missing. Pass it as an argument to the constructor or set it as an environment variable named OPENAI_API_KEY.`);
}
return apiKey;
}
async countTokens(input) {
return countTokens(this.tokenizer, input);
}
async callAPI(text, options) {
const run = options?.run;
const settings = options?.settings;
const callSettings = Object.assign({
apiKey: this.apiKey,
user: this.settings.isUserIdForwardingEnabled ? run?.userId : undefined,
}, this.settings, settings, {
abortSignal: run?.abortSignal,
input: text,
});
return callWithRetryAndThrottle({
retry: this.settings.retry,
throttle: this.settings.throttle,
call: async () => callOpenAITextEmbeddingAPI(callSettings),
});
}
generateEmbeddingResponse(texts, options) {
if (texts.length > this.maxTextsPerCall) {
throw new Error(`The OpenAI embedding API only supports ${this.maxTextsPerCall} texts per API call.`);
}
return this.callAPI(texts[0], options);
}
extractEmbeddings(response) {
return [response.data[0].embedding];
}
withSettings(additionalSettings) {
return new OpenAITextEmbeddingModel(Object.assign({}, this.settings, additionalSettings));
}
}
const openAITextEmbeddingResponseSchema = z.object({
object: z.literal("list"),
data: z
.array(z.object({
object: z.literal("embedding"),
embedding: z.array(z.number()),
index: z.number(),
}))
.length(1),
model: z.string(),
usage: z.object({
prompt_tokens: z.number(),
total_tokens: z.number(),
}),
});
/**
* Call the OpenAI Embedding API to generate an embedding for the given input.
*
* @see https://platform.openai.com/docs/api-reference/embeddings
*
* @example
* const response = await callOpenAITextEmbeddingAPI({
* apiKey: OPENAI_API_KEY,
* model: "text-embedding-ada-002",
* input: "At first, Nox didn't know what to do with the pup.",
* });
*
* console.log(response.data[0].embedding);
*/
async function callOpenAITextEmbeddingAPI({ baseUrl = "https://api.openai.com/v1", abortSignal, apiKey, model, input, user, }) {
return postJsonToApi({
url: `${baseUrl}/embeddings`,
apiKey,
body: {
model,
input,
user,
},
failedResponseHandler: failedOpenAICallResponseHandler,
successfulResponseHandler: createJsonResponseHandler(openAITextEmbeddingResponseSchema),
abortSignal,
});
}