UNPKG

ai-utils.js

Version:

Build AI applications, chatbots, and agents with JavaScript and TypeScript.

166 lines (165 loc) 5.67 kB
import z from "zod"; import { AbstractModel } from "../../model-function/AbstractModel.js"; import { callWithRetryAndThrottle } from "../../util/api/callWithRetryAndThrottle.js"; import { createJsonResponseHandler, postJsonToApi, } from "../../util/api/postToApi.js"; import { failedCohereCallResponseHandler } from "./CohereError.js"; import { CohereTokenizer } from "./CohereTokenizer.js"; export const COHERE_TEXT_EMBEDDING_MODELS = { "embed-english-light-v2.0": { contextWindowSize: 4096, embeddingDimensions: 1024, }, "embed-english-v2.0": { contextWindowSize: 4096, embeddingDimensions: 4096, }, "embed-multilingual-v2.0": { contextWindowSize: 4096, embeddingDimensions: 768, }, }; /** * Create a text embedding model that calls the Cohere Co.Embed API. * * @see https://docs.cohere.com/reference/embed * * @example * const { embeddings } = await embedTexts( * new CohereTextEmbeddingModel({ model: "embed-english-light-v2.0" }), * [ * "At first, Nox didn't know what to do with the pup.", * "He keenly observed and absorbed everything around him, from the birds in the sky to the trees in the forest.", * ] * ); */ export class CohereTextEmbeddingModel extends AbstractModel { constructor(settings) { super({ settings }); Object.defineProperty(this, "provider", { enumerable: true, configurable: true, writable: true, value: "cohere" }); Object.defineProperty(this, "maxTextsPerCall", { enumerable: true, configurable: true, writable: true, value: 96 }); Object.defineProperty(this, "embeddingDimensions", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "contextWindowSize", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "tokenizer", { enumerable: true, configurable: true, writable: true, value: void 0 }); this.contextWindowSize = COHERE_TEXT_EMBEDDING_MODELS[this.modelName].contextWindowSize; this.tokenizer = new CohereTokenizer({ baseUrl: this.settings.baseUrl, apiKey: this.settings.apiKey, model: this.settings.model, retry: this.settings.tokenizerSettings?.retry, throttle: this.settings.tokenizerSettings?.throttle, }); this.embeddingDimensions = COHERE_TEXT_EMBEDDING_MODELS[this.modelName].embeddingDimensions; } get modelName() { return this.settings.model; } async tokenize(text) { return this.tokenizer.tokenize(text); } async tokenizeWithTexts(text) { return this.tokenizer.tokenizeWithTexts(text); } async detokenize(tokens) { return this.tokenizer.detokenize(tokens); } get apiKey() { const apiKey = this.settings.apiKey ?? process.env.COHERE_API_KEY; if (apiKey == null) { throw new Error("No Cohere API key provided. Pass an API key to the constructor or set the COHERE_API_KEY environment variable."); } return apiKey; } async callAPI(texts, options) { if (texts.length > this.maxTextsPerCall) { throw new Error(`The Cohere embedding API only supports ${this.maxTextsPerCall} texts per API call.`); } const run = options?.run; const settings = options?.settings; const callSettings = Object.assign({ apiKey: this.apiKey, }, this.settings, settings, { abortSignal: run?.abortSignal, texts, }); return callWithRetryAndThrottle({ retry: this.settings.retry, throttle: this.settings.throttle, call: async () => callCohereEmbeddingAPI(callSettings), }); } generateEmbeddingResponse(texts, options) { return this.callAPI(texts, options); } extractEmbeddings(response) { return response.embeddings; } withSettings(additionalSettings) { return new CohereTextEmbeddingModel(Object.assign({}, this.settings, additionalSettings)); } } const cohereTextEmbeddingResponseSchema = z.object({ id: z.string(), texts: z.array(z.string()), embeddings: z.array(z.array(z.number())), meta: z.object({ api_version: z.object({ version: z.string(), }), }), }); /** * Call the Cohere Co.Embed API to generate an embedding for the given input. * * @see https://docs.cohere.com/reference/embed * * @example * const response = await callCohereEmbeddingAPI({ * apiKey: COHERE_API_KEY, * model: "embed-english-light-v2.0", * texts: [ * "At first, Nox didn't know what to do with the pup.", * "He keenly observed and absorbed everything around him, from the birds in the sky to the trees in the forest.", * ], * }); */ async function callCohereEmbeddingAPI({ baseUrl = "https://api.cohere.ai/v1", abortSignal, apiKey, model, texts, truncate, }) { return postJsonToApi({ url: `${baseUrl}/embed`, apiKey, body: { model, texts, truncate, }, failedResponseHandler: failedCohereCallResponseHandler, successfulResponseHandler: createJsonResponseHandler(cohereTextEmbeddingResponseSchema), abortSignal, }); }