ai-utils.js
Version:
Build AI applications, chatbots, and agents with JavaScript and TypeScript.
166 lines (165 loc) • 5.67 kB
JavaScript
import z from "zod";
import { AbstractModel } from "../../model-function/AbstractModel.js";
import { callWithRetryAndThrottle } from "../../util/api/callWithRetryAndThrottle.js";
import { createJsonResponseHandler, postJsonToApi, } from "../../util/api/postToApi.js";
import { failedCohereCallResponseHandler } from "./CohereError.js";
import { CohereTokenizer } from "./CohereTokenizer.js";
export const COHERE_TEXT_EMBEDDING_MODELS = {
"embed-english-light-v2.0": {
contextWindowSize: 4096,
embeddingDimensions: 1024,
},
"embed-english-v2.0": {
contextWindowSize: 4096,
embeddingDimensions: 4096,
},
"embed-multilingual-v2.0": {
contextWindowSize: 4096,
embeddingDimensions: 768,
},
};
/**
* Create a text embedding model that calls the Cohere Co.Embed API.
*
* @see https://docs.cohere.com/reference/embed
*
* @example
* const { embeddings } = await embedTexts(
* new CohereTextEmbeddingModel({ model: "embed-english-light-v2.0" }),
* [
* "At first, Nox didn't know what to do with the pup.",
* "He keenly observed and absorbed everything around him, from the birds in the sky to the trees in the forest.",
* ]
* );
*/
export class CohereTextEmbeddingModel extends AbstractModel {
constructor(settings) {
super({ settings });
Object.defineProperty(this, "provider", {
enumerable: true,
configurable: true,
writable: true,
value: "cohere"
});
Object.defineProperty(this, "maxTextsPerCall", {
enumerable: true,
configurable: true,
writable: true,
value: 96
});
Object.defineProperty(this, "embeddingDimensions", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "contextWindowSize", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "tokenizer", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
this.contextWindowSize =
COHERE_TEXT_EMBEDDING_MODELS[this.modelName].contextWindowSize;
this.tokenizer = new CohereTokenizer({
baseUrl: this.settings.baseUrl,
apiKey: this.settings.apiKey,
model: this.settings.model,
retry: this.settings.tokenizerSettings?.retry,
throttle: this.settings.tokenizerSettings?.throttle,
});
this.embeddingDimensions =
COHERE_TEXT_EMBEDDING_MODELS[this.modelName].embeddingDimensions;
}
get modelName() {
return this.settings.model;
}
async tokenize(text) {
return this.tokenizer.tokenize(text);
}
async tokenizeWithTexts(text) {
return this.tokenizer.tokenizeWithTexts(text);
}
async detokenize(tokens) {
return this.tokenizer.detokenize(tokens);
}
get apiKey() {
const apiKey = this.settings.apiKey ?? process.env.COHERE_API_KEY;
if (apiKey == null) {
throw new Error("No Cohere API key provided. Pass an API key to the constructor or set the COHERE_API_KEY environment variable.");
}
return apiKey;
}
async callAPI(texts, options) {
if (texts.length > this.maxTextsPerCall) {
throw new Error(`The Cohere embedding API only supports ${this.maxTextsPerCall} texts per API call.`);
}
const run = options?.run;
const settings = options?.settings;
const callSettings = Object.assign({
apiKey: this.apiKey,
}, this.settings, settings, {
abortSignal: run?.abortSignal,
texts,
});
return callWithRetryAndThrottle({
retry: this.settings.retry,
throttle: this.settings.throttle,
call: async () => callCohereEmbeddingAPI(callSettings),
});
}
generateEmbeddingResponse(texts, options) {
return this.callAPI(texts, options);
}
extractEmbeddings(response) {
return response.embeddings;
}
withSettings(additionalSettings) {
return new CohereTextEmbeddingModel(Object.assign({}, this.settings, additionalSettings));
}
}
const cohereTextEmbeddingResponseSchema = z.object({
id: z.string(),
texts: z.array(z.string()),
embeddings: z.array(z.array(z.number())),
meta: z.object({
api_version: z.object({
version: z.string(),
}),
}),
});
/**
* Call the Cohere Co.Embed API to generate an embedding for the given input.
*
* @see https://docs.cohere.com/reference/embed
*
* @example
* const response = await callCohereEmbeddingAPI({
* apiKey: COHERE_API_KEY,
* model: "embed-english-light-v2.0",
* texts: [
* "At first, Nox didn't know what to do with the pup.",
* "He keenly observed and absorbed everything around him, from the birds in the sky to the trees in the forest.",
* ],
* });
*/
async function callCohereEmbeddingAPI({ baseUrl = "https://api.cohere.ai/v1", abortSignal, apiKey, model, texts, truncate, }) {
return postJsonToApi({
url: `${baseUrl}/embed`,
apiKey,
body: {
model,
texts,
truncate,
},
failedResponseHandler: failedCohereCallResponseHandler,
successfulResponseHandler: createJsonResponseHandler(cohereTextEmbeddingResponseSchema),
abortSignal,
});
}