UNPKG

ai-utils.js

Version:

Build AI applications, chatbots, and agents with JavaScript and TypeScript.

90 lines (89 loc) 3.29 kB
import z from "zod"; import { AbstractModel } from "../../model-function/AbstractModel.js"; import { callWithRetryAndThrottle } from "../../util/api/callWithRetryAndThrottle.js"; import { createJsonResponseHandler, postJsonToApi, } from "../../util/api/postToApi.js"; import { failedLlamaCppCallResponseHandler } from "./LlamaCppError.js"; import { LlamaCppTokenizer } from "./LlamaCppTokenizer.js"; export class LlamaCppTextEmbeddingModel extends AbstractModel { constructor(settings = {}) { super({ settings }); Object.defineProperty(this, "provider", { enumerable: true, configurable: true, writable: true, value: "llamacpp" }); Object.defineProperty(this, "maxTextsPerCall", { enumerable: true, configurable: true, writable: true, value: 1 }); Object.defineProperty(this, "contextWindowSize", { enumerable: true, configurable: true, writable: true, value: undefined }); Object.defineProperty(this, "embeddingDimensions", { enumerable: true, configurable: true, writable: true, value: undefined }); Object.defineProperty(this, "tokenizer", { enumerable: true, configurable: true, writable: true, value: void 0 }); this.tokenizer = new LlamaCppTokenizer({ baseUrl: this.settings.baseUrl, retry: this.settings.tokenizerSettings?.retry, throttle: this.settings.tokenizerSettings?.throttle, }); } get modelName() { return null; } async tokenize(text) { return this.tokenizer.tokenize(text); } async callAPI(texts, options) { if (texts.length > this.maxTextsPerCall) { throw new Error(`The Llama.cpp embedding API only supports ${this.maxTextsPerCall} texts per API call.`); } const run = options?.run; const settings = options?.settings; const callSettings = Object.assign({}, this.settings, settings, { abortSignal: run?.abortSignal, content: texts[0], }); return callWithRetryAndThrottle({ retry: this.settings.retry, throttle: this.settings.throttle, call: async () => callLlamaCppEmbeddingAPI(callSettings), }); } generateEmbeddingResponse(texts, options) { return this.callAPI(texts, options); } extractEmbeddings(response) { return [response.embedding]; } withSettings(additionalSettings) { return new LlamaCppTextEmbeddingModel(Object.assign({}, this.settings, additionalSettings)); } } const llamaCppTextEmbeddingResponseSchema = z.object({ embedding: z.array(z.number()), }); async function callLlamaCppEmbeddingAPI({ baseUrl = "http://127.0.0.1:8080", abortSignal, content, }) { return postJsonToApi({ url: `${baseUrl}/embedding`, body: { content }, failedResponseHandler: failedLlamaCppCallResponseHandler, successfulResponseHandler: createJsonResponseHandler(llamaCppTextEmbeddingResponseSchema), abortSignal, }); }