ai-utils.js
Version:
Build AI applications, chatbots, and agents with JavaScript and TypeScript.
90 lines (89 loc) • 3.29 kB
JavaScript
import z from "zod";
import { AbstractModel } from "../../model-function/AbstractModel.js";
import { callWithRetryAndThrottle } from "../../util/api/callWithRetryAndThrottle.js";
import { createJsonResponseHandler, postJsonToApi, } from "../../util/api/postToApi.js";
import { failedLlamaCppCallResponseHandler } from "./LlamaCppError.js";
import { LlamaCppTokenizer } from "./LlamaCppTokenizer.js";
export class LlamaCppTextEmbeddingModel extends AbstractModel {
constructor(settings = {}) {
super({ settings });
Object.defineProperty(this, "provider", {
enumerable: true,
configurable: true,
writable: true,
value: "llamacpp"
});
Object.defineProperty(this, "maxTextsPerCall", {
enumerable: true,
configurable: true,
writable: true,
value: 1
});
Object.defineProperty(this, "contextWindowSize", {
enumerable: true,
configurable: true,
writable: true,
value: undefined
});
Object.defineProperty(this, "embeddingDimensions", {
enumerable: true,
configurable: true,
writable: true,
value: undefined
});
Object.defineProperty(this, "tokenizer", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
this.tokenizer = new LlamaCppTokenizer({
baseUrl: this.settings.baseUrl,
retry: this.settings.tokenizerSettings?.retry,
throttle: this.settings.tokenizerSettings?.throttle,
});
}
get modelName() {
return null;
}
async tokenize(text) {
return this.tokenizer.tokenize(text);
}
async callAPI(texts, options) {
if (texts.length > this.maxTextsPerCall) {
throw new Error(`The Llama.cpp embedding API only supports ${this.maxTextsPerCall} texts per API call.`);
}
const run = options?.run;
const settings = options?.settings;
const callSettings = Object.assign({}, this.settings, settings, {
abortSignal: run?.abortSignal,
content: texts[0],
});
return callWithRetryAndThrottle({
retry: this.settings.retry,
throttle: this.settings.throttle,
call: async () => callLlamaCppEmbeddingAPI(callSettings),
});
}
generateEmbeddingResponse(texts, options) {
return this.callAPI(texts, options);
}
extractEmbeddings(response) {
return [response.embedding];
}
withSettings(additionalSettings) {
return new LlamaCppTextEmbeddingModel(Object.assign({}, this.settings, additionalSettings));
}
}
const llamaCppTextEmbeddingResponseSchema = z.object({
embedding: z.array(z.number()),
});
async function callLlamaCppEmbeddingAPI({ baseUrl = "http://127.0.0.1:8080", abortSignal, content, }) {
return postJsonToApi({
url: `${baseUrl}/embedding`,
body: { content },
failedResponseHandler: failedLlamaCppCallResponseHandler,
successfulResponseHandler: createJsonResponseHandler(llamaCppTextEmbeddingResponseSchema),
abortSignal,
});
}