ai-utils.js
Version:
Build AI applications, chatbots, and agents with JavaScript and TypeScript.
97 lines (96 loc) • 3.79 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.LlamaCppTextEmbeddingModel = void 0;
const zod_1 = __importDefault(require("zod"));
const AbstractModel_js_1 = require("../../model-function/AbstractModel.cjs");
const callWithRetryAndThrottle_js_1 = require("../../util/api/callWithRetryAndThrottle.cjs");
const postToApi_js_1 = require("../../util/api/postToApi.cjs");
const LlamaCppError_js_1 = require("./LlamaCppError.cjs");
const LlamaCppTokenizer_js_1 = require("./LlamaCppTokenizer.cjs");
class LlamaCppTextEmbeddingModel extends AbstractModel_js_1.AbstractModel {
constructor(settings = {}) {
super({ settings });
Object.defineProperty(this, "provider", {
enumerable: true,
configurable: true,
writable: true,
value: "llamacpp"
});
Object.defineProperty(this, "maxTextsPerCall", {
enumerable: true,
configurable: true,
writable: true,
value: 1
});
Object.defineProperty(this, "contextWindowSize", {
enumerable: true,
configurable: true,
writable: true,
value: undefined
});
Object.defineProperty(this, "embeddingDimensions", {
enumerable: true,
configurable: true,
writable: true,
value: undefined
});
Object.defineProperty(this, "tokenizer", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
this.tokenizer = new LlamaCppTokenizer_js_1.LlamaCppTokenizer({
baseUrl: this.settings.baseUrl,
retry: this.settings.tokenizerSettings?.retry,
throttle: this.settings.tokenizerSettings?.throttle,
});
}
get modelName() {
return null;
}
async tokenize(text) {
return this.tokenizer.tokenize(text);
}
async callAPI(texts, options) {
if (texts.length > this.maxTextsPerCall) {
throw new Error(`The Llama.cpp embedding API only supports ${this.maxTextsPerCall} texts per API call.`);
}
const run = options?.run;
const settings = options?.settings;
const callSettings = Object.assign({}, this.settings, settings, {
abortSignal: run?.abortSignal,
content: texts[0],
});
return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({
retry: this.settings.retry,
throttle: this.settings.throttle,
call: async () => callLlamaCppEmbeddingAPI(callSettings),
});
}
generateEmbeddingResponse(texts, options) {
return this.callAPI(texts, options);
}
extractEmbeddings(response) {
return [response.embedding];
}
withSettings(additionalSettings) {
return new LlamaCppTextEmbeddingModel(Object.assign({}, this.settings, additionalSettings));
}
}
exports.LlamaCppTextEmbeddingModel = LlamaCppTextEmbeddingModel;
const llamaCppTextEmbeddingResponseSchema = zod_1.default.object({
embedding: zod_1.default.array(zod_1.default.number()),
});
async function callLlamaCppEmbeddingAPI({ baseUrl = "http://127.0.0.1:8080", abortSignal, content, }) {
return (0, postToApi_js_1.postJsonToApi)({
url: `${baseUrl}/embedding`,
body: { content },
failedResponseHandler: LlamaCppError_js_1.failedLlamaCppCallResponseHandler,
successfulResponseHandler: (0, postToApi_js_1.createJsonResponseHandler)(llamaCppTextEmbeddingResponseSchema),
abortSignal,
});
}