ludmi
Version:
LU (Layer Understanding) is a lightweight framework for controlled chatbot interactions with LLMs, action orchestration, and retrieval-augmented generation (RAG).
52 lines (51 loc) • 2.06 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.knowledgeBaseByText = exports.knowledgeBaseByJSON = void 0;
const tiktoken_1 = require("tiktoken");
const openai_1 = require("./llm/openai");
const MODEL = "text-embedding-3-small";
const knowledgeBaseByJSON = async ({ json, maxTokens = 1000, overlapTokens = 100, id }) => {
if (maxTokens <= overlapTokens) {
throw new Error('maxTokens debe ser mayor que overlapTokens');
}
const data = [];
for (const element of json) {
const text = JSON.stringify(element);
const fragments = await (0, exports.knowledgeBaseByText)({ text, maxTokens, overlapTokens });
//add fragments to the data
fragments.forEach(fragment => {
data.push({
id: id && element[id],
...fragment
});
});
}
return data;
};
exports.knowledgeBaseByJSON = knowledgeBaseByJSON;
const knowledgeBaseByText = async ({ text, maxTokens = 1000, overlapTokens = 100 }) => {
if (maxTokens <= overlapTokens) {
throw new Error('maxTokens debe ser mayor que overlapTokens');
}
const fragments = [];
const encoder = (0, tiktoken_1.encoding_for_model)(MODEL);
const tokens = encoder.encode(text);
const step = maxTokens - overlapTokens;
// Recorremos los tokens y vamos creando fragmentos de tamaño maxTokens con el solapamiento indicado
let contadorFragment = 0;
for (let i = 0; i < tokens.length; i += step) {
const fragmentTokens = tokens.slice(i, i + maxTokens);
const fragmentText = encoder.decode(fragmentTokens);
//Decoding the fragment
const content = new TextDecoder().decode(fragmentText);
const embedding = await (0, openai_1.getEmbeddings)(content);
//Adding the fragment to the list
fragments.push({
fragmentIndex: contadorFragment++,
text: content,
embedding
});
}
return fragments;
};
exports.knowledgeBaseByText = knowledgeBaseByText;