UNPKG

ludmi

Version:

LU (Layer Understanding) is a lightweight framework for controlled chatbot interactions with LLMs, action orchestration, and retrieval-augmented generation (RAG).

52 lines (51 loc) 2.06 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.knowledgeBaseByText = exports.knowledgeBaseByJSON = void 0; const tiktoken_1 = require("tiktoken"); const openai_1 = require("./llm/openai"); const MODEL = "text-embedding-3-small"; const knowledgeBaseByJSON = async ({ json, maxTokens = 1000, overlapTokens = 100, id }) => { if (maxTokens <= overlapTokens) { throw new Error('maxTokens debe ser mayor que overlapTokens'); } const data = []; for (const element of json) { const text = JSON.stringify(element); const fragments = await (0, exports.knowledgeBaseByText)({ text, maxTokens, overlapTokens }); //add fragments to the data fragments.forEach(fragment => { data.push({ id: id && element[id], ...fragment }); }); } return data; }; exports.knowledgeBaseByJSON = knowledgeBaseByJSON; const knowledgeBaseByText = async ({ text, maxTokens = 1000, overlapTokens = 100 }) => { if (maxTokens <= overlapTokens) { throw new Error('maxTokens debe ser mayor que overlapTokens'); } const fragments = []; const encoder = (0, tiktoken_1.encoding_for_model)(MODEL); const tokens = encoder.encode(text); const step = maxTokens - overlapTokens; // Recorremos los tokens y vamos creando fragmentos de tamaño maxTokens con el solapamiento indicado let contadorFragment = 0; for (let i = 0; i < tokens.length; i += step) { const fragmentTokens = tokens.slice(i, i + maxTokens); const fragmentText = encoder.decode(fragmentTokens); //Decoding the fragment const content = new TextDecoder().decode(fragmentText); const embedding = await (0, openai_1.getEmbeddings)(content); //Adding the fragment to the list fragments.push({ fragmentIndex: contadorFragment++, text: content, embedding }); } return fragments; }; exports.knowledgeBaseByText = knowledgeBaseByText;