UNPKG

@llm-tools/embedjs

Version:

A NodeJS RAG framework to easily work with LLMs and custom datasets

35 lines 1.47 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.TextLoader = void 0; const tslib_1 = require("tslib"); const textsplitters_1 = require("@langchain/textsplitters"); const md5_1 = tslib_1.__importDefault(require("md5")); const embedjs_interfaces_1 = require("@llm-tools/embedjs-interfaces"); const embedjs_utils_1 = require("@llm-tools/embedjs-utils"); class TextLoader extends embedjs_interfaces_1.BaseLoader { text; constructor({ text, chunkSize, chunkOverlap }) { super(`TextLoader_${(0, md5_1.default)(text)}`, { text: (0, embedjs_utils_1.truncateCenterString)(text, 50) }, chunkSize ?? 300, chunkOverlap ?? 0); this.text = text; } async *getUnfilteredChunks() { const tuncatedObjectString = (0, embedjs_utils_1.truncateCenterString)(this.text, 50); const chunker = new textsplitters_1.RecursiveCharacterTextSplitter({ chunkSize: this.chunkSize, chunkOverlap: this.chunkOverlap, }); const chunks = await chunker.splitText((0, embedjs_utils_1.cleanString)(this.text)); for (const chunk of chunks) { yield { pageContent: chunk, metadata: { type: 'TextLoader', source: tuncatedObjectString, textId: this.uniqueId, }, }; } } } exports.TextLoader = TextLoader; //# sourceMappingURL=text-loader.js.map