UNPKG

@llm-tools/embedjs

Version:

A NodeJS RAG framework to easily work with LLMs and custom datasets

30 lines 1.18 kB
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'; import md5 from 'md5'; import { BaseLoader } from '@llm-tools/embedjs-interfaces'; import { truncateCenterString, cleanString } from '@llm-tools/embedjs-utils'; export class TextLoader extends BaseLoader { text; constructor({ text, chunkSize, chunkOverlap }) { super(`TextLoader_${md5(text)}`, { text: truncateCenterString(text, 50) }, chunkSize ?? 300, chunkOverlap ?? 0); this.text = text; } async *getUnfilteredChunks() { const tuncatedObjectString = truncateCenterString(this.text, 50); const chunker = new RecursiveCharacterTextSplitter({ chunkSize: this.chunkSize, chunkOverlap: this.chunkOverlap, }); const chunks = await chunker.splitText(cleanString(this.text)); for (const chunk of chunks) { yield { pageContent: chunk, metadata: { type: 'TextLoader', source: tuncatedObjectString, textId: this.uniqueId, }, }; } } } //# sourceMappingURL=text-loader.js.map