@llm-tools/embedjs
Version:
A NodeJS RAG framework to easily work with LLMs and custom datasets
30 lines • 1.18 kB
JavaScript
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters';
import md5 from 'md5';
import { BaseLoader } from '@llm-tools/embedjs-interfaces';
import { truncateCenterString, cleanString } from '@llm-tools/embedjs-utils';
export class TextLoader extends BaseLoader {
text;
constructor({ text, chunkSize, chunkOverlap }) {
super(`TextLoader_${md5(text)}`, { text: truncateCenterString(text, 50) }, chunkSize ?? 300, chunkOverlap ?? 0);
this.text = text;
}
async *getUnfilteredChunks() {
const tuncatedObjectString = truncateCenterString(this.text, 50);
const chunker = new RecursiveCharacterTextSplitter({
chunkSize: this.chunkSize,
chunkOverlap: this.chunkOverlap,
});
const chunks = await chunker.splitText(cleanString(this.text));
for (const chunk of chunks) {
yield {
pageContent: chunk,
metadata: {
type: 'TextLoader',
source: tuncatedObjectString,
textId: this.uniqueId,
},
};
}
}
}
//# sourceMappingURL=text-loader.js.map