@llm-tools/embedjs
Version:
A NodeJS RAG framework to easily work with LLMs and custom datasets
35 lines • 1.47 kB
JavaScript
Object.defineProperty(exports, "__esModule", { value: true });
exports.TextLoader = void 0;
const tslib_1 = require("tslib");
const textsplitters_1 = require("@langchain/textsplitters");
const md5_1 = tslib_1.__importDefault(require("md5"));
const embedjs_interfaces_1 = require("@llm-tools/embedjs-interfaces");
const embedjs_utils_1 = require("@llm-tools/embedjs-utils");
class TextLoader extends embedjs_interfaces_1.BaseLoader {
text;
constructor({ text, chunkSize, chunkOverlap }) {
super(`TextLoader_${(0, md5_1.default)(text)}`, { text: (0, embedjs_utils_1.truncateCenterString)(text, 50) }, chunkSize ?? 300, chunkOverlap ?? 0);
this.text = text;
}
async *getUnfilteredChunks() {
const tuncatedObjectString = (0, embedjs_utils_1.truncateCenterString)(this.text, 50);
const chunker = new textsplitters_1.RecursiveCharacterTextSplitter({
chunkSize: this.chunkSize,
chunkOverlap: this.chunkOverlap,
});
const chunks = await chunker.splitText((0, embedjs_utils_1.cleanString)(this.text));
for (const chunk of chunks) {
yield {
pageContent: chunk,
metadata: {
type: 'TextLoader',
source: tuncatedObjectString,
textId: this.uniqueId,
},
};
}
}
}
exports.TextLoader = TextLoader;
//# sourceMappingURL=text-loader.js.map
;