UNPKG

@llm-tools/embedjs

Version:

A NodeJS RAG framework to easily work with LLMs and custom datasets

72 lines 3.14 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.LocalPathLoader = void 0; const tslib_1 = require("tslib"); const stream_mime_type_1 = require("stream-mime-type"); const debug_1 = tslib_1.__importDefault(require("debug")); const node_path_1 = tslib_1.__importDefault(require("node:path")); const node_fs_1 = tslib_1.__importDefault(require("node:fs")); const md5_1 = tslib_1.__importDefault(require("md5")); const mime_js_1 = require("../util/mime.cjs"); const embedjs_interfaces_1 = require("@llm-tools/embedjs-interfaces"); class LocalPathLoader extends embedjs_interfaces_1.BaseLoader { debug = (0, debug_1.default)('embedjs:loader:LocalPathLoader'); path; constructor({ path }) { super(`LocalPathLoader_${(0, md5_1.default)(path)}`, { path }); this.path = path; } async *getUnfilteredChunks() { for await (const result of await this.recursivelyAddPath(this.path)) { yield { ...result, metadata: { ...result.metadata, type: 'LocalPathLoader', originalPath: this.path, }, }; } } async *recursivelyAddPath(currentPath) { const isDir = node_fs_1.default.lstatSync(currentPath).isDirectory(); this.debug(`Processing path '${currentPath}'. It is a ${isDir ? 'Directory!' : 'file...'}`); if (!isDir) { const stream = node_fs_1.default.createReadStream(currentPath); let { mime } = await (0, stream_mime_type_1.getMimeType)(stream); stream.destroy(); this.debug(`File '${this.path}' has mime type '${mime}'`); if (mime === 'application/octet-stream') { const extension = currentPath.split('.').pop().toLowerCase(); if (extension === 'md' || extension === 'mdx') mime = 'text/markdown'; this.debug(`File '${this.path}' mime type updated to 'text/markdown'`); } try { const loader = await (0, mime_js_1.createLoaderFromMimeType)(currentPath, mime); for await (const result of await loader.getUnfilteredChunks()) { yield { pageContent: result.pageContent, metadata: { source: currentPath, }, }; } } catch (err) { this.debug(`Error creating loader for mime type '${mime}'`, err); } } else { const files = node_fs_1.default.readdirSync(currentPath); this.debug(`Dir '${currentPath}' has ${files.length} entries inside`, files); for (const file of files) { for await (const result of await this.recursivelyAddPath(node_path_1.default.resolve(currentPath, file))) { yield result; } } } } } exports.LocalPathLoader = LocalPathLoader; //# sourceMappingURL=local-path-loader.js.map