UNPKG

@n8n/n8n-nodes-langchain

Version:

![Banner image](https://user-images.githubusercontent.com/10284570/173569848-c624317f-42b1-45a6-ab09-f0ea3c247648.png)

203 lines 8.16 kB
"use strict"; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); var N8nBinaryLoader_exports = {}; __export(N8nBinaryLoader_exports, { N8nBinaryLoader: () => N8nBinaryLoader }); module.exports = __toCommonJS(N8nBinaryLoader_exports); var import_csv = require("@langchain/community/document_loaders/fs/csv"); var import_docx = require("@langchain/community/document_loaders/fs/docx"); var import_epub = require("@langchain/community/document_loaders/fs/epub"); var import_pdf = require("@langchain/community/document_loaders/fs/pdf"); var import_fs = require("fs"); var import_json = require("@langchain/classic/document_loaders/fs/json"); var import_text = require("@langchain/classic/document_loaders/fs/text"); var import_n8n_workflow = require("n8n-workflow"); var import_promises = require("stream/promises"); var import_tmp_promise = require("tmp-promise"); var import_helpers = require("./helpers"); const SUPPORTED_MIME_TYPES = { auto: ["*/*"], pdfLoader: ["application/pdf"], csvLoader: ["text/csv"], epubLoader: ["application/epub+zip"], docxLoader: ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"], textLoader: ["text/plain", "text/mdx", "text/md", "text/markdown"], jsonLoader: ["application/json"] }; class N8nBinaryLoader { constructor(context, optionsPrefix = "", binaryDataKey = "", textSplitter) { this.context = context; this.optionsPrefix = optionsPrefix; this.binaryDataKey = binaryDataKey; this.textSplitter = textSplitter; } async processAll(items) { const docs = []; if (!items) return []; for (let itemIndex = 0; itemIndex < items.length; itemIndex++) { const processedDocuments = await this.processItem(items[itemIndex], itemIndex); docs.push(...processedDocuments); } return docs; } async validateMimeType(mimeType, selectedLoader) { if (selectedLoader !== "auto" && !SUPPORTED_MIME_TYPES[selectedLoader].includes(mimeType)) { const neededLoader = Object.keys(SUPPORTED_MIME_TYPES).find( (loader) => SUPPORTED_MIME_TYPES[loader].includes(mimeType) ); throw new import_n8n_workflow.NodeOperationError( this.context.getNode(), `Mime type doesn't match selected loader. Please select under "Loader Type": ${neededLoader}` ); } if (!Object.values(SUPPORTED_MIME_TYPES).flat().includes(mimeType)) { throw new import_n8n_workflow.NodeOperationError(this.context.getNode(), `Unsupported mime type: ${mimeType}`); } if (!SUPPORTED_MIME_TYPES[selectedLoader].includes(mimeType) && selectedLoader !== "textLoader" && selectedLoader !== "auto") { throw new import_n8n_workflow.NodeOperationError( this.context.getNode(), `Unsupported mime type: ${mimeType} for selected loader: ${selectedLoader}` ); } } async getFilePathOrBlob(binaryData, mimeType) { if (binaryData.id) { const binaryBuffer = await this.context.helpers.binaryToBuffer( await this.context.helpers.getBinaryStream(binaryData.id) ); return new Blob([binaryBuffer], { type: mimeType }); } else { return new Blob([Buffer.from(binaryData.data, import_n8n_workflow.BINARY_ENCODING)], { type: mimeType }); } } async getLoader(mimeType, filePathOrBlob, itemIndex) { switch (mimeType) { case "application/pdf": const splitPages = this.context.getNodeParameter( `${this.optionsPrefix}splitPages`, itemIndex, false ); return new import_pdf.PDFLoader(filePathOrBlob, { splitPages }); case "text/csv": const column = this.context.getNodeParameter( `${this.optionsPrefix}column`, itemIndex, null ); const separator = this.context.getNodeParameter( `${this.optionsPrefix}separator`, itemIndex, "," ); return new import_csv.CSVLoader(filePathOrBlob, { column: column ?? void 0, separator }); case "application/epub+zip": let filePath; if (filePathOrBlob instanceof Blob) { const tmpFileData = await (0, import_tmp_promise.file)({ prefix: "epub-loader-" }); const bufferData = await filePathOrBlob.arrayBuffer(); await (0, import_promises.pipeline)([new Uint8Array(bufferData)], (0, import_fs.createWriteStream)(tmpFileData.path)); return new import_epub.EPubLoader(tmpFileData.path); } else { filePath = filePathOrBlob; } return new import_epub.EPubLoader(filePath); case "application/vnd.openxmlformats-officedocument.wordprocessingml.document": return new import_docx.DocxLoader(filePathOrBlob); case "text/plain": return new import_text.TextLoader(filePathOrBlob); case "application/json": const pointers = this.context.getNodeParameter( `${this.optionsPrefix}pointers`, itemIndex, "" ); const pointersArray = pointers.split(",").map((pointer) => pointer.trim()); return new import_json.JSONLoader(filePathOrBlob, pointersArray); default: return new import_text.TextLoader(filePathOrBlob); } } async loadDocuments(loader) { return this.textSplitter ? await this.textSplitter.splitDocuments(await loader.load()) : await loader.load(); } async cleanupTmpFileIfNeeded(cleanupTmpFile) { if (cleanupTmpFile) { await cleanupTmpFile(); } } async processItem(item, itemIndex) { const docs = []; const binaryMode = this.context.getNodeParameter("binaryMode", itemIndex, "allInputData"); if (binaryMode === "allInputData") { const binaryData = this.context.getInputData(); for (const data of binaryData) { if (data.binary) { const binaryDataKeys = Object.keys(data.binary); for (const fileKey of binaryDataKeys) { const processedDocuments = await this.processItemByKey(item, itemIndex, fileKey); docs.push(...processedDocuments); } } } } else { const processedDocuments = await this.processItemByKey(item, itemIndex, this.binaryDataKey); docs.push(...processedDocuments); } return docs; } async processItemByKey(item, itemIndex, binaryKey) { const selectedLoader = this.context.getNodeParameter( "loader", itemIndex, "auto" ); const docs = []; const metadata = (0, import_helpers.getMetadataFiltersValues)(this.context, itemIndex); if (!item) return []; const binaryData = this.context.helpers.assertBinaryData(itemIndex, binaryKey); const { mimeType } = binaryData; await this.validateMimeType(mimeType, selectedLoader); const filePathOrBlob = await this.getFilePathOrBlob(binaryData, mimeType); const cleanupTmpFile = void 0; const loader = await this.getLoader(mimeType, filePathOrBlob, itemIndex); const loadedDoc = await this.loadDocuments(loader); docs.push(...loadedDoc); if (metadata) { docs.forEach((document) => { document.metadata = { ...document.metadata, ...metadata }; }); } await this.cleanupTmpFileIfNeeded(cleanupTmpFile); return docs; } } // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { N8nBinaryLoader }); //# sourceMappingURL=N8nBinaryLoader.js.map