@n8n/n8n-nodes-langchain
Version:

203 lines • 8.16 kB
JavaScript
;
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
var N8nBinaryLoader_exports = {};
__export(N8nBinaryLoader_exports, {
N8nBinaryLoader: () => N8nBinaryLoader
});
module.exports = __toCommonJS(N8nBinaryLoader_exports);
var import_csv = require("@langchain/community/document_loaders/fs/csv");
var import_docx = require("@langchain/community/document_loaders/fs/docx");
var import_epub = require("@langchain/community/document_loaders/fs/epub");
var import_pdf = require("@langchain/community/document_loaders/fs/pdf");
var import_fs = require("fs");
var import_json = require("@langchain/classic/document_loaders/fs/json");
var import_text = require("@langchain/classic/document_loaders/fs/text");
var import_n8n_workflow = require("n8n-workflow");
var import_promises = require("stream/promises");
var import_tmp_promise = require("tmp-promise");
var import_helpers = require("./helpers");
const SUPPORTED_MIME_TYPES = {
auto: ["*/*"],
pdfLoader: ["application/pdf"],
csvLoader: ["text/csv"],
epubLoader: ["application/epub+zip"],
docxLoader: ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"],
textLoader: ["text/plain", "text/mdx", "text/md", "text/markdown"],
jsonLoader: ["application/json"]
};
class N8nBinaryLoader {
constructor(context, optionsPrefix = "", binaryDataKey = "", textSplitter) {
this.context = context;
this.optionsPrefix = optionsPrefix;
this.binaryDataKey = binaryDataKey;
this.textSplitter = textSplitter;
}
async processAll(items) {
const docs = [];
if (!items) return [];
for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
const processedDocuments = await this.processItem(items[itemIndex], itemIndex);
docs.push(...processedDocuments);
}
return docs;
}
async validateMimeType(mimeType, selectedLoader) {
if (selectedLoader !== "auto" && !SUPPORTED_MIME_TYPES[selectedLoader].includes(mimeType)) {
const neededLoader = Object.keys(SUPPORTED_MIME_TYPES).find(
(loader) => SUPPORTED_MIME_TYPES[loader].includes(mimeType)
);
throw new import_n8n_workflow.NodeOperationError(
this.context.getNode(),
`Mime type doesn't match selected loader. Please select under "Loader Type": ${neededLoader}`
);
}
if (!Object.values(SUPPORTED_MIME_TYPES).flat().includes(mimeType)) {
throw new import_n8n_workflow.NodeOperationError(this.context.getNode(), `Unsupported mime type: ${mimeType}`);
}
if (!SUPPORTED_MIME_TYPES[selectedLoader].includes(mimeType) && selectedLoader !== "textLoader" && selectedLoader !== "auto") {
throw new import_n8n_workflow.NodeOperationError(
this.context.getNode(),
`Unsupported mime type: ${mimeType} for selected loader: ${selectedLoader}`
);
}
}
async getFilePathOrBlob(binaryData, mimeType) {
if (binaryData.id) {
const binaryBuffer = await this.context.helpers.binaryToBuffer(
await this.context.helpers.getBinaryStream(binaryData.id)
);
return new Blob([binaryBuffer], {
type: mimeType
});
} else {
return new Blob([Buffer.from(binaryData.data, import_n8n_workflow.BINARY_ENCODING)], {
type: mimeType
});
}
}
async getLoader(mimeType, filePathOrBlob, itemIndex) {
switch (mimeType) {
case "application/pdf":
const splitPages = this.context.getNodeParameter(
`${this.optionsPrefix}splitPages`,
itemIndex,
false
);
return new import_pdf.PDFLoader(filePathOrBlob, { splitPages });
case "text/csv":
const column = this.context.getNodeParameter(
`${this.optionsPrefix}column`,
itemIndex,
null
);
const separator = this.context.getNodeParameter(
`${this.optionsPrefix}separator`,
itemIndex,
","
);
return new import_csv.CSVLoader(filePathOrBlob, { column: column ?? void 0, separator });
case "application/epub+zip":
let filePath;
if (filePathOrBlob instanceof Blob) {
const tmpFileData = await (0, import_tmp_promise.file)({ prefix: "epub-loader-" });
const bufferData = await filePathOrBlob.arrayBuffer();
await (0, import_promises.pipeline)([new Uint8Array(bufferData)], (0, import_fs.createWriteStream)(tmpFileData.path));
return new import_epub.EPubLoader(tmpFileData.path);
} else {
filePath = filePathOrBlob;
}
return new import_epub.EPubLoader(filePath);
case "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
return new import_docx.DocxLoader(filePathOrBlob);
case "text/plain":
return new import_text.TextLoader(filePathOrBlob);
case "application/json":
const pointers = this.context.getNodeParameter(
`${this.optionsPrefix}pointers`,
itemIndex,
""
);
const pointersArray = pointers.split(",").map((pointer) => pointer.trim());
return new import_json.JSONLoader(filePathOrBlob, pointersArray);
default:
return new import_text.TextLoader(filePathOrBlob);
}
}
async loadDocuments(loader) {
return this.textSplitter ? await this.textSplitter.splitDocuments(await loader.load()) : await loader.load();
}
async cleanupTmpFileIfNeeded(cleanupTmpFile) {
if (cleanupTmpFile) {
await cleanupTmpFile();
}
}
async processItem(item, itemIndex) {
const docs = [];
const binaryMode = this.context.getNodeParameter("binaryMode", itemIndex, "allInputData");
if (binaryMode === "allInputData") {
const binaryData = this.context.getInputData();
for (const data of binaryData) {
if (data.binary) {
const binaryDataKeys = Object.keys(data.binary);
for (const fileKey of binaryDataKeys) {
const processedDocuments = await this.processItemByKey(item, itemIndex, fileKey);
docs.push(...processedDocuments);
}
}
}
} else {
const processedDocuments = await this.processItemByKey(item, itemIndex, this.binaryDataKey);
docs.push(...processedDocuments);
}
return docs;
}
async processItemByKey(item, itemIndex, binaryKey) {
const selectedLoader = this.context.getNodeParameter(
"loader",
itemIndex,
"auto"
);
const docs = [];
const metadata = (0, import_helpers.getMetadataFiltersValues)(this.context, itemIndex);
if (!item) return [];
const binaryData = this.context.helpers.assertBinaryData(itemIndex, binaryKey);
const { mimeType } = binaryData;
await this.validateMimeType(mimeType, selectedLoader);
const filePathOrBlob = await this.getFilePathOrBlob(binaryData, mimeType);
const cleanupTmpFile = void 0;
const loader = await this.getLoader(mimeType, filePathOrBlob, itemIndex);
const loadedDoc = await this.loadDocuments(loader);
docs.push(...loadedDoc);
if (metadata) {
docs.forEach((document) => {
document.metadata = {
...document.metadata,
...metadata
};
});
}
await this.cleanupTmpFileIfNeeded(cleanupTmpFile);
return docs;
}
}
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
N8nBinaryLoader
});
//# sourceMappingURL=N8nBinaryLoader.js.map