UNPKG

@mazix/n8n-nodes-converter-documents

Version:

n8n node to convert various document formats (DOCX, DOC, XML, YML, XLSX, CSV, PDF, TXT, PPT, PPTX, HTML, JSON, ODT, ODP, ODS) to JSON or text format

209 lines 9.73 kB
"use strict"; /* * Convert File to JSON v6 * ───────────────────────────────────────────────────────── * Универсальный кастом-нод для n8n. * Поддерживает: DOC, DOCX, XML, XLS, XLSX, CSV, PDF, TXT, * PPT, PPTX, HTML / HTM, ODT, ODP, ODS, JSON. * Выход: { text: "..."} либо { sheets: {...} } + metadata. */ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.FileToJsonNode = void 0; const path_1 = __importDefault(require("path")); const file_type_1 = require("file-type"); const n8n_workflow_1 = require("n8n-workflow"); const errors_1 = require("./errors"); const utils_1 = require("./utils"); const strategies_1 = require("./strategies"); const SUPPORTED_FORMATS = [ "doc", "docx", "xml", "yml", "xlsx", "csv", "pdf", "txt", "ppt", "pptx", "html", "htm", "odt", "odp", "ods", "json", ]; /** * Custom n8n node: convert files to JSON/text * Supports DOCX, XML, YML, XLSX, CSV, PDF, TXT, PPTX, HTML */ class FileToJsonNode { constructor() { this.description = { displayName: "Convert File to JSON", name: "convertFileToJson", icon: "file:icon.svg", group: ["transform"], version: 5, description: "DOCX / XML / YML / XLSX / CSV / PDF / TXT / PPTX / HTML → JSON|text", defaults: { name: "Convert File to JSON" }, inputs: [n8n_workflow_1.NodeConnectionTypes.Main], outputs: [n8n_workflow_1.NodeConnectionTypes.Main], usableAsTool: true, properties: [ { displayName: "Binary Property", name: "binaryPropertyName", type: "string", default: "data", description: "Name of the binary property that contains the file", }, { displayName: "Max File Size (MB)", name: "maxFileSize", type: "number", default: 50, description: "Maximum file size in megabytes", typeOptions: { minValue: 1, maxValue: 100 } }, { displayName: "Max Concurrency", name: "maxConcurrency", type: "number", default: 4, description: "Maximum number of files processed concurrently", typeOptions: { minValue: 1, maxValue: 10 } }, { displayName: "Output Format (DOCX)", name: "outputFormat", type: "options", options: [ { name: "Plain Text", value: "text", description: "Extract text only (fastest, smallest output)", }, { name: "HTML", value: "html", description: "Convert to HTML (preserves tables, formatting, structure)", }, { name: "Markdown", value: "markdown", description: "Convert to Markdown with GFM tables (ideal for AI/LLM/RAG)", }, ], default: "text", description: "Choose output format for DOCX files. Markdown and HTML preserve tables and formatting for AI/LLM processing.", }, ], }; } /** * Main execution method for n8n node */ async execute() { const items = this.getInputData(); const maxFileSize = this.getNodeParameter('maxFileSize', 0, 50) * 1024 * 1024; const maxConcurrency = this.getNodeParameter('maxConcurrency', 0, 4); const processItem = async (item, i) => { const prop = this.getNodeParameter("binaryPropertyName", i, "data"); // --- Input data validation --- if (!item || typeof item !== "object") throw new errors_1.FileTypeError(`Item #${i} is not an object`); const itemObj = item; if (!itemObj.binary || typeof itemObj.binary !== "object") throw new errors_1.FileTypeError(`Item #${i} does not contain binary data`); const binary = itemObj.binary; if (!binary[prop]) throw new errors_1.FileTypeError(`Binary property "${prop}" is missing (item ${i})`); const binaryProp = binary[prop]; if (!binaryProp.fileName || typeof binaryProp.fileName !== "string") throw new errors_1.FileTypeError(`File does not contain a valid name (item ${i})`); const buf = await this.helpers.getBinaryDataBuffer(i, prop); if (!Buffer.isBuffer(buf) || buf.length === 0) throw new errors_1.EmptyFileError("File is empty or contains no data"); if (buf.length > maxFileSize) throw new errors_1.FileTooLargeError(`File is too large (maximum ${maxFileSize / 1024 / 1024} MB)`); // --- End of validation --- const name = (0, utils_1.sanitizeFileName)(binaryProp.fileName ?? ""); let ext = path_1.default.extname(name).slice(1).toLowerCase(); /* ── autodetect ── */ if (!ext || !SUPPORTED_FORMATS.includes(ext)) { try { const ft = await (0, file_type_1.fromBuffer)(buf); if (ft?.ext && SUPPORTED_FORMATS.includes(ft.ext)) { ext = ft.ext; } else { throw new errors_1.UnsupportedFormatError(`Unsupported file type: ${ext || "unknown"}`); } } catch (error) { this.logger?.warn('File type detection failed', { fileName: name, error: error instanceof Error ? error.message : String(error) }); throw new errors_1.UnsupportedFormatError(`Unsupported file type: ${ext || "unknown"}`); } } this.logger?.info("ConvertFileToJSON →", { file: name || "[no-name]", ext, size: buf.length, }); let json = {}; const startTime = performance.now(); const outputFormat = this.getNodeParameter('outputFormat', i, 'text'); try { if (!strategies_1.strategies[ext]) { throw new errors_1.UnsupportedFormatError(`Format "${ext}" is not supported`); } json = await strategies_1.strategies[ext](buf, ext, ext === 'docx' ? { outputFormat } : undefined); } catch (e) { if (e instanceof errors_1.FileTypeError || e instanceof errors_1.FileTooLargeError || e instanceof errors_1.UnsupportedFormatError || e instanceof errors_1.EmptyFileError || e instanceof errors_1.ProcessingError) { throw e; } throw new errors_1.ProcessingError(`${ext.toUpperCase()} processing error: ${e.message}`); } const processingTime = performance.now() - startTime; this.logger?.info('Processing completed', { file: name, size: buf.length, time: `${processingTime.toFixed(2)}ms`, type: ext }); if ("text" in json && (!json.text || json.text.trim().length === 0)) { throw new errors_1.EmptyFileError(`File "${name}" (${ext.toUpperCase()}, ${(buf.length / 1024).toFixed(2)} KB) contains no extractable text. ` + `Possible reasons: (1) File contains only images/graphics without text, ` + `(2) File is password-protected or encrypted, ` + `(3) File structure is corrupted, ` + `(4) File was created with a non-standard application. ` + `Try: Open file in original application and verify it contains text, then save it again.`); } json.metadata = { fileName: (0, utils_1.sanitizeFileName)(name) || null, fileSize: buf.length, fileType: ext, processedAt: new Date().toISOString(), }; return { json: json, pairedItem: { item: i }, }; }; const results = await (0, utils_1.promisePool)(items, processItem, maxConcurrency); return [[{ json: { files: results.map(result => result.json), totalFiles: results.length, processedAt: new Date().toISOString() } }]]; } } exports.FileToJsonNode = FileToJsonNode; //# sourceMappingURL=FileToJsonNode.node.js.map