UNPKG

n8n-nodes-parse-pdf

Version:

Extract text, tables, and structured data from PDF files using PDF API Hub

103 lines 3.58 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.description = void 0; exports.execute = execute; const helpers_1 = require("../helpers"); exports.description = [ { displayName: 'Input Type', name: 'parse_input_type', type: 'options', options: [ { name: 'URL (Default)', value: 'url' }, { name: 'File (Binary)', value: 'file' }, ], default: 'url', description: 'How to provide the PDF to parse', displayOptions: { show: { operation: ['parsePdf'], }, }, }, { displayName: 'PDF URL', name: 'parse_url', type: 'string', default: '', description: 'URL of the PDF to parse', placeholder: 'https://pdfapihub.com/sample.pdf', displayOptions: { show: { operation: ['parsePdf'], parse_input_type: ['url'], }, }, }, { displayName: 'Binary Property Name', name: 'parse_file_binary_property', type: 'string', default: 'data', description: 'Binary property containing the PDF file', displayOptions: { show: { operation: ['parsePdf'], parse_input_type: ['file'], }, }, }, { displayName: 'Parse Mode', name: 'parse_mode', type: 'options', options: [ { name: 'Text Only (Default)', value: 'text', description: 'Extract text only' }, { name: 'Layout', value: 'layout', description: 'Text + text blocks with bounding boxes' }, { name: 'Tables', value: 'tables', description: 'Text + table blocks' }, { name: 'Full', value: 'full', description: 'Text + blocks + tables + images' }, ], default: 'text', description: 'What to extract from the PDF', displayOptions: { show: { operation: ['parsePdf'], }, }, }, { displayName: 'Pages', name: 'parse_pages', type: 'string', default: 'all', description: 'Page selection: "all" or a range like "1-3" or single page like "2"', displayOptions: { show: { operation: ['parsePdf'], }, }, }, ]; async function execute(index, returnData) { const parseInputType = this.getNodeParameter('parse_input_type', index); const pdfUrl = this.getNodeParameter('parse_url', index, ''); const mode = this.getNodeParameter('parse_mode', index); const pages = this.getNodeParameter('parse_pages', index); const body = { mode, pages }; if (parseInputType === 'url') { body.url = (0, helpers_1.normalizeUrl)(pdfUrl); } const requestOptions = parseInputType === 'file' ? await helpers_1.createSingleFileMultipart.call(this, index, this.getNodeParameter('parse_file_binary_property', index), body) : { body, json: true }; const responseData = await this.helpers.httpRequestWithAuthentication.call(this, 'pdfapihubApi', { method: 'POST', url: 'https://pdfapihub.com/api/v1/pdf/parse', ...requestOptions, returnFullResponse: true, ignoreHttpStatusErrors: true, }); (0, helpers_1.checkApiResponse)(this, responseData.statusCode, responseData.body, index); returnData.push((0, helpers_1.parseJsonResponseBody)(responseData.body, index)); } //# sourceMappingURL=parsePdf.js.map