n8n-nodes-parse-pdf
Version:
Extract text, tables, and structured data from PDF files using PDF API Hub
103 lines • 3.58 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.description = void 0;
exports.execute = execute;
const helpers_1 = require("../helpers");
exports.description = [
{
displayName: 'Input Type',
name: 'parse_input_type',
type: 'options',
options: [
{ name: 'URL (Default)', value: 'url' },
{ name: 'File (Binary)', value: 'file' },
],
default: 'url',
description: 'How to provide the PDF to parse',
displayOptions: {
show: {
operation: ['parsePdf'],
},
},
},
{
displayName: 'PDF URL',
name: 'parse_url',
type: 'string',
default: '',
description: 'URL of the PDF to parse',
placeholder: 'https://pdfapihub.com/sample.pdf',
displayOptions: {
show: {
operation: ['parsePdf'],
parse_input_type: ['url'],
},
},
},
{
displayName: 'Binary Property Name',
name: 'parse_file_binary_property',
type: 'string',
default: 'data',
description: 'Binary property containing the PDF file',
displayOptions: {
show: {
operation: ['parsePdf'],
parse_input_type: ['file'],
},
},
},
{
displayName: 'Parse Mode',
name: 'parse_mode',
type: 'options',
options: [
{ name: 'Text Only (Default)', value: 'text', description: 'Extract text only' },
{ name: 'Layout', value: 'layout', description: 'Text + text blocks with bounding boxes' },
{ name: 'Tables', value: 'tables', description: 'Text + table blocks' },
{ name: 'Full', value: 'full', description: 'Text + blocks + tables + images' },
],
default: 'text',
description: 'What to extract from the PDF',
displayOptions: {
show: {
operation: ['parsePdf'],
},
},
},
{
displayName: 'Pages',
name: 'parse_pages',
type: 'string',
default: 'all',
description: 'Page selection: "all" or a range like "1-3" or single page like "2"',
displayOptions: {
show: {
operation: ['parsePdf'],
},
},
},
];
async function execute(index, returnData) {
const parseInputType = this.getNodeParameter('parse_input_type', index);
const pdfUrl = this.getNodeParameter('parse_url', index, '');
const mode = this.getNodeParameter('parse_mode', index);
const pages = this.getNodeParameter('parse_pages', index);
const body = { mode, pages };
if (parseInputType === 'url') {
body.url = (0, helpers_1.normalizeUrl)(pdfUrl);
}
const requestOptions = parseInputType === 'file'
? await helpers_1.createSingleFileMultipart.call(this, index, this.getNodeParameter('parse_file_binary_property', index), body)
: { body, json: true };
const responseData = await this.helpers.httpRequestWithAuthentication.call(this, 'pdfapihubApi', {
method: 'POST',
url: 'https://pdfapihub.com/api/v1/pdf/parse',
...requestOptions,
returnFullResponse: true,
ignoreHttpStatusErrors: true,
});
(0, helpers_1.checkApiResponse)(this, responseData.statusCode, responseData.body, index);
returnData.push((0, helpers_1.parseJsonResponseBody)(responseData.body, index));
}
//# sourceMappingURL=parsePdf.js.map