UNPKG

@fin.cx/einvoice

Version:

A TypeScript module for creating, manipulating, and embedding XML data within PDF files specifically tailored for electronic invoice (einvoice) packages.

98 lines 7.67 kB
import { BaseXMLExtractor, StandardXMLExtractor, AssociatedFilesExtractor, TextXMLExtractor } from './extractors/index.js'; import { FormatDetector } from '../utils/format.detector.js'; import { InvoiceFormat } from '../../interfaces/common.js'; /** * Error types for PDF extraction operations */ export var PDFExtractError; (function (PDFExtractError) { PDFExtractError["EXTRACT_ERROR"] = "XML extraction failed"; PDFExtractError["INVALID_INPUT"] = "Invalid input parameters"; PDFExtractError["NO_XML_FOUND"] = "No XML found in PDF"; })(PDFExtractError || (PDFExtractError = {})); /** * Main PDF extractor class that orchestrates the extraction process * Uses multiple specialized extractors in sequence to maximize success rate */ export class PDFExtractor { /** * Constructor initializes the chain of extractors */ constructor() { this.extractors = []; // Add extractors in order of preference/likelihood of success this.extractors.push(new StandardXMLExtractor(), // Standard PDF/A-3 embedded files new AssociatedFilesExtractor(), // Associated files (ZUGFeRD v1, some Factur-X) new TextXMLExtractor() // Text-based extraction (fallback) ); } /** * Extract XML from a PDF buffer * Tries multiple extraction methods in sequence * @param pdfBuffer PDF buffer * @returns Result with either the extracted XML or error information */ async extractXml(pdfBuffer) { try { console.log('Starting XML extraction from PDF...'); // Validate input if (!pdfBuffer || pdfBuffer.length === 0) { return this.createErrorResult(PDFExtractError.INVALID_INPUT, 'PDF buffer is empty or undefined'); } // Ensure buffer is Uint8Array const pdfBufferArray = Buffer.isBuffer(pdfBuffer) ? new Uint8Array(pdfBuffer) : pdfBuffer; // Try each extractor in sequence for (const extractor of this.extractors) { const extractorName = extractor.constructor.name; console.log(`Trying extraction with ${extractorName}...`); try { const xml = await extractor.extractXml(pdfBufferArray); if (xml) { console.log(`Successfully extracted XML using ${extractorName}`); // Detect format of the extracted XML const format = FormatDetector.detectFormat(xml); return { success: true, xml, format, extractorUsed: extractorName }; } console.log(`Extraction with ${extractorName} failed, trying next method...`); } catch (error) { // Log error but continue with next extractor console.warn(`Error using ${extractorName}: ${error instanceof Error ? error.message : String(error)}`); } } // If all extractors fail, return a no XML found error return this.createErrorResult(PDFExtractError.NO_XML_FOUND, 'All extraction methods failed, no valid XML found in PDF'); } catch (error) { // Handle any unexpected errors return this.createErrorResult(PDFExtractError.EXTRACT_ERROR, `Unexpected error during XML extraction: ${error instanceof Error ? error.message : String(error)}`, error instanceof Error ? error : undefined); } } /** * Create a PDF extract result with error information * @param type Error type * @param message Error message * @param originalError Original error object * @returns Error result */ createErrorResult(type, message, originalError) { console.error(`PDF Extractor Error (${type}): ${message}`); if (originalError) { console.error(originalError); } return { success: false, error: { type, message, originalError } }; } } //# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoicGRmLmV4dHJhY3Rvci5qcyIsInNvdXJjZVJvb3QiOiIiLCJzb3VyY2VzIjpbIi4uLy4uLy4uL3RzL2Zvcm1hdHMvcGRmL3BkZi5leHRyYWN0b3IudHMiXSwibmFtZXMiOltdLCJtYXBwaW5ncyI6IkFBQUEsT0FBTyxFQUNMLGdCQUFnQixFQUNoQixvQkFBb0IsRUFDcEIsd0JBQXdCLEVBQ3hCLGdCQUFnQixFQUNqQixNQUFNLHVCQUF1QixDQUFDO0FBQy9CLE9BQU8sRUFBRSxjQUFjLEVBQUUsTUFBTSw2QkFBNkIsQ0FBQztBQUM3RCxPQUFPLEVBQUUsYUFBYSxFQUFFLE1BQU0sNEJBQTRCLENBQUM7QUFFM0Q7O0dBRUc7QUFDSCxNQUFNLENBQU4sSUFBWSxlQUlYO0FBSkQsV0FBWSxlQUFlO0lBQ3pCLDBEQUF1QyxDQUFBO0lBQ3ZDLDZEQUEwQyxDQUFBO0lBQzFDLHVEQUFvQyxDQUFBO0FBQ3RDLENBQUMsRUFKVyxlQUFlLEtBQWYsZUFBZSxRQUkxQjtBQWlCRDs7O0dBR0c7QUFDSCxNQUFNLE9BQU8sWUFBWTtJQUd2Qjs7T0FFRztJQUNIO1FBTFEsZUFBVSxHQUF1QixFQUFFLENBQUM7UUFNMUMsOERBQThEO1FBQzlELElBQUksQ0FBQyxVQUFVLENBQUMsSUFBSSxDQUNsQixJQUFJLG9CQUFvQixFQUFFLEVBQU8sa0NBQWtDO1FBQ25FLElBQUksd0JBQXdCLEVBQUUsRUFBRywrQ0FBK0M7UUFDaEYsSUFBSSxnQkFBZ0IsRUFBRSxDQUFXLG1DQUFtQztTQUNyRSxDQUFDO0lBQ0osQ0FBQztJQUVEOzs7OztPQUtHO0lBQ0ksS0FBSyxDQUFDLFVBQVUsQ0FBQyxTQUE4QjtRQUNwRCxJQUFJLENBQUM7WUFDSCxPQUFPLENBQUMsR0FBRyxDQUFDLHFDQUFxQyxDQUFDLENBQUM7WUFFbkQsaUJBQWlCO1lBQ2pCLElBQUksQ0FBQyxTQUFTLElBQUksU0FBUyxDQUFDLE1BQU0sS0FBSyxDQUFDLEVBQUUsQ0FBQztnQkFDekMsT0FBTyxJQUFJLENBQUMsaUJBQWlCLENBQUMsZUFBZSxDQUFDLGFBQWEsRUFBRSxrQ0FBa0MsQ0FBQyxDQUFDO1lBQ25HLENBQUM7WUFFRCw4QkFBOEI7WUFDOUIsTUFBTSxjQUFjLEdBQUcsTUFBTSxDQUFDLFFBQVEsQ0FBQyxTQUFTLENBQUMsQ0FBQyxDQUFDLENBQUMsSUFBSSxVQUFVLENBQUMsU0FBUyxDQUFDLENBQUMsQ0FBQyxDQUFDLFNBQVMsQ0FBQztZQUUxRixpQ0FBaUM7WUFDakMsS0FBSyxNQUFNLFNBQVMsSUFBSSxJQUFJLENBQUMsVUFBVSxFQUFFLENBQUM7Z0JBQ3hDLE1BQU0sYUFBYSxHQUFHLFNBQVMsQ0FBQyxXQUFXLENBQUMsSUFBSSxDQUFDO2dCQUNqRCxPQUFPLENBQUMsR0FBRyxDQUFDLDBCQUEwQixhQUFhLEtBQUssQ0FBQyxDQUFDO2dCQUUxRCxJQUFJLENBQUM7b0JBQ0gsTUFBTSxHQUFHLEdBQUcsTUFBTSxTQUFTLENBQUMsVUFBVSxDQUFDLGNBQWMsQ0FBQyxDQUFDO29CQUV2RCxJQUFJLEdBQUcsRUFBRSxDQUFDO3dCQUNSLE9BQU8sQ0FBQyxHQUFHLENBQUMsb0NBQW9DLGFBQWEsRUFBRSxDQUFDLENBQUM7d0JBRWpFLHFDQUFxQzt3QkFDckMsTUFBTSxNQUFNLEdBQUcsY0FBYyxDQUFDLFlBQVksQ0FBQyxHQUFHLENBQUMsQ0FBQzt3QkFFaEQsT0FBTzs0QkFDTCxPQUFPLEVBQUUsSUFBSTs0QkFDYixHQUFHOzRCQUNILE1BQU07NEJBQ04sYUFBYSxFQUFFLGFBQWE7eUJBQzdCLENBQUM7b0JBQ0osQ0FBQztvQkFFRCxPQUFPLENBQUMsR0FBRyxDQUFDLG1CQUFtQixhQUFhLGdDQUFnQyxDQUFDLENBQUM7Z0JBQ2hGLENBQUM7Z0JBQUMsT0FBTyxLQUFLLEVBQUUsQ0FBQztvQkFDZiw2Q0FBNkM7b0JBQzdDLE9BQU8sQ0FBQyxJQUFJLENBQUMsZUFBZSxhQUFhLEtBQUssS0FBSyxZQUFZLEtBQUssQ0FBQyxDQUFDLENBQUMsS0FBSyxDQUFDLE9BQU8sQ0FBQyxDQUFDLENBQUMsTUFBTSxDQUFDLEtBQUssQ0FBQyxFQUFFLENBQUMsQ0FBQztnQkFDMUcsQ0FBQztZQUNILENBQUM7WUFFRCxzREFBc0Q7WUFDdEQsT0FBTyxJQUFJLENBQUMsaUJBQWlCLENBQzNCLGVBQWUsQ0FBQyxZQUFZLEVBQzVCLDBEQUEwRCxDQUMzRCxDQUFDO1FBQ0osQ0FBQztRQUFDLE9BQU8sS0FBSyxFQUFFLENBQUM7WUFDZiwrQkFBK0I7WUFDL0IsT0FBTyxJQUFJLENBQUMsaUJBQWlCLENBQzNCLGVBQWUsQ0FBQyxhQUFhLEVBQzdCLDJDQUEyQyxLQUFLLFlBQVksS0FBSyxDQUFDLENBQUMsQ0FBQyxLQUFLLENBQUMsT0FBTyxDQUFDLENBQUMsQ0FBQyxNQUFNLENBQUMsS0FBSyxDQUFDLEVBQUUsRUFDbkcsS0FBSyxZQUFZLEtBQUssQ0FBQyxDQUFDLENBQUMsS0FBSyxDQUFDLENBQUMsQ0FBQyxTQUFTLENBQzNDLENBQUM7UUFDSixDQUFDO0lBQ0gsQ0FBQztJQUVEOzs7Ozs7T0FNRztJQUNLLGlCQUFpQixDQUN2QixJQUFxQixFQUNyQixPQUFlLEVBQ2YsYUFBcUI7UUFFckIsT0FBTyxDQUFDLEtBQUssQ0FBQyx3QkFBd0IsSUFBSSxNQUFNLE9BQU8sRUFBRSxDQUFDLENBQUM7UUFDM0QsSUFBSSxhQUFhLEVBQUUsQ0FBQztZQUNsQixPQUFPLENBQUMsS0FBSyxDQUFDLGFBQWEsQ0FBQyxDQUFDO1FBQy9CLENBQUM7UUFFRCxPQUFPO1lBQ0wsT0FBTyxFQUFFLEtBQUs7WUFDZCxLQUFLLEVBQUU7Z0JBQ0wsSUFBSTtnQkFDSixPQUFPO2dCQUNQLGFBQWE7YUFDZDtTQUNGLENBQUM7SUFDSixDQUFDO0NBQ0YifQ==