@fin.cx/einvoice
Version:
A TypeScript module for creating, manipulating, and embedding XML data within PDF files specifically tailored for electronic invoice (einvoice) packages.
98 lines • 7.67 kB
JavaScript
import { BaseXMLExtractor, StandardXMLExtractor, AssociatedFilesExtractor, TextXMLExtractor } from './extractors/index.js';
import { FormatDetector } from '../utils/format.detector.js';
import { InvoiceFormat } from '../../interfaces/common.js';
/**
* Error types for PDF extraction operations
*/
export var PDFExtractError;
(function (PDFExtractError) {
PDFExtractError["EXTRACT_ERROR"] = "XML extraction failed";
PDFExtractError["INVALID_INPUT"] = "Invalid input parameters";
PDFExtractError["NO_XML_FOUND"] = "No XML found in PDF";
})(PDFExtractError || (PDFExtractError = {}));
/**
* Main PDF extractor class that orchestrates the extraction process
* Uses multiple specialized extractors in sequence to maximize success rate
*/
export class PDFExtractor {
/**
* Constructor initializes the chain of extractors
*/
constructor() {
this.extractors = [];
// Add extractors in order of preference/likelihood of success
this.extractors.push(new StandardXMLExtractor(), // Standard PDF/A-3 embedded files
new AssociatedFilesExtractor(), // Associated files (ZUGFeRD v1, some Factur-X)
new TextXMLExtractor() // Text-based extraction (fallback)
);
}
/**
* Extract XML from a PDF buffer
* Tries multiple extraction methods in sequence
* @param pdfBuffer PDF buffer
* @returns Result with either the extracted XML or error information
*/
async extractXml(pdfBuffer) {
try {
console.log('Starting XML extraction from PDF...');
// Validate input
if (!pdfBuffer || pdfBuffer.length === 0) {
return this.createErrorResult(PDFExtractError.INVALID_INPUT, 'PDF buffer is empty or undefined');
}
// Ensure buffer is Uint8Array
const pdfBufferArray = Buffer.isBuffer(pdfBuffer) ? new Uint8Array(pdfBuffer) : pdfBuffer;
// Try each extractor in sequence
for (const extractor of this.extractors) {
const extractorName = extractor.constructor.name;
console.log(`Trying extraction with ${extractorName}...`);
try {
const xml = await extractor.extractXml(pdfBufferArray);
if (xml) {
console.log(`Successfully extracted XML using ${extractorName}`);
// Detect format of the extracted XML
const format = FormatDetector.detectFormat(xml);
return {
success: true,
xml,
format,
extractorUsed: extractorName
};
}
console.log(`Extraction with ${extractorName} failed, trying next method...`);
}
catch (error) {
// Log error but continue with next extractor
console.warn(`Error using ${extractorName}: ${error instanceof Error ? error.message : String(error)}`);
}
}
// If all extractors fail, return a no XML found error
return this.createErrorResult(PDFExtractError.NO_XML_FOUND, 'All extraction methods failed, no valid XML found in PDF');
}
catch (error) {
// Handle any unexpected errors
return this.createErrorResult(PDFExtractError.EXTRACT_ERROR, `Unexpected error during XML extraction: ${error instanceof Error ? error.message : String(error)}`, error instanceof Error ? error : undefined);
}
}
/**
* Create a PDF extract result with error information
* @param type Error type
* @param message Error message
* @param originalError Original error object
* @returns Error result
*/
createErrorResult(type, message, originalError) {
console.error(`PDF Extractor Error (${type}): ${message}`);
if (originalError) {
console.error(originalError);
}
return {
success: false,
error: {
type,
message,
originalError
}
};
}
}
//# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoicGRmLmV4dHJhY3Rvci5qcyIsInNvdXJjZVJvb3QiOiIiLCJzb3VyY2VzIjpbIi4uLy4uLy4uL3RzL2Zvcm1hdHMvcGRmL3BkZi5leHRyYWN0b3IudHMiXSwibmFtZXMiOltdLCJtYXBwaW5ncyI6IkFBQUEsT0FBTyxFQUNMLGdCQUFnQixFQUNoQixvQkFBb0IsRUFDcEIsd0JBQXdCLEVBQ3hCLGdCQUFnQixFQUNqQixNQUFNLHVCQUF1QixDQUFDO0FBQy9CLE9BQU8sRUFBRSxjQUFjLEVBQUUsTUFBTSw2QkFBNkIsQ0FBQztBQUM3RCxPQUFPLEVBQUUsYUFBYSxFQUFFLE1BQU0sNEJBQTRCLENBQUM7QUFFM0Q7O0dBRUc7QUFDSCxNQUFNLENBQU4sSUFBWSxlQUlYO0FBSkQsV0FBWSxlQUFlO0lBQ3pCLDBEQUF1QyxDQUFBO0lBQ3ZDLDZEQUEwQyxDQUFBO0lBQzFDLHVEQUFvQyxDQUFBO0FBQ3RDLENBQUMsRUFKVyxlQUFlLEtBQWYsZUFBZSxRQUkxQjtBQWlCRDs7O0dBR0c7QUFDSCxNQUFNLE9BQU8sWUFBWTtJQUd2Qjs7T0FFRztJQUNIO1FBTFEsZUFBVSxHQUF1QixFQUFFLENBQUM7UUFNMUMsOERBQThEO1FBQzlELElBQUksQ0FBQyxVQUFVLENBQUMsSUFBSSxDQUNsQixJQUFJLG9CQUFvQixFQUFFLEVBQU8sa0NBQWtDO1FBQ25FLElBQUksd0JBQXdCLEVBQUUsRUFBRywrQ0FBK0M7UUFDaEYsSUFBSSxnQkFBZ0IsRUFBRSxDQUFXLG1DQUFtQztTQUNyRSxDQUFDO0lBQ0osQ0FBQztJQUVEOzs7OztPQUtHO0lBQ0ksS0FBSyxDQUFDLFVBQVUsQ0FBQyxTQUE4QjtRQUNwRCxJQUFJLENBQUM7WUFDSCxPQUFPLENBQUMsR0FBRyxDQUFDLHFDQUFxQyxDQUFDLENBQUM7WUFFbkQsaUJBQWlCO1lBQ2pCLElBQUksQ0FBQyxTQUFTLElBQUksU0FBUyxDQUFDLE1BQU0sS0FBSyxDQUFDLEVBQUUsQ0FBQztnQkFDekMsT0FBTyxJQUFJLENBQUMsaUJBQWlCLENBQUMsZUFBZSxDQUFDLGFBQWEsRUFBRSxrQ0FBa0MsQ0FBQyxDQUFDO1lBQ25HLENBQUM7WUFFRCw4QkFBOEI7WUFDOUIsTUFBTSxjQUFjLEdBQUcsTUFBTSxDQUFDLFFBQVEsQ0FBQyxTQUFTLENBQUMsQ0FBQyxDQUFDLENBQUMsSUFBSSxVQUFVLENBQUMsU0FBUyxDQUFDLENBQUMsQ0FBQyxDQUFDLFNBQVMsQ0FBQztZQUUxRixpQ0FBaUM7WUFDakMsS0FBSyxNQUFNLFNBQVMsSUFBSSxJQUFJLENBQUMsVUFBVSxFQUFFLENBQUM7Z0JBQ3hDLE1BQU0sYUFBYSxHQUFHLFNBQVMsQ0FBQyxXQUFXLENBQUMsSUFBSSxDQUFDO2dCQUNqRCxPQUFPLENBQUMsR0FBRyxDQUFDLDBCQUEwQixhQUFhLEtBQUssQ0FBQyxDQUFDO2dCQUUxRCxJQUFJLENBQUM7b0JBQ0gsTUFBTSxHQUFHLEdBQUcsTUFBTSxTQUFTLENBQUMsVUFBVSxDQUFDLGNBQWMsQ0FBQyxDQUFDO29CQUV2RCxJQUFJLEdBQUcsRUFBRSxDQUFDO3dCQUNSLE9BQU8sQ0FBQyxHQUFHLENBQUMsb0NBQW9DLGFBQWEsRUFBRSxDQUFDLENBQUM7d0JBRWpFLHFDQUFxQzt3QkFDckMsTUFBTSxNQUFNLEdBQUcsY0FBYyxDQUFDLFlBQVksQ0FBQyxHQUFHLENBQUMsQ0FBQzt3QkFFaEQsT0FBTzs0QkFDTCxPQUFPLEVBQUUsSUFBSTs0QkFDYixHQUFHOzRCQUNILE1BQU07NEJBQ04sYUFBYSxFQUFFLGFBQWE7eUJBQzdCLENBQUM7b0JBQ0osQ0FBQztvQkFFRCxPQUFPLENBQUMsR0FBRyxDQUFDLG1CQUFtQixhQUFhLGdDQUFnQyxDQUFDLENBQUM7Z0JBQ2hGLENBQUM7Z0JBQUMsT0FBTyxLQUFLLEVBQUUsQ0FBQztvQkFDZiw2Q0FBNkM7b0JBQzdDLE9BQU8sQ0FBQyxJQUFJLENBQUMsZUFBZSxhQUFhLEtBQUssS0FBSyxZQUFZLEtBQUssQ0FBQyxDQUFDLENBQUMsS0FBSyxDQUFDLE9BQU8sQ0FBQyxDQUFDLENBQUMsTUFBTSxDQUFDLEtBQUssQ0FBQyxFQUFFLENBQUMsQ0FBQztnQkFDMUcsQ0FBQztZQUNILENBQUM7WUFFRCxzREFBc0Q7WUFDdEQsT0FBTyxJQUFJLENBQUMsaUJBQWlCLENBQzNCLGVBQWUsQ0FBQyxZQUFZLEVBQzVCLDBEQUEwRCxDQUMzRCxDQUFDO1FBQ0osQ0FBQztRQUFDLE9BQU8sS0FBSyxFQUFFLENBQUM7WUFDZiwrQkFBK0I7WUFDL0IsT0FBTyxJQUFJLENBQUMsaUJBQWlCLENBQzNCLGVBQWUsQ0FBQyxhQUFhLEVBQzdCLDJDQUEyQyxLQUFLLFlBQVksS0FBSyxDQUFDLENBQUMsQ0FBQyxLQUFLLENBQUMsT0FBTyxDQUFDLENBQUMsQ0FBQyxNQUFNLENBQUMsS0FBSyxDQUFDLEVBQUUsRUFDbkcsS0FBSyxZQUFZLEtBQUssQ0FBQyxDQUFDLENBQUMsS0FBSyxDQUFDLENBQUMsQ0FBQyxTQUFTLENBQzNDLENBQUM7UUFDSixDQUFDO0lBQ0gsQ0FBQztJQUVEOzs7Ozs7T0FNRztJQUNLLGlCQUFpQixDQUN2QixJQUFxQixFQUNyQixPQUFlLEVBQ2YsYUFBcUI7UUFFckIsT0FBTyxDQUFDLEtBQUssQ0FBQyx3QkFBd0IsSUFBSSxNQUFNLE9BQU8sRUFBRSxDQUFDLENBQUM7UUFDM0QsSUFBSSxhQUFhLEVBQUUsQ0FBQztZQUNsQixPQUFPLENBQUMsS0FBSyxDQUFDLGFBQWEsQ0FBQyxDQUFDO1FBQy9CLENBQUM7UUFFRCxPQUFPO1lBQ0wsT0FBTyxFQUFFLEtBQUs7WUFDZCxLQUFLLEVBQUU7Z0JBQ0wsSUFBSTtnQkFDSixPQUFPO2dCQUNQLGFBQWE7YUFDZDtTQUNGLENBQUM7SUFDSixDQUFDO0NBQ0YifQ==