UNPKG

@fin.cx/einvoice

Version:

A TypeScript module for creating, manipulating, and embedding XML data within PDF files specifically tailored for electronic invoice (einvoice) packages.

74 lines 7.23 kB
import { PDFDocument, PDFDict, PDFName, PDFRawStream, PDFArray, PDFString } from '../../../plugins.js'; import { BaseXMLExtractor } from './base.extractor.js'; /** * Standard PDF XML extractor that extracts XML from embedded files * Works with PDF/A-3 documents that follow the standard for embedding files */ export class StandardXMLExtractor extends BaseXMLExtractor { /** * Extract XML from a PDF buffer using standard PDF/A-3 embedded files * @param pdfBuffer PDF buffer * @returns XML content or null if not found */ async extractXml(pdfBuffer) { try { const pdfDoc = await PDFDocument.load(pdfBuffer); // Get the document's metadata dictionary const namesDictObj = pdfDoc.catalog.lookup(PDFName.of('Names')); if (!(namesDictObj instanceof PDFDict)) { console.warn('No Names dictionary found in PDF! This PDF does not contain embedded files.'); return null; } // Get the embedded files dictionary const embeddedFilesDictObj = namesDictObj.lookup(PDFName.of('EmbeddedFiles')); if (!(embeddedFilesDictObj instanceof PDFDict)) { console.warn('No EmbeddedFiles dictionary found! This PDF does not contain embedded files.'); return null; } // Get the names array const filesSpecObj = embeddedFilesDictObj.lookup(PDFName.of('Names')); if (!(filesSpecObj instanceof PDFArray)) { console.warn('No files specified in EmbeddedFiles dictionary!'); return null; } // Try to find an XML file in the embedded files for (let i = 0; i < filesSpecObj.size(); i += 2) { const fileNameObj = filesSpecObj.lookup(i); const fileSpecObj = filesSpecObj.lookup(i + 1); if (!(fileNameObj instanceof PDFString) || !(fileSpecObj instanceof PDFDict)) { continue; } // Get the filename as string const fileName = fileNameObj.decodeText(); // Check if it's a known invoice XML file name const isKnownFileName = this.knownFileNames.some(knownName => fileName.toLowerCase() === knownName.toLowerCase()); // Check if it's any XML file or has invoice-related keywords const isXmlFile = fileName.toLowerCase().endsWith('.xml') || fileName.toLowerCase().includes('zugferd') || fileName.toLowerCase().includes('factur-x') || fileName.toLowerCase().includes('xrechnung') || fileName.toLowerCase().includes('invoice'); if (isKnownFileName || isXmlFile) { const efDictObj = fileSpecObj.lookup(PDFName.of('EF')); if (!(efDictObj instanceof PDFDict)) { continue; } const fileStream = efDictObj.lookup(PDFName.of('F')); if (fileStream instanceof PDFRawStream) { const xmlContent = await this.extractXmlFromStream(fileStream, fileName); if (xmlContent) { return xmlContent; } } } } console.warn('No valid XML found in embedded files'); return null; } catch (error) { console.error('Error in standard extraction:', error); return null; } } } //# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoic3RhbmRhcmQuZXh0cmFjdG9yLmpzIiwic291cmNlUm9vdCI6IiIsInNvdXJjZXMiOlsiLi4vLi4vLi4vLi4vdHMvZm9ybWF0cy9wZGYvZXh0cmFjdG9ycy9zdGFuZGFyZC5leHRyYWN0b3IudHMiXSwibmFtZXMiOltdLCJtYXBwaW5ncyI6IkFBQUEsT0FBTyxFQUFFLFdBQVcsRUFBRSxPQUFPLEVBQUUsT0FBTyxFQUFFLFlBQVksRUFBRSxRQUFRLEVBQUUsU0FBUyxFQUFFLE1BQU0scUJBQXFCLENBQUM7QUFDdkcsT0FBTyxFQUFFLGdCQUFnQixFQUFFLE1BQU0scUJBQXFCLENBQUM7QUFFdkQ7OztHQUdHO0FBQ0gsTUFBTSxPQUFPLG9CQUFxQixTQUFRLGdCQUFnQjtJQUN4RDs7OztPQUlHO0lBQ0ksS0FBSyxDQUFDLFVBQVUsQ0FBQyxTQUE4QjtRQUNwRCxJQUFJLENBQUM7WUFDSCxNQUFNLE1BQU0sR0FBRyxNQUFNLFdBQVcsQ0FBQyxJQUFJLENBQUMsU0FBUyxDQUFDLENBQUM7WUFFakQseUNBQXlDO1lBQ3pDLE1BQU0sWUFBWSxHQUFHLE1BQU0sQ0FBQyxPQUFPLENBQUMsTUFBTSxDQUFDLE9BQU8sQ0FBQyxFQUFFLENBQUMsT0FBTyxDQUFDLENBQUMsQ0FBQztZQUNoRSxJQUFJLENBQUMsQ0FBQyxZQUFZLFlBQVksT0FBTyxDQUFDLEVBQUUsQ0FBQztnQkFDdkMsT0FBTyxDQUFDLElBQUksQ0FBQyw2RUFBNkUsQ0FBQyxDQUFDO2dCQUM1RixPQUFPLElBQUksQ0FBQztZQUNkLENBQUM7WUFFRCxvQ0FBb0M7WUFDcEMsTUFBTSxvQkFBb0IsR0FBRyxZQUFZLENBQUMsTUFBTSxDQUFDLE9BQU8sQ0FBQyxFQUFFLENBQUMsZUFBZSxDQUFDLENBQUMsQ0FBQztZQUM5RSxJQUFJLENBQUMsQ0FBQyxvQkFBb0IsWUFBWSxPQUFPLENBQUMsRUFBRSxDQUFDO2dCQUMvQyxPQUFPLENBQUMsSUFBSSxDQUFDLDhFQUE4RSxDQUFDLENBQUM7Z0JBQzdGLE9BQU8sSUFBSSxDQUFDO1lBQ2QsQ0FBQztZQUVELHNCQUFzQjtZQUN0QixNQUFNLFlBQVksR0FBRyxvQkFBb0IsQ0FBQyxNQUFNLENBQUMsT0FBTyxDQUFDLEVBQUUsQ0FBQyxPQUFPLENBQUMsQ0FBQyxDQUFDO1lBQ3RFLElBQUksQ0FBQyxDQUFDLFlBQVksWUFBWSxRQUFRLENBQUMsRUFBRSxDQUFDO2dCQUN4QyxPQUFPLENBQUMsSUFBSSxDQUFDLGlEQUFpRCxDQUFDLENBQUM7Z0JBQ2hFLE9BQU8sSUFBSSxDQUFDO1lBQ2QsQ0FBQztZQUVELGdEQUFnRDtZQUNoRCxLQUFLLElBQUksQ0FBQyxHQUFHLENBQUMsRUFBRSxDQUFDLEdBQUcsWUFBWSxDQUFDLElBQUksRUFBRSxFQUFFLENBQUMsSUFBSSxDQUFDLEVBQUUsQ0FBQztnQkFDaEQsTUFBTSxXQUFXLEdBQUcsWUFBWSxDQUFDLE1BQU0sQ0FBQyxDQUFDLENBQUMsQ0FBQztnQkFDM0MsTUFBTSxXQUFXLEdBQUcsWUFBWSxDQUFDLE1BQU0sQ0FBQyxDQUFDLEdBQUcsQ0FBQyxDQUFDLENBQUM7Z0JBRS9DLElBQUksQ0FBQyxDQUFDLFdBQVcsWUFBWSxTQUFTLENBQUMsSUFBSSxDQUFDLENBQUMsV0FBVyxZQUFZLE9BQU8sQ0FBQyxFQUFFLENBQUM7b0JBQzdFLFNBQVM7Z0JBQ1gsQ0FBQztnQkFFRCw2QkFBNkI7Z0JBQzdCLE1BQU0sUUFBUSxHQUFHLFdBQVcsQ0FBQyxVQUFVLEVBQUUsQ0FBQztnQkFFMUMsOENBQThDO2dCQUM5QyxNQUFNLGVBQWUsR0FBRyxJQUFJLENBQUMsY0FBYyxDQUFDLElBQUksQ0FDOUMsU0FBUyxDQUFDLEVBQUUsQ0FBQyxRQUFRLENBQUMsV0FBVyxFQUFFLEtBQUssU0FBUyxDQUFDLFdBQVcsRUFBRSxDQUNoRSxDQUFDO2dCQUVGLDZEQUE2RDtnQkFDN0QsTUFBTSxTQUFTLEdBQUcsUUFBUSxDQUFDLFdBQVcsRUFBRSxDQUFDLFFBQVEsQ0FBQyxNQUFNLENBQUM7b0JBQ3ZDLFFBQVEsQ0FBQyxXQUFXLEVBQUUsQ0FBQyxRQUFRLENBQUMsU0FBUyxDQUFDO29CQUMxQyxRQUFRLENBQUMsV0FBVyxFQUFFLENBQUMsUUFBUSxDQUFDLFVBQVUsQ0FBQztvQkFDM0MsUUFBUSxDQUFDLFdBQVcsRUFBRSxDQUFDLFFBQVEsQ0FBQyxXQUFXLENBQUM7b0JBQzVDLFFBQVEsQ0FBQyxXQUFXLEVBQUUsQ0FBQyxRQUFRLENBQUMsU0FBUyxDQUFDLENBQUM7Z0JBRTdELElBQUksZUFBZSxJQUFJLFNBQVMsRUFBRSxDQUFDO29CQUNqQyxNQUFNLFNBQVMsR0FBRyxXQUFXLENBQUMsTUFBTSxDQUFDLE9BQU8sQ0FBQyxFQUFFLENBQUMsSUFBSSxDQUFDLENBQUMsQ0FBQztvQkFDdkQsSUFBSSxDQUFDLENBQUMsU0FBUyxZQUFZLE9BQU8sQ0FBQyxFQUFFLENBQUM7d0JBQ3BDLFNBQVM7b0JBQ1gsQ0FBQztvQkFFRCxNQUFNLFVBQVUsR0FBRyxTQUFTLENBQUMsTUFBTSxDQUFDLE9BQU8sQ0FBQyxFQUFFLENBQUMsR0FBRyxDQUFDLENBQUMsQ0FBQztvQkFDckQsSUFBSSxVQUFVLFlBQVksWUFBWSxFQUFFLENBQUM7d0JBQ3ZDLE1BQU0sVUFBVSxHQUFHLE1BQU0sSUFBSSxDQUFDLG9CQUFvQixDQUFDLFVBQVUsRUFBRSxRQUFRLENBQUMsQ0FBQzt3QkFDekUsSUFBSSxVQUFVLEVBQUUsQ0FBQzs0QkFDZixPQUFPLFVBQVUsQ0FBQzt3QkFDcEIsQ0FBQztvQkFDSCxDQUFDO2dCQUNILENBQUM7WUFDSCxDQUFDO1lBRUQsT0FBTyxDQUFDLElBQUksQ0FBQyxzQ0FBc0MsQ0FBQyxDQUFDO1lBQ3JELE9BQU8sSUFBSSxDQUFDO1FBQ2QsQ0FBQztRQUFDLE9BQU8sS0FBSyxFQUFFLENBQUM7WUFDZixPQUFPLENBQUMsS0FBSyxDQUFDLCtCQUErQixFQUFFLEtBQUssQ0FBQyxDQUFDO1lBQ3RELE9BQU8sSUFBSSxDQUFDO1FBQ2QsQ0FBQztJQUNILENBQUM7Q0FDRiJ9