UNPKG

mindee

Version:

Mindee Client Library for Node.js

77 lines (76 loc) 3.26 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.extractInvoices = extractInvoices; const pdf_lib_1 = require("@cantoo/pdf-lib"); const errors_1 = require("../../errors"); const product_1 = require("../../product"); const extractedInvoiceSplitterImage_1 = require("./extractedInvoiceSplitterImage"); async function splitPdf(pdfDoc, invoicePageGroups) { if (invoicePageGroups.length === 0) { return []; } const generatedPdfs = []; for (let i = 0; i < invoicePageGroups.length; i++) { const subdocument = await pdf_lib_1.PDFDocument.create(); const fullIndexes = []; for (let j = invoicePageGroups[i][0]; j <= invoicePageGroups[i][invoicePageGroups[i].length - 1]; j++) { fullIndexes.push(j); } const copiedPages = await subdocument.copyPages(pdfDoc, fullIndexes); copiedPages.map((page) => { subdocument.addPage(page); }); const subdocumentBytes = await subdocument.save(); generatedPdfs.push(new extractedInvoiceSplitterImage_1.ExtractedInvoiceSplitterImage(subdocumentBytes, [invoicePageGroups[i][0], invoicePageGroups[i][invoicePageGroups[i].length - 1]])); } return generatedPdfs; } async function getPdfDoc(inputFile) { await inputFile.init(); if (!inputFile.isPdf()) { throw new errors_1.MindeeMimeTypeError("Invoice Splitter is only compatible with pdf documents."); } const pdfDoc = await pdf_lib_1.PDFDocument.load(inputFile.fileObject, { ignoreEncryption: true, password: "" }); if (pdfDoc.getPageCount() < 2) { throw new errors_1.MindeeError("Invoice Splitter is only compatible with multi-page-pdf documents."); } return pdfDoc; } /** * Extracts & cuts the pages of a main document invoice according to the provided indexes. * * @param inputFile File to extract sub-invoices from. * @param indexes List of indexes to cut the document according to. * Can be provided either as a InvoiceSplitterV1 inference, or a direct list of splits. * @param strict If set to true, doesn't cut pages where the API isn't 100% confident. * @returns A promise of extracted images, as an array of ExtractedInvoiceSplitterImage. */ async function extractInvoices(inputFile, indexes, strict = false) { if (!indexes) { throw new errors_1.MindeeError("No possible receipts candidates found for MultiReceipts extraction."); } let customIndexes = []; if (indexes instanceof product_1.InvoiceSplitterV1) { indexes.prediction.invoicePageGroups.map((invoicePageGroup) => { if (!strict || invoicePageGroup.confidence === 1) { customIndexes.push(invoicePageGroup.pageIndexes ?? []); } }); } else { customIndexes = indexes; } const pdfDoc = await getPdfDoc(inputFile); const pageCount = pdfDoc.getPageCount(); customIndexes.forEach((pageGroup) => { pageGroup.forEach((index) => { if (index >= pageCount) { throw new errors_1.MindeeError(`Given index ${index} doesn't exist in page range (0-${pageCount - 1})`); } }); }); return await splitPdf(pdfDoc, customIndexes); }