mindee
Version:
Mindee Client Library for Node.js
77 lines (76 loc) • 3.26 kB
JavaScript
Object.defineProperty(exports, "__esModule", { value: true });
exports.extractInvoices = extractInvoices;
const pdf_lib_1 = require("@cantoo/pdf-lib");
const errors_1 = require("../../errors");
const product_1 = require("../../product");
const extractedInvoiceSplitterImage_1 = require("./extractedInvoiceSplitterImage");
async function splitPdf(pdfDoc, invoicePageGroups) {
if (invoicePageGroups.length === 0) {
return [];
}
const generatedPdfs = [];
for (let i = 0; i < invoicePageGroups.length; i++) {
const subdocument = await pdf_lib_1.PDFDocument.create();
const fullIndexes = [];
for (let j = invoicePageGroups[i][0]; j <= invoicePageGroups[i][invoicePageGroups[i].length - 1]; j++) {
fullIndexes.push(j);
}
const copiedPages = await subdocument.copyPages(pdfDoc, fullIndexes);
copiedPages.map((page) => {
subdocument.addPage(page);
});
const subdocumentBytes = await subdocument.save();
generatedPdfs.push(new extractedInvoiceSplitterImage_1.ExtractedInvoiceSplitterImage(subdocumentBytes, [invoicePageGroups[i][0], invoicePageGroups[i][invoicePageGroups[i].length - 1]]));
}
return generatedPdfs;
}
async function getPdfDoc(inputFile) {
await inputFile.init();
if (!inputFile.isPdf()) {
throw new errors_1.MindeeMimeTypeError("Invoice Splitter is only compatible with pdf documents.");
}
const pdfDoc = await pdf_lib_1.PDFDocument.load(inputFile.fileObject, {
ignoreEncryption: true,
password: ""
});
if (pdfDoc.getPageCount() < 2) {
throw new errors_1.MindeeError("Invoice Splitter is only compatible with multi-page-pdf documents.");
}
return pdfDoc;
}
/**
* Extracts & cuts the pages of a main document invoice according to the provided indexes.
*
* @param inputFile File to extract sub-invoices from.
* @param indexes List of indexes to cut the document according to.
* Can be provided either as a InvoiceSplitterV1 inference, or a direct list of splits.
* @param strict If set to true, doesn't cut pages where the API isn't 100% confident.
* @returns A promise of extracted images, as an array of ExtractedInvoiceSplitterImage.
*/
async function extractInvoices(inputFile, indexes, strict = false) {
if (!indexes) {
throw new errors_1.MindeeError("No possible receipts candidates found for MultiReceipts extraction.");
}
let customIndexes = [];
if (indexes instanceof product_1.InvoiceSplitterV1) {
indexes.prediction.invoicePageGroups.map((invoicePageGroup) => {
if (!strict || invoicePageGroup.confidence === 1) {
customIndexes.push(invoicePageGroup.pageIndexes ?? []);
}
});
}
else {
customIndexes = indexes;
}
const pdfDoc = await getPdfDoc(inputFile);
const pageCount = pdfDoc.getPageCount();
customIndexes.forEach((pageGroup) => {
pageGroup.forEach((index) => {
if (index >= pageCount) {
throw new errors_1.MindeeError(`Given index ${index} doesn't exist in page range (0-${pageCount - 1})`);
}
});
});
return await splitPdf(pdfDoc, customIndexes);
}
;