UNPKG

mindee

Version:

Mindee Client Library for Node.js

43 lines (42 loc) 1.84 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.extractFromPage = extractFromPage; const pdf_lib_1 = require("@cantoo/pdf-lib"); const geometry_1 = require("../../geometry"); /** * Extracts elements from a page based off of a list of bounding boxes. * * @param pdfPage PDF Page to extract from. * @param polygons List of coordinates to pull the elements from. */ async function extractFromPage(pdfPage, polygons) { const { width, height } = pdfPage.getSize(); const extractedElements = []; // Manual upscale. // Fixes issues with the OCR. const qualityScale = 300 / 72; for (const polygon of polygons) { const tempPdf = await pdf_lib_1.PDFDocument.create(); const newWidth = width * ((0, geometry_1.getMinMaxX)(polygon).max - (0, geometry_1.getMinMaxX)(polygon).min); const newHeight = height * ((0, geometry_1.getMinMaxY)(polygon).max - (0, geometry_1.getMinMaxY)(polygon).min); const cropped = await tempPdf.embedPage(pdfPage, { left: (0, geometry_1.getMinMaxX)(polygon).min * width, right: (0, geometry_1.getMinMaxX)(polygon).max * width, top: height - ((0, geometry_1.getMinMaxY)(polygon).min * height), bottom: height - ((0, geometry_1.getMinMaxY)(polygon).max * height), }); const samplePage = tempPdf.addPage([newWidth * qualityScale, newHeight * qualityScale]); samplePage.drawRectangle({ x: 0, y: 0, width: newWidth * qualityScale, height: newHeight * qualityScale, }); samplePage.drawPage(cropped, { width: newWidth * qualityScale, height: newHeight * qualityScale, }); extractedElements.push(await tempPdf.save()); } return extractedElements; }