pomljs
Version:
Prompt Orchestration Markup Language
79 lines (73 loc) • 2.56 kB
JavaScript
;
var PDFJS = require('pdfjs-dist/legacy/build/pdf.js');
function _interopNamespaceDefault(e) {
var n = Object.create(null);
if (e) {
Object.keys(e).forEach(function (k) {
if (k !== 'default') {
var d = Object.getOwnPropertyDescriptor(e, k);
Object.defineProperty(n, k, d.get ? d : {
enumerable: true,
get: function () { return e[k]; }
});
}
});
}
n.default = e;
return Object.freeze(n);
}
var PDFJS__namespace = /*#__PURE__*/_interopNamespaceDefault(PDFJS);
const log = console.log;
console.log = (m, ...a) => /Cannot polyfill `(DOMMatrix|Path2D)`/.test(m) ? null : log(m, ...a);
let pdfjs = PDFJS__namespace;
if (PDFJS__namespace.GlobalWorkerOptions === undefined) {
// in esm
pdfjs = PDFJS__namespace.default;
}
else {
// in commonjs
pdfjs = PDFJS__namespace;
}
pdfjs.GlobalWorkerOptions.workerSrc = 'pdfjs-dist/legacy/build/pdf.worker.js';
console.log = log; // restore original console.log
async function getNumPages(pdfBuffer) {
const uint8Array = pdfBuffer instanceof ArrayBuffer ? new Uint8Array(pdfBuffer) : new Uint8Array(pdfBuffer);
const loadingTask = pdfjs.getDocument({ data: uint8Array });
const pdfDocument = await loadingTask.promise;
return pdfDocument.numPages;
}
async function pdfParse(pdfBuffer, maxPages) {
const uint8Array = pdfBuffer instanceof ArrayBuffer ? new Uint8Array(pdfBuffer) : new Uint8Array(pdfBuffer);
const loadingTask = pdfjs.getDocument({ data: uint8Array });
const pdfDocument = await loadingTask.promise;
let fullTexts = [];
if (maxPages == undefined) {
maxPages = pdfDocument.numPages;
}
else {
maxPages = Math.min(maxPages, pdfDocument.numPages);
}
for (let pageNum = 1; pageNum <= maxPages; pageNum++) {
const page = await pdfDocument.getPage(pageNum);
const pageText = await extractTextFromPage(page);
fullTexts.push(pageText);
}
return fullTexts.join('\n\n');
}
async function extractTextFromPage(page) {
const textContent = await page.getTextContent();
let lastY, text = '';
for (let item of textContent.items) {
if (lastY == item.transform[5] || !lastY) {
text += item.str;
}
else {
text += '\n' + item.str;
}
lastY = item.transform[5];
}
return text;
}
exports.getNumPages = getNumPages;
exports.pdfParse = pdfParse;
//# sourceMappingURL=pdf.cjs.map