@iqx-limited/quick-pdf
Version:
Converting PDFs to images (📃 to 📸)
548 lines (539 loc) • 17.8 kB
JavaScript
;
var semver = require('semver');
var node_url = require('node:url');
var node_path = require('node:path');
var promises = require('node:fs/promises');
var puppeteer = require('puppeteer');
var imageSize = require('image-size');
var PDFDocument = require('pdfkit');
var node_fs = require('node:fs');
var node = require('html-validate/node');
let firefox = null;
let chrome = null;
let isRemoteBrowser = false;
const BROWSER_PATHS = {
chrome: {
linux: "/usr/bin/google-chrome",
mac: "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
windows: node_path.join("C:", "Program Files", "Google", "Chrome", "Application", "chrome.exe")
},
firefox: {
linux: "/usr/bin/firefox",
mac: "/Applications/Firefox.app/Contents/MacOS/firefox",
windows: node_path.join("C:", "Program Files", "Mozilla Firefox", "firefox.exe")
}
};
const getOS = () => {
if (process.platform === "win32")
return "windows";
if (process.platform === "darwin")
return "mac";
return "linux";
};
const isBrowserInstalled = async (browser) => {
const os = getOS();
const browserPath = BROWSER_PATHS[browser][os];
try {
await promises.access(browserPath);
return true;
}
catch {
return false;
}
};
let launching = null;
const pagePoolSize = 5;
const RESOURCE_LIMIT = 100;
let resourceCount = 0;
let firefoxPagePool = [];
let chromePagePool = [];
async function launchPages(browser, type) {
let pool = type === "chrome" ? chromePagePool : firefoxPagePool;
if (!browser) {
throw new Error("Browser Not Launched");
}
if (pool.length > 0) {
return pool;
}
pool = await Promise.all(Array.from({ length: pagePoolSize }, async () => {
if (browser?.connected) {
const page = await browser.newPage();
await page.setRequestInterception(true);
await page.setDefaultNavigationTimeout(10000);
await page.goto("about:blank");
page.on("request", request => {
resourceCount++;
if (resourceCount > RESOURCE_LIMIT) {
page.reload();
resourceCount = 0;
}
else {
request.continue();
}
});
return page;
}
else {
throw new Error("Browser not available");
}
}));
if (type === "chrome") {
chromePagePool = pool;
}
else {
firefoxPagePool = pool;
}
return pool;
}
async function getPage(type) {
await launchPages(type === "chrome" ? chrome : firefox, type);
const pool = type === "chrome" ? chromePagePool : firefoxPagePool;
const page = pool.pop();
if (!page)
throw new Error("Page pool unexpectedly empty");
return page;
}
async function restorePage(type, page) {
await page.setViewport({
width: 800,
height: 600,
deviceScaleFactor: 1
});
if (type === "chrome") {
chromePagePool.push(page);
}
else if (type === "firefox") {
firefoxPagePool.push(page);
}
}
async function launchBrowser(browserType, wsURL) {
if (!browserType) {
if (firefox?.connected) {
return { browser: firefox, type: "firefox" };
}
else if (chrome?.connected) {
return { browser: chrome, type: "chrome" };
}
else {
const [firefox, chrome] = await Promise.all([
launchBrowser("firefox", wsURL).catch(() => null),
launchBrowser("chrome", wsURL).catch(() => null)
]);
if (firefox) {
return firefox;
}
else if (chrome) {
return chrome;
}
throw new Error("No browser launched yet");
}
}
if (browserType === "firefox" && firefox?.connected) {
return {
browser: firefox,
type: "firefox"
};
}
else if (browserType === "chrome" && chrome?.connected) {
return {
browser: chrome,
type: "chrome"
};
}
if (!(await isBrowserInstalled(browserType))) {
throw new Error(`${browserType.toUpperCase()} is not installed.`);
}
if (launching) {
await launching;
return launchBrowser(browserType, wsURL);
}
launching = (async () => {
isRemoteBrowser = !!wsURL;
let browser;
if (wsURL) {
console.log(`Launching remote ${browserType.toUpperCase()} browser...`);
browser = await puppeteer.connect({
browserWSEndpoint: wsURL,
acceptInsecureCerts: true
});
}
else {
console.log(`Launching local ${browserType.toUpperCase()} browser...`);
browser = await puppeteer.launch({
browser: browserType,
headless: "shell",
executablePath: BROWSER_PATHS[browserType][getOS()],
args: ["--no-sandbox", "--disable-setuid-sandbox"]
});
}
await launchPages(browser, browserType);
if (browserType === "chrome") {
chrome = browser;
chrome.on("disconnected", () => {
chrome = null;
});
}
else {
firefox = browser;
firefox.on("disconnected", () => {
firefox = null;
});
}
launching = null;
return {
browser,
type: browserType
};
})();
return launching;
}
async function closeBrowser() {
try {
if (chrome?.connected) {
await chrome.disconnect();
}
if (firefox?.connected) {
await firefox.disconnect();
}
if (!isRemoteBrowser) {
if (chrome) {
await chrome.close();
}
if (firefox) {
await firefox.close();
}
}
console.log("Browser closed successfully.");
chrome = null;
firefox = null;
}
catch (err) {
console.error("Error closing browsers in @iqx-limited/quick-form:", err);
}
}
process.on("exit", async () => {
await closeBrowser();
});
process.on("SIGINT", async () => {
console.log("SIGINT received. Closing browsers...");
await closeBrowser();
});
process.on("SIGTERM", async () => {
console.log("SIGTERM received. Closing browsers...");
await closeBrowser();
});
const pdf2img = async (input, options = {}) => {
const { browser } = await launchBrowser("firefox");
if (!browser?.connected) {
throw new Error("Browser not available");
}
const page = await getPage("firefox");
let path = "";
let address = "";
let tempFile = false;
if (Buffer.isBuffer(input)) {
path = node_path.resolve(process.cwd(), "temp.pdf");
address = node_url.pathToFileURL(path).toString();
tempFile = true;
await promises.writeFile(path, input);
}
else {
if (typeof input === "string" && input.startsWith("http")) {
path = input;
address = path;
}
else {
path = node_path.resolve(input.toString());
address = node_url.pathToFileURL(path).toString();
}
}
try {
await page.goto(address);
if (options.password) {
try {
await page.waitForSelector('input[type="password"]', {
visible: true,
timeout: 5000
});
console.log("Password prompt detected, entering password...");
await page.type('input[type="password"]', options.password || "");
await page.keyboard.press("Enter");
}
catch { }
}
await page.waitForSelector("canvas", { timeout: 5000 });
const imageBuffers = [];
const pageCount = await page.evaluate(() => {
if (window.PDFViewerApplication) {
return window.PDFViewerApplication.pagesCount;
}
return 0;
});
const metadata = await page.evaluate(() => {
const app = window.PDFViewerApplication;
if (app && app.pdfDocument) {
return app.documentInfo ?? {};
}
return {};
});
const pdfPage = await page.evaluate(() => {
const canvas = document.querySelector("canvas");
const { width, height } = canvas.getBoundingClientRect();
return { width, height };
});
await page.setViewport({
width: pdfPage.width,
height: pdfPage.height,
deviceScaleFactor: 1
});
if (options.page) {
if (options.page < 1 || options.page > pageCount) {
throw new Error(`Page number ${options.page} is out of bounds. PDF has ${pageCount} pages.`);
}
imageBuffers.push(await renderPage(page, options.page, options));
}
else {
for (let i = 1; i <= pageCount; i++) {
imageBuffers.push(await renderPage(page, i, options));
}
}
if (tempFile) {
await promises.unlink(path);
}
await restorePage("firefox", page);
return {
length: pageCount,
metadata: metadata.info,
pages: imageBuffers
};
}
catch (error) {
if (tempFile) {
await promises.unlink(path);
}
await restorePage("firefox", page);
throw error;
}
finally {
if (options.closeBrowser) {
await closeBrowser();
}
}
};
const renderPage = async (page, pageNumber, options) => {
await page.evaluate((pageNum) => {
if (window.PDFViewerApplication) {
window.PDFViewerApplication.page = pageNum;
}
}, pageNumber);
await page.waitForSelector(`.page[data-page-number="${pageNumber}"]`, { timeout: 5000 });
await page.waitForFunction((pageNum) => {
const pageContainer = document.querySelector(`.page[data-page-number="${pageNum}"]`);
if (!pageContainer)
return true;
return !pageContainer.querySelector(".loadingIcon");
}, {}, pageNumber);
const pageBoundingBox = await page.evaluate((pageNum) => {
const pageContainer = document.querySelector(`.page[data-page-number="${pageNum}"]`);
if (!pageContainer)
throw new Error(`Page container for page ${pageNum} not found`);
const canvas = pageContainer.querySelector("canvas");
if (!canvas)
throw new Error(`Canvas for page ${pageNum} not found`);
const { x, y, width, height } = canvas.getBoundingClientRect();
return { x, y, width, height };
}, pageNumber);
const screenshotOptions = {
fullPage: false,
type: options.type ?? "png",
clip: {
x: pageBoundingBox.x,
y: pageBoundingBox.y,
width: pageBoundingBox.width,
height: pageBoundingBox.height
}
};
if (options.type && options.type !== "png") {
screenshotOptions.quality = options.quality ?? 100;
}
try {
const uint8array = await page.screenshot(screenshotOptions);
return Buffer.from(uint8array);
}
catch {
throw new Error(`Failed to render page ${pageNumber} of the PDF`);
}
};
async function getBuffer(input) {
if (input instanceof Buffer) {
return input;
}
return fetch(input.toString())
.then(res => {
if (res.ok) {
return res.arrayBuffer();
}
else {
throw new Error("Failed to Fetch the File");
}
})
.then(array => Buffer.from(array))
.catch(() => {
if (node_fs.existsSync(input.toString())) {
return node_fs.readFileSync(input.toString());
}
throw new Error("Failed to Fetch the File");
});
}
const fetchHtmlFromUrl = async (url) => {
const response = await fetch(url);
if (!response.ok) {
throw new Error(`Failed to fetch content from URL: ${url}`);
}
return await response.text();
};
const readHtmlFromFilePath = async (filePath) => {
return node_fs.readFileSync(filePath, "utf-8");
};
const img2pdf = async (input, options = {}) => {
const { fileTypeFromBuffer } = await import('file-type');
return new Promise((resolve, reject) => {
getBuffer(input).then(async (buf) => {
const type = await fileTypeFromBuffer(buf);
if (type?.mime !== "image/jpeg" && type?.mime !== "image/png") {
throw new Error("Provided File is not a JPEG or a PNG.");
}
const pdfBuffers = [];
const imgSize = imageSize.imageSize(buf);
const landscape = imgSize.width && imgSize.height ? imgSize.width > imgSize.height : false;
const doc = new PDFDocument({
size: "a4",
layout: landscape ? "landscape" : "portrait",
margins: {
top: 0,
bottom: 0,
left: 0,
right: 0
}
});
doc.on("data", (data) => {
pdfBuffers.push(data);
});
doc.on("end", () => {
resolve(Buffer.concat(pdfBuffers));
});
doc.fontSize(options.fontSize ?? 10);
const topMargin = options.header ? 20 : 0;
const bottomMargin = options.footer ? 20 : 0;
const sidePadding = 20;
const imageHeight = doc.page.height - topMargin - bottomMargin;
if (options.header) {
doc.text(options.header, sidePadding, topMargin / 2 - 6, {
align: "center",
baseline: "top",
width: doc.page.width - 2 * sidePadding,
height: topMargin - 5,
ellipsis: true
}).moveDown(0.5);
}
doc.image(buf, 0, topMargin, {
width: doc.page.width,
height: imageHeight
});
if (options.footer) {
doc.text(options.footer, sidePadding, doc.page.height - bottomMargin / 2 - 6, {
align: "center",
width: doc.page.width - 2 * sidePadding,
height: bottomMargin - 5,
ellipsis: true
});
}
doc.end();
}).catch((e) => {
reject(e);
}).finally(async () => {
if (options.closeBrowser) {
await closeBrowser();
}
});
});
};
const html2pdf = async (input, options = {}) => {
const { browser, type } = await launchBrowser();
const validator = new node.HtmlValidate(options.rules ?? {
extends: ["html-validate:standard"],
rules: {
"no-trailing-whitespace": "off"
}
});
let htmlContent = input.toString();
if (htmlContent.startsWith("http://") || htmlContent.startsWith("https://")) {
htmlContent = await fetchHtmlFromUrl(htmlContent);
}
else if (node_fs.existsSync(input)) {
htmlContent = await readHtmlFromFilePath(htmlContent);
}
if (!browser?.connected) {
throw new Error("Browser not available");
}
const page = await getPage(type);
const validation = (options.validation ?? true);
try {
const res = validation ? await validator.validateString(htmlContent) : { valid: true };
if (res.valid) {
await page.setContent(htmlContent, { waitUntil: "load" });
const pdf = await page.pdf({
format: "A4",
printBackground: true
});
const pdfBuffer = Buffer.from(pdf);
if (options.base64 ?? false) {
return pdfBuffer.toString("base64");
}
return pdfBuffer;
}
else {
throw {
valid: false,
count: {
errors: res.errorCount,
warnings: res.warningCount
},
validation: res.results.map(res => {
return {
file: res.filePath,
count: {
errors: res.errorCount,
warnings: res.warningCount
},
messages: res.messages.map(msg => {
return {
message: msg.message,
line: msg.line,
column: msg.column,
ruleId: msg.ruleId
};
})
};
})
};
}
}
finally {
await restorePage(type, page);
if (options.closeBrowser) {
await closeBrowser();
}
}
};
const requiredVersion = ">=20.0.0";
if (!semver.satisfies(process.version, requiredVersion)) {
console.error(`\nError: Node.js version ${requiredVersion} is required. You are using ${process.version}.\n`);
process.exit(1);
}
exports.closeBrowser = closeBrowser;
exports.html2pdf = html2pdf;
exports.img2pdf = img2pdf;
exports.launchBrowser = launchBrowser;
exports.pdf2img = pdf2img;
//# sourceMappingURL=index.cjs.map