UNPKG

@iqx-limited/quick-pdf

Version:

Converting PDFs to images (๐Ÿ“ƒ to ๐Ÿ“ธ)

677 lines (669 loc) โ€ข 22.3 kB
import { satisfies } from 'semver'; import { pathToFileURL } from 'node:url'; import { join, resolve } from 'node:path'; import { readFile, writeFile, access, unlink } from 'node:fs/promises'; import puppeteer from 'puppeteer'; import os, { platform } from 'node:os'; import { execSync } from 'node:child_process'; import { imageSize } from 'image-size'; import PDFDocument from 'pdfkit'; import { existsSync, readFileSync } from 'node:fs'; import { HtmlValidate } from 'html-validate'; let firefox = null; let chrome = null; let isRemoteBrowser = false; let devMode = false; const registryPath = join(process.cwd(), ".puppeteer-launches.json"); const BROWSER_PATHS = { chrome: { linux: "/usr/bin/google-chrome", mac: "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", windows: join("C:", "Program Files", "Google", "Chrome", "Application", "chrome.exe") }, firefox: { linux: "/usr/bin/firefox", mac: "/Applications/Firefox.app/Contents/MacOS/firefox", windows: join("C:", "Program Files", "Mozilla Firefox", "firefox.exe") } }; let launching = null; const RESOURCE_LIMIT = 100; let resourceCount = 0; let firefoxPagePool = []; let chromePagePool = []; const isProcessAlive = (pid) => { try { process.kill(pid, 0); return true; } catch { return false; } }; const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms)); const killProcess = (pid, force = false) => { const isWin = platform() === "win32"; try { if (isWin) { const signal = force ? "/F" : ""; execSync(`taskkill ${signal} /PID ${pid}`); } else { process.kill(pid, force ? "SIGKILL" : "SIGTERM"); } return true; } catch { return false; } }; const cleanupOrphanedBrowsers = async () => { let registry = {}; try { const data = await readFile(registryPath, "utf8"); registry = JSON.parse(data); } catch { registry = {}; } const launchIds = Object.keys(registry); for (const launchId of launchIds) { const { pid, ownerPid } = registry[launchId]; if (!isProcessAlive(ownerPid)) { try { killProcess(pid); } catch { } await sleep(500); try { if (isProcessAlive(pid)) { killProcess(pid, true); } } catch { } delete registry[launchId]; console.log(`๐Ÿงน Cleaned up orphaned browser PID ${pid} (launchId: ${launchId})`); } } await writeFile(registryPath, JSON.stringify(registry, null, 2)); }; const getOS = () => { if (process.platform === "win32") return "windows"; if (process.platform === "darwin") return "mac"; return "linux"; }; const setDevMode = (mode) => { devMode = mode; if (devMode) { console.log("Quick-PDF: Dev Mode Enabled"); cleanupOrphanedBrowsers().catch(err => { console.error("Error during cleanup of orphaned browsers:", err); }); } }; const isBrowserInstalled = async (browser) => { const os = getOS(); const browserPath = BROWSER_PATHS[browser][os]; try { await access(browserPath); return true; } catch { return false; } }; async function launchPages(browser, type) { let pool = type === "chrome" ? chromePagePool : firefoxPagePool; if (!browser?.connected) { throw new Error("Browser Not Launched"); } if (pool.length > 0) { return pool; } pool = [ await createPage(browser), await createPage(browser), await createPage(browser), await createPage(browser), await createPage(browser) ]; if (type === "chrome") { chromePagePool = pool; } else { firefoxPagePool = pool; } return pool; } async function createPage(browser) { try { const context = browser?.defaultBrowserContext() || await browser?.createBrowserContext(); const page = await context?.newPage(); if (!page) { throw new Error("Failed to create a new page"); } await page.setRequestInterception(true); await page.setDefaultNavigationTimeout(10000); await page.goto("about:blank"); page.on("request", (request) => { resourceCount++; if (resourceCount > RESOURCE_LIMIT) { page.reload(); resourceCount = 0; } else { request.continue(); } }); page.on("error", err => { console.error("Page error:", err); }); page.on("pageerror", err => { console.error("Page error:", err); }); return page; } catch (err) { if (devMode) { console.log(`Trying to launch a page with browser: ${browser?.process() ? "exists" : "null"}`); console.log(`Browser process PID: ${browser?.process() ? browser.process()?.pid : "N/A"}`); console.log(`Browser contexts: ${browser?.browserContexts().length ?? 0}`); console.log(`Browser connected: ${browser?.connected}`); console.log(`Browser user agent: ${await browser?.userAgent()}`); console.log(`Browser version: ${await browser?.version()}`); for (const context of browser?.browserContexts?.() ?? []) { console.log(` - context ID: ${context.id || "(no id)"}`); } console.log(`Product: ${browser?.process()?.spawnargs?.join(" ") ?? "(no spawnargs)"}`); const targets = browser?.targets?.() ?? []; console.log(`Targets: ${targets.length}`); targets.forEach(t => { console.log(` - Target type: ${t.type()}, URL: ${t.url()}`); }); } throw err; } } async function getPage(type) { const browser = type === "chrome" ? chrome : firefox; await launchPages(browser, type); const pool = type === "chrome" ? chromePagePool : firefoxPagePool; const page = pool.pop(); if (!page) return await createPage(browser); return page; } async function restorePage(type, page) { await page.setViewport({ width: 800, height: 600, deviceScaleFactor: 1 }); if (type === "chrome") { chromePagePool.push(page); } else if (type === "firefox") { firefoxPagePool.push(page); } } async function launchBrowser(browserType, wsURL) { const isBrowserValid = (browser) => { try { if (browser && browser.browserContexts().length && browser.connected && browser.process() !== null) { return true; } return false; } catch { return false; } }; if (!browserType) { if (isBrowserValid(firefox)) { return { browser: firefox, type: "firefox" }; } else if (isBrowserValid(chrome)) { return { browser: chrome, type: "chrome" }; } else { const firefox = await launchBrowser("firefox", wsURL).catch(() => null); if (firefox) { return firefox; } const chrome = await launchBrowser("chrome", wsURL).catch(() => null); if (chrome) { return chrome; } throw new Error("No browser launched yet"); } } if (browserType === "firefox" && isBrowserValid(firefox)) { return { browser: firefox, type: "firefox" }; } else if (browserType === "chrome" && isBrowserValid(chrome)) { return { browser: chrome, type: "chrome" }; } if (!(await isBrowserInstalled(browserType))) { throw new Error(`${browserType.toUpperCase()} is not installed.`); } if (launching) { await launching; return launchBrowser(browserType, wsURL); } launching = (async () => { isRemoteBrowser = !!wsURL; let browser; if (wsURL) { console.log(`Launching remote ${browserType.toUpperCase()} browser...`); browser = await puppeteer.connect({ browserWSEndpoint: wsURL, acceptInsecureCerts: true }); } else { console.log(`Launching local ${browserType.toUpperCase()} browser...`); join(os.tmpdir(), `puppeteer-${Date.now()}-${process.pid}`); browser = await puppeteer.launch({ browser: browserType, headless: "shell", userDataDir: undefined, executablePath: BROWSER_PATHS[browserType][getOS()], args: [ "--no-sandbox", "--disable-setuid-sandbox", `--quickpdf-launch-id=${Date.now()}` ] }); if (devMode) { console.log(`${browserType.toUpperCase()} launched with PID: ${browser.process()?.pid}`); } } let registry = {}; try { const data = await readFile(registryPath, "utf8"); registry = JSON.parse(data); } catch { } const pid = browser.process()?.pid; if (pid) { registry[pid] = { pid: pid, ownerPid: process.pid, started: Date.now() }; } await writeFile(registryPath, JSON.stringify(registry, null, 2)); await launchPages(browser, browserType); console.log(`${browserType.toUpperCase()} browser is ready for usage.`); if (browserType === "chrome") { chrome = browser; chrome.on("targetdestroyed", target => { console.log(`Target destroyed: ${target.url()}`); }); chrome.on("disconnected", () => { chrome = null; console.warn("Browser disconnected"); }); } else { firefox = browser; firefox.on("targetdestroyed", target => { console.log(`Target destroyed: ${target.url()}`); }); firefox.on("disconnected", () => { firefox = null; console.warn("Browser disconnected"); }); } launching = null; return { browser, type: browserType }; })(); return launching; } async function closeBrowser() { try { if (chrome?.connected) { await chrome.disconnect(); } if (firefox?.connected) { await firefox.disconnect(); } if (!isRemoteBrowser) { if (chrome) { await chrome.close(); } if (firefox) { await firefox.close(); } } console.log("Browser closed successfully."); chrome = null; firefox = null; } catch (err) { console.error("Error closing browsers in @iqx-limited/quick-form:", err); } } const pdf2img = async (input, options = {}) => { const { browser } = await launchBrowser("firefox"); if (!browser?.connected) { throw new Error("Browser not available"); } const page = await getPage("firefox"); let path = ""; let address = ""; let tempFile = false; if (Buffer.isBuffer(input)) { path = resolve(process.cwd(), "temp.pdf"); address = pathToFileURL(path).toString(); tempFile = true; await writeFile(path, input); } else { if (typeof input === "string" && input.startsWith("http")) { path = input; address = path; } else { path = resolve(input.toString()); address = pathToFileURL(path).toString(); } } try { await page.goto(address); if (options.password) { try { await page.waitForSelector('input[type="password"]', { visible: true, timeout: 5000 }); console.log("Password prompt detected, entering password..."); await page.type('input[type="password"]', options.password || ""); await page.keyboard.press("Enter"); } catch { } } await page.waitForSelector("canvas", { timeout: 5000 }); const imageBuffers = []; const pageCount = await page.evaluate(() => { if (window.PDFViewerApplication) { return window.PDFViewerApplication.pagesCount; } return 0; }); const metadata = await page.evaluate(() => { const app = window.PDFViewerApplication; if (app && app.pdfDocument) { return app.documentInfo ?? {}; } return {}; }); const pdfPage = await page.evaluate(() => { const canvas = document.querySelector("canvas"); const { width, height } = canvas.getBoundingClientRect(); return { width, height }; }); await page.setViewport({ width: pdfPage.width, height: pdfPage.height, deviceScaleFactor: 1 }); if (options.page) { if (options.page < 1 || options.page > pageCount) { throw new Error(`Page number ${options.page} is out of bounds. PDF has ${pageCount} pages.`); } imageBuffers.push(await renderPage(page, options.page, options)); } else { for (let i = 1; i <= pageCount; i++) { imageBuffers.push(await renderPage(page, i, options)); } } if (tempFile) { await unlink(path); } await restorePage("firefox", page); return { length: pageCount, metadata: metadata.info, pages: imageBuffers }; } catch (error) { if (tempFile) { await unlink(path); } await restorePage("firefox", page); throw error; } finally { if (options.closeBrowser) { await closeBrowser(); } } }; const renderPage = async (page, pageNumber, options) => { await page.evaluate((pageNum) => { if (window.PDFViewerApplication) { window.PDFViewerApplication.page = pageNum; } }, pageNumber); await page.waitForSelector(`.page[data-page-number="${pageNumber}"]`, { timeout: 5000 }); await page.waitForFunction((pageNum) => { const pageContainer = document.querySelector(`.page[data-page-number="${pageNum}"]`); if (!pageContainer) return true; return !pageContainer.querySelector(".loadingIcon"); }, {}, pageNumber); const pageBoundingBox = await page.evaluate((pageNum) => { const pageContainer = document.querySelector(`.page[data-page-number="${pageNum}"]`); if (!pageContainer) throw new Error(`Page container for page ${pageNum} not found`); const canvas = pageContainer.querySelector("canvas"); if (!canvas) throw new Error(`Canvas for page ${pageNum} not found`); const { x, y, width, height } = canvas.getBoundingClientRect(); return { x, y, width, height }; }, pageNumber); const screenshotOptions = { fullPage: false, type: options.type ?? "png", clip: { x: pageBoundingBox.x, y: pageBoundingBox.y, width: pageBoundingBox.width, height: pageBoundingBox.height } }; if (options.type && options.type !== "png") { screenshotOptions.quality = options.quality ?? 100; } try { const uint8array = await page.screenshot(screenshotOptions); return Buffer.from(uint8array); } catch { throw new Error(`Failed to render page ${pageNumber} of the PDF`); } }; async function getBuffer(input) { if (input instanceof Buffer) { return input; } return fetch(input.toString()) .then(res => { if (res.ok) { return res.arrayBuffer(); } else { throw new Error("Failed to Fetch the File"); } }) .then(array => Buffer.from(array)) .catch(() => { if (existsSync(input.toString())) { return readFileSync(input.toString()); } throw new Error("Failed to Fetch the File"); }); } const fetchHtmlFromUrl = async (url) => { const response = await fetch(url); if (!response.ok) { throw new Error(`Failed to fetch content from URL: ${url}`); } return await response.text(); }; const readHtmlFromFilePath = async (filePath) => { return readFileSync(filePath, "utf-8"); }; const img2pdf = async (input, options = {}) => { const { fileTypeFromBuffer } = await import('file-type'); return new Promise((resolve, reject) => { getBuffer(input).then(async (buf) => { const type = await fileTypeFromBuffer(buf); if (type?.mime !== "image/jpeg" && type?.mime !== "image/png") { throw new Error("Provided File is not a JPEG or a PNG."); } const pdfBuffers = []; const imgSize = imageSize(buf); const landscape = imgSize.width && imgSize.height ? imgSize.width > imgSize.height : false; const doc = new PDFDocument({ size: "a4", layout: landscape ? "landscape" : "portrait", margins: { top: 0, bottom: 0, left: 0, right: 0 } }); doc.on("data", (data) => { pdfBuffers.push(data); }); doc.on("end", () => { resolve(Buffer.concat(pdfBuffers)); }); doc.fontSize(options.fontSize ?? 10); const topMargin = options.header ? 20 : 0; const bottomMargin = options.footer ? 20 : 0; const sidePadding = 20; const imageHeight = doc.page.height - topMargin - bottomMargin; if (options.header) { doc.text(options.header, sidePadding, topMargin / 2 - 6, { align: "center", baseline: "top", width: doc.page.width - 2 * sidePadding, height: topMargin - 5, ellipsis: true }).moveDown(0.5); } doc.image(buf, 0, topMargin, { width: doc.page.width, height: imageHeight }); if (options.footer) { doc.text(options.footer, sidePadding, doc.page.height - bottomMargin / 2 - 6, { align: "center", width: doc.page.width - 2 * sidePadding, height: bottomMargin - 5, ellipsis: true }); } doc.end(); }).catch((e) => { reject(e); }).finally(async () => { if (options.closeBrowser) { await closeBrowser(); } }); }); }; const html2pdf = async (input, options = {}) => { const { browser, type } = await launchBrowser(); const validator = new HtmlValidate(options.rules ?? { extends: ["html-validate:standard"], rules: { "no-trailing-whitespace": "off" } }); let htmlContent = input.toString(); if (htmlContent.startsWith("http://") || htmlContent.startsWith("https://")) { htmlContent = await fetchHtmlFromUrl(htmlContent); } else if (existsSync(input)) { htmlContent = await readHtmlFromFilePath(htmlContent); } if (!browser?.connected) { throw new Error("Browser not available"); } const page = await getPage(type); const validation = (options.validation ?? true); try { const res = validation ? await validator.validateString(htmlContent) : { valid: true }; if (res.valid) { await page.setContent(htmlContent, { waitUntil: "load" }); const pdf = await page.pdf({ format: "A4", printBackground: true }); const pdfBuffer = Buffer.from(pdf); if (options.base64 ?? false) { return pdfBuffer.toString("base64"); } return pdfBuffer; } else { throw { valid: false, count: { errors: res.errorCount, warnings: res.warningCount }, validation: res.results.map(res => { return { file: res.filePath, count: { errors: res.errorCount, warnings: res.warningCount }, messages: res.messages.map(msg => { return { message: msg.message, line: msg.line, column: msg.column, ruleId: msg.ruleId }; }) }; }) }; } } finally { await restorePage(type, page); if (options.closeBrowser) { await closeBrowser(); } } }; const requiredVersion = ">=20.0.0"; if (!satisfies(process.version, requiredVersion)) { console.error(`\nError: Node.js version ${requiredVersion} is required. You are using ${process.version}.\n`); process.exit(1); } export { closeBrowser, html2pdf, img2pdf, launchBrowser, pdf2img, setDevMode }; //# sourceMappingURL=index.mjs.map