UNPKG

e2pdf

Version:

A lightweight, highly efficient, and customizable Node.js library for crawling websites and converting pages into compact, AI-optimized PDFs. Ideal for data archiving, offline analysis, and feeding content to AI tools. Delivers fast performance and allows

120 lines (119 loc) 4.53 kB
import { Configuration, PlaywrightCrawlerOptions } from "crawlee"; export interface E2PdfOptions { /** * Output directory * @default process.cwd() */ out?: string; /** * callback function to run after each pdf print */ afterPrint?: (pdf: Buffer<ArrayBufferLike>, url: string, filePath: string) => void; /** * PDF options * @see https://playwright.dev/docs/api/class-page#page-pdf */ pdf?: { /** * Display header and footer. Defaults to `false`. */ displayHeaderFooter?: boolean; /** * HTML template for the print footer. Should use the same format as the * [`headerTemplate`](https://playwright.dev/docs/api/class-page#page-pdf-option-header-template). */ footerTemplate?: string; /** * Paper format. If set, takes priority over * [`width`](https://playwright.dev/docs/api/class-page#page-pdf-option-width) or * [`height`](https://playwright.dev/docs/api/class-page#page-pdf-option-height) options. Defaults to 'Letter'. */ format?: string; /** * HTML template for the print header. Should be valid HTML markup with following classes used to inject printing * values into them: * - `'date'` formatted print date * - `'title'` document title * - `'url'` document location * - `'pageNumber'` current page number * - `'totalPages'` total pages in the document */ headerTemplate?: string; /** * Paper height, accepts values labeled with units. */ height?: string | number; /** * Paper orientation. Defaults to `false`. */ landscape?: boolean; /** * Paper margins, defaults to none. */ margin?: { /** * Top margin, accepts values labeled with units. Defaults to `0`. */ top?: string | number; /** * Right margin, accepts values labeled with units. Defaults to `0`. */ right?: string | number; /** * Bottom margin, accepts values labeled with units. Defaults to `0`. */ bottom?: string | number; /** * Left margin, accepts values labeled with units. Defaults to `0`. */ left?: string | number; }; /** * Whether or not to embed the document outline into the PDF. Defaults to `false`. */ outline?: boolean; /** * Paper ranges to print, e.g., '1-5, 8, 11-13'. Defaults to the empty string, which means print all pages. */ pageRanges?: string; /** * The file path to save the PDF to. If [`path`](https://playwright.dev/docs/api/class-page#page-pdf-option-path) is a * relative path, then it is resolved relative to the current working directory. If no path is provided, the PDF won't * be saved to the disk. */ path?: string; /** * Give any CSS `@page` size declared in the page priority over what is declared in * [`width`](https://playwright.dev/docs/api/class-page#page-pdf-option-width) and * [`height`](https://playwright.dev/docs/api/class-page#page-pdf-option-height) or * [`format`](https://playwright.dev/docs/api/class-page#page-pdf-option-format) options. Defaults to `false`, which * will scale the content to fit the paper size. */ preferCSSPageSize?: boolean; /** * Print background graphics. Defaults to `false`. */ printBackground?: boolean; /** * Scale of the webpage rendering. Defaults to `1`. Scale amount must be between 0.1 and 2. */ scale?: number; /** * Whether or not to generate tagged (accessible) PDF. Defaults to `false`. */ tagged?: boolean; /** * Paper width, accepts values labeled with units. */ width?: string | number; }; /** * @see https://crawlee.dev/api/playwright-crawler/class/PlaywrightCrawler */ crawlerOptions?: PlaywrightCrawlerOptions; /** * @see https://crawlee.dev/api/playwright-crawler/class/PlaywrightCrawler */ crawlerConfig?: Configuration; } export declare const e2pdf: (url: string, config?: E2PdfOptions) => Promise<void>;