unpdf
Version:
PDF extraction and rendering across all JavaScript runtimes
91 lines (83 loc) • 3.27 kB
text/typescript
import * as _napi_rs_canvas from '@napi-rs/canvas';
import { DocumentInitParameters, PDFDocumentProxy } from './types/src/display/api';
import * as PDFJS from './types/src/pdf';
interface ExtractedImageObject {
data: Uint8ClampedArray;
width: number;
height: number;
channels: 1 | 3 | 4;
key: string;
}
/**
* Extracts images from a specific page of a PDF document, including necessary metadata,
* such as width, height, and calculated color channels.
*
* @example
* const imagesData = await extractImages(pdf, pageNum)
*
* for (const imgData of imagesData) {
* const imageIndex = totalImagesProcessed + 1
* await sharp(imgData.data, {
* raw: { width: imgData.width, height: imgData.height, channels: imgData.channels }
* })
* .png()
* .toFile(`${imageIndex}.png`)
* }
*/
declare function extractImages$1(data: DocumentInitParameters['data'] | PDFDocumentProxy, pageNumber: number): Promise<ExtractedImageObject[]>;
declare function renderPageAsImage$1(data: DocumentInitParameters['data'] | PDFDocumentProxy, pageNumber: number, options?: {
canvasImport?: () => Promise<typeof _napi_rs_canvas>;
/** @default 1.0 */
scale?: number;
width?: number;
height?: number;
}): Promise<ArrayBuffer>;
declare function getMeta$1(data: DocumentInitParameters['data'] | PDFDocumentProxy): Promise<{
info: Record<string, any>;
metadata: Record<string, any>;
}>;
declare function extractText$1(data: DocumentInitParameters['data'] | PDFDocumentProxy, options?: {
mergePages?: false;
}): Promise<{
totalPages: number;
text: string[];
}>;
declare function extractText$1(data: DocumentInitParameters['data'] | PDFDocumentProxy, options: {
mergePages: true;
}): Promise<{
totalPages: number;
text: string;
}>;
/**
* By default, unpdf will use the latest version of PDF.js compiled for
* serverless environments. If you want to use a different version, you can
* provide a custom resolver function.
*
* @example
* // Use the official PDF.js build (make sure to install it first)
* import { definePDFJSModule } from 'unpdf'
*
* await definePDFJSModule(() => import('pdfjs-dist'))
*/
declare function definePDFJSModule(pdfjs: () => Promise<any>): Promise<void>;
/** @deprecated Use `definePDFJSModule` instead. */
declare function configureUnPDF(options: {
pdfjs?: () => Promise<any>;
}): Promise<void>;
/**
* Returns a PDFDocumentProxy instance from a given binary data.
*
* Applies the following defaults:
* - `isEvalSupported: false`
* - `useSystemFonts: true`
*/
declare function getDocumentProxy(data: DocumentInitParameters['data'], options?: DocumentInitParameters): Promise<PDFDocumentProxy>;
declare function getResolvedPDFJS(): Promise<typeof PDFJS>;
declare function resolvePDFJSImport(pdfjsResolver?: () => Promise<any>, { reload }?: {
reload?: boolean | undefined;
}): Promise<void>;
declare const getMeta: typeof getMeta$1;
declare const extractText: typeof extractText$1;
declare const extractImages: typeof extractImages$1;
declare const renderPageAsImage: typeof renderPageAsImage$1;
export { configureUnPDF, definePDFJSModule, extractImages, extractText, getDocumentProxy, getMeta, getResolvedPDFJS, renderPageAsImage, resolvePDFJSImport };