UNPKG

pdf2json

Version:

PDF file parser that converts PDF binaries to JSON and text, powered by porting a fork of PDF.JS to Node.js

180 lines (157 loc) 4.61 kB
/* eslint-disable @typescript-eslint/no-explicit-any */ import { EventEmitter } from "node:events"; import { Transform, Readable, TransformOptions, TransformCallback } from "node:stream"; import fs from "node:fs"; export declare class StringifyStream extends Transform { constructor(options?: TransformOptions); _transform(obj: any, encoding: string, callback: TransformCallback): void; } export declare class ParserStream extends Transform { static createContentStream(jsonObj: any): Readable; static createOutputStream(outputPath: string, resolve: (value: string) => void, reject: (reason?: any) => void): fs.WriteStream; constructor(pdfParser: any, options?: TransformOptions); _transform(chunk: any, enc: string, callback: TransformCallback): void; _flush(callback: TransformCallback): void; _destroy(): void; } export declare class PDFParser extends EventEmitter { static get colorDict(): object; static get fontFaceDict(): object; static get fontStyleDict(): object; static get PDFUnit(): any; static get ParserStream(): typeof ParserStream; static get StringifyStream(): typeof StringifyStream; static get pkInfo(): { version: string; name: string; description: string; author: string; license: string; }; // eslint-disable-next-line @typescript-eslint/naming-convention static get _PARSER_SIG(): string; constructor(context?: PDFParserContext | null, needRawText?: boolean, password?: string); on<K extends keyof EventMap>(eventName: K, listener: EventMap[K]): this; readonly data: object | null; readonly binBufferKey: string; createParserStream(): ParserStream; loadPDF(pdfFilePath: string, verbosity?: number): Promise<void>; parseBuffer(pdfBuffer: Buffer, verbosity?: number): void; getRawTextContent(): string; getRawTextContentStream(): Readable; getAllFieldsTypes(): FieldType[]; getAllFieldData(): FieldType[]; getAllFieldsTypesStream(): Readable; getMergedTextBlocksIfNeeded(): object; getMergedTextBlocksStream(): Readable; resetPDFJS(needRawText?: boolean): void; destroy(): void; } export type EventMap = { /** Emitted when a parsing error occurs */ "pdfParser_dataError": (errMsg: { parserError: Error } | Error) => void; /** Emitted when parsing is complete and data is ready */ "pdfParser_dataReady": (pdfData: Output) => void; /** Emitted when PDFJS emits readable meta info */ "readable": (meta: Output["Meta"]) => void; /** Emitted for each page of parsed data, or null at end */ "data": (data: Output["Pages"][number] | null) => void; } export interface Output { Transcoder: string; Meta: { [key: string]: any }; Pages: Page[]; } export declare interface Page { Width: number; Height: number; HLines: Line[]; VLines: Line[]; Fills: Fill[]; Texts: Text[]; Fields: Field[]; Boxsets: Boxset[]; } export declare interface Fill { x: number; y: number; w: number; h: number; oc?: string; clr?: number; } export declare interface Line { x: number; y: number; w: number; l: number; oc?: string; clr?: number; } export declare interface Text { x: number; y: number; w: number; sw: number; A: 'left' | 'center' | 'right'; R: TextRun[]; oc?: string; clr?: number; } export declare interface TextRun { T: string; S: number; TS: [number, number, 0 | 1, 0 | 1]; RA?: number; } export declare interface Boxset { boxes: Box[]; id: { Id: string; EN?: number; }; } export declare interface Field { id: { Id: string; EN?: number; }; style: number; TI: number; AM: number; TU: string; x: number; y: number; w: number; h: number; T: { Name: 'alpha' | 'link'; TypeInfo: object; }; } export declare interface FieldType { id: string; type: 'alpha' | 'box' | 'radio' | 'date' | 'link' | 'signature'; calc: boolean; value: string | boolean; } export declare interface Box { // Simple box (used in Fills, HLines, VLines, etc.) x?: number; y?: number; w?: number; h?: number; oc?: string; clr?: number; // Field/Boxset box (used in Boxsets, Fields) id?: { Id: string; EN?: number; }; T?: { Name: string; TypeInfo?: object; }; TI?: number; AM?: number; checked?: boolean; style?: number; } export interface PDFParserContext { destroy?(): void; } export = PDFParser