ppu-pdf
Version:
Easily extract text from digital PDF files with coordinate and font size included, and optionally group text by lines or render scanned pdf to canvas/png.
20 lines (19 loc) • 1.57 kB
TypeScript
import { type Canvas } from "@napi-rs/canvas";
import { type CompactPageLines, type CompactPdfLine, type CompactPdfWord, type PageLines, type PageTexts, type PdfCompactLineAlgorithm, type PdfLine, type PdfScannedThreshold, type PdfWord } from "./pdf.interface";
export declare class PdfReaderCommon {
saveCanvasToPng(canvas: Canvas, filename: string, foldername: string): Promise<void>;
protected dumpCanvasMapCommon(canvasMap: Map<number, Canvas>, filename: string, foldername?: string, startIndex?: number): Promise<void>;
protected sortTextContent(texts: PdfWord[]): PdfWord[];
protected sortTextContentSimple(texts: PdfWord[]): PdfWord[];
protected getLinesFromTextsCommon(pageTexts: PageTexts, startIndex?: number): PageLines;
protected getLines(words?: PdfWord[]): PdfLine[];
protected mergeLines(lines: PdfWord[][]): PdfLine[];
protected getCompactLines(words?: CompactPdfWord[]): CompactPdfLine[];
protected mergeCompactLines(lines: CompactPdfWord[][]): CompactPdfLine[];
protected getCompactLinesOldAlgorithm(words?: CompactPdfWord[]): CompactPdfLine[];
protected mergeCompactLinesOldAlgorithm(lines: CompactPdfWord[][]): CompactPdfLine[];
getCompactLinesFromTextsCommon(pageTexts: PageTexts, algorithm?: PdfCompactLineAlgorithm, startIndex?: number): CompactPageLines;
protected mapWordsToCompactWords(words?: PdfWord[]): CompactPdfWord[];
protected isScannedCommon(pageTexts: PageTexts, options?: PdfScannedThreshold, startIndex?: number): boolean;
protected normalizedText(str: string): string;
}