ppu-pdf
Version:
Easily extract text from digital PDF files with coordinate and font size included, and optionally group text by lines or render scanned pdf to canvas/png.
25 lines (24 loc) • 1.79 kB
TypeScript
import { type CompactPageLines, type CompactPdfLine, type CompactPdfWord, type PageLines, type PageTexts, type PageToonLines, type PdfCompactLineAlgorithm, type PdfLine, type PdfScannedThreshold, type PdfWord } from "../pdf.interface.js";
/**
* Platform-agnostic base class containing all shared PDF processing logic.
* Both Node.js and web implementations extend this class.
*/
export declare class BasePdfReaderCommon {
protected sortTextContent(texts: PdfWord[]): PdfWord[];
protected sortTextContentSimple(texts: PdfWord[]): PdfWord[];
protected removeFakeBold(texts: PdfWord[]): PdfWord[];
protected getLinesFromTextsCommon(pageTexts: PageTexts, startIndex?: number): PageLines;
protected getLines(words?: PdfWord[]): PdfLine[];
protected mergeLines(lines: PdfWord[][]): PdfLine[];
protected getCompactLines(words?: CompactPdfWord[]): CompactPdfLine[];
protected mergeCompactLines(lines: CompactPdfWord[][]): CompactPdfLine[];
protected getCompactLinesOldAlgorithm(words?: CompactPdfWord[]): CompactPdfLine[];
protected mergeCompactLinesOldAlgorithm(lines: CompactPdfWord[][]): CompactPdfLine[];
getCompactLinesFromTextsCommon(pageTexts: PageTexts, algorithm?: PdfCompactLineAlgorithm, startIndex?: number): CompactPageLines;
protected mapWordsToCompactWords(words?: PdfWord[]): CompactPdfWord[];
protected isScannedCommon(pageTexts: PageTexts, options?: PdfScannedThreshold, startIndex?: number): boolean;
protected normalizedText(str: string): string;
protected isPageScannedCommon(pageText: string, options?: PdfScannedThreshold): boolean;
protected getToonWords(pdfWords: PdfWord[], enableToon: boolean): string;
protected getLinesFromTextsInToonCommon(pageTexts: PageTexts, startIndex?: number): PageToonLines;
}