UNPKG

ppu-pdf

Version:

Easily extract text from digital PDF files with coordinate and font size included, and optionally group text by lines or render scanned pdf to canvas/png.

25 lines (24 loc) 1.79 kB
import { type CompactPageLines, type CompactPdfLine, type CompactPdfWord, type PageLines, type PageTexts, type PageToonLines, type PdfCompactLineAlgorithm, type PdfLine, type PdfScannedThreshold, type PdfWord } from "../pdf.interface.js"; /** * Platform-agnostic base class containing all shared PDF processing logic. * Both Node.js and web implementations extend this class. */ export declare class BasePdfReaderCommon { protected sortTextContent(texts: PdfWord[]): PdfWord[]; protected sortTextContentSimple(texts: PdfWord[]): PdfWord[]; protected removeFakeBold(texts: PdfWord[]): PdfWord[]; protected getLinesFromTextsCommon(pageTexts: PageTexts, startIndex?: number): PageLines; protected getLines(words?: PdfWord[]): PdfLine[]; protected mergeLines(lines: PdfWord[][]): PdfLine[]; protected getCompactLines(words?: CompactPdfWord[]): CompactPdfLine[]; protected mergeCompactLines(lines: CompactPdfWord[][]): CompactPdfLine[]; protected getCompactLinesOldAlgorithm(words?: CompactPdfWord[]): CompactPdfLine[]; protected mergeCompactLinesOldAlgorithm(lines: CompactPdfWord[][]): CompactPdfLine[]; getCompactLinesFromTextsCommon(pageTexts: PageTexts, algorithm?: PdfCompactLineAlgorithm, startIndex?: number): CompactPageLines; protected mapWordsToCompactWords(words?: PdfWord[]): CompactPdfWord[]; protected isScannedCommon(pageTexts: PageTexts, options?: PdfScannedThreshold, startIndex?: number): boolean; protected normalizedText(str: string): string; protected isPageScannedCommon(pageText: string, options?: PdfScannedThreshold): boolean; protected getToonWords(pdfWords: PdfWord[], enableToon: boolean): string; protected getLinesFromTextsInToonCommon(pageTexts: PageTexts, startIndex?: number): PageToonLines; }