ppu-paddle-ocr

Version:

Lightweight, probably the fastest PaddleOCR SDK in TypeScript. Runs anywhere JavaScript runs: Node.js, Bun, Deno, mobile react-native, web browsers, and browser extensions. Docker & CLI supported. The official SDK is browser-only. Accurate text detection

github.com/PT-Perkasa-Pilar-Utama/ppu-paddle-ocr

PT-Perkasa-Pilar-Utama/ppu-paddle-ocr

96 lines (95 loc) • 4.74 kB

TypeScript

View Raw

import type { InferenceSession } from "onnxruntime-common"; import type { BatchRecognizeOptions, PaddleOptions, RecognizeOptions } from "../interface.js"; import type { BaseDetectionService } from "./base-detection.service.js"; import type { BaseRecognitionService, RecognitionResult } from "./base-recognition.service.js"; import type { BatchItemResult } from "./batch.js"; import type { CoreCanvas, PlatformProvider } from "./platform.js"; /** * OCR result grouped by detected text lines. * * Each entry in `lines` is an array of recognized words on the same line, * sorted left-to-right. */ export type PaddleOcrResult = { /** Full extracted text with lines separated by newlines. */ text: string; /** Recognition results grouped by line, in reading order. */ lines: RecognitionResult[][]; /** Average confidence across all recognized items (0–1). */ confidence: number; }; /** * OCR result as a flat list of recognized text items. * * Convenience alternative to {@link PaddleOcrResult} when line grouping * is not needed (e.g. for search indexing or simple display). */ export type FlattenedPaddleOcrResult = { /** Full extracted text as a single space-separated string. */ text: string; /** All recognized items in reading order. */ results: RecognitionResult[]; /** Average confidence across all recognized items (0–1). */ confidence: number; }; /** A single OCR result, grouped or flattened depending on `flatten`. */ export type AnyOcrResult = PaddleOcrResult | FlattenedPaddleOcrResult; /** Accepted source for a single image in a batch. */ export type BatchRecognizeInput = ArrayBuffer | CoreCanvas | string; /** * Abstract base class for platform-agnostic PaddleOCR service. * * Concrete implementations (`PaddleOcrService` for Node, Web, etc.) * extend this class and provide a {@link PlatformProvider}. */ export declare abstract class BasePaddleOcrService { protected options: PaddleOptions; protected detectionSession: InferenceSession | null; protected recognitionSession: InferenceSession | null; protected detector: BaseDetectionService | null; protected recognitor: BaseRecognitionService | null; protected readonly platform: PlatformProvider; constructor(platform: PlatformProvider, options?: PaddleOptions); protected log(message: string): void; protected abstract initSessions(): Promise<void>; /** * Run the full OCR pipeline (detection → recognition) on an image. * * @param image - The source image as an `ArrayBuffer`, platform canvas, or URL/path string. * @param options - Per-call options such as `flatten`, `noCache`, and custom `dictionary`. * @returns Grouped or flattened OCR results depending on `options.flatten`. */ recognize(image: ArrayBuffer | CoreCanvas | string, options?: RecognizeOptions): Promise<PaddleOcrResult | FlattenedPaddleOcrResult>; /** * Run {@link recognize} over many images with bounded concurrency. * * Results are returned index-aligned to the inputs regardless of completion * order. Memory stays bounded: at most `concurrency` images are decoded and * in flight at once, so a large (or streamed) input set never materializes * all at once. See {@link BatchRecognizeOptions} for `settle`, `signal`, and * `onProgress`. * * @param images - An array or (async) iterable of image sources. * @param options - Per-image recognize options plus batch controls. */ batchRecognize(images: Iterable<BatchRecognizeInput> | AsyncIterable<BatchRecognizeInput>, options: BatchRecognizeOptions & { settle: true; }): Promise<BatchItemResult<AnyOcrResult>[]>; batchRecognize(images: Iterable<BatchRecognizeInput> | AsyncIterable<BatchRecognizeInput>, options?: BatchRecognizeOptions): Promise<AnyOcrResult[]>; /** * Streaming variant of {@link batchRecognize}: yields each image's result as * soon as it finishes (completion order), so callers needn't buffer the whole * batch. Each item carries its input `index` for reordering. * * With `settle: false` (default) the generator throws on the first image * failure; with `settle: true` failures arrive as `{ status: "rejected" }`. */ batchRecognizeStream(images: Iterable<BatchRecognizeInput> | AsyncIterable<BatchRecognizeInput>, options?: BatchRecognizeOptions): AsyncGenerator<BatchItemResult<AnyOcrResult>>; /** * Resolve the effective concurrency. `"auto"` (or unset) yields `1` when an * accelerator execution provider is configured, else a small CPU default. */ private resolveConcurrency; private flattenResults; private groupResultsByLine; }