UNPKG

ppu-paddle-ocr

Version:

Lightweight, probably the fastest PaddleOCR SDK in TypeScript. Runs anywhere JavaScript runs: Node.js, Bun, Deno, mobile react-native, web browsers, and browser extensions. Docker & CLI supported. The official SDK is browser-only. Accurate text detection

282 lines (281 loc) 9.89 kB
import type { InferenceSession } from "onnxruntime-common"; /** * The image processing engine to use for preprocessing. * * - `"opencv"` – Uses OpenCV.js (`ImageProcessor` / `Contours` from `ppu-ocv`). * More accurate region detection; recommended for production use. **(default)** * - `"canvas-native"` – Uses pure HTML Canvas operations (`CanvasProcessor` from `ppu-ocv/canvas`). * No OpenCV dependency; suitable for lightweight or browser-extension environments. */ export type ProcessingEngine = "opencv" | "canvas-native"; /** * Paths to the OCR model and dictionary files. * * **Preset models** (import from `"ppu-paddle-ocr"`): * * - **PP-OCRv6**: `V6_SMALL_MODEL` (default), `V6_MEDIUM_MODEL`, `V6_TINY_MODEL` * - **PP-OCRv5**: `V5_EN_MOBILE_MODEL`, `V5_EN_MOBILE_INT8_MODEL`, `V5_EN_SERVER_MODEL`, `V5_MOBILE_MODEL`, `V5_SERVER_MODEL` * - **PP-OCRv5 Languages**: `V5_ARABIC_MOBILE_MODEL`, `V5_CYRILLIC_MOBILE_MODEL`, `V5_DEVANAGARI_MOBILE_MODEL`, `V5_GREEK_MOBILE_MODEL`, `V5_ESLAV_MOBILE_MODEL`, `V5_KOREAN_MOBILE_MODEL`, `V5_LATIN_MOBILE_MODEL`, `V5_TAMIL_MOBILE_MODEL`, `V5_TELUGU_MOBILE_MODEL`, `V5_THAI_MOBILE_MODEL` * - **PP-OCRv4**: `V4_EN_MOBILE_MODEL`, `V4_MOBILE_MODEL`, `V4_SERVER_MODEL`, `V4_SERVER_DOC_MODEL` * - **PP-OCRv3**: `V3_MOBILE_MODEL`, `V3_JAPANESE_MOBILE_MODEL` * * Or provide granular custom paths for `detection`, `recognition`, `charactersDictionary`. * * @example * ```ts * import { PaddleOcrService, V6_SMALL_MODEL } from "ppu-paddle-ocr"; * const service = new PaddleOcrService({ model: V6_SMALL_MODEL }); * ``` */ export type ModelPathOptions = { /** * Onnx file buffer or path for the text detection model. * Required if not using the library's built‑in default model. */ detection?: ArrayBuffer | string; /** * Onnx file buffer or path for the text recognition model. * Required if not using the library's built‑in default model. */ recognition?: ArrayBuffer | string; /** * Onnx file buffer or path for the character dictionary. * Required if not using the library's built‑in default dictionary (en_dict.txt). */ charactersDictionary?: ArrayBuffer | string; }; /** * Controls verbose output and image dumps for debugging OCR. */ export type DebuggingOptions = { /** * Enable detailed logging of each processing step. * @default false */ verbose?: boolean; /** * Save intermediate image data to disk for inspection. * @default false */ debug?: boolean; /** * Directory where debug images will be written. * Relative to the current working directory. * @default "out" */ debugFolder?: string; }; /** * Parameters for the text detection preprocessing and filtering stage. */ export type DetectionOptions = { /** * Per-channel mean values used to normalize input pixels [R, G, B]. * @default [0.485, 0.456, 0.406] */ mean?: [number, number, number]; /** * Per-channel standard deviation values used to normalize input pixels [R, G, B]. * @default [0.229, 0.224, 0.225] */ stdDeviation?: [number, number, number]; /** * Maximum dimension (longest side) for input images, in pixels. * Images above this size will be scaled down, maintaining aspect ratio. * @default 640 */ maxSideLength?: number; /** * Padding applied to each detected box vertical as a fraction of its height * @default 0.4 */ paddingVertical?: number; /** * Padding applied to each detected box vertical as a fraction of its height * @default 0.6 */ paddingHorizontal?: number; /** * Remove detected boxes with area below this threshold, in pixels. * @default 50 */ minimumAreaThreshold?: number; }; /** * Strategy for recognizing text in detected regions. * * - `"per-box"` – Each detected box is recognized individually (most accurate, n inferences). * - `"per-line"` – Boxes on the same line are merged and recognized together (fewer inferences, good accuracy). * - `"cross-line"` – Crops are packed into uniform-width batches across lines to minimize inference count. * * @default "per-line" */ export type RecognitionStrategy = "per-box" | "per-line" | "cross-line"; /** * Parameters for the text recognition preprocessing stage. */ export type RecognitionOptions = { /** * Fixed height for input images, in pixels. * Models will resize width proportionally. * @default 48 */ imageHeight?: number; /** * Recognition strategy for processing detected text regions. * - `"per-box"` – Each box recognized individually (most accurate, n inferences) * - `"per-line"` – Same-line boxes merged per line (fewer inferences, good accuracy) * - `"cross-line"` – Crops packed into uniform-width batches across lines (fewest inferences) * @default "per-line" */ strategy?: RecognitionStrategy; /** * Width multiplier for the cross-line strategy's bin-packing target. * The batch target width is computed as `maxLineWidth × factor`. * Larger values pack more lines per batch (fewer inferences, potentially * lower accuracy); smaller values keep lines isolated (more inferences). * * Only used when `strategy` is `"cross-line"`. * @default 1.0 */ crossLineWidthFactor?: number; /** * A list of loaded character dictionary (string) for * recognition result decoding. */ charactersDictionary: string[]; }; /** * Options for individual recognize() calls. */ export type RecognizeOptions = { /** * Return flattened results instead of grouped by lines. * @default false */ flatten?: boolean; /** * Override the recognition strategy for this call. * If omitted, the strategy from the service options is used. */ strategy?: RecognitionStrategy; /** * Custom character dictionary for this specific call. * If provided, caching will be disabled for this call. */ dictionary?: string | ArrayBuffer; /** * Disable caching for this specific call. * @default false */ noCache?: boolean; }; /** * Options for `batchRecognize()` / `batchRecognizeStream()`. * * Extends {@link RecognizeOptions} (applied to every image) with controls for * concurrency, error handling, progress, and cancellation. */ export type BatchRecognizeOptions = RecognizeOptions & { /** * Maximum number of images processed concurrently. * * `"auto"` (default) picks `1` when an accelerator execution provider * (e.g. CUDA, WebGPU) is configured — a shared inference session serializes * device work anyway and parallel runs would stack VRAM — and a small CPU * default otherwise, to overlap JS preprocessing with native inference. * @default "auto" */ concurrency?: number | "auto"; /** * When `true`, a failing image does not abort the batch: its slot is filled * with a `{ status: "rejected", reason }` entry. When `false` (default), the * first failure rejects the whole call, matching `recognize()`. * @default false */ settle?: boolean; /** * Cancels the batch. Pending images are not scheduled and the call rejects * with an `AbortError`. In-flight inferences are allowed to finish but their * results are discarded. */ signal?: AbortSignal; /** * Invoked after each image settles with the running completed count and the * total (when the input length is known up front, e.g. an array). */ onProgress?: (done: number, total: number | undefined) => void; }; /** * Controls the image processing backend. */ export type ProcessingOptions = { /** * The image processing engine used for detection preprocessing and * recognition resizing. * * - `"opencv"` – Uses OpenCV.js via `ppu-ocv` (more accurate, **default**). * - `"canvas-native"` – Pure canvas operations via `ppu-ocv/canvas` (no OpenCV dependency). * * @default "opencv" */ engine?: ProcessingEngine; }; /** * Full configuration for the PaddleOCR service. * Combines model file paths with detection, recognition, and debugging parameters. */ export type PaddleOptions = { /** * File paths to the required OCR model components. */ model?: ModelPathOptions; /** * Controls parameters for text detection. */ detection?: DetectionOptions; /** * Controls parameters for text recognition. */ recognition?: RecognitionOptions; /** * Controls logging and image dump behavior for debugging. */ debugging?: DebuggingOptions; /** * ONNX Runtime session configuration options. */ session?: SessionOptions; /** * Controls the image processing backend. */ processing?: ProcessingOptions; }; /** * ONNX Runtime session configuration options. * * Extends the native `InferenceSession.SessionOptions` from ONNX Runtime * so that any valid provider configuration (e.g. WebAssembly, CUDA, CoreML) * is accepted without type mismatch. */ export type SessionOptions = InferenceSession.SessionOptions & { /** * Execution providers to use for inference (e.g., 'cpu', 'cuda', 'wasm'). * Accepts provider name strings or provider-specific configuration objects. * @default ['cpu'] */ executionProviders?: InferenceSession.SessionOptions["executionProviders"]; }; /** * Simple rectangle representation. */ export type Box = { /** X-coordinate of the top-left corner. */ x: number; /** Y-coordinate of the top-left corner. */ y: number; /** Width of the box in pixels. */ width: number; /** Height of the box in pixels. */ height: number; };