ppu-paddle-ocr

Version:

Blazing-fast and lightweight PaddleOCR library for Node.js and Bun. Perform accurate text detection, recognition, and image deskew with a simple, modern, and type-safe API. Ideal for document processing, data extraction, and computer vision tasks.

PT-Perkasa-Pilar-Utama/ppu-paddle-ocr

102 lines (101 loc) • 3.13 kB

TypeScript

import * as ort from "onnxruntime-node"; import { Canvas } from "ppu-ocv"; import type { Box, DebuggingOptions, RecognitionOptions } from "../interface"; export interface RecognitionResult { text: string; box: Box; confidence: number; } /** * Service for detecting and recognizing text in images */ export declare class RecognitionService { private readonly options; private readonly debugging; private readonly session; private readonly toolkit; private static readonly BLANK_INDEX; private static readonly UNK_TOKEN; private static readonly MIN_CROP_WIDTH; constructor(session: ort.InferenceSession, options?: Partial<RecognitionOptions>, debugging?: Partial<DebuggingOptions>); /** * Logs a message if verbose debugging is enabled */ private log; /** * Main method to run text recognition on an image with detected regions * @param image The original image buffer or image in Canvas * @param detection Array of bounding boxes from text detection * @param charactersDictionary Optional custom character dictionary * @returns Array of recognition results with text and bounding box, sorted in reading order */ run(image: ArrayBuffer | Canvas, detection: Box[], charactersDictionary?: string[]): Promise<RecognitionResult[]>; /** * Filter out invalid boxes */ private filterValidBoxes; /** * Process all valid boxes in parallel using Promise.all */ private processBoxesInParallel; /** * Process a single text box */ private processBox; /** * Sort recognition results by reading order (top to bottom, left to right) */ private sortResultsByReadingOrder; /** * Validates if a bounding box has valid dimensions */ private isValidBox; /** * Crops a region from the source canvas based on bounding box */ private cropRegion; /** * Saves a debug image of the cropped region */ private saveDebugCrop; /** * Logs details about the processing of a text region */ private logProcessingDetails; /** * Recognizes text in a cropped canvas region */ private recognizeText; /** * Preprocesses a cropped image for the recognition model */ private preprocessImage; /** * Creates a normalized image tensor from the preprocessed canvas */ private createImageTensor; /** * Runs the ONNX inference session with the prepared tensor */ private runInference; /** * Decodes the results from the model output tensor */ private decodeResults; /** * Performs greedy decoding on CTC model output logits */ private ctcGreedyDecode; /** * Appends the appropriate character to the decoded text */ private appendCharacterToText; /** * Finds the class with maximum probability for a given timestep */ private findMaxProbabilityClass; /** * Checks if the predicted class index is valid for the character dictionary */ private isValidDictionaryIndex; }