UNPKG

react-native-executorch

Version:

An easy way to run AI models in React Native with ExecuTorch

150 lines (133 loc) 4.86 kB
import { symbols } from '../constants/ocr/symbols'; import { RnExecutorchError } from '../errors/errorUtils'; import { Frame, PixelData, ResourceSource } from './common'; /** * OCRDetection represents a single detected text instance in an image, * including its bounding box, recognized text, and confidence score. * @category Types * @property {Point[]} bbox - An array of points defining the bounding box around the detected text. * @property {string} text - The recognized text within the bounding box. * @property {number} score - The confidence score of the OCR detection, ranging from 0 to 1. */ export interface OCRDetection { bbox: Point[]; text: string; score: number; } /** * Point represents a coordinate in 2D space. * @category Types * @property {number} x - The x-coordinate of the point. * @property {number} y - The y-coordinate of the point. */ export interface Point { x: number; y: number; } /** * Configuration properties for the `useOCR` hook. * @category Types */ export interface OCRProps { /** * Object containing the necessary model sources and configuration for the OCR pipeline. */ model: { /** * The built-in model name, e.g. `'ocr-en'`. Used for telemetry and hook reload triggers. * Pass one of the pre-built OCR constants (e.g. `OCR_ENGLISH`) to populate all required fields. */ modelName: OCRModelName; /** * `ResourceSource` that specifies the location of the text detector model binary. */ detectorSource: ResourceSource; /** * `ResourceSource` that specifies the location of the text recognizer model binary. */ recognizerSource: ResourceSource; /** * The language configuration enum for the OCR model (e.g., English, Polish, etc.). */ language: OCRLanguage; }; /** * Boolean that can prevent automatic model loading (and downloading the data if loaded for the first time) after running the hook. * Defaults to `false`. */ preventLoad?: boolean; } /** * Configuration properties for the `useVerticalOCR` hook. * @category Types */ export interface VerticalOCRProps extends OCRProps { /** * Boolean indicating whether to treat each character independently during recognition. * Defaults to `false`. */ independentCharacters?: boolean; } /** * Return type for the `useOCR` hook. * Manages the state and operations for Optical Character Recognition (OCR). * @category Types */ export interface OCRType { /** * Contains the error object if the models failed to load, download, or encountered a runtime error during recognition. */ error: RnExecutorchError | null; /** * Indicates whether both detector and recognizer models are loaded and ready to process images. */ isReady: boolean; /** * Indicates whether the model is currently processing an image. */ isGenerating: boolean; /** * Represents the total download progress of the model binaries as a value between 0 and 1. */ downloadProgress: number; /** * Executes the OCR pipeline (detection and recognition) on the provided image. * * Supports two input types: * 1. **String path/URI**: File path, URL, or Base64-encoded string * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage) * * **Note**: For VisionCamera frame processing, use `runOnFrame` instead. * @param input - Image source (string or PixelData object) * @returns A Promise that resolves to the OCR results (recognized text and bounding boxes). * @throws {RnExecutorchError} If the models are not loaded or are currently processing another image. */ forward: (input: string | PixelData) => Promise<OCRDetection[]>; /** * Synchronous worklet function for VisionCamera frame processing. * Automatically handles native buffer extraction and cleanup. * * **Use this for VisionCamera frame processing in worklets.** * For async processing, use `forward()` instead. * * **Note**: OCR is a two-stage pipeline (detection + recognition) and may not * achieve real-time frame rates. Frames may be dropped if inference is still running. * * Available after model is loaded (`isReady: true`). * @param frame - VisionCamera Frame object * @param isFrontCamera - Whether the front camera is active, used for mirroring corrections. * @returns Array of OCRDetection results for the frame. */ runOnFrame: ((frame: Frame, isFrontCamera: boolean) => OCRDetection[]) | null; } /** * Enumeration of supported OCR languages based on available symbol sets. * @category Types */ export type OCRLanguage = keyof typeof symbols; /** * Union of all built-in OCR model names. * Each name is derived from the language code, e.g. `'ocr-en'`, `'ocr-ja'`. * @category Types */ export type OCRModelName = `ocr-${OCRLanguage}`;