react-native-executorch
Version:
An easy way to run AI models in React Native with ExecuTorch
150 lines (133 loc) • 4.86 kB
text/typescript
import { symbols } from '../constants/ocr/symbols';
import { RnExecutorchError } from '../errors/errorUtils';
import { Frame, PixelData, ResourceSource } from './common';
/**
* OCRDetection represents a single detected text instance in an image,
* including its bounding box, recognized text, and confidence score.
* @category Types
* @property {Point[]} bbox - An array of points defining the bounding box around the detected text.
* @property {string} text - The recognized text within the bounding box.
* @property {number} score - The confidence score of the OCR detection, ranging from 0 to 1.
*/
export interface OCRDetection {
bbox: Point[];
text: string;
score: number;
}
/**
* Point represents a coordinate in 2D space.
* @category Types
* @property {number} x - The x-coordinate of the point.
* @property {number} y - The y-coordinate of the point.
*/
export interface Point {
x: number;
y: number;
}
/**
* Configuration properties for the `useOCR` hook.
* @category Types
*/
export interface OCRProps {
/**
* Object containing the necessary model sources and configuration for the OCR pipeline.
*/
model: {
/**
* The built-in model name, e.g. `'ocr-en'`. Used for telemetry and hook reload triggers.
* Pass one of the pre-built OCR constants (e.g. `OCR_ENGLISH`) to populate all required fields.
*/
modelName: OCRModelName;
/**
* `ResourceSource` that specifies the location of the text detector model binary.
*/
detectorSource: ResourceSource;
/**
* `ResourceSource` that specifies the location of the text recognizer model binary.
*/
recognizerSource: ResourceSource;
/**
* The language configuration enum for the OCR model (e.g., English, Polish, etc.).
*/
language: OCRLanguage;
};
/**
* Boolean that can prevent automatic model loading (and downloading the data if loaded for the first time) after running the hook.
* Defaults to `false`.
*/
preventLoad?: boolean;
}
/**
* Configuration properties for the `useVerticalOCR` hook.
* @category Types
*/
export interface VerticalOCRProps extends OCRProps {
/**
* Boolean indicating whether to treat each character independently during recognition.
* Defaults to `false`.
*/
independentCharacters?: boolean;
}
/**
* Return type for the `useOCR` hook.
* Manages the state and operations for Optical Character Recognition (OCR).
* @category Types
*/
export interface OCRType {
/**
* Contains the error object if the models failed to load, download, or encountered a runtime error during recognition.
*/
error: RnExecutorchError | null;
/**
* Indicates whether both detector and recognizer models are loaded and ready to process images.
*/
isReady: boolean;
/**
* Indicates whether the model is currently processing an image.
*/
isGenerating: boolean;
/**
* Represents the total download progress of the model binaries as a value between 0 and 1.
*/
downloadProgress: number;
/**
* Executes the OCR pipeline (detection and recognition) on the provided image.
*
* Supports two input types:
* 1. **String path/URI**: File path, URL, or Base64-encoded string
* 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage)
*
* **Note**: For VisionCamera frame processing, use `runOnFrame` instead.
* @param input - Image source (string or PixelData object)
* @returns A Promise that resolves to the OCR results (recognized text and bounding boxes).
* @throws {RnExecutorchError} If the models are not loaded or are currently processing another image.
*/
forward: (input: string | PixelData) => Promise<OCRDetection[]>;
/**
* Synchronous worklet function for VisionCamera frame processing.
* Automatically handles native buffer extraction and cleanup.
*
* **Use this for VisionCamera frame processing in worklets.**
* For async processing, use `forward()` instead.
*
* **Note**: OCR is a two-stage pipeline (detection + recognition) and may not
* achieve real-time frame rates. Frames may be dropped if inference is still running.
*
* Available after model is loaded (`isReady: true`).
* @param frame - VisionCamera Frame object
* @param isFrontCamera - Whether the front camera is active, used for mirroring corrections.
* @returns Array of OCRDetection results for the frame.
*/
runOnFrame: ((frame: Frame, isFrontCamera: boolean) => OCRDetection[]) | null;
}
/**
* Enumeration of supported OCR languages based on available symbol sets.
* @category Types
*/
export type OCRLanguage = keyof typeof symbols;
/**
* Union of all built-in OCR model names.
* Each name is derived from the language code, e.g. `'ocr-en'`, `'ocr-ja'`.
* @category Types
*/
export type OCRModelName = `ocr-${OCRLanguage}`;