react-native-executorch
Version:
An easy way to run AI models in React Native with ExecuTorch
197 lines (182 loc) • 7.83 kB
text/typescript
import { RnExecutorchError } from '../errors/errorUtils';
import { LabelEnum, Triple, ResourceSource, PixelData, Frame } from './common';
import { CocoLabel } from '../constants/commonVision';
export { CocoLabel };
/**
* Represents a bounding box for a detected object in an image.
* @category Types
* @property {number} x1 - The x-coordinate of the top-left corner of the bounding box.
* @property {number} y1 - The y-coordinate of the top-left corner of the bounding box.
* @property {number} x2 - The x-coordinate of the bottom-right corner of the bounding box.
* @property {number} y2 - The y-coordinate of the bottom-right corner of the bounding box.
*/
export interface Bbox {
x1: number;
x2: number;
y1: number;
y2: number;
}
/**
* Represents a detected object within an image, including its bounding box, label, and confidence score.
* @category Types
* @typeParam L - The label enum type for the detected object. Defaults to {@link CocoLabel}.
* @property {Bbox} bbox - The bounding box of the detected object, defined by its top-left (x1, y1) and bottom-right (x2, y2) coordinates.
* @property {keyof L} label - The class label of the detected object.
* @property {number} score - The confidence score of the detection, typically ranging from 0 to 1.
*/
export interface Detection<L extends LabelEnum = typeof CocoLabel> {
bbox: Bbox;
label: keyof L;
score: number;
}
/**
* Options for configuring object detection inference.
* @category Types
* @typeParam L - The label enum type for filtering classes of interest.
* @property {number} [detectionThreshold] - Minimum confidence score for detections (0-1). Defaults to model-specific value.
* @property {number} [iouThreshold] - IoU threshold for non-maximum suppression (0-1). Defaults to model-specific value.
* @property {number} [inputSize] - Input size for multi-method models (e.g., 384, 512, 640 for YOLO). Required for YOLO models if not using default.
* @property {(keyof L)[]} [classesOfInterest] - Optional array of class labels to filter detections. Only detections matching these classes will be returned.
*/
export interface ObjectDetectionOptions<L extends LabelEnum> {
detectionThreshold?: number;
iouThreshold?: number;
inputSize?: number;
classesOfInterest?: (keyof L)[];
}
/**
* Per-model config for {@link ObjectDetectionModule.fromModelName}.
* Each model name maps to its required fields.
* @category Types
*/
export type ObjectDetectionModelSources =
| { modelName: 'ssdlite-320-mobilenet-v3-large'; modelSource: ResourceSource }
| { modelName: 'rf-detr-nano'; modelSource: ResourceSource }
| { modelName: 'yolo26n'; modelSource: ResourceSource }
| { modelName: 'yolo26s'; modelSource: ResourceSource }
| { modelName: 'yolo26m'; modelSource: ResourceSource }
| { modelName: 'yolo26l'; modelSource: ResourceSource }
| { modelName: 'yolo26x'; modelSource: ResourceSource };
/**
* Union of all built-in object detection model names.
* @category Types
*/
export type ObjectDetectionModelName = ObjectDetectionModelSources['modelName'];
/**
* Configuration for a custom object detection model.
* @category Types
* @typeParam T - The label enum type for the model.
* @property {T} labelMap - The label mapping for the model.
* @property {object} [preprocessorConfig] - Optional preprocessing configuration with normalization parameters.
* @property {number} [defaultDetectionThreshold] - Default detection confidence threshold (0-1).
* @property {number} [defaultIouThreshold] - Default IoU threshold for non-maximum suppression (0-1).
* @property {readonly number[]} [availableInputSizes] - For multi-method models, the available input sizes (e.g., [384, 512, 640]).
* @property {number} [defaultInputSize] - For multi-method models, the default input size to use.
*/
export type ObjectDetectionConfig<T extends LabelEnum> = {
labelMap: T;
preprocessorConfig?: { normMean?: Triple<number>; normStd?: Triple<number> };
defaultDetectionThreshold?: number;
defaultIouThreshold?: number;
} & (
| {
availableInputSizes: readonly number[];
defaultInputSize: number;
}
| {
availableInputSizes?: undefined;
defaultInputSize?: undefined;
}
);
/**
* Props for the `useObjectDetection` hook.
* @typeParam C - A {@link ObjectDetectionModelSources} config specifying which built-in model to load.
* @category Types
* @property model - The model config containing `modelName` and `modelSource`.
* @property {boolean} [preventLoad] - Boolean that can prevent automatic model loading (and downloading the data if you load it for the first time) after running the hook.
*/
export interface ObjectDetectionProps<C extends ObjectDetectionModelSources> {
model: C;
preventLoad?: boolean;
}
/**
* Return type for the `useObjectDetection` hook.
* Manages the state and operations for Computer Vision object detection tasks.
* @typeParam L - The {@link LabelEnum} representing the model's class labels.
* @category Types
*/
export interface ObjectDetectionType<L extends LabelEnum> {
/**
* Contains the error object if the model failed to load, download, or encountered a runtime error during detection.
*/
error: RnExecutorchError | null;
/**
* Indicates whether the object detection model is loaded and ready to process images.
*/
isReady: boolean;
/**
* Indicates whether the model is currently processing an image.
*/
isGenerating: boolean;
/**
* Represents the download progress of the model binary as a value between 0 and 1.
*/
downloadProgress: number;
/**
* Executes the model's forward pass with automatic input type detection.
* @param input - Image source (string path/URI or PixelData object)
* @param options - Optional configuration for detection inference
* @returns A Promise that resolves to an array of `Detection` objects.
* @throws {RnExecutorchError} If the model is not loaded or is currently processing another image.
* @example
* ```typescript
* // String path with options
* const detections1 = await model.forward('file:///path/to/image.jpg', {
* detectionThreshold: 0.7,
* inputSize: 640, // For YOLO models
* classesOfInterest: ['PERSON', 'CAR']
* });
*
* // Pixel data
* const detections2 = await model.forward({
* dataPtr: new Uint8Array(rgbPixels),
* sizes: [480, 640, 3],
* scalarType: ScalarType.BYTE
* }, { detectionThreshold: 0.5 });
* ```
*/
forward: (
input: string | PixelData,
options?: ObjectDetectionOptions<L>
) => Promise<Detection<L>[]>;
/**
* Returns the available input sizes for multi-method models (e.g., YOLO).
* Returns undefined for single-method models (e.g., RF-DETR, SSDLite).
* @returns Array of available input sizes or undefined
* @example
* ```typescript
* const sizes = model.getAvailableInputSizes(); // [384, 512, 640] for YOLO models
* ```
*/
getAvailableInputSizes: () => readonly number[] | undefined;
/**
* Synchronous worklet function for real-time VisionCamera frame processing.
* Automatically handles native buffer extraction and cleanup.
*
* **Use this for VisionCamera frame processing in worklets.**
* For async processing, use `forward()` instead.
*
* Available after model is loaded (`isReady: true`).
* @param frame - VisionCamera Frame object
* @param isFrontCamera - Whether the front camera is active, used for mirroring corrections.
* @param options - Optional configuration for detection inference
* @returns Array of Detection objects representing detected items in the frame.
*/
runOnFrame:
| ((
frame: Frame,
isFrontCamera: boolean,
options?: ObjectDetectionOptions<L>
) => Detection<L>[])
| null;
}