unocr
Version:
Unified OCR library with multi-driver support for Tesseract.js and AI models, providing structured text extraction using hast-based output format
34 lines (30 loc) • 1.07 kB
TypeScript
import { DataType } from 'undio';
export { DataType } from 'undio';
import { Root } from 'hast';
export { Root } from 'hast';
type OCRResult = Root;
type OCRInput = DataType;
interface RecognizesOptions {
parallel?: number;
}
interface Driver<OptionsT = DriverOptions> {
name?: string;
options?: OptionsT;
recognize: (input: OCRInput) => MaybePromise<OCRResult>;
recognizes?: (inputs: OCRInput[], options?: RecognizesOptions) => MaybePromise<OCRResult[]>;
dispose?: () => MaybePromise<void>;
}
interface DriverOptions {
[key: string]: any;
}
interface OCRManagerOptions {
driver: Driver;
}
type MaybePromise<T> = T | Promise<T>;
declare function createOCRManager(options: OCRManagerOptions): {
recognize: (input: OCRInput) => MaybePromise<OCRResult>;
recognizes: (inputs: OCRInput[], options?: RecognizesOptions) => MaybePromise<OCRResult[]>;
dispose: () => MaybePromise<void>;
};
export { createOCRManager };
export type { Driver, DriverOptions, MaybePromise, OCRInput, OCRManagerOptions, OCRResult, RecognizesOptions };