UNPKG

whisper.rn

Version:

React Native binding of whisper.cpp

197 lines 7.3 kB
import { NativeWhisperContext } from './NativeRNWhisper'; import type { TranscribeOptions, TranscribeResult } from './NativeRNWhisper'; import AudioSessionIos from './AudioSessionIos'; import type { AudioSessionCategoryIos, AudioSessionCategoryOptionIos, AudioSessionModeIos } from './AudioSessionIos'; export type { TranscribeOptions, TranscribeResult, AudioSessionCategoryIos, AudioSessionCategoryOptionIos, AudioSessionModeIos, }; export type TranscribeNewSegmentsResult = { nNew: number; totalNNew: number; result: string; segments: TranscribeResult['segments']; }; export type TranscribeNewSegmentsNativeEvent = { contextId: number; jobId: number; result: TranscribeNewSegmentsResult; }; export type TranscribeFileOptions = TranscribeOptions & { /** * Progress callback, the progress is between 0 and 100 */ onProgress?: (progress: number) => void; /** * Callback when new segments are transcribed */ onNewSegments?: (result: TranscribeNewSegmentsResult) => void; }; export type TranscribeProgressNativeEvent = { contextId: number; jobId: number; progress: number; }; export type AudioSessionSettingIos = { category: AudioSessionCategoryIos; options?: AudioSessionCategoryOptionIos[]; mode?: AudioSessionModeIos; active?: boolean; }; export type TranscribeRealtimeOptions = TranscribeOptions & { /** * Realtime record max duration in seconds. * Due to the whisper.cpp hard constraint - processes the audio in chunks of 30 seconds, * the recommended value will be <= 30 seconds. (Default: 30) */ realtimeAudioSec?: number; /** * Optimize audio transcription performance by slicing audio samples when `realtimeAudioSec` > 30. * Set `realtimeAudioSliceSec` < 30 so performance improvements can be achieved in the Whisper hard constraint (processes the audio in chunks of 30 seconds). * (Default: Equal to `realtimeMaxAudioSec`) */ realtimeAudioSliceSec?: number; /** * Min duration of audio to start transcribe in seconds for each slice. * The minimum value is 0.5 ms and maximum value is realtimeAudioSliceSec (Default: 1) */ realtimeAudioMinSec?: number; /** * Output path for audio file. If not set, the audio file will not be saved * (Default: Undefined) */ audioOutputPath?: string; /** * Start transcribe on recording when the audio volume is greater than the threshold by using VAD (Voice Activity Detection). * The first VAD will be triggered after 2 second of recording. * (Default: false) */ useVad?: boolean; /** * The length of the collected audio is used for VAD, cannot be less than 2000ms. (ms) (Default: 2000) */ vadMs?: number; /** * VAD threshold. (Default: 0.6) */ vadThold?: number; /** * Frequency to apply High-pass filter in VAD. (Default: 100.0) */ vadFreqThold?: number; /** * iOS: Audio session settings when start transcribe * Keep empty to use current audio session state */ audioSessionOnStartIos?: AudioSessionSettingIos; /** * iOS: Audio session settings when stop transcribe * - Keep empty to use last audio session state * - Use `restore` to restore audio session state before start transcribe */ audioSessionOnStopIos?: string | AudioSessionSettingIos; }; export type TranscribeRealtimeEvent = { contextId: number; jobId: number; /** Is capturing audio, when false, the event is the final result */ isCapturing: boolean; isStoppedByAction?: boolean; code: number; data?: TranscribeResult; error?: string; processTime: number; recordingTime: number; slices?: Array<{ code: number; error?: string; data?: TranscribeResult; processTime: number; recordingTime: number; }>; }; export type TranscribeRealtimeNativePayload = { /** Is capturing audio, when false, the event is the final result */ isCapturing: boolean; isStoppedByAction?: boolean; code: number; processTime: number; recordingTime: number; isUseSlices: boolean; sliceIndex: number; data?: TranscribeResult; error?: string; }; export type TranscribeRealtimeNativeEvent = { contextId: number; jobId: number; payload: TranscribeRealtimeNativePayload; }; export type BenchResult = { config: string; nThreads: number; encodeMs: number; decodeMs: number; batchMs: number; promptMs: number; }; export declare class WhisperContext { id: number; gpu: boolean; reasonNoGPU: string; constructor({ contextId, gpu, reasonNoGPU, }: NativeWhisperContext); private transcribeWithNativeMethod; /** * Transcribe audio file (path or base64 encoded wav file) * base64: need add `data:audio/wav;base64,` prefix */ transcribe(filePathOrBase64: string | number, options?: TranscribeFileOptions): { /** Stop the transcribe */ stop: () => Promise<void>; /** Transcribe result promise */ promise: Promise<TranscribeResult>; }; /** * Transcribe audio data (base64 encoded float32 PCM data) */ transcribeData(data: string, options?: TranscribeFileOptions): { stop: () => Promise<void>; promise: Promise<TranscribeResult>; }; /** Transcribe the microphone audio stream, the microphone user permission is required */ transcribeRealtime(options?: TranscribeRealtimeOptions): Promise<{ /** Stop the realtime transcribe */ stop: () => Promise<void>; /** Subscribe to realtime transcribe events */ subscribe: (callback: (event: TranscribeRealtimeEvent) => void) => void; }>; bench(maxThreads: number): Promise<BenchResult>; release(): Promise<void>; } export type ContextOptions = { filePath: string | number; /** * CoreML model assets, if you're using `require` on filePath, * use this option is required if you want to enable Core ML, * you will need bundle weights/weight.bin, model.mil, coremldata.bin into app by `require` */ coreMLModelAsset?: { filename: string; assets: string[] | number[]; }; /** Is the file path a bundle asset for pure string filePath */ isBundleAsset?: boolean; /** Prefer to use Core ML model if exists. If set to false, even if the Core ML model exists, it will not be used. */ useCoreMLIos?: boolean; /** Use GPU if available. Currently iOS only, if it's enabled, Core ML option will be ignored. */ useGpu?: boolean; /** Use Flash Attention, only recommended if GPU available */ useFlashAttn?: boolean; }; export declare function initWhisper({ filePath, coreMLModelAsset, isBundleAsset, useGpu, useCoreMLIos, useFlashAttn, }: ContextOptions): Promise<WhisperContext>; export declare function releaseAllWhisper(): Promise<void>; /** Current version of whisper.cpp */ export declare const libVersion: string; /** Is use CoreML models on iOS */ export declare const isUseCoreML: boolean; /** Is allow fallback to CPU if load CoreML model failed */ export declare const isCoreMLAllowFallback: boolean; export { AudioSessionIos }; //# sourceMappingURL=index.d.ts.map