UNPKG

whisper.rn

Version:

React Native binding of whisper.cpp

233 lines 8.86 kB
import { NativeWhisperContext, NativeWhisperVadContext } from './NativeRNWhisper'; import type { TranscribeOptions, TranscribeResult, VadOptions, VadSegment } from './NativeRNWhisper'; import AudioSessionIos from './AudioSessionIos'; import type { AudioSessionCategoryIos, AudioSessionCategoryOptionIos, AudioSessionModeIos } from './AudioSessionIos'; export type { TranscribeOptions, TranscribeResult, AudioSessionCategoryIos, AudioSessionCategoryOptionIos, AudioSessionModeIos, VadOptions, VadSegment, }; export type TranscribeNewSegmentsResult = { nNew: number; totalNNew: number; result: string; segments: TranscribeResult['segments']; }; export type TranscribeNewSegmentsNativeEvent = { contextId: number; jobId: number; result: TranscribeNewSegmentsResult; }; export type TranscribeFileOptions = TranscribeOptions & { /** * Progress callback, the progress is between 0 and 100 */ onProgress?: (progress: number) => void; /** * Callback when new segments are transcribed */ onNewSegments?: (result: TranscribeNewSegmentsResult) => void; }; export type TranscribeProgressNativeEvent = { contextId: number; jobId: number; progress: number; }; export type AudioSessionSettingIos = { category: AudioSessionCategoryIos; options?: AudioSessionCategoryOptionIos[]; mode?: AudioSessionModeIos; active?: boolean; }; export type TranscribeRealtimeOptions = TranscribeOptions & { /** * Realtime record max duration in seconds. * Due to the whisper.cpp hard constraint - processes the audio in chunks of 30 seconds, * the recommended value will be <= 30 seconds. (Default: 30) */ realtimeAudioSec?: number; /** * Optimize audio transcription performance by slicing audio samples when `realtimeAudioSec` > 30. * Set `realtimeAudioSliceSec` < 30 so performance improvements can be achieved in the Whisper hard constraint (processes the audio in chunks of 30 seconds). * (Default: Equal to `realtimeMaxAudioSec`) */ realtimeAudioSliceSec?: number; /** * Min duration of audio to start transcribe in seconds for each slice. * The minimum value is 0.5 ms and maximum value is realtimeAudioSliceSec (Default: 1) */ realtimeAudioMinSec?: number; /** * Output path for audio file. If not set, the audio file will not be saved * (Default: Undefined) */ audioOutputPath?: string; /** * Start transcribe on recording when the audio volume is greater than the threshold by using VAD (Voice Activity Detection). * The first VAD will be triggered after 2 second of recording. * (Default: false) */ useVad?: boolean; /** * The length of the collected audio is used for VAD, cannot be less than 2000ms. (ms) (Default: 2000) */ vadMs?: number; /** * VAD threshold. (Default: 0.6) */ vadThold?: number; /** * Frequency to apply High-pass filter in VAD. (Default: 100.0) */ vadFreqThold?: number; /** * iOS: Audio session settings when start transcribe * Keep empty to use current audio session state */ audioSessionOnStartIos?: AudioSessionSettingIos; /** * iOS: Audio session settings when stop transcribe * - Keep empty to use last audio session state * - Use `restore` to restore audio session state before start transcribe */ audioSessionOnStopIos?: string | AudioSessionSettingIos; }; export type TranscribeRealtimeEvent = { contextId: number; jobId: number; /** Is capturing audio, when false, the event is the final result */ isCapturing: boolean; isStoppedByAction?: boolean; code: number; data?: TranscribeResult; error?: string; processTime: number; recordingTime: number; slices?: Array<{ code: number; error?: string; data?: TranscribeResult; processTime: number; recordingTime: number; }>; }; export type TranscribeRealtimeNativePayload = { /** Is capturing audio, when false, the event is the final result */ isCapturing: boolean; isStoppedByAction?: boolean; code: number; processTime: number; recordingTime: number; isUseSlices: boolean; sliceIndex: number; data?: TranscribeResult; error?: string; }; export type TranscribeRealtimeNativeEvent = { contextId: number; jobId: number; payload: TranscribeRealtimeNativePayload; }; export type BenchResult = { config: string; nThreads: number; encodeMs: number; decodeMs: number; batchMs: number; promptMs: number; }; export declare class WhisperContext { id: number; gpu: boolean; reasonNoGPU: string; constructor({ contextId, gpu, reasonNoGPU, }: NativeWhisperContext); private transcribeWithNativeMethod; /** * Transcribe audio file (path or base64 encoded wav file) * base64: need add `data:audio/wav;base64,` prefix */ transcribe(filePathOrBase64: string | number, options?: TranscribeFileOptions): { /** Stop the transcribe */ stop: () => Promise<void>; /** Transcribe result promise */ promise: Promise<TranscribeResult>; }; /** * Transcribe audio data (base64 encoded float32 PCM data) */ transcribeData(data: string, options?: TranscribeFileOptions): { stop: () => Promise<void>; promise: Promise<TranscribeResult>; }; /** Transcribe the microphone audio stream, the microphone user permission is required */ transcribeRealtime(options?: TranscribeRealtimeOptions): Promise<{ /** Stop the realtime transcribe */ stop: () => Promise<void>; /** Subscribe to realtime transcribe events */ subscribe: (callback: (event: TranscribeRealtimeEvent) => void) => void; }>; bench(maxThreads: number): Promise<BenchResult>; release(): Promise<void>; } export type ContextOptions = { filePath: string | number; /** * CoreML model assets, if you're using `require` on filePath, * use this option is required if you want to enable Core ML, * you will need bundle weights/weight.bin, model.mil, coremldata.bin into app by `require` */ coreMLModelAsset?: { filename: string; assets: string[] | number[]; }; /** Is the file path a bundle asset for pure string filePath */ isBundleAsset?: boolean; /** Prefer to use Core ML model if exists. If set to false, even if the Core ML model exists, it will not be used. */ useCoreMLIos?: boolean; /** Use GPU if available. Currently iOS only, if it's enabled, Core ML option will be ignored. */ useGpu?: boolean; /** Use Flash Attention, only recommended if GPU available */ useFlashAttn?: boolean; }; export declare function initWhisper({ filePath, coreMLModelAsset, isBundleAsset, useGpu, useCoreMLIos, useFlashAttn, }: ContextOptions): Promise<WhisperContext>; export declare function releaseAllWhisper(): Promise<void>; /** Current version of whisper.cpp */ export declare const libVersion: string; /** Is use CoreML models on iOS */ export declare const isUseCoreML: boolean; /** Is allow fallback to CPU if load CoreML model failed */ export declare const isCoreMLAllowFallback: boolean; export { AudioSessionIos }; export type VadContextOptions = { filePath: string | number; /** Is the file path a bundle asset for pure string filePath */ isBundleAsset?: boolean; /** Use GPU if available. Currently iOS only */ useGpu?: boolean; /** Number of threads to use during computation (Default: 2 for 4-core devices, 4 for more cores) */ nThreads?: number; }; export declare class WhisperVadContext { id: number; gpu: boolean; reasonNoGPU: string; constructor({ contextId, gpu, reasonNoGPU, }: NativeWhisperVadContext); /** * Detect speech segments in audio file (path or base64 encoded wav file) * base64: need add `data:audio/wav;base64,` prefix */ detectSpeech(filePathOrBase64: string | number, options?: VadOptions): Promise<VadSegment[]>; /** * Detect speech segments in raw audio data (base64 encoded float32 PCM data) */ detectSpeechData(audioData: string, options?: VadOptions): Promise<VadSegment[]>; release(): Promise<void>; } /** * Initialize a VAD context for voice activity detection * @param options VAD context options * @returns Promise resolving to WhisperVadContext instance */ export declare function initWhisperVad({ filePath, isBundleAsset, useGpu, nThreads, }: VadContextOptions): Promise<WhisperVadContext>; /** * Release all VAD contexts and free their memory * @returns Promise resolving when all contexts are released */ export declare function releaseAllWhisperVad(): Promise<void>; //# sourceMappingURL=index.d.ts.map