whisper.rn
Version:
React Native binding of whisper.cpp
197 lines • 7.3 kB
TypeScript
import { NativeWhisperContext } from './NativeRNWhisper';
import type { TranscribeOptions, TranscribeResult } from './NativeRNWhisper';
import AudioSessionIos from './AudioSessionIos';
import type { AudioSessionCategoryIos, AudioSessionCategoryOptionIos, AudioSessionModeIos } from './AudioSessionIos';
export type { TranscribeOptions, TranscribeResult, AudioSessionCategoryIos, AudioSessionCategoryOptionIos, AudioSessionModeIos, };
export type TranscribeNewSegmentsResult = {
nNew: number;
totalNNew: number;
result: string;
segments: TranscribeResult['segments'];
};
export type TranscribeNewSegmentsNativeEvent = {
contextId: number;
jobId: number;
result: TranscribeNewSegmentsResult;
};
export type TranscribeFileOptions = TranscribeOptions & {
/**
* Progress callback, the progress is between 0 and 100
*/
onProgress?: (progress: number) => void;
/**
* Callback when new segments are transcribed
*/
onNewSegments?: (result: TranscribeNewSegmentsResult) => void;
};
export type TranscribeProgressNativeEvent = {
contextId: number;
jobId: number;
progress: number;
};
export type AudioSessionSettingIos = {
category: AudioSessionCategoryIos;
options?: AudioSessionCategoryOptionIos[];
mode?: AudioSessionModeIos;
active?: boolean;
};
export type TranscribeRealtimeOptions = TranscribeOptions & {
/**
* Realtime record max duration in seconds.
* Due to the whisper.cpp hard constraint - processes the audio in chunks of 30 seconds,
* the recommended value will be <= 30 seconds. (Default: 30)
*/
realtimeAudioSec?: number;
/**
* Optimize audio transcription performance by slicing audio samples when `realtimeAudioSec` > 30.
* Set `realtimeAudioSliceSec` < 30 so performance improvements can be achieved in the Whisper hard constraint (processes the audio in chunks of 30 seconds).
* (Default: Equal to `realtimeMaxAudioSec`)
*/
realtimeAudioSliceSec?: number;
/**
* Min duration of audio to start transcribe in seconds for each slice.
* The minimum value is 0.5 ms and maximum value is realtimeAudioSliceSec (Default: 1)
*/
realtimeAudioMinSec?: number;
/**
* Output path for audio file. If not set, the audio file will not be saved
* (Default: Undefined)
*/
audioOutputPath?: string;
/**
* Start transcribe on recording when the audio volume is greater than the threshold by using VAD (Voice Activity Detection).
* The first VAD will be triggered after 2 second of recording.
* (Default: false)
*/
useVad?: boolean;
/**
* The length of the collected audio is used for VAD, cannot be less than 2000ms. (ms) (Default: 2000)
*/
vadMs?: number;
/**
* VAD threshold. (Default: 0.6)
*/
vadThold?: number;
/**
* Frequency to apply High-pass filter in VAD. (Default: 100.0)
*/
vadFreqThold?: number;
/**
* iOS: Audio session settings when start transcribe
* Keep empty to use current audio session state
*/
audioSessionOnStartIos?: AudioSessionSettingIos;
/**
* iOS: Audio session settings when stop transcribe
* - Keep empty to use last audio session state
* - Use `restore` to restore audio session state before start transcribe
*/
audioSessionOnStopIos?: string | AudioSessionSettingIos;
};
export type TranscribeRealtimeEvent = {
contextId: number;
jobId: number;
/** Is capturing audio, when false, the event is the final result */
isCapturing: boolean;
isStoppedByAction?: boolean;
code: number;
data?: TranscribeResult;
error?: string;
processTime: number;
recordingTime: number;
slices?: Array<{
code: number;
error?: string;
data?: TranscribeResult;
processTime: number;
recordingTime: number;
}>;
};
export type TranscribeRealtimeNativePayload = {
/** Is capturing audio, when false, the event is the final result */
isCapturing: boolean;
isStoppedByAction?: boolean;
code: number;
processTime: number;
recordingTime: number;
isUseSlices: boolean;
sliceIndex: number;
data?: TranscribeResult;
error?: string;
};
export type TranscribeRealtimeNativeEvent = {
contextId: number;
jobId: number;
payload: TranscribeRealtimeNativePayload;
};
export type BenchResult = {
config: string;
nThreads: number;
encodeMs: number;
decodeMs: number;
batchMs: number;
promptMs: number;
};
export declare class WhisperContext {
id: number;
gpu: boolean;
reasonNoGPU: string;
constructor({ contextId, gpu, reasonNoGPU, }: NativeWhisperContext);
private transcribeWithNativeMethod;
/**
* Transcribe audio file (path or base64 encoded wav file)
* base64: need add `data:audio/wav;base64,` prefix
*/
transcribe(filePathOrBase64: string | number, options?: TranscribeFileOptions): {
/** Stop the transcribe */
stop: () => Promise<void>;
/** Transcribe result promise */
promise: Promise<TranscribeResult>;
};
/**
* Transcribe audio data (base64 encoded float32 PCM data)
*/
transcribeData(data: string, options?: TranscribeFileOptions): {
stop: () => Promise<void>;
promise: Promise<TranscribeResult>;
};
/** Transcribe the microphone audio stream, the microphone user permission is required */
transcribeRealtime(options?: TranscribeRealtimeOptions): Promise<{
/** Stop the realtime transcribe */
stop: () => Promise<void>;
/** Subscribe to realtime transcribe events */
subscribe: (callback: (event: TranscribeRealtimeEvent) => void) => void;
}>;
bench(maxThreads: number): Promise<BenchResult>;
release(): Promise<void>;
}
export type ContextOptions = {
filePath: string | number;
/**
* CoreML model assets, if you're using `require` on filePath,
* use this option is required if you want to enable Core ML,
* you will need bundle weights/weight.bin, model.mil, coremldata.bin into app by `require`
*/
coreMLModelAsset?: {
filename: string;
assets: string[] | number[];
};
/** Is the file path a bundle asset for pure string filePath */
isBundleAsset?: boolean;
/** Prefer to use Core ML model if exists. If set to false, even if the Core ML model exists, it will not be used. */
useCoreMLIos?: boolean;
/** Use GPU if available. Currently iOS only, if it's enabled, Core ML option will be ignored. */
useGpu?: boolean;
/** Use Flash Attention, only recommended if GPU available */
useFlashAttn?: boolean;
};
export declare function initWhisper({ filePath, coreMLModelAsset, isBundleAsset, useGpu, useCoreMLIos, useFlashAttn, }: ContextOptions): Promise<WhisperContext>;
export declare function releaseAllWhisper(): Promise<void>;
/** Current version of whisper.cpp */
export declare const libVersion: string;
/** Is use CoreML models on iOS */
export declare const isUseCoreML: boolean;
/** Is allow fallback to CPU if load CoreML model failed */
export declare const isCoreMLAllowFallback: boolean;
export { AudioSessionIos };
//# sourceMappingURL=index.d.ts.map