@picovoice/cheetah-web
Version:
Cheetah Speech-to-Text engine for web browsers (via WebAssembly)
123 lines • 5.95 kB
TypeScript
import { CheetahModel, CheetahOptions, CheetahTranscript } from './types';
export declare class Cheetah {
private _module?;
private readonly _pv_cheetah_process;
private readonly _pv_cheetah_flush;
private readonly _pv_cheetah_delete;
private readonly _version;
private readonly _sampleRate;
private readonly _frameLength;
private readonly _processMutex;
private readonly _objectAddress;
private readonly _inputBufferAddress;
private readonly _isEndpointAddress;
private readonly _transcriptAddressAddress;
private readonly _messageStackAddressAddressAddress;
private readonly _messageStackDepthAddress;
private static _wasmSimd;
private static _wasmSimdLib;
private static _wasmPThread;
private static _wasmPThreadLib;
private static _sdk;
private static _cheetahMutex;
private readonly _transcriptCallback;
private readonly _processErrorCallback?;
private constructor();
/**
* Get Cheetah engine version.
*/
get version(): string;
/**
* Get frame length.
*/
get frameLength(): number;
/**
* Get sample rate.
*/
get sampleRate(): number;
/**
* Set base64 wasm file with SIMD feature.
* @param wasmSimd Base64'd wasm file to use to initialize wasm.
*/
static setWasmSimd(wasmSimd: string): void;
/**
* Set base64 SIMD wasm file in text format.
* @param wasmSimdLib Base64'd SIMD wasm file in text format.
*/
static setWasmSimdLib(wasmSimdLib: string): void;
/**
* Set base64 wasm file with SIMD and pthread feature.
* @param wasmPThread Base64'd wasm file to use to initialize wasm.
*/
static setWasmPThread(wasmPThread: string): void;
/**
* Set base64 SIMD and thread wasm file in text format.
* @param wasmPThreadLib Base64'd wasm file in text format.
*/
static setWasmPThreadLib(wasmPThreadLib: string): void;
static setSdk(sdk: string): void;
/**
* Creates an instance of the Picovoice Cheetah Speech-to-Text engine.
* Behind the scenes, it requires the WebAssembly code to load and initialize before
* it can create an instance.
*
* @param accessKey AccessKey obtained from Picovoice Console (https://console.picovoice.ai/)
* @param transcriptCallback User-defined callback to run after receiving transcript result.
* @param model Cheetah model options.
* @param model.base64 The model in base64 string to initialize Cheetah.
* @param model.publicPath The model path relative to the public directory.
* @param model.customWritePath Custom path to save the model in storage.
* Set to a different name to use multiple models across `cheetah` instances.
* @param model.forceWrite Flag to overwrite the model in storage even if it exists.
* @param model.version Version of the model file. Increment to update the model file in storage.
* @param options Optional configuration arguments.
* @param options.device String representation of the device (e.g., CPU or GPU) to use. If set to `best`, the most
* suitable device is selected automatically. If set to `gpu`, the engine uses the first available GPU device. To
* select a specific GPU device, set this argument to `gpu:${GPU_INDEX}`, where `${GPU_INDEX}` is the index of the
* target GPU. If set to `cpu`, the engine will run on the CPU with the default number of threads. To specify the
* number of threads, set this argument to `cpu:${NUM_THREADS}`, where `${NUM_THREADS}` is the desired number of
* threads.
* @param options.endpointDurationSec Duration of endpoint in seconds. A speech endpoint is detected when there is a
* chunk of audio (with a duration specified herein) after an utterance without any speech in it. Set to `0`
* to disable endpoint detection.
* @param options.enableAutomaticPunctuation Flag to enable automatic punctuation insertion.
* @param options.processErrorCallback User-defined callback invoked if any error happens
* while processing the audio stream. Its only input argument is the error message.
*
* @returns An instance of the Cheetah engine.
*/
static create(accessKey: string, transcriptCallback: (cheetahTranscript: CheetahTranscript) => void, model: CheetahModel, options?: CheetahOptions): Promise<Cheetah>;
static _init(accessKey: string, transcriptCallback: (cheetahTranscript: CheetahTranscript) => void, modelPath: string, options?: CheetahOptions): Promise<Cheetah>;
/**
* Processes a frame of audio. The required sample rate can be retrieved from '.sampleRate' and the length
* of frame (number of audio samples per frame) can be retrieved from '.frameLength' The audio needs to be
* 16-bit linearly-encoded. Furthermore, the engine operates on single-channel audio.
*
* @param pcm A frame of audio with properties described above.
*/
process(pcm: Int16Array): Promise<void>;
/**
* Marks the end of the audio stream, flushes internal state of the object, and returns any remaining transcribed
* text.
*
* @return Any remaining transcribed text. If none is available then an empty string is returned.
*/
flush(): Promise<void>;
private cheetahFlush;
/**
* Releases resources acquired by WebAssembly module.
*/
release(): Promise<void>;
onmessage(e: MessageEvent): Promise<void>;
private static initWasm;
/**
* Lists all available devices that Cheetah can use for inference.
* Each entry in the list can be the used as the `device` argument for the `.create` method.
*
* @returns List of all available devices that Cheetah can use for inference.
*/
static listAvailableDevices(): Promise<string[]>;
private static getMessageStack;
private static wrapAsyncFunction;
}
//# sourceMappingURL=cheetah.d.ts.map