smart-whisper-electron

import EventEmitter from 'node:events'; declare enum WhisperSamplingStrategy { WHISPER_SAMPLING_GREEDY = 0, WHISPER_SAMPLING_BEAM_SEARCH = 1 } type TranscribeFormat = "simple" | "detail"; /** * See {@link https://github.com/ggerganov/whisper.cpp/blob/00b7a4be02ca82d53ac69dd2dd438c16e2af7658/whisper.h#L433C19-L433C19} for details. */ interface TranscribeParams<Format extends TranscribeFormat = TranscribeFormat, TokenTimestamp extends boolean = false> { strategy: WhisperSamplingStrategy; n_threads: number; n_max_text_ctx: number; offset_ms: number; duration_ms: number; translate: boolean; no_context: boolean; no_timestamps: boolean; single_segment: boolean; print_special: boolean; print_progress: boolean; print_realtime: boolean; print_timestamps: boolean; token_timestamps: TokenTimestamp; thold_pt: number; thold_ptsum: number; max_len: number; split_on_word: boolean; max_tokens: number; speed_up: boolean; debug_mode: boolean; audio_ctx: number; tdrz_enable: boolean; initial_prompt: string; /** * Language code, e.g. "en", "de", "fr", "es", "it", "nl", "pt", "ru", "tr", "uk", "pl", "sv", "cs", "zh", "ja", "ko" */ language: string; suppress_blank: boolean; suppress_non_speech_tokens: boolean; temperature: number; max_initial_ts: number; length_penalty: number; temperature_inc: number; entropy_thold: number; logprob_thold: number; no_speech_thold: number; best_of: number; beam_size: number; format: Format; } interface TranscribeSimpleResult { from: number; to: number; text: string; } /** * Represents a detailed result of transcription. */ interface TranscribeDetailedResult<TokenTimestamp extends boolean> extends TranscribeSimpleResult { /** The detected spoken language. */ lang: string; /** The confidence level of the transcription, calculated by the average probability of the tokens. */ confidence: number; /** The tokens generated during the transcription process. */ tokens: { /** The text of the token, for CJK languages, due to the BPE encoding, the token text may not be readable. */ text: string; /** The ID of the token. */ id: number; /** The probability of the token. */ p: number; /** The start timestamp of the token, in milliseconds. Only available when `token_timestamps` of {@link TranscribeParams} is `true`. */ from: TokenTimestamp extends true ? number : undefined; /** The end timestamp of the token, in milliseconds. Only available when `token_timestamps` of {@link TranscribeParams} is `true`. */ to: TokenTimestamp extends true ? number : undefined; }[]; } type TranscribeResult<Format extends TranscribeFormat = TranscribeFormat, TokenTimestamp extends boolean = boolean> = Format extends "simple" ? TranscribeSimpleResult : TranscribeDetailedResult<TokenTimestamp>; /** * A external handle to a model. */ type Handle = { readonly "": unique symbol; }; declare enum WhisperAligmentHeadsPreset { NONE = 0 } interface WhisperContextParams { use_gpu?: boolean; flash_attn?: boolean; gpu_device?: number; dtw_token_timestamps?: boolean; dtw_aheads_preset?: WhisperAligmentHeadsPreset; dtw_n_top?: number; dtw_mem_size?: number; offload?: number; } interface WhisperConfig$1 { /** * Whether to use GPU acceleration (if available) * @default true */ gpu?: boolean; /** * Time in seconds after which the model is freed from memory * @default 0 (disabled) */ offload?: number; /** * Advanced configuration parameters */ params?: WhisperContextParams; } declare namespace Binding { /** * Load a model from a whisper weights file. * @param file The path to the whisper weights file. * @param gpu Whether to use the GPU or not. * @param callback A callback that will be called with the handle to the model. */ function load(file: string, gpu: boolean, callback: (handle: Handle) => void): void; /** * Release the memory of the model, it will be unusable after this. * @param handle The handle to the model. * @param callback A callback that will be called when the model is freed. */ function free(handle: Handle, callback: () => void): void; /** * Transcribe a PCM buffer. * @param handle The handle to the model. * @param pcm The PCM buffer. * @param params The parameters to use for transcription. * @param finish A callback that will be called when the transcription is finished. * @param progress A callback that will be called when a new result is available. */ function transcribe<Format extends TranscribeFormat, TokenTimestamp extends boolean>(handle: Handle, pcm: Float32Array, params: Partial<TranscribeParams<Format, TokenTimestamp>>, finish: (results: TranscribeResult<Format, TokenTimestamp>[]) => void, progress: (result: TranscribeResult<Format, TokenTimestamp>) => void): void; class WhisperModel { private _ctx; constructor(handle: Handle); get handle(): Handle | null; get freed(): boolean; /** * Release the memory of the model, it will be unusable after this. * It's safe to call this multiple times, but it will only free the model once. */ free(): Promise<void>; /** * Load a model from a whisper weights file. * @param file The path to the whisper weights file. * @param config Configuration for the model or boolean for GPU usage */ static load(file: string, config?: WhisperConfig$1 | boolean): Promise<WhisperModel>; } } /** * The native binding for the underlying C++ addon. */ declare const binding: typeof Binding; declare class WhisperModel extends binding.WhisperModel { } declare class TranscribeTask<Format extends TranscribeFormat, TokenTimestamp extends boolean> extends EventEmitter { private _model; private _result; /** * You should not construct this class directly, use {@link TranscribeTask.run} instead. */ constructor(model: WhisperModel); get model(): WhisperModel; /** * A promise that resolves to the result of the transcription task. */ get result(): Promise<TranscribeResult<Format, TokenTimestamp>[]>; private _run; static run<Format extends TranscribeFormat, TokenTimestamp extends boolean>(model: WhisperModel, pcm: Float32Array, params: Partial<TranscribeParams<Format, TokenTimestamp>>): Promise<TranscribeTask<Format, TokenTimestamp>>; on(event: "finish", listener: (results: TranscribeResult<Format, TokenTimestamp>[]) => void): this; on(event: "transcribed", listener: (result: TranscribeResult<Format, TokenTimestamp>) => void): this; once(event: "finish", listener: (results: TranscribeResult<Format, TokenTimestamp>[]) => void): this; once(event: "transcribed", listener: (result: TranscribeResult<Format, TokenTimestamp>) => void): this; off(event: "finish", listener: (results: TranscribeResult<Format, TokenTimestamp>[]) => void): this; off(event: "transcribed", listener: (result: TranscribeResult<Format, TokenTimestamp>) => void): this; } interface WhisperConfig { /** * Time in seconds to wait before offloading the model if it's not being used. */ offload: number; /** * Whether to use the GPU or not. */ gpu: boolean; /** * Advanced configuration parameters */ params?: WhisperContextParams; } /** * The Whisper class is responsible for managing the lifecycle and operations of whisper model. * It handles the loading and offloading of the model, managing transcription tasks, and configuring model parameters. */ declare class Whisper { private _file; private _available; private _loading; private _tasks; private _config; private _offload_timer; /** * Constructs a new Whisper instance with a specified model file and configuration. * @param file - The path to the Whisper model file. * @param config - Optional configuration for the Whisper instance. */ constructor(file: string, config?: Partial<WhisperConfig>); get file(): string; set file(file: string); get config(): WhisperConfig; get tasks(): Promise<TranscribeResult[]>[]; reset_offload_timer(): void; private clear_offload_timer; model(): Promise<WhisperModel>; /** * Loads the whisper model asynchronously. * If the model is already being loaded, returns the existing one. * * You don't need to call this method directly, it's called automatically if necessary when you call {@link Whisper.transcribe}. * * @returns A Promise that resolves to the loaded model. */ load(): Promise<WhisperModel>; /** * Transcribes the given PCM audio data using the Whisper model. * @param pcm - The mono 16k PCM audio data to transcribe. * @param params - Optional parameters for transcription. * @returns A promise that resolves to the result of the transcription task. */ transcribe<Format extends TranscribeFormat, TokenTimestamp extends boolean>(pcm: Float32Array, params?: Partial<TranscribeParams<Format, TokenTimestamp>>): Promise<TranscribeTask<Format, TokenTimestamp>>; free(): Promise<void>; } /** * MODELS is an object that contains the URLs of different ggml whisper models. * Each model is represented by a key-value pair, where the key is the model name * and the value is the URL of the model. */ declare const MODELS: { readonly tiny: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin"; readonly "tiny.en": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en.bin"; readonly small: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin"; readonly "small.en": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en.bin"; readonly base: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin"; readonly "base.en": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin"; readonly medium: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin"; readonly "medium.en": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.en.bin"; readonly "large-v1": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v1.bin"; readonly "large-v2": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v2.bin"; readonly "large-v3": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3.bin"; readonly "large-v3-turbo": "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo.bin"; }; type ModelName = keyof typeof MODELS | (string & {}); /** * Downloads a ggml whisper model from a specified URL or shorthand. * * @param model - The model to download, specified either as a key of the {@link MODELS} object or as a URL. * @returns A promise that resolves to the name of the downloaded model. * @throws An error if the model URL or shorthand is invalid, or if the model fails to download. */ declare function download(model: ModelName): Promise<string>; /** * Removes a locally downloaded model. * @param model - The name of the model to remove. */ declare function remove(model: ModelName): void; /** * Retrieves a list of model names that are available locally. * @returns An array of model names. */ declare function list(): ModelName[]; /** * Checks if a model exists. * @param model - The name of the model. * @returns True if the model exists, false otherwise. */ declare function check(model: ModelName): boolean; /** * Resolves the absolute path of a model. * @param model - The name of the model. * @returns The resolved path of the model. * @throws Error if the model is not found. */ declare function resolve(model: ModelName): string; declare const dir: { root: string; models: string; }; declare const index_MODELS: typeof MODELS; type index_ModelName = ModelName; declare const index_check: typeof check; declare const index_dir: typeof dir; declare const index_download: typeof download; declare const index_list: typeof list; declare const index_remove: typeof remove; declare const index_resolve: typeof resolve; declare namespace index { export { index_MODELS as MODELS, type index_ModelName as ModelName, index_check as check, index_dir as dir, index_download as download, index_list as list, index_remove as remove, index_resolve as resolve }; } export { Binding, type Handle, type TranscribeDetailedResult, type TranscribeFormat, type TranscribeParams, type TranscribeResult, type TranscribeSimpleResult, TranscribeTask, Whisper, WhisperAligmentHeadsPreset, type WhisperConfig$1 as WhisperConfig, type WhisperContextParams, WhisperModel, WhisperSamplingStrategy, binding, index as manager };