UNPKG

@soniox/speech-to-text-web

Version:

Javascript client library for Soniox Speech-to-Text websocket API

161 lines (160 loc) 6.28 kB
import { ErrorStatus } from './errors'; import { RecorderState } from './state'; import { SpeechToTextAPIResponse, TranslationConfig } from './types'; type ApiKeyGetter = () => Promise<string>; type Callbacks = { onStateChange?: (update: { oldState: RecorderState; newState: RecorderState; }) => void; onStarted?: () => void; onPartialResult?: (result: SpeechToTextAPIResponse) => void; onFinished?: () => void; /** * Called when an error occurs. * * @param status - The error status. * @param message - More descriptive error message. * @param errorCode - The error code. Returned only if status is `api_error`. */ onError?: (status: ErrorStatus, message: string, errorCode: number | undefined) => void; }; type SonioxClientOptions = { /** * WebSocket URI. If not provided, the default URI will be used. */ webSocketUri?: string; /** * Either a string or a an async function which returns api key. * Function can be used to generate a temporary API key, which is useful if you want to avoid exposing your API key to the client. */ apiKey: string | ApiKeyGetter; /** * How many messages to queue before websocket is opened. If full, error will be thrown. * (opening websocket might take some time, especially if the API key fetching is slow or * the user has a slow connection) */ bufferQueueSize?: number; } & Callbacks; type AudioOptions = { /** * One of the available Speech-to-Text models. */ model: string; /** * List of language codes to hint the API on what language to transcribe. * * Example: `['en', 'fr', 'de']` */ languageHints?: string[]; /** * Context string to pass to the API. */ context?: string; /** * When true, speakers are identified and separated in the transcription output. */ enableSpeakerDiarization?: boolean; /** When true, language identification is enabled. */ enableLanguageIdentification?: boolean; /** When true, endpoint detection is enabled. */ enableEndpointDetection?: boolean; /** * Translation configuration. Can be one-way or two-way translation. */ translation?: TranslationConfig; /** * The format of the streamed audio (e.g., "auto", "s16le"). */ audioFormat?: string; /** * Required for raw PCM formats. */ sampleRate?: number; /** * Required for raw PCM formats. Typically 1 for mono audio, 2 for stereo. */ numChannels?: number; /** * A client-defined identifier to track this stream. Can be any string. If not provided, it will be auto-generated. */ clientReferenceId?: string; /** * Can be used to set the `echoCancellation` and `noiseSuppression` properties of the MediaTrackConstraints object. * See https://developer.mozilla.org/en-US/docs/Web/API/MediaTrackConstraints for more details. */ audioConstraints?: MediaTrackConstraints; /** * MediaRecorder options: https://developer.mozilla.org/en-US/docs/Web/API/MediaRecorder/MediaRecorder */ mediaRecorderOptions?: Record<string, any>; /** * If you don't want to transcribe audio from microphone, you can pass a MediaStream to the `stream` option. * This can be useful if you want to transcribe audio from a file or a custom source. */ stream?: MediaStream; } & Callbacks; export declare class SonioxClient { static isSupported: boolean; _state: RecorderState; _options: SonioxClientOptions; _audioOptions: AudioOptions | null; _websocket: WebSocket | null; _mediaRecorder: MediaRecorder | null; _queuedMessages: (Blob | string)[]; /** * SonioxClient connects to the Soniox Speech-to-Text API for real-time speech-to-text transcription and translation. * It provides a simple API for starting and stopping the transcription, as well as handling the transcription results. * * @example * const sonioxClient = new SonioxClient({ * apiKey: '<SONIOX_API_KEY>', * onPartialResult: (result) => { * console.log('partial result', result.text); * }, * }); * sonioxClient.start(); */ constructor(options?: SonioxClientOptions); _hasCallback: <T extends keyof Callbacks>(name: T) => boolean; _callback: <T extends keyof Callbacks>(name: T, ...args: Parameters<NonNullable<Callbacks[T]>>) => void; _setState(newState: RecorderState): void; get state(): RecorderState; /** * Start transcription. You can pass options to configure the transcription settings, source and callbacks. */ start: (audioOptions: AudioOptions) => Promise<void>; /** * Stop transcription. Stopping transcription will send stop signal to the API and wait for the final results to be received. * Only after the final results are received, the transcription will be finished. If you want to cancel the transcription immediately, * (for example, on component unmount), you should probably use the `cancel()` method instead. */ stop: () => void; /** * Cancel transcription. Cancelling transcription will stop the transcription immediately and close the resources. * For user initiated cancellation, you should probably use the `stop()` method instead. */ cancel: () => void; /** * Trigger finalize. This will finalize all non-final tokens. */ finalize: () => void; _onMediaRecorderData: (event: BlobEvent) => Promise<void>; _onMediaRecorderError: (event: Event) => void; _onMediaRecorderPause: (_event: Event) => void; _onMediaRecorderStop: (_event: Event) => void; _onWebSocketOpen: (event: Event) => void; _onWebSocketOpenAsync: (_event: Event) => Promise<void>; _onWebSocketError: (_event: Event) => void; _onWebSocketMessage: (event: MessageEvent<any>) => void; _onError: (status: ErrorStatus, message: string | undefined, errorCode?: number | undefined) => void; _closeSource: () => void; _closeResources: () => void; _handleFinished(): void; } /** * @deprecated Use SonioxClient instead. * */ export declare const RecordTranscribe: typeof SonioxClient; export {};