@soniox/speech-to-text-web
Version:
Javascript client library for Soniox Speech-to-Text websocket API
161 lines (160 loc) • 6.28 kB
TypeScript
import { ErrorStatus } from './errors';
import { RecorderState } from './state';
import { SpeechToTextAPIResponse, TranslationConfig } from './types';
type ApiKeyGetter = () => Promise<string>;
type Callbacks = {
onStateChange?: (update: {
oldState: RecorderState;
newState: RecorderState;
}) => void;
onStarted?: () => void;
onPartialResult?: (result: SpeechToTextAPIResponse) => void;
onFinished?: () => void;
/**
* Called when an error occurs.
*
* @param status - The error status.
* @param message - More descriptive error message.
* @param errorCode - The error code. Returned only if status is `api_error`.
*/
onError?: (status: ErrorStatus, message: string, errorCode: number | undefined) => void;
};
type SonioxClientOptions = {
/**
* WebSocket URI. If not provided, the default URI will be used.
*/
webSocketUri?: string;
/**
* Either a string or a an async function which returns api key.
* Function can be used to generate a temporary API key, which is useful if you want to avoid exposing your API key to the client.
*/
apiKey: string | ApiKeyGetter;
/**
* How many messages to queue before websocket is opened. If full, error will be thrown.
* (opening websocket might take some time, especially if the API key fetching is slow or
* the user has a slow connection)
*/
bufferQueueSize?: number;
} & Callbacks;
type AudioOptions = {
/**
* One of the available Speech-to-Text models.
*/
model: string;
/**
* List of language codes to hint the API on what language to transcribe.
*
* Example: `['en', 'fr', 'de']`
*/
languageHints?: string[];
/**
* Context string to pass to the API.
*/
context?: string;
/**
* When true, speakers are identified and separated in the transcription output.
*/
enableSpeakerDiarization?: boolean;
/** When true, language identification is enabled. */
enableLanguageIdentification?: boolean;
/** When true, endpoint detection is enabled. */
enableEndpointDetection?: boolean;
/**
* Translation configuration. Can be one-way or two-way translation.
*/
translation?: TranslationConfig;
/**
* The format of the streamed audio (e.g., "auto", "s16le").
*/
audioFormat?: string;
/**
* Required for raw PCM formats.
*/
sampleRate?: number;
/**
* Required for raw PCM formats. Typically 1 for mono audio, 2 for stereo.
*/
numChannels?: number;
/**
* A client-defined identifier to track this stream. Can be any string. If not provided, it will be auto-generated.
*/
clientReferenceId?: string;
/**
* Can be used to set the `echoCancellation` and `noiseSuppression` properties of the MediaTrackConstraints object.
* See https://developer.mozilla.org/en-US/docs/Web/API/MediaTrackConstraints for more details.
*/
audioConstraints?: MediaTrackConstraints;
/**
* MediaRecorder options: https://developer.mozilla.org/en-US/docs/Web/API/MediaRecorder/MediaRecorder
*/
mediaRecorderOptions?: Record<string, any>;
/**
* If you don't want to transcribe audio from microphone, you can pass a MediaStream to the `stream` option.
* This can be useful if you want to transcribe audio from a file or a custom source.
*/
stream?: MediaStream;
} & Callbacks;
export declare class SonioxClient {
static isSupported: boolean;
_state: RecorderState;
_options: SonioxClientOptions;
_audioOptions: AudioOptions | null;
_websocket: WebSocket | null;
_mediaRecorder: MediaRecorder | null;
_queuedMessages: (Blob | string)[];
/**
* SonioxClient connects to the Soniox Speech-to-Text API for real-time speech-to-text transcription and translation.
* It provides a simple API for starting and stopping the transcription, as well as handling the transcription results.
*
* @example
* const sonioxClient = new SonioxClient({
* apiKey: '<SONIOX_API_KEY>',
* onPartialResult: (result) => {
* console.log('partial result', result.text);
* },
* });
* sonioxClient.start();
*/
constructor(options?: SonioxClientOptions);
_hasCallback: <T extends keyof Callbacks>(name: T) => boolean;
_callback: <T extends keyof Callbacks>(name: T, ...args: Parameters<NonNullable<Callbacks[T]>>) => void;
_setState(newState: RecorderState): void;
get state(): RecorderState;
/**
* Start transcription. You can pass options to configure the transcription settings, source and callbacks.
*/
start: (audioOptions: AudioOptions) => Promise<void>;
/**
* Stop transcription. Stopping transcription will send stop signal to the API and wait for the final results to be received.
* Only after the final results are received, the transcription will be finished. If you want to cancel the transcription immediately,
* (for example, on component unmount), you should probably use the `cancel()` method instead.
*/
stop: () => void;
/**
* Cancel transcription. Cancelling transcription will stop the transcription immediately and close the resources.
* For user initiated cancellation, you should probably use the `stop()` method instead.
*/
cancel: () => void;
/**
* Trigger finalize. This will finalize all non-final tokens.
*/
finalize: () => void;
_onMediaRecorderData: (event: BlobEvent) => Promise<void>;
_onMediaRecorderError: (event: Event) => void;
_onMediaRecorderPause: (_event: Event) => void;
_onMediaRecorderStop: (_event: Event) => void;
_onWebSocketOpen: (event: Event) => void;
_onWebSocketOpenAsync: (_event: Event) => Promise<void>;
_onWebSocketError: (_event: Event) => void;
_onWebSocketMessage: (event: MessageEvent<any>) => void;
_onError: (status: ErrorStatus, message: string | undefined, errorCode?: number | undefined) => void;
_closeSource: () => void;
_closeResources: () => void;
_handleFinished(): void;
}
/**
* @deprecated Use SonioxClient instead.
*
*/
export declare const RecordTranscribe: typeof SonioxClient;
export {};