UNPKG

@speechmatics/real-time-client

Version:

Client for the Speechmatics real-time API

331 lines (279 loc) 9.76 kB
import { TypedEventTarget } from 'typescript-event-target'; type RawAudioEncodingEnum = 'pcm_f32le' | 'pcm_s16le' | 'mulaw'; interface AudioFormatRaw { type: 'raw'; encoding: RawAudioEncodingEnum; sample_rate: number; } interface AudioFormatFile { type: 'file'; } interface AdditionalVocabObject { content: string; sounds_like?: string[]; } type DiarizationConfig = 'none' | 'speaker'; type MaxDelayModeConfig = 'flexible' | 'fixed'; interface SpeakerDiarizationConfig { max_speakers?: number; prefer_current_speaker?: boolean; speaker_sensitivity?: number; } interface AudioFilteringConfig { volume_threshold?: number; } interface WordReplacementItem { from: string; to: string; } interface TranscriptFilteringConfig { remove_disfluencies?: boolean; replacements?: WordReplacementItem[][]; } type OperatingPoint = 'standard' | 'enhanced'; interface PunctuationOverrides { permitted_marks?: string[]; sensitivity?: number; } interface ConversationConfig { end_of_utterance_silence_trigger?: number; } interface TranscriptionConfig { language: string; domain?: string; output_locale?: string; additional_vocab?: (string | AdditionalVocabObject)[]; diarization?: DiarizationConfig; max_delay?: number; max_delay_mode?: MaxDelayModeConfig; speaker_diarization_config?: SpeakerDiarizationConfig; audio_filtering_config?: AudioFilteringConfig; transcript_filtering_config?: TranscriptFilteringConfig; enable_partials?: boolean; enable_entities?: boolean; operating_point?: OperatingPoint; punctuation_overrides?: PunctuationOverrides; conversation_config?: ConversationConfig; } interface TranslationConfig { target_languages: string[]; enable_partials?: boolean; } interface AudioEventsConfig { types?: string[]; } interface StartRecognition { message: 'StartRecognition'; audio_format: AudioFormatRaw | AudioFormatFile; transcription_config: TranscriptionConfig; translation_config?: TranslationConfig; audio_events_config?: AudioEventsConfig; } interface EndOfStream { message: 'EndOfStream'; last_seq_no: number; } interface SetRecognitionConfig { message: 'SetRecognitionConfig'; transcription_config: TranscriptionConfig; } type RealtimeClientMessage = StartRecognition | EndOfStream | SetRecognitionConfig; interface RecognitionStarted { message: 'RecognitionStarted'; orchestrator_version?: string; id?: string; } interface AudioAdded { message: 'AudioAdded'; seq_no: number; } interface RecognitionMetadata { start_time: number; end_time: number; transcript: string; } type RecognitionResultTypeEnum = 'word' | 'punctuation'; type AttachesToEnum = 'next' | 'previous' | 'none' | 'both'; type DirectionEnum = 'ltr' | 'rtl'; interface RecognitionDisplay { direction: DirectionEnum; } interface RecognitionAlternative { content: string; confidence: number; language?: string; display?: RecognitionDisplay; speaker?: string; } interface RecognitionResult { type: RecognitionResultTypeEnum; start_time: number; end_time: number; channel?: string; attaches_to?: AttachesToEnum; is_eos?: boolean; alternatives?: RecognitionAlternative[]; score?: number; volume?: number; } interface AddPartialTranscript { message: 'AddPartialTranscript'; format?: string; metadata: RecognitionMetadata; results: RecognitionResult[]; } interface AddTranscript { message: 'AddTranscript'; format?: string; metadata: RecognitionMetadata; results: RecognitionResult[]; } interface TranslatedSentence { content: string; start_time: number; end_time: number; speaker?: string; } interface AddPartialTranslation { message: 'AddPartialTranslation'; format?: string; language: string; results: TranslatedSentence[]; } interface AddTranslation { message: 'AddTranslation'; format?: string; language: string; results: TranslatedSentence[]; } interface EndOfTranscript { message: 'EndOfTranscript'; } interface AudioEventStartData { type: string; start_time: number; confidence: number; } interface AudioEventStarted { message: 'AudioEventStarted'; event: AudioEventStartData; } interface AudioEventEndData { type: string; end_time: number; } interface AudioEventEnded { message: 'AudioEventEnded'; event: AudioEventEndData; } interface EndOfUtteranceMetadata { start_time?: number; end_time?: number; } interface EndOfUtterance { message: 'EndOfUtterance'; metadata: EndOfUtteranceMetadata; } type InfoTypeEnum = 'recognition_quality' | 'model_redirect' | 'deprecated' | 'concurrent_session_usage'; interface Info { message: 'Info'; type: InfoTypeEnum; reason: string; code?: number; seq_no?: number; quality?: string; usage?: number; quota?: number; last_updated?: string; } type WarningTypeEnum = 'duration_limit_exceeded'; interface Warning { message: 'Warning'; type: WarningTypeEnum; reason: string; code?: number; seq_no?: number; duration_limit?: number; } type ErrorTypeEnum = 'invalid_message' | 'invalid_model' | 'invalid_config' | 'invalid_audio_type' | 'not_authorised' | 'insufficient_funds' | 'not_allowed' | 'job_error' | 'data_error' | 'buffer_error' | 'protocol_error' | 'timelimit_exceeded' | 'quota_exceeded' | 'unknown_error'; interface ErrorType { message: 'Error'; type: ErrorTypeEnum; reason: string; code?: number; seq_no?: number; } type RealtimeServerMessage = RecognitionStarted | AudioAdded | AddPartialTranscript | AddTranscript | AddPartialTranslation | AddTranslation | EndOfTranscript | AudioEventStarted | AudioEventEnded | EndOfUtterance | Info | Warning | ErrorType; declare class SocketStateChangeEvent extends Event { readonly socketState: RealtimeClient['socketState']; constructor(socketState: RealtimeClient['socketState']); } declare class ReceiveMessageEvent extends Event { readonly data: RealtimeServerMessage; constructor(data: RealtimeServerMessage); } declare class SendMessageEvent extends Event { readonly data: RealtimeClientMessage; constructor(data: RealtimeClientMessage); } interface RealtimeClientEventMap { sendMessage: SendMessageEvent; receiveMessage: ReceiveMessageEvent; socketStateChange: SocketStateChangeEvent; } type AddAudio = Parameters<WebSocket['send']>[0]; interface RealtimeClientOptions { url?: string; appId?: string; enableLegacy?: boolean; } type RealtimeTranscriptionConfig = Omit<StartRecognition, 'message' | 'audio_format'> & Partial<Pick<StartRecognition, 'audio_format'>>; declare class RealtimeClient extends TypedEventTarget<RealtimeClientEventMap> { constructor(config?: RealtimeClientOptions); readonly url: string; private readonly appId?; private readonly enableLegacy; private socket?; get socketState(): "connecting" | "open" | "closing" | "closed" | undefined; private lastAudioAddedSeqNo; private connect; private sendMessage; sendAudio(data: AddAudio): void; start(jwt: string, config: RealtimeTranscriptionConfig): Promise<RecognitionStarted>; stopRecognition({ noTimeout }?: { noTimeout?: true; }): Promise<unknown>; setRecognitionConfig(config: TranscriptionConfig): void; } declare class SpeechmaticsRealtimeError extends Error { constructor(message: string, options?: ErrorOptions); } declare function getFeatures(region?: Region): Promise<FeatureResponse>; type Region = 'eu2' | 'neu' | 'wus'; interface FeatureResponse { metadata: { language_pack_info: Record<string, { language_description: string; locales?: Record<string, { name: string; }>; }>; }; realtime: { transcription: [ { version: 'latest'; languages: string[]; locales: Record<string, string[]>; domains: Record<string, string[]>; } ]; translation: [ { version: 'latest'; languages: Record<string, string[]>; } ]; }; } export { type AddAudio, type AddPartialTranscript, type AddPartialTranslation, type AddTranscript, type AddTranslation, type AdditionalVocabObject, type AttachesToEnum, type AudioAdded, type AudioEventEndData, type AudioEventEnded, type AudioEventStartData, type AudioEventStarted, type AudioEventsConfig, type AudioFilteringConfig, type AudioFormatFile, type AudioFormatRaw, type ConversationConfig, type DiarizationConfig, type DirectionEnum, type EndOfStream, type EndOfTranscript, type EndOfUtterance, type EndOfUtteranceMetadata, type ErrorType, type ErrorTypeEnum, type FeatureResponse, type Info, type InfoTypeEnum, type MaxDelayModeConfig, type OperatingPoint, type PunctuationOverrides, type RawAudioEncodingEnum, RealtimeClient, type RealtimeClientEventMap, type RealtimeClientMessage, type RealtimeClientOptions, type RealtimeServerMessage, type RealtimeTranscriptionConfig, ReceiveMessageEvent, type RecognitionAlternative, type RecognitionDisplay, type RecognitionMetadata, type RecognitionResult, type RecognitionResultTypeEnum, type RecognitionStarted, SendMessageEvent, type SetRecognitionConfig, SocketStateChangeEvent, type SpeakerDiarizationConfig, SpeechmaticsRealtimeError, type StartRecognition, type TranscriptFilteringConfig, type TranscriptionConfig, type TranslatedSentence, type TranslationConfig, type Warning, type WarningTypeEnum, type WordReplacementItem, getFeatures };