@volley/recognition-client-sdk
Version:
Recognition Service TypeScript/Node.js Client SDK
1,402 lines (1,385 loc) • 84.4 kB
TypeScript
import { z } from 'zod';
/**
* Provider types and enums for recognition services
* NOTE_TO_AI: DO NOT CHANGE THIS UNLESS EXPLICITLY ASKED. Always ask before making any changes.
*/
/**
* Supported speech recognition providers
*/
declare enum RecognitionProvider {
ASSEMBLYAI = "assemblyai",
DEEPGRAM = "deepgram",
ELEVENLABS = "elevenlabs",
FIREWORKS = "fireworks",
GOOGLE = "google",
GEMINI_BATCH = "gemini-batch",
OPENAI_BATCH = "openai-batch",
OPENAI_REALTIME = "openai-realtime"
}
/**
* ASR API type - distinguishes between streaming and file-based transcription APIs
* - STREAMING: Real-time streaming APIs (Deepgram, AssemblyAI, Google)
* - FILE_BASED: File upload/batch APIs (OpenAI Batch, Gemini Batch)
*/
declare enum ASRApiType {
STREAMING = "streaming",
FILE_BASED = "file-based"
}
/**
* Deepgram model names
*/
declare enum DeepgramModel {
NOVA_2 = "nova-2",
NOVA_3 = "nova-3",
FLUX_GENERAL_EN = "flux-general-en"
}
/**
* Google Cloud Speech models
* @see https://cloud.google.com/speech-to-text/docs/transcription-model
* @see https://cloud.google.com/speech-to-text/v2/docs/chirp_3-model
*/
declare enum GoogleModel {
CHIRP_3 = "chirp_3",
CHIRP_2 = "chirp_2",
CHIRP = "chirp",
LATEST_LONG = "latest_long",
LATEST_SHORT = "latest_short",
TELEPHONY = "telephony",
TELEPHONY_SHORT = "telephony_short",
DEFAULT = "default",
COMMAND_AND_SEARCH = "command_and_search",
PHONE_CALL = "phone_call",
VIDEO = "video"
}
/**
* Fireworks AI models for ASR
* @see https://docs.fireworks.ai/guides/querying-asr-models
* @see https://fireworks.ai/models/fireworks/fireworks-asr-large
*/
declare enum FireworksModel {
ASR_V1 = "fireworks-asr-large",
ASR_V2 = "fireworks-asr-v2",
WHISPER_V3 = "whisper-v3",
WHISPER_V3_TURBO = "whisper-v3-turbo"
}
/**
* ElevenLabs Scribe models for speech-to-text
* @see https://elevenlabs.io/blog/introducing-scribe-v2-realtime
* @see https://elevenlabs.io/docs/cookbooks/speech-to-text/streaming
* @see https://elevenlabs.io/docs/api-reference/speech-to-text/convert
*/
declare enum ElevenLabsModel {
SCRIBE_V2_REALTIME = "scribe_v2_realtime",
SCRIBE_V1 = "scribe_v1"
}
/**
* OpenAI Realtime API transcription models
* These are the verified `input_audio_transcription.model` values.
* @see https://platform.openai.com/docs/guides/realtime
*/
declare enum OpenAIRealtimeModel {
GPT_4O_MINI_TRANSCRIBE = "gpt-4o-mini-transcribe"
}
/**
* Type alias for any model from any provider
*/
type RecognitionModel = DeepgramModel | GoogleModel | FireworksModel | ElevenLabsModel | OpenAIRealtimeModel | string;
/**
* Audio encoding types
*/
declare enum AudioEncoding {
ENCODING_UNSPECIFIED = 0,
LINEAR16 = 1,
OGG_OPUS = 2,
FLAC = 3,
MULAW = 4,
ALAW = 5
}
declare namespace AudioEncoding {
/**
* Convert numeric ID to AudioEncoding enum
* @param id - Numeric encoding identifier (0-5)
* @returns AudioEncoding enum value or undefined if invalid
*/
function fromId(id: number): AudioEncoding | undefined;
/**
* Convert string name to AudioEncoding enum
* @param nameStr - String name like "linear16", "LINEAR16", "ogg_opus", "OGG_OPUS", etc. (case insensitive)
* @returns AudioEncoding enum value or undefined if invalid
*/
function fromName(nameStr: string): AudioEncoding | undefined;
/**
* Convert AudioEncoding enum to numeric ID
* @param encoding - AudioEncoding enum value
* @returns Numeric ID (0-5)
*/
function toId(encoding: AudioEncoding): number;
/**
* Convert AudioEncoding enum to string name
* @param encoding - AudioEncoding enum value
* @returns String name like "LINEAR16", "MULAW", etc.
*/
function toName(encoding: AudioEncoding): string;
/**
* Check if a numeric ID is a valid encoding
* @param id - Numeric identifier to validate
* @returns true if valid encoding ID
*/
function isIdValid(id: number): boolean;
/**
* Check if a string name is a valid encoding
* @param nameStr - String name to validate
* @returns true if valid encoding name
*/
function isNameValid(nameStr: string): boolean;
}
/**
* Common sample rates (in Hz)
*/
declare enum SampleRate {
RATE_8000 = 8000,
RATE_16000 = 16000,
RATE_22050 = 22050,
RATE_24000 = 24000,
RATE_32000 = 32000,
RATE_44100 = 44100,
RATE_48000 = 48000
}
declare namespace SampleRate {
/**
* Convert Hz value to SampleRate enum
* @param hz - Sample rate in Hz (8000, 16000, etc.)
* @returns SampleRate enum value or undefined if invalid
*/
function fromHz(hz: number): SampleRate | undefined;
/**
* Convert string name to SampleRate enum
* @param nameStr - String name like "rate_8000", "RATE_16000", etc. (case insensitive)
* @returns SampleRate enum value or undefined if invalid
*/
function fromName(nameStr: string): SampleRate | undefined;
/**
* Convert SampleRate enum to Hz value
* @param rate - SampleRate enum value
* @returns Hz value (8000, 16000, etc.)
*/
function toHz(rate: SampleRate): number;
/**
* Convert SampleRate enum to string name
* @param rate - SampleRate enum value
* @returns String name like "RATE_8000", "RATE_16000", etc.
*/
function toName(rate: SampleRate): string;
/**
* Check if a numeric Hz value is a valid sample rate
* @param hz - Hz value to validate
* @returns true if valid sample rate
*/
function isHzValid(hz: number): boolean;
/**
* Check if a string name is a valid sample rate
* @param nameStr - String name to validate
* @returns true if valid sample rate name
*/
function isNameValid(nameStr: string): boolean;
}
/**
* Supported languages for recognition
* Using BCP-47 language tags
*/
declare enum Language {
ENGLISH_US = "en-US",
ENGLISH_GB = "en-GB",
SPANISH_ES = "es-ES",
SPANISH_MX = "es-MX",
FRENCH_FR = "fr-FR",
GERMAN_DE = "de-DE",
ITALIAN_IT = "it-IT",
PORTUGUESE_BR = "pt-BR",
JAPANESE_JP = "ja-JP",
KOREAN_KR = "ko-KR",
CHINESE_CN = "zh-CN",
CHINESE_TW = "zh-TW"
}
/**
* Recognition Result Types V1
* NOTE_TO_AI: DO NOT CHANGE THIS UNLESS EXPLICITLY ASKED. Always ask before making any changes.
* Types and schemas for recognition results sent to SDK clients
*/
/**
* Message type discriminator for recognition results V1
*/
declare enum RecognitionResultTypeV1 {
TRANSCRIPTION = "Transcription",
FUNCTION_CALL = "FunctionCall",
METADATA = "Metadata",
ERROR = "Error",
CLIENT_CONTROL_MESSAGE = "ClientControlMessage"
}
/**
* Transcription result V1 - contains transcript message
* In the long run game side should not need to know it. In the short run it is send back to client.
* NOTE_TO_AI: DO NOT CHANGE THIS UNLESS EXPLICITLY ASKED. Always ask before making any changes.
*/
declare const TranscriptionResultSchemaV1: z.ZodObject<{
type: z.ZodLiteral<RecognitionResultTypeV1.TRANSCRIPTION>;
audioUtteranceId: z.ZodString;
finalTranscript: z.ZodString;
finalTranscriptConfidence: z.ZodOptional<z.ZodNumber>;
pendingTranscript: z.ZodOptional<z.ZodString>;
pendingTranscriptConfidence: z.ZodOptional<z.ZodNumber>;
is_finished: z.ZodBoolean;
voiceStart: z.ZodOptional<z.ZodNumber>;
voiceDuration: z.ZodOptional<z.ZodNumber>;
voiceEnd: z.ZodOptional<z.ZodNumber>;
startTimestamp: z.ZodOptional<z.ZodNumber>;
endTimestamp: z.ZodOptional<z.ZodNumber>;
receivedAtMs: z.ZodOptional<z.ZodNumber>;
accumulatedAudioTimeMs: z.ZodOptional<z.ZodNumber>;
}, "strip", z.ZodTypeAny, {
type: RecognitionResultTypeV1.TRANSCRIPTION;
audioUtteranceId: string;
finalTranscript: string;
is_finished: boolean;
finalTranscriptConfidence?: number | undefined;
pendingTranscript?: string | undefined;
pendingTranscriptConfidence?: number | undefined;
voiceStart?: number | undefined;
voiceDuration?: number | undefined;
voiceEnd?: number | undefined;
startTimestamp?: number | undefined;
endTimestamp?: number | undefined;
receivedAtMs?: number | undefined;
accumulatedAudioTimeMs?: number | undefined;
}, {
type: RecognitionResultTypeV1.TRANSCRIPTION;
audioUtteranceId: string;
finalTranscript: string;
is_finished: boolean;
finalTranscriptConfidence?: number | undefined;
pendingTranscript?: string | undefined;
pendingTranscriptConfidence?: number | undefined;
voiceStart?: number | undefined;
voiceDuration?: number | undefined;
voiceEnd?: number | undefined;
startTimestamp?: number | undefined;
endTimestamp?: number | undefined;
receivedAtMs?: number | undefined;
accumulatedAudioTimeMs?: number | undefined;
}>;
type TranscriptionResultV1 = z.infer<typeof TranscriptionResultSchemaV1>;
/**
* Function call result V1 - similar to LLM function call
* In the long run game server should know it, rather than TV or client.
*/
declare const FunctionCallResultSchemaV1: z.ZodObject<{
type: z.ZodLiteral<RecognitionResultTypeV1.FUNCTION_CALL>;
audioUtteranceId: z.ZodString;
functionName: z.ZodString;
functionArgJson: z.ZodString;
}, "strip", z.ZodTypeAny, {
type: RecognitionResultTypeV1.FUNCTION_CALL;
audioUtteranceId: string;
functionName: string;
functionArgJson: string;
}, {
type: RecognitionResultTypeV1.FUNCTION_CALL;
audioUtteranceId: string;
functionName: string;
functionArgJson: string;
}>;
type FunctionCallResultV1 = z.infer<typeof FunctionCallResultSchemaV1>;
/**
* Metadata result V1 - contains metadata, timing information, and ASR config
* Sent when the provider connection closes to provide final timing metrics and config
* In the long run game server should know it, rather than TV or client.
*/
declare const MetadataResultSchemaV1: z.ZodObject<{
type: z.ZodLiteral<RecognitionResultTypeV1.METADATA>;
audioUtteranceId: z.ZodString;
recordingStartMs: z.ZodOptional<z.ZodNumber>;
recordingEndMs: z.ZodOptional<z.ZodNumber>;
transcriptEndMs: z.ZodOptional<z.ZodNumber>;
socketCloseAtMs: z.ZodOptional<z.ZodNumber>;
duration: z.ZodOptional<z.ZodNumber>;
volume: z.ZodOptional<z.ZodNumber>;
accumulatedAudioTimeMs: z.ZodOptional<z.ZodNumber>;
costInUSD: z.ZodOptional<z.ZodDefault<z.ZodNumber>>;
apiType: z.ZodOptional<z.ZodNativeEnum<typeof ASRApiType>>;
asrConfig: z.ZodOptional<z.ZodString>;
rawAsrMetadata: z.ZodOptional<z.ZodString>;
}, "strip", z.ZodTypeAny, {
type: RecognitionResultTypeV1.METADATA;
audioUtteranceId: string;
recordingStartMs?: number | undefined;
recordingEndMs?: number | undefined;
transcriptEndMs?: number | undefined;
socketCloseAtMs?: number | undefined;
duration?: number | undefined;
volume?: number | undefined;
accumulatedAudioTimeMs?: number | undefined;
costInUSD?: number | undefined;
apiType?: ASRApiType | undefined;
asrConfig?: string | undefined;
rawAsrMetadata?: string | undefined;
}, {
type: RecognitionResultTypeV1.METADATA;
audioUtteranceId: string;
recordingStartMs?: number | undefined;
recordingEndMs?: number | undefined;
transcriptEndMs?: number | undefined;
socketCloseAtMs?: number | undefined;
duration?: number | undefined;
volume?: number | undefined;
accumulatedAudioTimeMs?: number | undefined;
costInUSD?: number | undefined;
apiType?: ASRApiType | undefined;
asrConfig?: string | undefined;
rawAsrMetadata?: string | undefined;
}>;
type MetadataResultV1 = z.infer<typeof MetadataResultSchemaV1>;
/**
* Error type enum V1 - categorizes different types of errors
*/
declare enum ErrorTypeV1 {
AUTHENTICATION_ERROR = "authentication_error",
VALIDATION_ERROR = "validation_error",
PROVIDER_ERROR = "provider_error",
TIMEOUT_ERROR = "timeout_error",
QUOTA_EXCEEDED = "quota_exceeded",
CONNECTION_ERROR = "connection_error",
UNKNOWN_ERROR = "unknown_error"
}
/**
* Error result V1 - contains error message
* In the long run game server should know it, rather than TV or client.
*/
declare const ErrorResultSchemaV1: z.ZodObject<{
type: z.ZodLiteral<RecognitionResultTypeV1.ERROR>;
audioUtteranceId: z.ZodString;
errorType: z.ZodOptional<z.ZodNativeEnum<typeof ErrorTypeV1>>;
message: z.ZodOptional<z.ZodString>;
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
description: z.ZodOptional<z.ZodString>;
}, "strip", z.ZodTypeAny, {
type: RecognitionResultTypeV1.ERROR;
audioUtteranceId: string;
errorType?: ErrorTypeV1 | undefined;
message?: string | undefined;
code?: string | number | undefined;
description?: string | undefined;
}, {
type: RecognitionResultTypeV1.ERROR;
audioUtteranceId: string;
errorType?: ErrorTypeV1 | undefined;
message?: string | undefined;
code?: string | number | undefined;
description?: string | undefined;
}>;
type ErrorResultV1 = z.infer<typeof ErrorResultSchemaV1>;
/**
* Client control actions enum V1
* Actions that can be sent from server to client to control the recognition stream
* In the long run audio client(mic) should know it, rather than servers.
*/
declare enum ClientControlActionV1 {
READY_FOR_UPLOADING_RECORDING = "ready_for_uploading_recording",
STOP_RECORDING = "stop_recording"
}
/**
* Error Exception Types
*
* Defines structured exception types for each ErrorTypeV1 category.
* Each exception type has metadata about whether it's immediately available
* (can be shown to user right away vs needs investigation/retry).
*/
/**
* Authentication/Authorization Error
* isImmediatelyAvailable: false
* These are system configuration issues, not user-facing
*/
declare const AuthenticationExceptionSchema: z.ZodObject<{
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
message: z.ZodString;
audioUtteranceId: z.ZodOptional<z.ZodString>;
description: z.ZodOptional<z.ZodString>;
timestamp: z.ZodOptional<z.ZodNumber>;
errorType: z.ZodLiteral<ErrorTypeV1.AUTHENTICATION_ERROR>;
isImmediatelyAvailable: z.ZodLiteral<false>;
service: z.ZodOptional<z.ZodString>;
authMethod: z.ZodOptional<z.ZodString>;
}, "strip", z.ZodTypeAny, {
message: string;
errorType: ErrorTypeV1.AUTHENTICATION_ERROR;
isImmediatelyAvailable: false;
provider?: RecognitionProvider | undefined;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
service?: string | undefined;
authMethod?: string | undefined;
}, {
message: string;
errorType: ErrorTypeV1.AUTHENTICATION_ERROR;
isImmediatelyAvailable: false;
provider?: RecognitionProvider | undefined;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
service?: string | undefined;
authMethod?: string | undefined;
}>;
type AuthenticationException = z.infer<typeof AuthenticationExceptionSchema>;
/**
* Validation Error
* isImmediatelyAvailable: true
* User provided invalid input - can show them what's wrong
*/
declare const ValidationExceptionSchema: z.ZodObject<{
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
message: z.ZodString;
audioUtteranceId: z.ZodOptional<z.ZodString>;
description: z.ZodOptional<z.ZodString>;
timestamp: z.ZodOptional<z.ZodNumber>;
errorType: z.ZodLiteral<ErrorTypeV1.VALIDATION_ERROR>;
isImmediatelyAvailable: z.ZodLiteral<true>;
field: z.ZodOptional<z.ZodString>;
expected: z.ZodOptional<z.ZodString>;
received: z.ZodOptional<z.ZodString>;
}, "strip", z.ZodTypeAny, {
message: string;
errorType: ErrorTypeV1.VALIDATION_ERROR;
isImmediatelyAvailable: true;
provider?: RecognitionProvider | undefined;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
field?: string | undefined;
expected?: string | undefined;
received?: string | undefined;
}, {
message: string;
errorType: ErrorTypeV1.VALIDATION_ERROR;
isImmediatelyAvailable: true;
provider?: RecognitionProvider | undefined;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
field?: string | undefined;
expected?: string | undefined;
received?: string | undefined;
}>;
type ValidationException = z.infer<typeof ValidationExceptionSchema>;
/**
* Provider Error
* isImmediatelyAvailable: false
* Error from ASR provider - usually transient or needs investigation
*/
declare const ProviderExceptionSchema: z.ZodObject<{
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
message: z.ZodString;
audioUtteranceId: z.ZodOptional<z.ZodString>;
description: z.ZodOptional<z.ZodString>;
timestamp: z.ZodOptional<z.ZodNumber>;
errorType: z.ZodLiteral<ErrorTypeV1.PROVIDER_ERROR>;
isImmediatelyAvailable: z.ZodLiteral<false>;
provider: z.ZodOptional<z.ZodString>;
providerErrorCode: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
isTransient: z.ZodOptional<z.ZodBoolean>;
}, "strip", z.ZodTypeAny, {
message: string;
errorType: ErrorTypeV1.PROVIDER_ERROR;
isImmediatelyAvailable: false;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
provider?: string | undefined;
providerErrorCode?: string | number | undefined;
isTransient?: boolean | undefined;
}, {
message: string;
errorType: ErrorTypeV1.PROVIDER_ERROR;
isImmediatelyAvailable: false;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
provider?: string | undefined;
providerErrorCode?: string | number | undefined;
isTransient?: boolean | undefined;
}>;
type ProviderException = z.infer<typeof ProviderExceptionSchema>;
/**
* Timeout Error
* isImmediatelyAvailable: true
* Request took too long - user should try again
*/
declare const TimeoutExceptionSchema: z.ZodObject<{
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
message: z.ZodString;
audioUtteranceId: z.ZodOptional<z.ZodString>;
description: z.ZodOptional<z.ZodString>;
timestamp: z.ZodOptional<z.ZodNumber>;
errorType: z.ZodLiteral<ErrorTypeV1.TIMEOUT_ERROR>;
isImmediatelyAvailable: z.ZodLiteral<true>;
timeoutMs: z.ZodOptional<z.ZodNumber>;
operation: z.ZodOptional<z.ZodString>;
}, "strip", z.ZodTypeAny, {
message: string;
errorType: ErrorTypeV1.TIMEOUT_ERROR;
isImmediatelyAvailable: true;
provider?: RecognitionProvider | undefined;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
timeoutMs?: number | undefined;
operation?: string | undefined;
}, {
message: string;
errorType: ErrorTypeV1.TIMEOUT_ERROR;
isImmediatelyAvailable: true;
provider?: RecognitionProvider | undefined;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
timeoutMs?: number | undefined;
operation?: string | undefined;
}>;
type TimeoutException = z.infer<typeof TimeoutExceptionSchema>;
/**
* Quota Exceeded Error
* isImmediatelyAvailable: true
* Rate limit or quota exceeded - user should wait
*/
declare const QuotaExceededExceptionSchema: z.ZodObject<{
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
message: z.ZodString;
audioUtteranceId: z.ZodOptional<z.ZodString>;
description: z.ZodOptional<z.ZodString>;
timestamp: z.ZodOptional<z.ZodNumber>;
errorType: z.ZodLiteral<ErrorTypeV1.QUOTA_EXCEEDED>;
isImmediatelyAvailable: z.ZodLiteral<true>;
quotaType: z.ZodOptional<z.ZodString>;
resetAt: z.ZodOptional<z.ZodNumber>;
retryAfterSeconds: z.ZodOptional<z.ZodNumber>;
}, "strip", z.ZodTypeAny, {
message: string;
errorType: ErrorTypeV1.QUOTA_EXCEEDED;
isImmediatelyAvailable: true;
provider?: RecognitionProvider | undefined;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
quotaType?: string | undefined;
resetAt?: number | undefined;
retryAfterSeconds?: number | undefined;
}, {
message: string;
errorType: ErrorTypeV1.QUOTA_EXCEEDED;
isImmediatelyAvailable: true;
provider?: RecognitionProvider | undefined;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
quotaType?: string | undefined;
resetAt?: number | undefined;
retryAfterSeconds?: number | undefined;
}>;
type QuotaExceededException = z.infer<typeof QuotaExceededExceptionSchema>;
/**
* Connection Error
* isImmediatelyAvailable: true
* Connection establishment or network failure - user should check network or retry
*/
declare const ConnectionExceptionSchema: z.ZodObject<{
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
message: z.ZodString;
audioUtteranceId: z.ZodOptional<z.ZodString>;
description: z.ZodOptional<z.ZodString>;
timestamp: z.ZodOptional<z.ZodNumber>;
errorType: z.ZodLiteral<ErrorTypeV1.CONNECTION_ERROR>;
isImmediatelyAvailable: z.ZodLiteral<true>;
attempts: z.ZodOptional<z.ZodNumber>;
url: z.ZodOptional<z.ZodString>;
underlyingError: z.ZodOptional<z.ZodString>;
}, "strip", z.ZodTypeAny, {
message: string;
errorType: ErrorTypeV1.CONNECTION_ERROR;
isImmediatelyAvailable: true;
provider?: RecognitionProvider | undefined;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
attempts?: number | undefined;
url?: string | undefined;
underlyingError?: string | undefined;
}, {
message: string;
errorType: ErrorTypeV1.CONNECTION_ERROR;
isImmediatelyAvailable: true;
provider?: RecognitionProvider | undefined;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
attempts?: number | undefined;
url?: string | undefined;
underlyingError?: string | undefined;
}>;
type ConnectionException = z.infer<typeof ConnectionExceptionSchema>;
/**
* Unknown Error
* isImmediatelyAvailable: false
* Unexpected error - needs investigation
*/
declare const UnknownExceptionSchema: z.ZodObject<{
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
message: z.ZodString;
audioUtteranceId: z.ZodOptional<z.ZodString>;
description: z.ZodOptional<z.ZodString>;
timestamp: z.ZodOptional<z.ZodNumber>;
errorType: z.ZodLiteral<ErrorTypeV1.UNKNOWN_ERROR>;
isImmediatelyAvailable: z.ZodLiteral<false>;
stack: z.ZodOptional<z.ZodString>;
context: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
}, "strip", z.ZodTypeAny, {
message: string;
errorType: ErrorTypeV1.UNKNOWN_ERROR;
isImmediatelyAvailable: false;
provider?: RecognitionProvider | undefined;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
stack?: string | undefined;
context?: Record<string, unknown> | undefined;
}, {
message: string;
errorType: ErrorTypeV1.UNKNOWN_ERROR;
isImmediatelyAvailable: false;
provider?: RecognitionProvider | undefined;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
stack?: string | undefined;
context?: Record<string, unknown> | undefined;
}>;
type UnknownException = z.infer<typeof UnknownExceptionSchema>;
/**
* Discriminated union of all exception types
* Use this for type-safe error handling
*/
declare const RecognitionExceptionSchema: z.ZodDiscriminatedUnion<"errorType", [z.ZodObject<{
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
message: z.ZodString;
audioUtteranceId: z.ZodOptional<z.ZodString>;
description: z.ZodOptional<z.ZodString>;
timestamp: z.ZodOptional<z.ZodNumber>;
errorType: z.ZodLiteral<ErrorTypeV1.AUTHENTICATION_ERROR>;
isImmediatelyAvailable: z.ZodLiteral<false>;
service: z.ZodOptional<z.ZodString>;
authMethod: z.ZodOptional<z.ZodString>;
}, "strip", z.ZodTypeAny, {
message: string;
errorType: ErrorTypeV1.AUTHENTICATION_ERROR;
isImmediatelyAvailable: false;
provider?: RecognitionProvider | undefined;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
service?: string | undefined;
authMethod?: string | undefined;
}, {
message: string;
errorType: ErrorTypeV1.AUTHENTICATION_ERROR;
isImmediatelyAvailable: false;
provider?: RecognitionProvider | undefined;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
service?: string | undefined;
authMethod?: string | undefined;
}>, z.ZodObject<{
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
message: z.ZodString;
audioUtteranceId: z.ZodOptional<z.ZodString>;
description: z.ZodOptional<z.ZodString>;
timestamp: z.ZodOptional<z.ZodNumber>;
errorType: z.ZodLiteral<ErrorTypeV1.VALIDATION_ERROR>;
isImmediatelyAvailable: z.ZodLiteral<true>;
field: z.ZodOptional<z.ZodString>;
expected: z.ZodOptional<z.ZodString>;
received: z.ZodOptional<z.ZodString>;
}, "strip", z.ZodTypeAny, {
message: string;
errorType: ErrorTypeV1.VALIDATION_ERROR;
isImmediatelyAvailable: true;
provider?: RecognitionProvider | undefined;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
field?: string | undefined;
expected?: string | undefined;
received?: string | undefined;
}, {
message: string;
errorType: ErrorTypeV1.VALIDATION_ERROR;
isImmediatelyAvailable: true;
provider?: RecognitionProvider | undefined;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
field?: string | undefined;
expected?: string | undefined;
received?: string | undefined;
}>, z.ZodObject<{
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
message: z.ZodString;
audioUtteranceId: z.ZodOptional<z.ZodString>;
description: z.ZodOptional<z.ZodString>;
timestamp: z.ZodOptional<z.ZodNumber>;
errorType: z.ZodLiteral<ErrorTypeV1.PROVIDER_ERROR>;
isImmediatelyAvailable: z.ZodLiteral<false>;
provider: z.ZodOptional<z.ZodString>;
providerErrorCode: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
isTransient: z.ZodOptional<z.ZodBoolean>;
}, "strip", z.ZodTypeAny, {
message: string;
errorType: ErrorTypeV1.PROVIDER_ERROR;
isImmediatelyAvailable: false;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
provider?: string | undefined;
providerErrorCode?: string | number | undefined;
isTransient?: boolean | undefined;
}, {
message: string;
errorType: ErrorTypeV1.PROVIDER_ERROR;
isImmediatelyAvailable: false;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
provider?: string | undefined;
providerErrorCode?: string | number | undefined;
isTransient?: boolean | undefined;
}>, z.ZodObject<{
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
message: z.ZodString;
audioUtteranceId: z.ZodOptional<z.ZodString>;
description: z.ZodOptional<z.ZodString>;
timestamp: z.ZodOptional<z.ZodNumber>;
errorType: z.ZodLiteral<ErrorTypeV1.TIMEOUT_ERROR>;
isImmediatelyAvailable: z.ZodLiteral<true>;
timeoutMs: z.ZodOptional<z.ZodNumber>;
operation: z.ZodOptional<z.ZodString>;
}, "strip", z.ZodTypeAny, {
message: string;
errorType: ErrorTypeV1.TIMEOUT_ERROR;
isImmediatelyAvailable: true;
provider?: RecognitionProvider | undefined;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
timeoutMs?: number | undefined;
operation?: string | undefined;
}, {
message: string;
errorType: ErrorTypeV1.TIMEOUT_ERROR;
isImmediatelyAvailable: true;
provider?: RecognitionProvider | undefined;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
timeoutMs?: number | undefined;
operation?: string | undefined;
}>, z.ZodObject<{
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
message: z.ZodString;
audioUtteranceId: z.ZodOptional<z.ZodString>;
description: z.ZodOptional<z.ZodString>;
timestamp: z.ZodOptional<z.ZodNumber>;
errorType: z.ZodLiteral<ErrorTypeV1.QUOTA_EXCEEDED>;
isImmediatelyAvailable: z.ZodLiteral<true>;
quotaType: z.ZodOptional<z.ZodString>;
resetAt: z.ZodOptional<z.ZodNumber>;
retryAfterSeconds: z.ZodOptional<z.ZodNumber>;
}, "strip", z.ZodTypeAny, {
message: string;
errorType: ErrorTypeV1.QUOTA_EXCEEDED;
isImmediatelyAvailable: true;
provider?: RecognitionProvider | undefined;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
quotaType?: string | undefined;
resetAt?: number | undefined;
retryAfterSeconds?: number | undefined;
}, {
message: string;
errorType: ErrorTypeV1.QUOTA_EXCEEDED;
isImmediatelyAvailable: true;
provider?: RecognitionProvider | undefined;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
quotaType?: string | undefined;
resetAt?: number | undefined;
retryAfterSeconds?: number | undefined;
}>, z.ZodObject<{
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
message: z.ZodString;
audioUtteranceId: z.ZodOptional<z.ZodString>;
description: z.ZodOptional<z.ZodString>;
timestamp: z.ZodOptional<z.ZodNumber>;
errorType: z.ZodLiteral<ErrorTypeV1.CONNECTION_ERROR>;
isImmediatelyAvailable: z.ZodLiteral<true>;
attempts: z.ZodOptional<z.ZodNumber>;
url: z.ZodOptional<z.ZodString>;
underlyingError: z.ZodOptional<z.ZodString>;
}, "strip", z.ZodTypeAny, {
message: string;
errorType: ErrorTypeV1.CONNECTION_ERROR;
isImmediatelyAvailable: true;
provider?: RecognitionProvider | undefined;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
attempts?: number | undefined;
url?: string | undefined;
underlyingError?: string | undefined;
}, {
message: string;
errorType: ErrorTypeV1.CONNECTION_ERROR;
isImmediatelyAvailable: true;
provider?: RecognitionProvider | undefined;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
attempts?: number | undefined;
url?: string | undefined;
underlyingError?: string | undefined;
}>, z.ZodObject<{
provider: z.ZodOptional<z.ZodNativeEnum<typeof RecognitionProvider>>;
code: z.ZodOptional<z.ZodUnion<[z.ZodString, z.ZodNumber]>>;
message: z.ZodString;
audioUtteranceId: z.ZodOptional<z.ZodString>;
description: z.ZodOptional<z.ZodString>;
timestamp: z.ZodOptional<z.ZodNumber>;
errorType: z.ZodLiteral<ErrorTypeV1.UNKNOWN_ERROR>;
isImmediatelyAvailable: z.ZodLiteral<false>;
stack: z.ZodOptional<z.ZodString>;
context: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
}, "strip", z.ZodTypeAny, {
message: string;
errorType: ErrorTypeV1.UNKNOWN_ERROR;
isImmediatelyAvailable: false;
provider?: RecognitionProvider | undefined;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
stack?: string | undefined;
context?: Record<string, unknown> | undefined;
}, {
message: string;
errorType: ErrorTypeV1.UNKNOWN_ERROR;
isImmediatelyAvailable: false;
provider?: RecognitionProvider | undefined;
code?: string | number | undefined;
audioUtteranceId?: string | undefined;
description?: string | undefined;
timestamp?: number | undefined;
stack?: string | undefined;
context?: Record<string, unknown> | undefined;
}>]>;
type RecognitionException = z.infer<typeof RecognitionExceptionSchema>;
/**
* Check if an exception should be shown to the user immediately
*/
declare function isExceptionImmediatelyAvailable(exception: RecognitionException): boolean;
/**
* Get user-friendly error message for exceptions
*/
declare function getUserFriendlyMessage(exception: RecognitionException): string;
/**
* Recognition Context Types V1
* NOTE_TO_AI: DO NOT CHANGE THIS UNLESS EXPLICITLY ASKED. Always ask before making any changes.
* Types and schemas for recognition context data
*/
/**
* Message type discriminator for recognition context V1
*/
declare enum RecognitionContextTypeV1 {
GAME_CONTEXT = "GameContext",
CONTROL_SIGNAL = "ControlSignal",
ASR_REQUEST = "ASRRequest"
}
/**
* Control signal types for recognition V1
*/
declare enum ControlSignalTypeV1 {
START_RECORDING = "start_recording",
STOP_RECORDING = "stop_recording"
}
/**
* SlotMap - A strongly typed map from slot names to lists of values
* Used for entity extraction and slot filling in voice interactions
*/
declare const SlotMapSchema: z.ZodRecord<z.ZodString, z.ZodArray<z.ZodString, "many">>;
type SlotMap = z.infer<typeof SlotMapSchema>;
/**
* Game context V1 - contains game state information
*/
declare const GameContextSchemaV1: z.ZodObject<{
type: z.ZodLiteral<RecognitionContextTypeV1.GAME_CONTEXT>;
gameId: z.ZodString;
gamePhase: z.ZodString;
promptSTT: z.ZodOptional<z.ZodString>;
promptSTF: z.ZodOptional<z.ZodString>;
promptTTF: z.ZodOptional<z.ZodString>;
slotMap: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodArray<z.ZodString, "many">>>;
}, "strip", z.ZodTypeAny, {
type: RecognitionContextTypeV1.GAME_CONTEXT;
gameId: string;
gamePhase: string;
promptSTT?: string | undefined;
promptSTF?: string | undefined;
promptTTF?: string | undefined;
slotMap?: Record<string, string[]> | undefined;
}, {
type: RecognitionContextTypeV1.GAME_CONTEXT;
gameId: string;
gamePhase: string;
promptSTT?: string | undefined;
promptSTF?: string | undefined;
promptTTF?: string | undefined;
slotMap?: Record<string, string[]> | undefined;
}>;
type GameContextV1 = z.infer<typeof GameContextSchemaV1>;
/**
* ASR Request V1 - contains complete ASR setup information
* Sent once at connection start to configure the session
*/
declare const ASRRequestSchemaV1: z.ZodObject<{
type: z.ZodLiteral<RecognitionContextTypeV1.ASR_REQUEST>;
audioUtteranceId: z.ZodOptional<z.ZodString>;
provider: z.ZodString;
model: z.ZodOptional<z.ZodString>;
language: z.ZodString;
sampleRate: z.ZodNumber;
encoding: z.ZodNumber;
interimResults: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
useContext: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
finalTranscriptStability: z.ZodOptional<z.ZodString>;
debugCommand: z.ZodOptional<z.ZodObject<{
enableDebugLog: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
enableAudioStorage: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
enableSongQuizSessionIdCheck: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
enablePilotModels: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
}, "strip", z.ZodTypeAny, {
enableDebugLog: boolean;
enableAudioStorage: boolean;
enableSongQuizSessionIdCheck: boolean;
enablePilotModels: boolean;
}, {
enableDebugLog?: boolean | undefined;
enableAudioStorage?: boolean | undefined;
enableSongQuizSessionIdCheck?: boolean | undefined;
enablePilotModels?: boolean | undefined;
}>>;
}, "strip", z.ZodTypeAny, {
provider: string;
language: string;
sampleRate: number;
encoding: number;
interimResults: boolean;
useContext: boolean;
type: RecognitionContextTypeV1.ASR_REQUEST;
audioUtteranceId?: string | undefined;
model?: string | undefined;
finalTranscriptStability?: string | undefined;
debugCommand?: {
enableDebugLog: boolean;
enableAudioStorage: boolean;
enableSongQuizSessionIdCheck: boolean;
enablePilotModels: boolean;
} | undefined;
}, {
provider: string;
language: string;
sampleRate: number;
encoding: number;
type: RecognitionContextTypeV1.ASR_REQUEST;
audioUtteranceId?: string | undefined;
model?: string | undefined;
interimResults?: boolean | undefined;
useContext?: boolean | undefined;
finalTranscriptStability?: string | undefined;
debugCommand?: {
enableDebugLog?: boolean | undefined;
enableAudioStorage?: boolean | undefined;
enableSongQuizSessionIdCheck?: boolean | undefined;
enablePilotModels?: boolean | undefined;
} | undefined;
}>;
type ASRRequestV1 = z.infer<typeof ASRRequestSchemaV1>;
/**
* Unified ASR Request Configuration
*
* Provider-agnostic configuration for ASR (Automatic Speech Recognition) requests.
* This interface provides a consistent API for clients regardless of the underlying provider.
*
* All fields use library-defined enums for type safety and consistency.
* Provider-specific mappers will convert these to provider-native formats.
*/
/**
* Final transcript stability modes
*
* Controls timeout duration for fallback final transcript after stopRecording().
* Similar to AssemblyAI's turn detection confidence modes but applied to our
* internal timeout mechanism when vendors don't respond with is_final=true.
*
* @see https://www.assemblyai.com/docs/speech-to-text/universal-streaming/turn-detection
*/
declare enum FinalTranscriptStability {
/**
* Aggressive mode: 100ms timeout
* Fast response, optimized for short utterances and quick back-and-forth
* Use cases: IVR, quick commands, retail confirmations
*/
AGGRESSIVE = "aggressive",
/**
* Balanced mode: 200ms timeout (default)
* Natural middle ground for most conversational scenarios
* Use cases: General customer support, tech support, typical voice interactions
*/
BALANCED = "balanced",
/**
* Conservative mode: 400ms timeout
* Wait longer for providers, optimized for complex/reflective speech
* Use cases: Healthcare, complex queries, careful thought processes
*/
CONSERVATIVE = "conservative",
/**
* Experimental mode: 10000ms (10 seconds) timeout
* Very long wait for batch/async providers that need significant processing time
* Use cases: Batch processing (Gemini, OpenAI Whisper), complex audio analysis
* Note: Should be cancelled immediately when transcript is received
*/
EXPERIMENTAL = "experimental"
}
/**
* Unified ASR request configuration
*
* This configuration is used by:
* - Client SDKs to specify recognition parameters
* - Demo applications for user input
* - Service layer to configure provider sessions
*
* Core fields only - all provider-specific options go in providerOptions
*
* @example
* ```typescript
* const config: ASRRequestConfig = {
* provider: RecognitionProvider.GOOGLE,
* model: GoogleModel.LATEST_LONG,
* language: Language.ENGLISH_US,
* sampleRate: SampleRate.RATE_16000, // or just 16000
* encoding: AudioEncoding.LINEAR16,
* providerOptions: {
* google: {
* enableAutomaticPunctuation: true,
* interimResults: true,
* singleUtterance: false
* }
* }
* };
* ```
*/
interface ASRRequestConfig {
/**
* The ASR provider to use
* Must be one of the supported providers in RecognitionProvider enum
*/
provider: RecognitionProvider | string;
/**
* Optional model specification for the provider
* Can be provider-specific model enum or string
* If not specified, provider's default model will be used
*/
model?: RecognitionModel;
/**
* Language/locale for recognition
* Use Language enum for common languages
* Can also accept BCP-47 language tags as strings
*/
language: Language | string;
/**
* Audio sample rate in Hz
* Prefer using SampleRate enum values for standard rates
* Can also accept numeric Hz values (e.g., 16000)
*/
sampleRate: SampleRate | number;
/**
* Audio encoding format
* Must match the actual audio data being sent
* Use AudioEncoding enum for standard formats
*/
encoding: AudioEncoding | string;
/**
* Enable interim (partial) results during recognition
* When true, receive real-time updates before finalization
* When false, only receive final results
* Default: false
*/
interimResults?: boolean;
/**
* Require GameContext before starting recognition such as song titles
* When true, server waits for GameContext message before processing audio
* When false, recognition starts immediately
* Default: false
*/
useContext?: boolean;
/**
* Final transcript stability mode
*
* Controls timeout duration for fallback final transcript when provider
* doesn't respond with is_final=true after stopRecording().
*
* - aggressive: 100ms - fast response, may cut off slow providers
* - balanced: 200ms - current default, good for most cases
* - conservative: 400ms - wait longer for complex utterances
*
* @default 'balanced'
* @see FinalTranscriptStability enum for detailed descriptions
*/
finalTranscriptStability?: FinalTranscriptStability | string;
/**
* Additional provider-specific options
*
* Common options per provider:
* - Deepgram: punctuate, smart_format, diarize, utterances
* - Google: enableAutomaticPunctuation, singleUtterance, enableWordTimeOffsets
* - AssemblyAI: formatTurns, filter_profanity, word_boost
*
* Note: interimResults is now a top-level field, but can still be overridden per provider
*
* @example
* ```typescript
* providerOptions: {
* google: {
* enableAutomaticPunctuation: true,
* singleUtterance: false,
* enableWordTimeOffsets: false
* }
* }
* ```
*/
providerOptions?: Record<string, any>;
/**
* Optional fallback ASR configurations
*
* List of alternative ASR configurations to use if the primary fails.
* Each fallback config is a complete ASRRequestConfig that will be tried
* in order until one succeeds.
*
* @example
* ```typescript
* fallbackModels: [
* {
* provider: RecognitionProvider.DEEPGRAM,
* model: DeepgramModel.NOVA_2,
* language: Language.ENGLISH_US,
* sampleRate: 16000,
* encoding: AudioEncoding.LINEAR16
* },
* {
* provider: RecognitionProvider.GOOGLE,
* model: GoogleModel.LATEST_SHORT,
* language: Language.ENGLISH_US,
* sampleRate: 16000,
* encoding: AudioEncoding.LINEAR16
* }
* ]
* ```
*/
fallbackModels?: ASRRequestConfig[];
}
/**
* Partial ASR config for updates
* All fields are optional for partial updates
*/
type PartialASRRequestConfig = Partial<ASRRequestConfig>;
/**
* Helper function to create a default ASR config
*/
declare function createDefaultASRConfig(overrides?: PartialASRRequestConfig): ASRRequestConfig;
/**
* Gemini Model Types
* Based on available models as of January 2025
*
* API Version Notes:
* - Gemini 2.5+ models: Use v1beta API (early access features)
* - Gemini 2.0 models: Use v1beta API (early access features)
* - Gemini 1.5 models: Use v1 API (stable, production-ready)
*
* @see https://ai.google.dev/gemini-api/docs/models
* @see https://ai.google.dev/gemini-api/docs/api-versions
*/
declare enum GeminiModel {
GEMINI_2_5_PRO = "gemini-2.5-pro",
GEMINI_2_5_FLASH = "gemini-2.5-flash",
GEMINI_2_5_FLASH_LITE = "gemini-2.5-flash-lite",
GEMINI_2_0_FLASH_LATEST = "gemini-2.0-flash-latest",
GEMINI_2_0_FLASH_EXP = "gemini-2.0-flash-exp"
}
/**
* OpenAI Model Types
*/
declare enum OpenAIModel {
WHISPER_1 = "whisper-1"
}
/**
* Standard stage/environment constants used across all services
*/
declare const STAGES: {
readonly LOCAL: "local";
readonly DEV: "dev";
readonly STAGING: "staging";
readonly PRODUCTION: "production";
};
type Stage = typeof STAGES[keyof typeof STAGES];
/**
* Generic WebSocket protocol types and utilities
* Supports flexible versioning and message types
* Used by both client and server implementations
*/
/**
* Base message structure - completely flexible
* @template V - Version type (number, string, etc.)
*/
interface Message<V = number> {
v: V;
type: string;
data?: unknown;
}
/**
* Version serializer interface
* Converts between version type V and byte representation
*/
interface VersionSerializer<V> {
serialize: (v: V) => number;
deserialize: (byte: number) => V;
}
/**
* WebSocketAudioClient - Abstract base class for WebSocket clients
* Sends audio and control messages, receives responses from server
*
* Features:
* - Generic version type support (number, string, etc.)
* - Type-safe upward/downward message data
* - Client-side backpressure monitoring
* - Abstract hooks for application-specific logic
* - Format-agnostic audio protocol (supports any encoding)
*/
type ClientConfig = {
url: string;
highWM?: number;
lowWM?: number;
};
/**
* WebSocketAudioClient - Abstract base class for WebSocket clients
* that send audio frames and JSON messages
*
* @template V - Version type (number, string, object, etc.)
* @template TUpward - Type of upward message data (Client -> Server)
* @template TDownward - Type of downward message data (Server -> Client)
*
* @example
* ```typescript
* class MyClient extends WebSocketAudioClient<number, MyUpMsg, MyDownMsg> {
* protected onConnected() {
* console.log('Connected!');
* }
*
* protected onMessage(msg) {
* console.log('Received:', msg.type, msg.data);
* }
*
* protected onDisconnected(code, reason) {
* console.log('Disconnected:', code, reason);
* }
*
* protected onError(error) {
* console.error('Error:', error);
* }
* }
*
* const client = new MyClient({ url: 'ws://localhost:8080' });
* client.connect();
* client.sendMessage(1, 'configure', { language: 'en' });
* client.sendAudio(audioData);
* ```
*/
declare abstract class WebSocketAudioClient<V = number, // Version type (default: number)
TUpward = unknown, // Upward message data type
TDownward = unknown> {
private cfg;
protected versionSerializer: VersionSerializer<V>;
private ws;
private seq;
private HWM;
private LWM;
constructor(cfg: ClientConfig, versionSerializer?: VersionSerializer<V>);
/**
* Hook: Called when WebSocket connection is established
*/
protected abstract onConnected(): void;
/**
* Hook: Called when WebSocket connection closes
* @param code - Close code (see WebSocketCloseCode enum)
* @param reason - Human-readable close reason
*/
protected abstract onDisconnected(code: number, reason: string): void;
/**
* Hook: Called when WebSocket error occurs
*/
protected abstract onError(error: Event): void;
/**
* Hook: Called when downward message arrives from server
* Override this to handle