@volley/recognition-client-sdk
Version:
Recognition Service TypeScript/Node.js Client SDK
265 lines • 10.1 kB
TypeScript
/**
* Recognition Client Types
*
* Type definitions and interfaces for the recognition client SDK.
* These interfaces enable dependency injection, testing, and alternative implementations.
*/
import { TranscriptionResultV1, FunctionCallResultV1, MetadataResultV1, ErrorResultV1, ASRRequestConfig, GameContextV1, Stage } from '@recog/shared-types';
/**
* Client connection state enum
* Represents the various states a recognition client can be in during its lifecycle
*/
export declare enum ClientState {
/** Initial state, no connection established */
INITIAL = "initial",
/** Actively establishing WebSocket connection */
CONNECTING = "connecting",
/** WebSocket connected but waiting for server ready signal */
CONNECTED = "connected",
/** Server ready, can send audio */
READY = "ready",
/** Sent stop signal, waiting for final transcript */
STOPPING = "stopping",
/** Connection closed normally after stop */
STOPPED = "stopped",
/** Connection failed or lost unexpectedly */
FAILED = "failed"
}
/**
* Callback URL configuration with message type filtering
*/
export interface RecognitionCallbackUrl {
/** The callback URL endpoint */
url: string;
/** Array of message types to send to this URL. If empty/undefined, all types are sent */
messageTypes?: Array<string | number>;
}
export type IRecognitionCallbackUrl = RecognitionCallbackUrl;
export interface IRecognitionClientConfig {
/**
* WebSocket endpoint URL (optional)
* Either `url` or `stage` must be provided.
* If both are provided, `url` takes precedence.
*
* Example with explicit URL:
* ```typescript
* { url: 'wss://custom-endpoint.example.com/ws/v1/recognize' }
* ```
*/
url?: string;
/**
* Stage for recognition service (recommended)
* Either `url` or `stage` must be provided.
* If both are provided, `url` takes precedence.
* Defaults to production if neither is provided.
*
* Example with STAGES enum (recommended):
* ```typescript
* import { STAGES } from '@recog/shared-types';
* { stage: STAGES.STAGING }
* ```
*
* String values also accepted:
* ```typescript
* { stage: 'staging' } // STAGES.LOCAL | STAGES.DEV | STAGES.STAGING | STAGES.PRODUCTION
* ```
*/
stage?: Stage | string;
/** ASR configuration (provider, model, language, etc.) - optional */
asrRequestConfig?: ASRRequestConfig;
/** Game context for improved recognition accuracy */
gameContext?: GameContextV1;
/** Audio utterance ID (optional) - if not provided, a UUID v4 will be generated */
audioUtteranceId?: string;
/** Callback URLs for server-side notifications with optional message type filtering (optional)
* Game side only need to use it if another service need to be notified about the transcription results.
*/
callbackUrls?: RecognitionCallbackUrl[];
/** User identification (optional) */
userId?: string;
/** Game session identification (optional). called 'sessionId' in Platform and most games. */
gameSessionId?: string;
/** Device identification (optional) */
deviceId?: string;
/** Account identification (optional) */
accountId?: string;
/** Question answer identifier for tracking Q&A sessions (optional and tracking purpose only) */
questionAnswerId?: string;
/** Platform for audio recording device (optional, e.g., 'ios', 'android', 'web', 'unity') */
platform?: string;
/** Callback when transcript is received */
onTranscript?: (result: TranscriptionResultV1) => void;
/**
* Callback when function call is received
* Note: Not supported in 2025. P2 feature for future speech-to-function-call capability.
*/
onFunctionCall?: (result: FunctionCallResultV1) => void;
/** Callback when metadata is received. Only once after transcription is complete.*/
onMetadata?: (metadata: MetadataResultV1) => void;
/** Callback when error occurs */
onError?: (error: ErrorResultV1) => void;
/** Callback when connected to WebSocket */
onConnected?: () => void;
/**
* Callback when WebSocket disconnects
* @param code - WebSocket close code (1000 = normal, 1006 = abnormal, etc.)
* @param reason - Close reason string
*/
onDisconnected?: (code: number, reason: string) => void;
/** High water mark for backpressure control (bytes) */
highWaterMark?: number;
/** Low water mark for backpressure control (bytes) */
lowWaterMark?: number;
/** Maximum buffer duration in seconds (default: 60s) */
maxBufferDurationSec?: number;
/** Expected chunks per second for ring buffer sizing (default: 100) */
chunksPerSecond?: number;
/**
* Connection retry configuration (optional)
* Only applies to initial connection establishment, not mid-stream interruptions.
*
* Default: { maxAttempts: 4, delayMs: 200 } (try once, retry 3 times = 4 total attempts)
*
* Timing: Attempt 1 → FAIL → wait 200ms → Attempt 2 → FAIL → wait 200ms → Attempt 3 → FAIL → wait 200ms → Attempt 4
*
* Example:
* ```typescript
* {
* connectionRetry: {
* maxAttempts: 2, // Try connecting up to 2 times (1 retry)
* delayMs: 500 // Wait 500ms between attempts
* }
* }
* ```
*/
connectionRetry?: {
/** Maximum number of connection attempts (default: 4, min: 1, max: 5) */
maxAttempts?: number;
/** Delay in milliseconds between retry attempts (default: 200ms) */
delayMs?: number;
};
/**
* Optional logger function for debugging
* If not provided, no logging will occur
* @param level - Log level: 'debug', 'info', 'warn', 'error'
* @param message - Log message
* @param data - Optional additional data
*/
logger?: (level: 'debug' | 'info' | 'warn' | 'error', message: string, data?: any) => void;
}
/**
* Recognition Client Interface
*
* Main interface for real-time speech recognition clients.
* Provides methods for connection management, audio streaming, and session control.
*/
export interface IRecognitionClient {
/**
* Connect to the WebSocket endpoint
* @returns Promise that resolves when connected
* @throws Error if connection fails or times out
*/
connect(): Promise<void>;
/**
* Send audio data to the recognition service
* Audio is buffered locally and sent when connection is ready.
* @param audioData - PCM audio data as ArrayBuffer, typed array view, or Blob
*/
sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
/**
* Stop recording and wait for final transcript
* The server will close the connection after sending the final transcript.
* @returns Promise that resolves when final transcript is received
*/
stopRecording(): Promise<void>;
/**
* Force stop and immediately close connection without waiting for server
*
* WARNING: This is an abnormal shutdown that bypasses the graceful stop flow:
* - Does NOT wait for server to process remaining audio
* - Does NOT receive final transcript from server
* - Immediately closes WebSocket connection
* - Cleans up resources (buffers, listeners)
*
* Use Cases:
* - User explicitly cancels/abandons session
* - Timeout scenarios where waiting is not acceptable
* - Need immediate cleanup and can't wait for server
*
* RECOMMENDED: Use stopRecording() for normal shutdown.
* Only use this when immediate disconnection is required.
*/
stopAbnormally(): void;
/**
* Get the audio utterance ID for this session
* Available immediately after client construction.
* @returns UUID v4 string identifying this recognition session
*/
getAudioUtteranceId(): string;
/**
* Get the current state of the client
* @returns Current ClientState value
*/
getState(): ClientState;
/**
* Check if WebSocket connection is open
* @returns true if connected and ready to communicate
*/
isConnected(): boolean;
/**
* Check if client is currently connecting
* @returns true if connection is in progress
*/
isConnecting(): boolean;
/**
* Check if client is currently stopping
* @returns true if stopRecording() is in progress
*/
isStopping(): boolean;
/**
* Check if transcription has finished
* @returns true if the transcription is complete
*/
isTranscriptionFinished(): boolean;
/**
* Check if the audio buffer has overflowed
* @returns true if the ring buffer has wrapped around
*/
isBufferOverflowing(): boolean;
/**
* Get client statistics
* @returns Statistics about audio transmission and buffering
*/
getStats(): IRecognitionClientStats;
/**
* Get the WebSocket URL being used by this client
* Available immediately after client construction.
* @returns WebSocket URL string
*/
getUrl(): string;
}
/**
* Client statistics interface
*/
export interface IRecognitionClientStats {
/** Total audio bytes sent to server */
audioBytesSent: number;
/** Total number of audio chunks sent */
audioChunksSent: number;
/** Total number of audio chunks buffered */
audioChunksBuffered: number;
/** Number of times the ring buffer overflowed */
bufferOverflowCount: number;
/** Current number of chunks in buffer */
currentBufferedChunks: number;
/** Whether the ring buffer has wrapped (overwritten old data) */
hasWrapped: boolean;
}
/**
* Configuration for RealTimeTwoWayWebSocketRecognitionClient
* This extends IRecognitionClientConfig and is the main configuration interface
* for creating a new RealTimeTwoWayWebSocketRecognitionClient instance.
*/
export interface RealTimeTwoWayWebSocketRecognitionClientConfig extends IRecognitionClientConfig {
}
//# sourceMappingURL=recognition-client.types.d.ts.map