@volley/recognition-client-sdk
Version:
Recognition Service TypeScript/Node.js Client SDK
834 lines (723 loc) • 30.1 kB
text/typescript
/**
* RealTimeTwoWayWebSocketRecognitionClient - Clean, compact SDK for real-time speech recognition
*
* Features:
* - Ring buffer-based audio storage with fixed memory footprint
* - Automatic buffering when disconnected, immediate send when connected
* - Buffer persists after flush (for future retry/reconnection scenarios)
* - Built on WebSocketAudioClient for robust protocol handling
* - Simple API: connect() → sendAudio() → stopRecording()
* - Type-safe message handling with callbacks
* - Automatic backpressure management
* - Overflow detection with buffer state tracking
*
* Example:
* ```typescript
* const client = new RealTimeTwoWayWebSocketRecognitionClient({
* url: 'ws://localhost:3101/ws/v1/recognize',
* onTranscript: (result) => console.log(result.finalTranscript),
* onError: (error) => console.error(error),
* maxBufferDurationSec: 60 // Ring buffer for 60 seconds
* });
*
* await client.connect();
*
* // Send audio chunks - always stored in ring buffer, sent if connected
* micStream.on('data', (chunk) => client.sendAudio(chunk));
*
* // Signal end of audio and wait for final results
* await client.stopRecording();
*
* // Server will close connection after sending finals
* // No manual cleanup needed - browser handles it
* ```
*/
import { WebSocketAudioClient } from '@recog/websocket';
import {
AudioEncoding,
RecognitionResultTypeV1,
ClientControlActionV1,
RecognitionContextTypeV1,
ControlSignalTypeV1,
type TranscriptionResultV1,
type FunctionCallResultV1,
type MetadataResultV1,
type ErrorResultV1,
type ClientControlMessageV1,
type ASRRequestConfig,
type ASRRequestV1,
type GameContextV1,
SampleRate
} from '@recog/shared-types';
import { v4 as uuidv4 } from 'uuid';
import { ClientState } from './recognition-client.types.js';
import type {
IRecognitionClient,
IRecognitionClientStats,
RealTimeTwoWayWebSocketRecognitionClientConfig,
RecognitionCallbackUrl
} from './recognition-client.types.js';
import { buildWebSocketUrl } from './utils/url-builder.js';
import { AudioRingBuffer } from './utils/audio-ring-buffer.js';
import { MessageHandler } from './utils/message-handler.js';
import { ConnectionError } from './errors.js';
// ============================================================================
// UTILITIES
// ============================================================================
/**
* Check if a WebSocket close code indicates normal closure
* @param code - WebSocket close code
* @returns true if the disconnection was normal/expected, false if it was an error
*/
export function isNormalDisconnection(code: number): boolean {
return code === 1000; // 1000 is the only "normal" close code
}
/**
* Convert Blob to ArrayBuffer with Smart TV compatibility
*
* Browser Compatibility:
* - blob.arrayBuffer(): Newer TV
* - FileReader: All browsers, including older Smart TVs
*
* @see https://developer.samsung.com/smarttv/develop/specifications/web-engine-specifications.html
* @param blob - Blob to convert
* @returns Promise resolving to ArrayBuffer
*/
async function blobToArrayBuffer(blob: Blob): Promise<ArrayBuffer> {
// Modern approach (Chrome 76+, Safari 14+, Tizen 2020+, webOS 5.0+)
if (typeof blob.arrayBuffer === 'function') {
return await blob.arrayBuffer();
}
// Fallback for older Smart TVs (Tizen 2018-2019, webOS 3.0-4.x)
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = (): void => resolve(reader.result as ArrayBuffer);
reader.onerror = (): void => reject(reader.error);
reader.readAsArrayBuffer(blob);
});
}
// ============================================================================
// TYPE DEFINITIONS
// ============================================================================
/**
* Re-export TranscriptionResultV1 as TranscriptionResult for backward compatibility
*/
export type TranscriptionResult = TranscriptionResultV1;
// Re-export config interface from types file for backward compatibility
export type { RealTimeTwoWayWebSocketRecognitionClientConfig } from './recognition-client.types.js';
/**
* Internal config with processed values and defaults
*/
interface InternalConfig {
url: string;
readonly audioUtteranceId: string; // Immutable - ensures one audio session per client instance
asrRequestConfig?: ASRRequestConfig;
gameContext?: GameContextV1;
callbackUrls?: RecognitionCallbackUrl[];
onTranscript: (result: TranscriptionResultV1) => void;
onFunctionCall: (result: FunctionCallResultV1) => void;
onMetadata: (metadata: MetadataResultV1) => void;
onError: (error: ErrorResultV1) => void;
onConnected: () => void;
onDisconnected: (code: number, reason: string) => void;
highWaterMark: number;
lowWaterMark: number;
maxBufferDurationSec: number;
chunksPerSecond: number;
connectionRetry: {
maxAttempts: number;
delayMs: number;
};
logger?: (level: 'debug' | 'info' | 'warn' | 'error', message: string, data?: any) => void;
}
// ============================================================================
// RECOGNITION CLIENT
// ============================================================================
/**
* RealTimeTwoWayWebSocketRecognitionClient - SDK-level client for real-time speech recognition
*
* Implements IRecognitionClient interface for dependency injection and testing.
* Extends WebSocketAudioClient with local audio buffering and simple callback-based API.
*/
export class RealTimeTwoWayWebSocketRecognitionClient
extends WebSocketAudioClient<number, any, any>
implements IRecognitionClient
{
private static readonly PROTOCOL_VERSION = 1;
private config: InternalConfig;
private audioBuffer: AudioRingBuffer;
private messageHandler: MessageHandler;
private state: ClientState = ClientState.INITIAL;
private connectionPromise: Promise<void> | undefined;
// Debug control (internal state, controlled by debugCommand in ASRRequest)
private isDebugLogEnabled = false;
// Stats
private audioBytesSent = 0;
private audioChunksSent = 0;
private audioStatsLogInterval = 100;
private lastAudioStatsLog = 0;
constructor(config: RealTimeTwoWayWebSocketRecognitionClientConfig) {
// Generate UUID v4 for audioUtteranceId if not provided
const audioUtteranceId = config.audioUtteranceId || uuidv4();
// Build WebSocket URL with query parameters
// Precedence: url > stage > default production
const url = buildWebSocketUrl({
audioUtteranceId,
...(config.url && { url: config.url }),
...(config.stage && { stage: config.stage }),
...(config.callbackUrls && { callbackUrls: config.callbackUrls }),
...(config.userId && { userId: config.userId }),
...(config.gameSessionId && { gameSessionId: config.gameSessionId }),
...(config.deviceId && { deviceId: config.deviceId }),
...(config.accountId && { accountId: config.accountId }),
...(config.questionAnswerId && { questionAnswerId: config.questionAnswerId }),
...(config.platform && { platform: config.platform }),
...(config.gameContext && { gameContext: config.gameContext })
});
// Initialize base WebSocketAudioClient
super({
url: url,
highWM: config.highWaterMark ?? 512_000,
lowWM: config.lowWaterMark ?? 128_000
});
// Process retry config with defaults and validation
const retryConfig = config.connectionRetry || {};
const maxAttempts = Math.max(1, Math.min(5, retryConfig.maxAttempts ?? 4)); // Default: 4 attempts (3 retries), clamp 1-5
const delayMs = retryConfig.delayMs ?? 200; // Fast retry for short audio sessions
// Process config with defaults
this.config = {
url,
audioUtteranceId,
...(config.asrRequestConfig && { asrRequestConfig: config.asrRequestConfig }),
...(config.gameContext && { gameContext: config.gameContext }),
...(config.callbackUrls && { callbackUrls: config.callbackUrls }),
onTranscript: config.onTranscript || (() => {}),
onFunctionCall: config.onFunctionCall || (() => {}),
onMetadata: config.onMetadata || (() => {}),
onError: config.onError || (() => {}),
onConnected: config.onConnected || (() => {}),
onDisconnected: config.onDisconnected || (() => {}),
highWaterMark: config.highWaterMark ?? 512_000,
lowWaterMark: config.lowWaterMark ?? 128_000,
maxBufferDurationSec: config.maxBufferDurationSec ?? 60,
chunksPerSecond: config.chunksPerSecond ?? 100,
connectionRetry: {
maxAttempts,
delayMs
},
...(config.logger && { logger: config.logger })
};
// Initialize audio buffer
this.audioBuffer = new AudioRingBuffer({
maxBufferDurationSec: this.config.maxBufferDurationSec,
chunksPerSecond: this.config.chunksPerSecond,
...(this.config.logger && { logger: this.config.logger })
});
// Initialize message handler
this.messageHandler = new MessageHandler({
onTranscript: this.config.onTranscript,
onFunctionCall: this.config.onFunctionCall,
onMetadata: this.config.onMetadata,
onError: this.config.onError,
onControlMessage: this.handleControlMessage.bind(this),
...(this.config.logger && { logger: this.config.logger })
});
}
// ==========================================================================
// PRIVATE HELPERS
// ==========================================================================
/**
* Internal logging helper - only logs if a logger was provided in config
* Debug logs are additionally gated by isDebugLogEnabled flag
* @param level - Log level: debug, info, warn, or error
* @param message - Message to log
* @param data - Optional additional data to log
*/
private log(level: 'debug' | 'info' | 'warn' | 'error', message: string, data?: any): void {
// Skip debug logs if debug logging is not enabled
if (level === 'debug' && !this.isDebugLogEnabled) {
return;
}
if (this.config.logger) {
this.config.logger(level, `[SDK] ${message}`, data);
}
}
/**
* Clean up internal resources to free memory
* Called when connection closes (normally or abnormally)
*/
private cleanup(): void {
this.log('debug', 'Cleaning up resources');
// Clear audio buffer to free memory
this.audioBuffer.clear();
// Reset stats
this.audioBytesSent = 0;
this.audioChunksSent = 0;
this.lastAudioStatsLog = 0;
// Clear connection promise so new connections can be made
this.connectionPromise = undefined;
}
// ==========================================================================
// PUBLIC API
// ==========================================================================
override async connect(): Promise<void> {
// FIRST: Prevent concurrent connection attempts - return existing promise if connecting
if (this.connectionPromise) {
this.log('debug', 'Returning existing connection promise (already connecting)', {
state: this.state
});
return this.connectionPromise;
}
// SECOND: Check state machine - prevent connections in wrong states
if (
this.state !== ClientState.INITIAL &&
this.state !== ClientState.FAILED &&
this.state !== ClientState.STOPPED
) {
this.log('debug', 'Already connected or in wrong state', {
state: this.state
});
// If we're already connected/ready, return resolved promise
return Promise.resolve();
}
// THIRD: Create connection promise with retry logic
// Store the promise IMMEDIATELY to prevent concurrent attempts
this.connectionPromise = this.connectWithRetry();
return this.connectionPromise;
}
/**
* Attempt to connect with retry logic
* Only retries on initial connection establishment, not mid-stream interruptions
*/
private async connectWithRetry(): Promise<void> {
const { maxAttempts, delayMs } = this.config.connectionRetry;
const connectionTimeout = 10000; // 10 second timeout per attempt
// TODO: Consider implementing error-code-based retry strategy
// - Retry on 503 (Service Unavailable) with longer delays
// - Don't retry on 401 (Unauthorized) or 400 (Bad Request)
// - Requires extracting HTTP status from WebSocket connection error
// For now: Simple retry for all connection failures
let lastError: Error | undefined;
// Store original handlers once (not per-attempt to avoid nested wrappers)
// IMPORTANT: Save the bound protected methods (they contain the real logic)!
// The protected onConnected() method sends ASRRequest and then calls config callback
// The protected onError() method converts Event to ErrorResultV1 and calls config callback
const originalOnConnected = this.onConnected.bind(this);
const originalOnError = this.onError.bind(this);
try {
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
// Use debug for first attempt (usually succeeds), info for retries
const attemptLogLevel = attempt === 1 ? 'debug' : 'info';
this.log(attemptLogLevel, `Connection attempt ${attempt}/${maxAttempts}`, {
url: this.config.url,
delayMs: attempt > 1 ? delayMs : 0
});
this.state = ClientState.CONNECTING;
const connectionStartTime = Date.now();
try {
// Create promise for this single attempt with timeout
await new Promise<void>((resolve, reject) => {
let settled = false; // Guard against late callbacks for this attempt
const timeout = setTimeout(() => {
if (settled) return;
settled = true;
this.log('warn', 'Connection timeout', { timeout: connectionTimeout, attempt });
this.state = ClientState.FAILED;
reject(new Error(`Connection timeout after ${connectionTimeout}ms`));
}, connectionTimeout);
// One-shot handlers for this attempt
this.onConnected = (): void => {
if (settled) return; // Ignore late callback
settled = true;
clearTimeout(timeout);
const connectionTime = Date.now() - connectionStartTime;
this.log('debug', 'Connection established successfully', {
connectionTimeMs: connectionTime,
url: this.config.url,
attempt
});
this.state = ClientState.CONNECTED;
// Call original handler
originalOnConnected();
resolve();
};
this.onError = (error): void => {
if (settled) return; // Ignore late callback
settled = true;
clearTimeout(timeout);
this.log('warn', 'Connection error', { error, attempt });
this.state = ClientState.FAILED;
// Don't call originalOnError - it expects ErrorResultV1, not WebSocket Event
// Connection errors are handled by throwing ConnectionError after retry exhaustion
reject(error);
};
// Start the connection attempt
super.connect();
});
// Success! Connection established
const successLogLevel = attempt === 1 ? 'debug' : 'info';
this.log(successLogLevel, `Connection successful on attempt ${attempt}`, {
totalAttempts: attempt
});
return; // Success - exit retry loop
} catch (error) {
lastError = error as Error;
if (attempt < maxAttempts) {
// Not the last attempt - wait before retry
// Use info for first 2 retries (attempts 2-3), warn for 3rd retry (attempt 4)
const logLevel = attempt < 3 ? 'info' : 'warn';
this.log(logLevel, `Connection attempt ${attempt} failed, retrying after ${delayMs}ms`, {
error: lastError.message,
nextAttempt: attempt + 1
});
// Reset state to allow retry (but DON'T clear connectionPromise - maintains concurrency guard)
this.state = ClientState.INITIAL;
// Wait before next attempt
await new Promise(resolve => setTimeout(resolve, delayMs));
} else {
// Last attempt failed - all retries exhausted
this.log('warn', `All ${maxAttempts} connection attempts failed`, {
error: lastError.message
});
}
}
}
// All retries exhausted - throw typed ConnectionError
throw new ConnectionError(
`Failed to establish connection after ${maxAttempts} attempts`,
maxAttempts,
this.config.url,
lastError
);
} finally {
// Restore original protected method handlers
this.onConnected = originalOnConnected;
this.onError = originalOnError;
// Clear connectionPromise only after entire retry sequence completes (success or failure)
this.connectionPromise = undefined;
}
}
override sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void {
// Handle Blob by converting to ArrayBuffer asynchronously
if (audioData instanceof Blob) {
blobToArrayBuffer(audioData)
.then((arrayBuffer) => {
this.sendAudioInternal(arrayBuffer);
})
.catch((error) => {
this.log('error', 'Failed to convert Blob to ArrayBuffer', error);
});
return;
}
// Handle ArrayBuffer and ArrayBufferView synchronously
this.sendAudioInternal(audioData);
}
private sendAudioInternal(audioData: ArrayBuffer | ArrayBufferView): void {
const bytes = ArrayBuffer.isView(audioData) ? audioData.byteLength : audioData.byteLength;
if (bytes === 0) return;
// BACKPRESSURE HINT: Return false or throw if audioBuffer.write() returns false (overflow)
// Caller should pause audio capture until buffer has space (check isBufferOverflowing())
// Always write to ring buffer
this.audioBuffer.write(audioData);
// Send immediately if ready and not backpressured
if (this.state === ClientState.READY && !super.isLocalBackpressured()) {
this.log('debug', 'Sending audio immediately', { bytes });
this.sendAudioNow(audioData);
this.audioBuffer.read(); // Remove from buffer since we sent it
} else {
this.log('debug', 'Buffering audio', {
bytes,
state: this.state,
backpressured: super.isLocalBackpressured()
});
}
// Log audio stats periodically (only if debug logging is enabled)
if (this.isDebugLogEnabled) {
const totalChunks = this.audioChunksSent + this.audioBuffer.getStats().chunksBuffered;
if (totalChunks - this.lastAudioStatsLog >= this.audioStatsLogInterval) {
const stats = this.audioBuffer.getStats();
this.log('debug', 'Audio statistics', {
totalBytesSent: this.audioBytesSent,
totalChunksSent: this.audioChunksSent,
...stats
});
this.lastAudioStatsLog = totalChunks;
}
}
}
async stopRecording(): Promise<void> {
if (this.state !== ClientState.READY) {
this.log('debug', 'stopRecording called but not in READY state', { state: this.state });
return;
}
this.log('debug', 'Stopping recording');
this.state = ClientState.STOPPING;
super.sendMessage(RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION, 'message', {
type: RecognitionContextTypeV1.CONTROL_SIGNAL,
signal: ControlSignalTypeV1.STOP_RECORDING
});
return new Promise((resolve) => {
const timeout = setTimeout(() => {
this.state = ClientState.STOPPED;
resolve();
}, 5000);
const original = this.config.onTranscript;
this.config.onTranscript = (result): void => {
original(result);
if (result.is_finished) {
clearTimeout(timeout);
this.state = ClientState.STOPPED;
resolve();
}
};
// CRITICAL: Update MessageHandler's callback to use the wrapped version
// Otherwise it will keep calling the original and never detect is_finished
(this.messageHandler as any).callbacks.onTranscript = this.config.onTranscript;
});
}
stopAbnormally(): void {
// Guard: If already in terminal state, do nothing
if (this.state === ClientState.STOPPED || this.state === ClientState.FAILED) {
this.log('debug', 'stopAbnormally called but already in terminal state', { state: this.state });
return;
}
this.log('warn', 'Abnormal stop requested - closing connection immediately', { state: this.state });
// Update state to STOPPED (skip STOPPING)
this.state = ClientState.STOPPED;
// Clean up resources
this.cleanup();
// Close WebSocket connection immediately
// Code 1000 = Normal Closure (even though abnormal for us, it's normal for WebSocket spec)
// Type assertion needed because closeConnection is a newly added protected method
(this as any).closeConnection(1000, 'Client abnormal stop');
// Note: onDisconnected will be called by WebSocket close event
// which will call cleanup again (idempotent) and trigger onDisconnected callback
}
getAudioUtteranceId(): string {
return this.config.audioUtteranceId;
}
getUrl(): string {
return this.config.url;
}
getState(): ClientState {
return this.state;
}
isConnected(): boolean {
return this.state === ClientState.READY;
}
isConnecting(): boolean {
return this.state === ClientState.CONNECTING;
}
isStopping(): boolean {
return this.state === ClientState.STOPPING;
}
isTranscriptionFinished(): boolean {
return this.state === ClientState.STOPPED;
}
isBufferOverflowing(): boolean {
return this.audioBuffer.isOverflowing();
}
getStats(): IRecognitionClientStats {
const bufferStats = this.audioBuffer.getStats();
return {
audioBytesSent: this.audioBytesSent,
audioChunksSent: this.audioChunksSent,
audioChunksBuffered: bufferStats.chunksBuffered,
bufferOverflowCount: bufferStats.overflowCount,
currentBufferedChunks: bufferStats.currentBufferedChunks,
hasWrapped: bufferStats.hasWrapped
};
}
// ==========================================================================
// WEBSOCKET HOOKS (from WebSocketAudioClient)
// ==========================================================================
protected onConnected(): void {
this.log('debug', 'WebSocket onConnected callback');
// Send ASRRequest with configuration (if provided)
if (this.config.asrRequestConfig) {
// Extract debugCommand if present (with type safety for new field)
const debugCommand = (this.config.asrRequestConfig as any).debugCommand;
if (debugCommand?.enableDebugLog) {
this.isDebugLogEnabled = true;
this.log('debug', 'Debug logging enabled via debugCommand');
}
// Only generate debug log data if debug logging is enabled
if (this.isDebugLogEnabled) {
this.log('debug', 'Sending ASR request', this.config.asrRequestConfig);
}
const asrRequest: ASRRequestV1 = {
type: RecognitionContextTypeV1.ASR_REQUEST,
audioUtteranceId: this.config.audioUtteranceId,
provider: this.config.asrRequestConfig.provider.toString(),
model: this.config.asrRequestConfig.model,
language: this.config.asrRequestConfig.language?.toString() || 'en',
sampleRate:
typeof this.config.asrRequestConfig.sampleRate === 'number'
? this.config.asrRequestConfig.sampleRate
: SampleRate.RATE_16000,
encoding:
typeof this.config.asrRequestConfig.encoding === 'number'
? this.config.asrRequestConfig.encoding
: AudioEncoding.LINEAR16,
interimResults: this.config.asrRequestConfig.interimResults ?? false,
// Auto-enable useContext if gameContext is provided, or use explicit value if set
useContext: this.config.asrRequestConfig.useContext ?? !!this.config.gameContext,
// Include finalTranscriptStability if provided (it's already a string enum)
...(this.config.asrRequestConfig.finalTranscriptStability && {
finalTranscriptStability: this.config.asrRequestConfig.finalTranscriptStability
}),
...(debugCommand && { debugCommand })
};
super.sendMessage(
RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
'message',
asrRequest
);
}
// Send GameContext if provided
if (this.config.gameContext) {
// Only pass gameContext object to log if debug logging is enabled
if (this.isDebugLogEnabled) {
this.log('debug', 'Sending game context', this.config.gameContext);
}
super.sendMessage(
RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
'message',
this.config.gameContext
);
}
this.log('debug', 'Waiting for server ready signal');
this.config.onConnected();
}
protected onDisconnected(code: number, reason: string): void {
// DIAGNOSTIC: Enhanced logging for disconnections
const closeCodeDescription = this.getCloseCodeDescription(code);
const is1006 = code === 1006;
this.log('debug', '[DIAGNOSTIC] WebSocket disconnected', {
code,
codeDescription: closeCodeDescription,
reason: reason || '(empty)',
previousState: this.state,
is1006Abnormal: is1006,
audioChunksSent: this.audioChunksSent,
audioBytesSent: this.audioBytesSent,
bufferStats: this.audioBuffer.getStats()
});
// Update state based on disconnection type
if (this.state === ClientState.STOPPING) {
this.state = ClientState.STOPPED;
} else if (
this.state === ClientState.CONNECTED ||
this.state === ClientState.READY ||
this.state === ClientState.CONNECTING
) {
this.log('error', '[DIAGNOSTIC] Unexpected disconnection', {
code,
codeDescription: closeCodeDescription,
reason: reason || '(empty)',
is1006: is1006,
possibleCauses: is1006 ? [
'Network connection lost',
'Server process crashed',
'Provider (Deepgram/AssemblyAI) WebSocket closed abnormally',
'Firewall/proxy terminated connection',
'Browser/tab suspended (mobile)'
] : []
});
this.state = ClientState.FAILED;
}
// Clean up memory proactively when connection closes
this.cleanup();
this.config.onDisconnected(code, reason);
}
/**
* Get human-readable description for WebSocket close code
*/
private getCloseCodeDescription(code: number): string {
const descriptions: Record<number, string> = {
1000: 'Normal Closure',
1001: 'Going Away',
1002: 'Protocol Error',
1003: 'Unsupported Data',
1005: 'No Status Received',
1006: 'Abnormal Closure (no close frame received)',
1007: 'Invalid Frame Payload',
1008: 'Policy Violation',
1009: 'Message Too Big',
1010: 'Mandatory Extension',
1011: 'Internal Server Error',
1012: 'Service Restart',
1013: 'Try Again Later',
4000: 'Auth Required',
4001: 'Auth Failed',
4002: 'Rate Limit Exceeded',
4003: 'Invalid Session',
4004: 'Session Expired'
};
return descriptions[code] || `Unknown (${code})`;
}
protected onError(error: Event): void {
this.state = ClientState.FAILED;
const errorResult: ErrorResultV1 = {
type: RecognitionResultTypeV1.ERROR,
audioUtteranceId: '',
message: 'WebSocket error',
description: error.type || 'Connection error'
};
this.config.onError(errorResult);
}
protected override onMessage(msg: { v: number; type: string; data: any }): void {
this.messageHandler.handleMessage(msg);
}
// ==========================================================================
// INTERNAL HELPERS
// ==========================================================================
/**
* Handle control messages from server
* @param msg - Control message containing server actions
*/
private handleControlMessage(msg: ClientControlMessageV1): void {
switch (msg.action) {
case ClientControlActionV1.READY_FOR_UPLOADING_RECORDING: {
this.log('debug', 'Server ready for audio upload');
this.state = ClientState.READY;
this.messageHandler.setSessionStartTime(Date.now());
// Flush buffered audio now that server is ready
const bufferedChunks = this.audioBuffer.flush();
if (bufferedChunks.length > 0) {
this.log('debug', 'Flushing buffered audio', { chunks: bufferedChunks.length });
bufferedChunks.forEach((chunk) => this.sendAudioNow(chunk.data));
}
break;
}
case ClientControlActionV1.STOP_RECORDING:
this.log('debug', 'Received stop recording signal from server');
break;
default:
this.log('warn', 'Unknown control action', { action: msg.action });
}
}
/**
* Send audio immediately to the server (without buffering)
* @param audioData - Audio data to send
*/
private sendAudioNow(audioData: ArrayBuffer | ArrayBufferView): void {
const byteLength = ArrayBuffer.isView(audioData)
? audioData.byteLength
: audioData.byteLength;
const encodingId = (this.config.asrRequestConfig?.encoding ||
AudioEncoding.LINEAR16) as AudioEncoding;
const sampleRate =
typeof this.config.asrRequestConfig?.sampleRate === 'number'
? this.config.asrRequestConfig.sampleRate
: SampleRate.RATE_16000;
super.sendAudio(
audioData,
RealTimeTwoWayWebSocketRecognitionClient.PROTOCOL_VERSION,
encodingId,
sampleRate
);
this.audioBytesSent += byteLength;
this.audioChunksSent++;
}
}