UNPKG

@volley/recognition-client-sdk

Version:

Recognition Service TypeScript/Node.js Client SDK

480 lines (419 loc) 17.6 kB
/** * Simplified VGF Recognition Client * * A thin wrapper around RealTimeTwoWayWebSocketRecognitionClient that maintains * a VGF RecognitionState as a pure sink/output of recognition events. * * The VGF state is updated based on events but never influences client behavior. * All functionality is delegated to the underlying client. */ import { RecognitionState, TranscriptionStatus, RecordingStatus, RecognitionActionProcessingState } from './vgf-recognition-state.js'; import { IRecognitionClient, IRecognitionClientConfig, ClientState } from './recognition-client.types.js'; import { RealTimeTwoWayWebSocketRecognitionClient } from './recognition-client.js'; import { createVGFStateFromConfig, mapTranscriptionResultToState, mapErrorToState, updateStateOnStop } from './vgf-recognition-mapper.js'; import { RecognitionContextTypeV1 } from '@recog/shared-types'; import { v4 as uuidv4 } from 'uuid'; /** * Configuration for SimplifiedVGFRecognitionClient */ export interface SimplifiedVGFClientConfig extends IRecognitionClientConfig { /** * Callback invoked whenever the VGF state changes * Use this to update your UI or React state */ onStateChange?: (state: RecognitionState) => void; /** * Optional initial state to restore from a previous session * If provided, audioUtteranceId will be extracted and used */ initialState?: RecognitionState; } /** * Interface for SimplifiedVGFRecognitionClient * * A simplified client that maintains VGF state for game developers. * All methods from the underlying client are available, plus VGF state management. */ export interface ISimplifiedVGFRecognitionClient { // ============= Core Connection Methods ============= /** * Connect to the recognition service WebSocket * @returns Promise that resolves when connected and ready */ connect(): Promise<void>; /** * Send audio data for transcription * @param audioData - PCM audio data as ArrayBuffer, typed array, or Blob */ sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void; /** * Stop recording and wait for final transcription * @returns Promise that resolves when transcription is complete */ stopRecording(): Promise<void>; /** * Force stop and immediately close connection without waiting for server * * WARNING: This is an abnormal shutdown that bypasses the graceful stop flow: * - Does NOT wait for server to process remaining audio * - Does NOT receive final transcript from server (VGF state set to empty) * - Immediately closes WebSocket connection * - Cleans up resources (buffers, listeners) * * Use Cases: * - User explicitly cancels/abandons the session * - Timeout scenarios where waiting is not acceptable * - Need immediate cleanup and can't wait for server * * RECOMMENDED: Use stopRecording() for normal shutdown. * Only use this when immediate disconnection is required. */ stopAbnormally(): void; // ============= VGF State Methods ============= /** * Get the current VGF recognition state * @returns Current RecognitionState with all transcription data */ getVGFState(): RecognitionState; // ============= Status Check Methods ============= /** * Check if connected to the WebSocket */ isConnected(): boolean; /** * Check if currently connecting */ isConnecting(): boolean; /** * Check if currently stopping */ isStopping(): boolean; /** * Check if transcription has finished */ isTranscriptionFinished(): boolean; /** * Check if the audio buffer has overflowed */ isBufferOverflowing(): boolean; // ============= Utility Methods ============= /** * Get the audio utterance ID for this session */ getAudioUtteranceId(): string; /** * Get the WebSocket URL being used */ getUrl(): string; /** * Get the underlying client state (for advanced usage) */ getState(): ClientState; } /** * This wrapper ONLY maintains VGF state as a sink. * All actual functionality is delegated to the underlying client. */ export class SimplifiedVGFRecognitionClient implements ISimplifiedVGFRecognitionClient { private client: IRecognitionClient; private state: RecognitionState; private isRecordingAudio: boolean = false; private stateChangeCallback: ((state: RecognitionState) => void) | undefined; private expectedUuid: string; private logger: IRecognitionClientConfig['logger']; constructor(config: SimplifiedVGFClientConfig) { const { onStateChange, initialState, ...clientConfig } = config; this.stateChangeCallback = onStateChange; this.logger = clientConfig.logger; // Use provided initial state or create from config if (initialState) { // Check if initial state has a valid UUID const needsNewUuid = !initialState.audioUtteranceId || initialState.audioUtteranceId === '' || initialState.transcriptionStatus === TranscriptionStatus.ABORTED || initialState.transcriptionStatus === TranscriptionStatus.FINALIZED || initialState.transcriptionStatus === TranscriptionStatus.ERROR || (initialState.recognitionActionProcessingState !== undefined && initialState.recognitionActionProcessingState !== RecognitionActionProcessingState.COMPLETED); if (needsNewUuid) { // Generate new UUID for fresh session const newUUID = uuidv4(); if (clientConfig.logger) { const reason = !initialState.audioUtteranceId ? 'Missing UUID' : initialState.audioUtteranceId === '' ? 'Empty UUID' : `Terminal session (${initialState.transcriptionStatus})`; clientConfig.logger('info', `${reason} detected, generating new UUID: ${newUUID}`); } // Update state with new UUID and reset session-specific fields this.state = { ...initialState, audioUtteranceId: newUUID, transcriptionStatus: TranscriptionStatus.NOT_STARTED, startRecordingStatus: RecordingStatus.READY, recognitionActionProcessingState: RecognitionActionProcessingState.NOT_STARTED, finalTranscript: undefined }; // Use new UUID in client config clientConfig.audioUtteranceId = newUUID; // Notify state change immediately so app can update if (onStateChange) { onStateChange(this.state); } } else { // Non-terminal state with valid UUID - safe to reuse (e.g., reconnecting to IN_PROGRESS session) this.state = initialState; // Override audioUtteranceId in config if state has one if (initialState.audioUtteranceId && !clientConfig.audioUtteranceId) { clientConfig.audioUtteranceId = initialState.audioUtteranceId; } } } else { // Initialize VGF state from config this.state = createVGFStateFromConfig(clientConfig); } // Client is immediately ready to accept audio (will buffer if not connected) this.state = { ...this.state, startRecordingStatus: 'READY' }; // Track the expected UUID for this session this.expectedUuid = this.state.audioUtteranceId; // If VGF state has promptSlotMap, configure gameContext to use it if (this.state.promptSlotMap) { // Set useContext=true in ASR config to enable context processing if (clientConfig.asrRequestConfig) { clientConfig.asrRequestConfig.useContext = true; } // Add promptSlotMap to gameContext if (!clientConfig.gameContext) { // Only create gameContext if we have gameId and gamePhase // These should come from the game's configuration if (clientConfig.logger) { clientConfig.logger('warn', '[VGF] promptSlotMap found but no gameContext provided. SlotMap will not be sent.'); } } else { // Merge promptSlotMap into existing gameContext clientConfig.gameContext.slotMap = this.state.promptSlotMap; } } // Create underlying client with callbacks that ONLY update VGF state this.client = new RealTimeTwoWayWebSocketRecognitionClient({ ...clientConfig, // These callbacks ONLY update the VGF state sink onTranscript: (result) => { // Skip update if UUID doesn't match (stale callback from previous session) if (result.audioUtteranceId && result.audioUtteranceId !== this.expectedUuid) { if (this.logger) { this.logger('warn', `[VGF] Skipping transcript update: UUID mismatch (expected: ${this.expectedUuid}, got: ${result.audioUtteranceId})` ); } // Still call original callback if provided if (clientConfig.onTranscript) { clientConfig.onTranscript(result); } return; } // Update VGF state based on transcript this.state = mapTranscriptionResultToState(this.state, result, this.isRecordingAudio); this.notifyStateChange(); // Call original callback if provided if (clientConfig.onTranscript) { clientConfig.onTranscript(result); } }, onMetadata: (metadata) => { // Skip update if UUID doesn't match (stale callback from previous session) if (metadata.audioUtteranceId && metadata.audioUtteranceId !== this.expectedUuid) { if (this.logger) { this.logger('warn', `[VGF] Skipping metadata update: UUID mismatch (expected: ${this.expectedUuid}, got: ${metadata.audioUtteranceId})` ); } return; } if (clientConfig.onMetadata) { clientConfig.onMetadata(metadata); } }, onFunctionCall: (result) => { // Pass through function call - no VGF state changes needed for P2 feature if (clientConfig.onFunctionCall) { clientConfig.onFunctionCall(result); } }, onError: (error) => { // Skip update if UUID doesn't match (stale callback from previous session) if (error.audioUtteranceId && error.audioUtteranceId !== this.expectedUuid) { if (this.logger) { this.logger('warn', `[VGF] Skipping error update: UUID mismatch (expected: ${this.expectedUuid}, got: ${error.audioUtteranceId})` ); } return; } this.isRecordingAudio = false; // Reset on error this.state = mapErrorToState(this.state, error); this.notifyStateChange(); if (clientConfig.onError) { clientConfig.onError(error); } }, onConnected: () => { // Don't update READY here - client can accept audio before connection if (clientConfig.onConnected) { clientConfig.onConnected(); } }, onDisconnected: (code, reason) => { this.isRecordingAudio = false; // Reset on disconnect if (clientConfig.onDisconnected) { clientConfig.onDisconnected(code, reason); } } }); } // DELEGATE ALL METHODS TO UNDERLYING CLIENT // The wrapper ONLY updates VGF state, doesn't use it for decisions async connect(): Promise<void> { await this.client.connect(); // State will be updated via onConnected callback } sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void { // Track recording for state updates if (!this.isRecordingAudio) { this.isRecordingAudio = true; this.state = { ...this.state, startRecordingStatus: 'RECORDING', startRecordingTimestamp: new Date().toISOString() }; this.notifyStateChange(); } this.client.sendAudio(audioData); } async stopRecording(): Promise<void> { this.isRecordingAudio = false; this.state = updateStateOnStop(this.state); this.notifyStateChange(); await this.client.stopRecording(); } stopAbnormally(): void { const clientState = this.client.getState(); // Guard: Block if graceful shutdown in progress or already in terminal state // This prevents stopAbnormally from disrupting stopRecording's graceful finalization if (clientState === ClientState.STOPPING || clientState === ClientState.STOPPED || clientState === ClientState.FAILED) { // Already stopping/stopped - do nothing to avoid disrupting graceful shutdown return; } this.isRecordingAudio = false; // Set state to ABORTED - preserve any partial transcript received so far // This clearly indicates the session was cancelled/abandoned by user if (this.state.transcriptionStatus !== TranscriptionStatus.ABORTED && this.state.transcriptionStatus !== TranscriptionStatus.FINALIZED) { this.state = { ...this.state, transcriptionStatus: TranscriptionStatus.ABORTED, startRecordingStatus: RecordingStatus.FINISHED, finalRecordingTimestamp: new Date().toISOString(), finalTranscriptionTimestamp: new Date().toISOString() }; this.notifyStateChange(); } // Delegate to underlying client for actual WebSocket cleanup this.client.stopAbnormally(); } // Pure delegation methods - no state logic getAudioUtteranceId(): string { return this.client.getAudioUtteranceId(); } getUrl(): string { return this.client.getUrl(); } getState(): ClientState { return this.client.getState(); } isConnected(): boolean { return this.client.isConnected(); } isConnecting(): boolean { return this.client.isConnecting(); } isStopping(): boolean { return this.client.isStopping(); } isTranscriptionFinished(): boolean { return this.client.isTranscriptionFinished(); } isBufferOverflowing(): boolean { return this.client.isBufferOverflowing(); } // VGF State access (read-only for consumers) getVGFState(): RecognitionState { return { ...this.state }; } private notifyStateChange(): void { // State has already been validated for correct UUID before this is called if (this.stateChangeCallback) { this.stateChangeCallback({ ...this.state }); } } } /** * Factory function for creating simplified client * Usage examples: * * // Basic usage * const client = createSimplifiedVGFClient({ * asrRequestConfig: { provider: 'deepgram', language: 'en' }, * onStateChange: (state) => { * console.log('VGF State updated:', state); * // Update React state, game UI, etc. * } * }); * * // With initial state (e.g., restoring from previous session) * const client = createSimplifiedVGFClient({ * asrRequestConfig: { provider: 'deepgram', language: 'en' }, * initialState: previousState, // Will use audioUtteranceId from state * onStateChange: (state) => setVGFState(state) * }); * * // With initial state containing promptSlotMap for enhanced recognition * const stateWithSlots: RecognitionState = { * audioUtteranceId: 'session-123', * promptSlotMap: { * 'song_title': ['one time', 'baby'], * 'artists': ['justin bieber'] * } * }; * const client = createSimplifiedVGFClient({ * asrRequestConfig: { provider: 'deepgram', language: 'en' }, * gameContext: { * type: RecognitionContextTypeV1.GAME_CONTEXT, * gameId: 'music-quiz', // Your game's ID * gamePhase: 'song-guessing' // Current game phase * }, * initialState: stateWithSlots, // promptSlotMap will be added to gameContext * onStateChange: (state) => setVGFState(state) * }); * * await client.connect(); * client.sendAudio(audioData); * // VGF state automatically updates based on transcription results */ export function createSimplifiedVGFClient(config: SimplifiedVGFClientConfig): ISimplifiedVGFRecognitionClient { return new SimplifiedVGFRecognitionClient(config); }