@volley/recognition-client-sdk

/** * Unit tests for SimplifiedVGFRecognitionClient */ import { SimplifiedVGFRecognitionClient, createSimplifiedVGFClient } from './simplified-vgf-recognition-client.js'; import { RealTimeTwoWayWebSocketRecognitionClient } from './recognition-client.js'; import { ClientState } from './recognition-client.types.js'; import { AudioEncoding, RecognitionContextTypeV1 } from '@recog/shared-types'; import { RecordingStatus, TranscriptionStatus, type RecognitionState } from './vgf-recognition-state.js'; // Mock the underlying client jest.mock('./recognition-client'); describe('SimplifiedVGFRecognitionClient', () => { let mockClient: jest.Mocked<RealTimeTwoWayWebSocketRecognitionClient>; let simplifiedClient: SimplifiedVGFRecognitionClient; let stateChangeCallback: jest.Mock; beforeEach(() => { // Reset mocks jest.clearAllMocks(); // Create mock for underlying client mockClient = { connect: jest.fn().mockResolvedValue(undefined), sendAudio: jest.fn(), stopRecording: jest.fn().mockResolvedValue(undefined), stopAbnormally: jest.fn(), getAudioUtteranceId: jest.fn().mockReturnValue('test-uuid'), getState: jest.fn().mockReturnValue(ClientState.INITIAL), isConnected: jest.fn().mockReturnValue(false), isConnecting: jest.fn().mockReturnValue(false), isStopping: jest.fn().mockReturnValue(false), isTranscriptionFinished: jest.fn().mockReturnValue(false), isBufferOverflowing: jest.fn().mockReturnValue(false), } as any; // Mock the constructor (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>) .mockImplementation(() => mockClient as any); stateChangeCallback = jest.fn(); }); describe('Constructor', () => { it('should initialize with correct default VGF state', () => { simplifiedClient = new SimplifiedVGFRecognitionClient({ asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 }, onStateChange: stateChangeCallback }); const state = simplifiedClient.getVGFState(); expect(state.audioUtteranceId).toBeDefined(); expect(state.startRecordingStatus).toBe(RecordingStatus.READY); expect(state.transcriptionStatus).toBe(TranscriptionStatus.NOT_STARTED); expect(state.pendingTranscript).toBe(''); }); it('should generate new UUID when initial state has no audioUtteranceId', () => { const initialState: RecognitionState = { // No audioUtteranceId provided startRecordingStatus: RecordingStatus.READY, transcriptionStatus: TranscriptionStatus.NOT_STARTED, pendingTranscript: '' } as RecognitionState; simplifiedClient = new SimplifiedVGFRecognitionClient({ initialState, asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 }, onStateChange: stateChangeCallback }); const state = simplifiedClient.getVGFState(); // Should have generated a new UUID expect(state.audioUtteranceId).toBeDefined(); expect(state.audioUtteranceId).toMatch(/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i); // Should preserve other fields expect(state.startRecordingStatus).toBe(RecordingStatus.READY); expect(state.transcriptionStatus).toBe(TranscriptionStatus.NOT_STARTED); // onStateChange should be called with the new UUID expect(stateChangeCallback).toHaveBeenCalledTimes(1); const callbackState = stateChangeCallback.mock.calls[0][0]; expect(callbackState.audioUtteranceId).toBe(state.audioUtteranceId); }); it('should generate new UUID when initial state has empty audioUtteranceId', () => { const initialState: RecognitionState = { audioUtteranceId: '', // Empty UUID startRecordingStatus: RecordingStatus.READY, transcriptionStatus: TranscriptionStatus.NOT_STARTED, pendingTranscript: '' }; simplifiedClient = new SimplifiedVGFRecognitionClient({ initialState, asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 }, onStateChange: stateChangeCallback }); const state = simplifiedClient.getVGFState(); // Should have generated a new UUID expect(state.audioUtteranceId).toBeDefined(); expect(state.audioUtteranceId).not.toBe(''); expect(state.audioUtteranceId).toMatch(/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i); }); it('should accept initial state and use its audioUtteranceId', () => { const initialState: RecognitionState = { audioUtteranceId: 'existing-session-id', startRecordingStatus: RecordingStatus.FINISHED, finalTranscript: 'Previous transcript', pendingTranscript: '', // Required field transcriptionStatus: TranscriptionStatus.FINALIZED }; simplifiedClient = new SimplifiedVGFRecognitionClient({ initialState, onStateChange: stateChangeCallback }); const state = simplifiedClient.getVGFState(); // FINALIZED session gets new UUID to prevent server session reuse expect(state.audioUtteranceId).not.toBe('existing-session-id'); expect(state.audioUtteranceId).toBeDefined(); // finalTranscript is cleared for fresh session expect(state.finalTranscript).toBeUndefined(); // Statuses reset for fresh session expect(state.transcriptionStatus).toBe(TranscriptionStatus.NOT_STARTED); expect(state.startRecordingStatus).toBe(RecordingStatus.READY); // Verify NEW audioUtteranceId was passed to underlying client const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls; expect(constructorCalls[0]?.[0]?.audioUtteranceId).not.toBe('existing-session-id'); expect(constructorCalls[0]?.[0]?.audioUtteranceId).toBe(state.audioUtteranceId); }); it('should store ASR config as JSON string', () => { const asrConfig = { provider: 'deepgram' as const, language: 'en', model: 'nova-2', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 }; simplifiedClient = new SimplifiedVGFRecognitionClient({ asrRequestConfig: asrConfig, onStateChange: stateChangeCallback }); const state = simplifiedClient.getVGFState(); expect(state.asrConfig).toBe(JSON.stringify(asrConfig)); }); }); describe('State Management', () => { beforeEach(() => { simplifiedClient = new SimplifiedVGFRecognitionClient({ asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 }, onStateChange: stateChangeCallback }); }); it('should update state to RECORDING when sendAudio is called', () => { const audioData = Buffer.from([1, 2, 3, 4]); simplifiedClient.sendAudio(audioData); expect(stateChangeCallback).toHaveBeenCalled(); const updatedState = stateChangeCallback.mock.calls[0][0]; expect(updatedState.startRecordingStatus).toBe(RecordingStatus.RECORDING); expect(updatedState.startRecordingTimestamp).toBeDefined(); }); it('should only set recording timestamp once', () => { const audioData = Buffer.from([1, 2, 3, 4]); simplifiedClient.sendAudio(audioData); const firstTimestamp = stateChangeCallback.mock.calls[0][0].startRecordingTimestamp; // Clear mock to verify no additional state changes stateChangeCallback.mockClear(); // Second sendAudio should not trigger state change since already recording simplifiedClient.sendAudio(audioData); expect(stateChangeCallback).not.toHaveBeenCalled(); // Verify timestamp hasn't changed in internal state const currentState = simplifiedClient.getVGFState(); expect(currentState.startRecordingTimestamp).toBe(firstTimestamp); }); it('should update state to FINISHED when stopRecording is called', async () => { await simplifiedClient.stopRecording(); expect(stateChangeCallback).toHaveBeenCalled(); const updatedState = stateChangeCallback.mock.calls[0][0]; expect(updatedState.startRecordingStatus).toBe(RecordingStatus.FINISHED); expect(updatedState.finalRecordingTimestamp).toBeDefined(); }); }); describe('Transcript Callbacks', () => { let onTranscriptCallback: (result: any) => void; let onMetadataCallback: (metadata: any) => void; let onErrorCallback: (error: any) => void; beforeEach(() => { // Capture the callbacks passed to underlying client simplifiedClient = new SimplifiedVGFRecognitionClient({ asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 }, onStateChange: stateChangeCallback }); const constructorCall = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls[0]?.[0]; onTranscriptCallback = constructorCall?.onTranscript ?? jest.fn(); onMetadataCallback = constructorCall?.onMetadata ?? jest.fn(); onErrorCallback = constructorCall?.onError ?? jest.fn(); }); it('should directly copy pending transcript without combining', () => { const transcriptResult = { finalTranscript: 'Hello', pendingTranscript: ' world', pendingTranscriptConfidence: 0.85, finalTranscriptConfidence: 0.95, is_finished: false }; onTranscriptCallback(transcriptResult); expect(stateChangeCallback).toHaveBeenCalled(); const updatedState = stateChangeCallback.mock.calls[0][0]; expect(updatedState.transcriptionStatus).toBe(TranscriptionStatus.IN_PROGRESS); // Should be direct copy, NOT combined expect(updatedState.pendingTranscript).toBe(' world'); expect(updatedState.pendingConfidence).toBe(0.85); // Final should also be copied when present expect(updatedState.finalTranscript).toBe('Hello'); expect(updatedState.finalConfidence).toBe(0.95); }); it('should update VGF state with final transcript', () => { const transcriptResult = { finalTranscript: 'Hello world', finalTranscriptConfidence: 0.98, is_finished: true }; onTranscriptCallback(transcriptResult); expect(stateChangeCallback).toHaveBeenCalled(); const updatedState = stateChangeCallback.mock.calls[0][0]; expect(updatedState.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED); expect(updatedState.finalTranscript).toBe('Hello world'); expect(updatedState.finalConfidence).toBe(0.98); expect(updatedState.pendingTranscript).toBe(''); expect(updatedState.finalTranscriptionTimestamp).toBeDefined(); }); it('should handle metadata and mark recording as finished', () => { // Get the actual UUID from the client const actualUuid = simplifiedClient.getVGFState().audioUtteranceId; const metadata = { audioUtteranceId: actualUuid, duration: 5000 }; onMetadataCallback(metadata); expect(stateChangeCallback).toHaveBeenCalled(); const updatedState = stateChangeCallback.mock.calls[0][0]; expect(updatedState.startRecordingStatus).toBe(RecordingStatus.FINISHED); expect(updatedState.finalRecordingTimestamp).toBeDefined(); }); it('should handle errors and update state', () => { const error = { message: 'Recognition failed', code: 'RECOGNITION_ERROR' }; onErrorCallback(error); expect(stateChangeCallback).toHaveBeenCalled(); const updatedState = stateChangeCallback.mock.calls[0][0]; expect(updatedState.transcriptionStatus).toBe(TranscriptionStatus.ERROR); expect(updatedState.startRecordingStatus).toBe(RecordingStatus.FINISHED); }); it('should reset isRecordingAudio on error', () => { // First start recording simplifiedClient.sendAudio(Buffer.from([1, 2, 3])); // Then error occurs onErrorCallback({ message: 'Error' }); // Send audio again should restart recording stateChangeCallback.mockClear(); simplifiedClient.sendAudio(Buffer.from([4, 5, 6])); const updatedState = stateChangeCallback.mock.calls[0][0]; expect(updatedState.startRecordingStatus).toBe(RecordingStatus.RECORDING); expect(updatedState.startRecordingTimestamp).toBeDefined(); }); }); describe('Method Delegation', () => { beforeEach(() => { simplifiedClient = new SimplifiedVGFRecognitionClient({ asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 } }); }); it('should delegate connect() to underlying client', async () => { await simplifiedClient.connect(); expect(mockClient.connect).toHaveBeenCalled(); }); it('should delegate sendAudio() to underlying client', () => { const audioData = Buffer.from([1, 2, 3, 4]); simplifiedClient.sendAudio(audioData); expect(mockClient.sendAudio).toHaveBeenCalledWith(audioData); }); it('should delegate sendAudio() with Blob to underlying client', () => { const blob = new Blob([new Uint8Array([1, 2, 3, 4])]); simplifiedClient.sendAudio(blob); expect(mockClient.sendAudio).toHaveBeenCalledWith(blob); }); it('should delegate stopRecording() to underlying client', async () => { await simplifiedClient.stopRecording(); expect(mockClient.stopRecording).toHaveBeenCalled(); }); it('should delegate status check methods', () => { simplifiedClient.isConnected(); expect(mockClient.isConnected).toHaveBeenCalled(); simplifiedClient.isConnecting(); expect(mockClient.isConnecting).toHaveBeenCalled(); simplifiedClient.isStopping(); expect(mockClient.isStopping).toHaveBeenCalled(); simplifiedClient.isTranscriptionFinished(); expect(mockClient.isTranscriptionFinished).toHaveBeenCalled(); simplifiedClient.isBufferOverflowing(); expect(mockClient.isBufferOverflowing).toHaveBeenCalled(); }); it('should delegate getAudioUtteranceId()', () => { const id = simplifiedClient.getAudioUtteranceId(); expect(mockClient.getAudioUtteranceId).toHaveBeenCalled(); expect(id).toBe('test-uuid'); }); it('should delegate getState()', () => { const state = simplifiedClient.getState(); expect(mockClient.getState).toHaveBeenCalled(); expect(state).toBe(ClientState.INITIAL); }); }); describe('Original Callbacks', () => { it('should call original onTranscript callback if provided', () => { const originalOnTranscript = jest.fn(); simplifiedClient = new SimplifiedVGFRecognitionClient({ asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 }, onTranscript: originalOnTranscript }); const constructorCall = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls[0]?.[0]; const wrappedCallback = constructorCall?.onTranscript; if (!wrappedCallback) throw new Error('onTranscript callback not found'); const result = { finalTranscript: 'test', is_finished: false }; wrappedCallback(result as any); expect(originalOnTranscript).toHaveBeenCalledWith(result); }); it('should call original onError callback if provided', () => { const originalOnError = jest.fn(); simplifiedClient = new SimplifiedVGFRecognitionClient({ asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 }, onError: originalOnError }); const constructorCall = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls[0]?.[0]; const wrappedCallback = constructorCall?.onError; if (!wrappedCallback) throw new Error('onError callback not found'); const error = { message: 'test error' }; wrappedCallback(error as any); expect(originalOnError).toHaveBeenCalledWith(error); }); }); describe('Thin Layer Verification', () => { it('should pass transcript result directly to mapper without modification', () => { simplifiedClient = new SimplifiedVGFRecognitionClient({ asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 }, onStateChange: stateChangeCallback }); // Get actual UUID from client const actualUuid = simplifiedClient.getVGFState().audioUtteranceId; const transcriptResult = { type: 'Transcription', audioUtteranceId: actualUuid, finalTranscript: 'Final text', pendingTranscript: 'Pending text', finalTranscriptConfidence: 0.99, pendingTranscriptConfidence: 0.88, is_finished: false, voiceStart: 100, voiceDuration: 500, voiceEnd: 600, extraField: 'should be ignored by VGF state' }; const constructorCall = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls[0]?.[0]; const wrappedCallback = constructorCall?.onTranscript; if (!wrappedCallback) throw new Error('onTranscript callback not found'); wrappedCallback(transcriptResult as any); const updatedState = stateChangeCallback.mock.calls[0][0]; // Verify direct copy without modification expect(updatedState.pendingTranscript).toBe('Pending text'); expect(updatedState.finalTranscript).toBe('Final text'); expect(updatedState.pendingConfidence).toBe(0.88); expect(updatedState.finalConfidence).toBe(0.99); }); it('should handle only pending transcript correctly', () => { const transcriptResult = { pendingTranscript: 'Just pending', pendingTranscriptConfidence: 0.75, is_finished: false }; simplifiedClient = new SimplifiedVGFRecognitionClient({ asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 }, onStateChange: stateChangeCallback }); const constructorCall = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls[0]?.[0]; const wrappedCallback = constructorCall?.onTranscript; if (!wrappedCallback) throw new Error('onTranscript callback not found'); wrappedCallback(transcriptResult as any); const updatedState = stateChangeCallback.mock.calls[0][0]; expect(updatedState.pendingTranscript).toBe('Just pending'); expect(updatedState.pendingConfidence).toBe(0.75); expect(updatedState.finalTranscript).toBeUndefined(); expect(updatedState.finalConfidence).toBeUndefined(); }); it('should handle only final transcript correctly', () => { const transcriptResult = { finalTranscript: 'Just final', finalTranscriptConfidence: 0.92, is_finished: false }; simplifiedClient = new SimplifiedVGFRecognitionClient({ asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 }, onStateChange: stateChangeCallback }); const constructorCall = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls[0]?.[0]; const wrappedCallback = constructorCall?.onTranscript; if (!wrappedCallback) throw new Error('onTranscript callback not found'); wrappedCallback(transcriptResult as any); const updatedState = stateChangeCallback.mock.calls[0][0]; expect(updatedState.pendingTranscript).toBe(''); // Empty string when undefined expect(updatedState.pendingConfidence).toBeUndefined(); expect(updatedState.finalTranscript).toBe('Just final'); expect(updatedState.finalConfidence).toBe(0.92); }); it('should clear pending when is_finished is true', () => { const transcriptResult = { finalTranscript: 'Complete transcript', pendingTranscript: 'Should be ignored', finalTranscriptConfidence: 0.98, pendingTranscriptConfidence: 0.77, is_finished: true }; simplifiedClient = new SimplifiedVGFRecognitionClient({ asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 }, onStateChange: stateChangeCallback }); const constructorCall = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls[0]?.[0]; const wrappedCallback = constructorCall?.onTranscript; if (!wrappedCallback) throw new Error('onTranscript callback not found'); wrappedCallback(transcriptResult as any); const updatedState = stateChangeCallback.mock.calls[0][0]; expect(updatedState.finalTranscript).toBe('Complete transcript'); expect(updatedState.finalConfidence).toBe(0.98); // Pending should be cleared when finished expect(updatedState.pendingTranscript).toBe(''); expect(updatedState.pendingConfidence).toBeUndefined(); expect(updatedState.transcriptionStatus).toBe(TranscriptionStatus.FINALIZED); }); }); describe('Factory Function', () => { it('should create SimplifiedVGFRecognitionClient instance', () => { const client = createSimplifiedVGFClient({ asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 } }); expect(client).toBeInstanceOf(SimplifiedVGFRecognitionClient); expect(client.getVGFState).toBeDefined(); expect(client.connect).toBeDefined(); }); it('should auto-generate new UUID for ABORTED session and reset fields', () => { const stateChangeCallback = jest.fn(); const abortedState: RecognitionState = { audioUtteranceId: 'old-aborted-uuid', transcriptionStatus: TranscriptionStatus.ABORTED, startRecordingStatus: RecordingStatus.FINISHED, pendingTranscript: '', finalTranscript: 'old transcript from aborted session' }; const client = createSimplifiedVGFClient({ initialState: abortedState, onStateChange: stateChangeCallback, asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 } }); // Should have called callback with new UUID expect(stateChangeCallback).toHaveBeenCalledTimes(1); const newState = stateChangeCallback.mock.calls[0][0]; // New UUID should be different expect(newState.audioUtteranceId).not.toBe('old-aborted-uuid'); expect(newState.audioUtteranceId).toBeDefined(); // Status fields should be reset for fresh session expect(newState.transcriptionStatus).toBe(TranscriptionStatus.NOT_STARTED); expect(newState.startRecordingStatus).toBe(RecordingStatus.READY); // Previous transcript should be cleared expect(newState.finalTranscript).toBeUndefined(); // Client should use the new UUID expect(client.getVGFState().audioUtteranceId).toBe(newState.audioUtteranceId); }); it('should auto-generate new UUID for FINALIZED session and reset fields', () => { const stateChangeCallback = jest.fn(); const finalizedState: RecognitionState = { audioUtteranceId: 'old-finalized-uuid', transcriptionStatus: TranscriptionStatus.FINALIZED, startRecordingStatus: RecordingStatus.FINISHED, pendingTranscript: '', finalTranscript: 'completed transcript from previous session' }; const client = createSimplifiedVGFClient({ initialState: finalizedState, onStateChange: stateChangeCallback, asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 } }); // Should have generated new UUID expect(stateChangeCallback).toHaveBeenCalledTimes(1); const newState = stateChangeCallback.mock.calls[0][0]; expect(newState.audioUtteranceId).not.toBe('old-finalized-uuid'); expect(newState.transcriptionStatus).toBe(TranscriptionStatus.NOT_STARTED); expect(newState.startRecordingStatus).toBe(RecordingStatus.READY); expect(newState.finalTranscript).toBeUndefined(); }); it('should preserve UUID for IN_PROGRESS session (valid resumption)', () => { const stateChangeCallback = jest.fn(); const inProgressState: RecognitionState = { audioUtteranceId: 'in-progress-uuid', transcriptionStatus: TranscriptionStatus.IN_PROGRESS, startRecordingStatus: RecordingStatus.RECORDING, pendingTranscript: 'partial text' }; const client = createSimplifiedVGFClient({ initialState: inProgressState, onStateChange: stateChangeCallback, asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 } }); // Should NOT generate new UUID (valid reconnection) const currentState = client.getVGFState(); expect(currentState.audioUtteranceId).toBe('in-progress-uuid'); expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.IN_PROGRESS); }); }); describe('PromptSlotMap Integration', () => { it('should pass promptSlotMap from initial state to gameContext', () => { const initialState: RecognitionState = { audioUtteranceId: 'test-123', pendingTranscript: '', // Required field promptSlotMap: { 'entity1': ['value1', 'value2'], 'entity2': ['value3'] } }; const gameContext = { type: RecognitionContextTypeV1.GAME_CONTEXT, gameId: 'test-game', gamePhase: 'test-phase' } as const; simplifiedClient = new SimplifiedVGFRecognitionClient({ asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 }, gameContext, initialState }); const constructorCall = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls[0]?.[0]; // Verify useContext was set to true expect(constructorCall?.asrRequestConfig?.useContext).toBe(true); // Verify slotMap was added to gameContext expect(constructorCall?.gameContext?.slotMap).toEqual({ 'entity1': ['value1', 'value2'], 'entity2': ['value3'] }); }); it('should warn if promptSlotMap exists but no gameContext provided', () => { const logger = jest.fn(); const initialState: RecognitionState = { audioUtteranceId: 'test-123', pendingTranscript: '', // Required field promptSlotMap: { 'entity1': ['value1'] } }; simplifiedClient = new SimplifiedVGFRecognitionClient({ asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 }, initialState, logger }); expect(logger).toHaveBeenCalledWith( 'warn', '[VGF] promptSlotMap found but no gameContext provided. SlotMap will not be sent.' ); }); it('should preserve promptSlotMap throughout state changes', () => { const initialState: RecognitionState = { audioUtteranceId: 'test-123', pendingTranscript: '', // Required field promptSlotMap: { 'slots': ['test'] } }; simplifiedClient = new SimplifiedVGFRecognitionClient({ asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 }, initialState, onStateChange: stateChangeCallback }); // Send audio and verify promptSlotMap is preserved simplifiedClient.sendAudio(Buffer.from([1, 2, 3])); let state = stateChangeCallback.mock.calls[0][0]; expect(state.promptSlotMap).toEqual({ 'slots': ['test'] }); // Simulate transcript and verify preservation const constructorCall = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls[0]?.[0]; const onTranscriptCallback = constructorCall?.onTranscript; if (!onTranscriptCallback) throw new Error('onTranscript callback not found'); onTranscriptCallback({ finalTranscript: 'test', is_finished: false } as any); state = stateChangeCallback.mock.calls[1][0]; expect(state.promptSlotMap).toEqual({ 'slots': ['test'] }); }); }); describe('State Immutability', () => { it('should return a copy of VGF state, not a reference', () => { simplifiedClient = new SimplifiedVGFRecognitionClient({ asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 } }); const state1 = simplifiedClient.getVGFState(); const state2 = simplifiedClient.getVGFState(); expect(state1).not.toBe(state2); // Different object references expect(state1).toEqual(state2); // But same content }); it('should pass a copy of state to onStateChange callback', () => { simplifiedClient = new SimplifiedVGFRecognitionClient({ asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 }, onStateChange: stateChangeCallback }); simplifiedClient.sendAudio(Buffer.from([1, 2, 3])); const callbackState = stateChangeCallback.mock.calls[0][0]; const currentState = simplifiedClient.getVGFState(); expect(callbackState).not.toBe(currentState); // Different references }); }); describe('stopAbnormally', () => { beforeEach(() => { simplifiedClient = new SimplifiedVGFRecognitionClient({ asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 }, onStateChange: stateChangeCallback }); }); it('should immediately set state to ABORTED and preserve partial transcript', () => { // Start recording first simplifiedClient.sendAudio(Buffer.from([1, 2, 3])); jest.clearAllMocks(); // Call stopAbnormally simplifiedClient.stopAbnormally(); // Verify state was updated to ABORTED (not FINALIZED) expect(stateChangeCallback).toHaveBeenCalledTimes(1); const finalState = stateChangeCallback.mock.calls[0][0]; expect(finalState.transcriptionStatus).toBe(TranscriptionStatus.ABORTED); // finalTranscript is preserved (not overridden to empty string) expect(finalState.startRecordingStatus).toBe(RecordingStatus.FINISHED); expect(finalState.finalRecordingTimestamp).toBeDefined(); expect(finalState.finalTranscriptionTimestamp).toBeDefined(); }); it('should stop recording audio flag', () => { // Start recording simplifiedClient.sendAudio(Buffer.from([1, 2, 3])); // Call stopAbnormally simplifiedClient.stopAbnormally(); // Send more audio - should not update recording status again jest.clearAllMocks(); simplifiedClient.sendAudio(Buffer.from([4, 5, 6])); // Verify recording status was set in sendAudio const state = simplifiedClient.getVGFState(); expect(state.startRecordingStatus).toBe(RecordingStatus.RECORDING); }); it('should be idempotent - calling twice does not change state again', () => { // Start recording simplifiedClient.sendAudio(Buffer.from([1, 2, 3])); jest.clearAllMocks(); // Call stopAbnormally first time simplifiedClient.stopAbnormally(); expect(stateChangeCallback).toHaveBeenCalledTimes(1); const firstCallState = stateChangeCallback.mock.calls[0][0]; const firstTranscript = firstCallState.finalTranscript; jest.clearAllMocks(); // Call stopAbnormally second time simplifiedClient.stopAbnormally(); // Should not trigger state change callback again (already aborted) expect(stateChangeCallback).toHaveBeenCalledTimes(0); const currentState = simplifiedClient.getVGFState(); expect(currentState.transcriptionStatus).toBe(TranscriptionStatus.ABORTED); expect(currentState.finalTranscript).toBe(firstTranscript); // Unchanged }); it('should work even if called before any recording', () => { // Call stopAbnormally without ever recording simplifiedClient.stopAbnormally(); const state = simplifiedClient.getVGFState(); expect(state.transcriptionStatus).toBe(TranscriptionStatus.ABORTED); expect(state.finalTranscript).toBeUndefined(); // No transcript was ever received expect(state.startRecordingStatus).toBe(RecordingStatus.FINISHED); }); it('should preserve existing state fields except for overridden ones', () => { // Set up some initial state by sending audio simplifiedClient.sendAudio(Buffer.from([1, 2, 3])); const initialState = simplifiedClient.getVGFState(); const audioUtteranceId = initialState.audioUtteranceId; const initialTranscript = initialState.finalTranscript; // Call stopAbnormally simplifiedClient.stopAbnormally(); const finalState = simplifiedClient.getVGFState(); // Should preserve audioUtteranceId, finalTranscript and other non-overridden fields expect(finalState.audioUtteranceId).toBe(audioUtteranceId); expect(finalState.finalTranscript).toBe(initialTranscript); // Preserved // Should override these fields expect(finalState.transcriptionStatus).toBe(TranscriptionStatus.ABORTED); expect(finalState.startRecordingStatus).toBe(RecordingStatus.FINISHED); }); it('should set both recording and transcription timestamps', () => { const beforeTime = new Date().toISOString(); simplifiedClient.stopAbnormally(); const state = simplifiedClient.getVGFState(); const afterTime = new Date().toISOString(); // Timestamps should be set and within reasonable range expect(state.finalRecordingTimestamp).toBeDefined(); expect(state.finalTranscriptionTimestamp).toBeDefined(); // Basic sanity check that timestamps are ISO strings expect(state.finalRecordingTimestamp).toMatch(/^\d{4}-\d{2}-\d{2}T/); expect(state.finalTranscriptionTimestamp).toMatch(/^\d{4}-\d{2}-\d{2}T/); // Timestamps should be close to current time if (state.finalRecordingTimestamp) { expect(state.finalRecordingTimestamp >= beforeTime).toBe(true); expect(state.finalRecordingTimestamp <= afterTime).toBe(true); } }); it('should call underlying client stopAbnormally for cleanup', () => { simplifiedClient.stopAbnormally(); // stopAbnormally on underlying client SHOULD be called for WebSocket cleanup expect(mockClient.stopAbnormally).toHaveBeenCalled(); // stopRecording on underlying client should NOT be called expect(mockClient.stopRecording).not.toHaveBeenCalled(); }); it('should differ from stopRecording behavior', async () => { // Test that stopAbnormally and stopRecording behave differently jest.clearAllMocks(); // Use the existing simplifiedClient for testing simplifiedClient.sendAudio(Buffer.from([1, 2, 3])); // Test stopAbnormally - should NOT call underlying client simplifiedClient.stopAbnormally(); expect(mockClient.stopRecording).not.toHaveBeenCalled(); // Create new client to test stopRecording const client2 = new SimplifiedVGFRecognitionClient({ asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 }, onStateChange: jest.fn() }); // Clear mocks to isolate client2's behavior jest.clearAllMocks(); // Test stopRecording - SHOULD call underlying client await client2.stopRecording(); expect(mockClient.stopRecording).toHaveBeenCalled(); }); it('should use ABORTED status to distinguish from normal completion', () => { // Test that stopAbnormally uses ABORTED, not FINALIZED simplifiedClient.sendAudio(Buffer.from([1, 2, 3])); // Abnormal stop - should set to ABORTED simplifiedClient.stopAbnormally(); const abortedState = simplifiedClient.getVGFState(); // Verify ABORTED is used (not FINALIZED) expect(abortedState.transcriptionStatus).toBe(TranscriptionStatus.ABORTED); expect(abortedState.transcriptionStatus).not.toBe(TranscriptionStatus.FINALIZED); // finalTranscript is preserved (whatever partial transcript was received) // ABORTED clearly indicates user cancelled, vs FINALIZED which means completed normally }); describe('state guards', () => { it('should do nothing if already fully stopped', () => { // Setup: finalize state and mark underlying client as stopped mockClient.getState.mockReturnValue(ClientState.STOPPED); simplifiedClient.stopAbnormally(); // Clear mocks to test second call jest.clearAllMocks(); // Call again - should return early and not call anything simplifiedClient.stopAbnormally(); expect(stateChangeCallback).not.toHaveBeenCalled(); expect(mockClient.stopAbnormally).not.toHaveBeenCalled(); }); it('should not call underlying client if already in STOPPED state', () => { // Mock underlying client as already stopped mockClient.getState.mockReturnValue(ClientState.STOPPED); // But VGF state not finalized yet simplifiedClient.sendAudio(Buffer.from([1, 2, 3])); jest.clearAllMocks(); simplifiedClient.stopAbnormally(); // Should be blocked completely - no state change, no underlying call expect(stateChangeCallback).not.toHaveBeenCalled(); expect(mockClient.stopAbnormally).not.toHaveBeenCalled(); }); it('should not call underlying client if already in FAILED state', () => { // Mock underlying client as failed mockClient.getState.mockReturnValue(ClientState.FAILED); simplifiedClient.stopAbnormally(); // Should NOT update VGF state or call underlying client expect(stateChangeCallback).not.toHaveBeenCalled(); expect(mockClient.stopAbnormally).not.toHaveBeenCalled(); }); it('should block if client is in STOPPING state (graceful shutdown in progress)', () => { // Start recording first simplifiedClient.sendAudio(Buffer.from([1, 2, 3])); // Get initial state before attempting stopAbnormally const initialState = simplifiedClient.getVGFState(); const initialStatus = initialState.transcriptionStatus; // Mock underlying client as STOPPING (stopRecording was called) mockClient.getState.mockReturnValue(ClientState.STOPPING); jest.clearAllMocks(); // Try to call stopAbnormally while graceful shutdown in progress simplifiedClient.stopAbnormally(); // Should be blocked - no state change, no underlying call expect(stateChangeCallback).not.toHaveBeenCalled(); expect(mockClient.stopAbnormally).not.toHaveBeenCalled(); // VGF state should remain unchanged (not changed to ABORTED) const state = simplifiedClient.getVGFState(); expect(state.transcriptionStatus).toBe(initialStatus); expect(state.transcriptionStatus).not.toBe(TranscriptionStatus.ABORTED); }); it('should only update VGF state if already finalized but client not stopped', () => { // First call - fully stop simplifiedClient.stopAbnormally(); const firstCallCount = stateChangeCallback.mock.calls.length; // Mock underlying client reconnects (edge case) mockClient.getState.mockReturnValue(ClientState.READY); jest.clearAllMocks(); // Second call - VGF already finalized but client not stopped simplifiedClient.stopAbnormally(); // Should NOT update VGF state (already finalized) expect(stateChangeCallback).not.toHaveBeenCalled(); // But SHOULD call underlying client (not stopped) expect(mockClient.stopAbnormally).toHaveBeenCalled(); }); }); }); describe('UUID Change Detection', () => { it('should skip onStateChange callback when UUID changes by default', () => { // Create client with initial state const initialState: RecognitionState = { audioUtteranceId: 'session-123', startRecordingStatus: RecordingStatus.READY, transcriptionStatus: TranscriptionStatus.NOT_STARTED, pendingTranscript: '' }; simplifiedClient = new SimplifiedVGFRecognitionClient({ initialState, asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 }, onStateChange: stateChangeCallback }); // Get the callbacks that were passed to the underlying client const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls; const clientConfig = constructorCalls[0]?.[0]; const onTranscriptCallback = clientConfig?.onTranscript; // Simulate transcript with a different UUID (stale callback from previous session) onTranscriptCallback?.({ type: 'transcript', is_finished: false, pendingTranscript: 'test transcript', audioUtteranceId: 'different-uuid-456' // Different UUID } as any); // State should NOT be updated - callback should be skipped expect(stateChangeCallback).not.toHaveBeenCalled(); // Internal state should still have original UUID const state = simplifiedClient.getVGFState(); expect(state.audioUtteranceId).toBe('session-123'); expect(state.pendingTranscript).toBe(''); // Not updated }); it('should process callbacks with matching UUID', () => { // Create client with initial state const initialState: RecognitionState = { audioUtteranceId: 'session-123', startRecordingStatus: RecordingStatus.READY, transcriptionStatus: TranscriptionStatus.NOT_STARTED, pendingTranscript: '' }; simplifiedClient = new SimplifiedVGFRecognitionClient({ initialState, asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 }, onStateChange: stateChangeCallback }); // Get the callbacks that were passed to the underlying client const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls; const clientConfig = constructorCalls[0]?.[0]; const onTranscriptCallback = clientConfig?.onTranscript; // Simulate transcript with matching UUID onTranscriptCallback?.({ type: 'transcript', is_finished: false, pendingTranscript: 'test transcript', audioUtteranceId: 'session-123' // Same UUID } as any); // State should be updated normally expect(stateChangeCallback).toHaveBeenCalledTimes(1); const updatedState = stateChangeCallback.mock.calls[0][0]; expect(updatedState.audioUtteranceId).toBe('session-123'); expect(updatedState.pendingTranscript).toBe('test transcript'); }); it('should skip metadata callback with different UUID', () => { // Create client with initial state const initialState: RecognitionState = { audioUtteranceId: 'session-123', startRecordingStatus: RecordingStatus.READY, transcriptionStatus: TranscriptionStatus.NOT_STARTED, pendingTranscript: '' }; simplifiedClient = new SimplifiedVGFRecognitionClient({ initialState, asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 }, onStateChange: stateChangeCallback }); // Get the metadata callback const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls; const clientConfig = constructorCalls[0]?.[0]; const onMetadataCallback = clientConfig?.onMetadata; // Simulate metadata with different UUID onMetadataCallback?.({ type: 'metadata', event: 'recording_stopped', audioUtteranceId: 'different-uuid-456' } as any); // Callback should be skipped expect(stateChangeCallback).not.toHaveBeenCalled(); }); it('should skip error callback with different UUID', () => { // Create client with initial state const initialState: RecognitionState = { audioUtteranceId: 'session-123', startRecordingStatus: RecordingStatus.READY, transcriptionStatus: TranscriptionStatus.NOT_STARTED, pendingTranscript: '' }; simplifiedClient = new SimplifiedVGFRecognitionClient({ initialState, asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 }, onStateChange: stateChangeCallback }); // Get the error callback const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls; const clientConfig = constructorCalls[0]?.[0]; const onErrorCallback = clientConfig?.onError; // Simulate error with different UUID onErrorCallback?.({ type: 'error', error: 'test error', audioUtteranceId: 'different-uuid-456' } as any); // Callback should be skipped expect(stateChangeCallback).not.toHaveBeenCalled(); }); it('should track UUID after terminal state regeneration', () => { // Create client with terminal initial state (forces UUID regeneration) const initialState: RecognitionState = { audioUtteranceId: 'old-session-123', startRecordingStatus: RecordingStatus.FINISHED, transcriptionStatus: TranscriptionStatus.FINALIZED, // Terminal state pendingTranscript: '', finalTranscript: 'Previous transcript' }; simplifiedClient = new SimplifiedVGFRecognitionClient({ initialState, asrRequestConfig: { provider: 'deepgram', language: 'en', sampleRate: 16000, encoding: AudioEncoding.LINEAR16 }, onStateChange: stateChangeCallback }); // Get the new UUID that was generated const newState = simplifiedClient.getVGFState(); const newUuid = newState.audioUtteranceId; expect(newUuid).not.toBe('old-session-123'); // Get the transcript callback const constructorCalls = (RealTimeTwoWayWebSocketRecognitionClient as jest.MockedClass<typeof RealTimeTwoWayWebSocketRecognitionClient>).mock.calls; const clientConfig = constructorCalls[0]?.[0]; const onTranscriptCallback = clientConfig?.onTranscript; // Clear initial state change callback from UUID regeneration jest.clearAllMocks(); // Simulate transcript with the NEW UUID onTranscriptCallback?.({ type: 'transcript', is_finished: false, pendingTranscript: 'new transcript', audioUtteranceId: newUuid // New UUID } as any); // Should process normally with new UUID expect(stateChangeCallback).toHaveBeenCalledTimes(1); const updatedState = stateChangeCallback.mock.calls[0][0]; expect(updatedState.pendingTranscript).toBe('new transcript'); // Simulate transcript with OLD UUID (stale callback) jest.clearAllMocks(); onTranscriptCallback?.({ type: 'transcript', is_finished: false, pendingTranscript: 'stale transcript',