@volley/recognition-client-sdk
Version:
Recognition Service TypeScript/Node.js Client SDK
480 lines (419 loc) • 17.6 kB
text/typescript
/**
* Simplified VGF Recognition Client
*
* A thin wrapper around RealTimeTwoWayWebSocketRecognitionClient that maintains
* a VGF RecognitionState as a pure sink/output of recognition events.
*
* The VGF state is updated based on events but never influences client behavior.
* All functionality is delegated to the underlying client.
*/
import {
RecognitionState,
TranscriptionStatus,
RecordingStatus,
RecognitionActionProcessingState
} from './vgf-recognition-state.js';
import {
IRecognitionClient,
IRecognitionClientConfig,
ClientState
} from './recognition-client.types.js';
import { RealTimeTwoWayWebSocketRecognitionClient } from './recognition-client.js';
import {
createVGFStateFromConfig,
mapTranscriptionResultToState,
mapErrorToState,
updateStateOnStop
} from './vgf-recognition-mapper.js';
import { RecognitionContextTypeV1 } from '@recog/shared-types';
import { v4 as uuidv4 } from 'uuid';
/**
* Configuration for SimplifiedVGFRecognitionClient
*/
export interface SimplifiedVGFClientConfig extends IRecognitionClientConfig {
/**
* Callback invoked whenever the VGF state changes
* Use this to update your UI or React state
*/
onStateChange?: (state: RecognitionState) => void;
/**
* Optional initial state to restore from a previous session
* If provided, audioUtteranceId will be extracted and used
*/
initialState?: RecognitionState;
}
/**
* Interface for SimplifiedVGFRecognitionClient
*
* A simplified client that maintains VGF state for game developers.
* All methods from the underlying client are available, plus VGF state management.
*/
export interface ISimplifiedVGFRecognitionClient {
// ============= Core Connection Methods =============
/**
* Connect to the recognition service WebSocket
* @returns Promise that resolves when connected and ready
*/
connect(): Promise<void>;
/**
* Send audio data for transcription
* @param audioData - PCM audio data as ArrayBuffer, typed array, or Blob
*/
sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void;
/**
* Stop recording and wait for final transcription
* @returns Promise that resolves when transcription is complete
*/
stopRecording(): Promise<void>;
/**
* Force stop and immediately close connection without waiting for server
*
* WARNING: This is an abnormal shutdown that bypasses the graceful stop flow:
* - Does NOT wait for server to process remaining audio
* - Does NOT receive final transcript from server (VGF state set to empty)
* - Immediately closes WebSocket connection
* - Cleans up resources (buffers, listeners)
*
* Use Cases:
* - User explicitly cancels/abandons the session
* - Timeout scenarios where waiting is not acceptable
* - Need immediate cleanup and can't wait for server
*
* RECOMMENDED: Use stopRecording() for normal shutdown.
* Only use this when immediate disconnection is required.
*/
stopAbnormally(): void;
// ============= VGF State Methods =============
/**
* Get the current VGF recognition state
* @returns Current RecognitionState with all transcription data
*/
getVGFState(): RecognitionState;
// ============= Status Check Methods =============
/**
* Check if connected to the WebSocket
*/
isConnected(): boolean;
/**
* Check if currently connecting
*/
isConnecting(): boolean;
/**
* Check if currently stopping
*/
isStopping(): boolean;
/**
* Check if transcription has finished
*/
isTranscriptionFinished(): boolean;
/**
* Check if the audio buffer has overflowed
*/
isBufferOverflowing(): boolean;
// ============= Utility Methods =============
/**
* Get the audio utterance ID for this session
*/
getAudioUtteranceId(): string;
/**
* Get the WebSocket URL being used
*/
getUrl(): string;
/**
* Get the underlying client state (for advanced usage)
*/
getState(): ClientState;
}
/**
* This wrapper ONLY maintains VGF state as a sink.
* All actual functionality is delegated to the underlying client.
*/
export class SimplifiedVGFRecognitionClient implements ISimplifiedVGFRecognitionClient {
private client: IRecognitionClient;
private state: RecognitionState;
private isRecordingAudio: boolean = false;
private stateChangeCallback: ((state: RecognitionState) => void) | undefined;
private expectedUuid: string;
private logger: IRecognitionClientConfig['logger'];
constructor(config: SimplifiedVGFClientConfig) {
const { onStateChange, initialState, ...clientConfig } = config;
this.stateChangeCallback = onStateChange;
this.logger = clientConfig.logger;
// Use provided initial state or create from config
if (initialState) {
// Check if initial state has a valid UUID
const needsNewUuid = !initialState.audioUtteranceId ||
initialState.audioUtteranceId === '' ||
initialState.transcriptionStatus === TranscriptionStatus.ABORTED ||
initialState.transcriptionStatus === TranscriptionStatus.FINALIZED ||
initialState.transcriptionStatus === TranscriptionStatus.ERROR ||
(initialState.recognitionActionProcessingState !== undefined && initialState.recognitionActionProcessingState !== RecognitionActionProcessingState.COMPLETED);
if (needsNewUuid) {
// Generate new UUID for fresh session
const newUUID = uuidv4();
if (clientConfig.logger) {
const reason = !initialState.audioUtteranceId ? 'Missing UUID' :
initialState.audioUtteranceId === '' ? 'Empty UUID' :
`Terminal session (${initialState.transcriptionStatus})`;
clientConfig.logger('info', `${reason} detected, generating new UUID: ${newUUID}`);
}
// Update state with new UUID and reset session-specific fields
this.state = {
...initialState,
audioUtteranceId: newUUID,
transcriptionStatus: TranscriptionStatus.NOT_STARTED,
startRecordingStatus: RecordingStatus.READY,
recognitionActionProcessingState: RecognitionActionProcessingState.NOT_STARTED,
finalTranscript: undefined
};
// Use new UUID in client config
clientConfig.audioUtteranceId = newUUID;
// Notify state change immediately so app can update
if (onStateChange) {
onStateChange(this.state);
}
} else {
// Non-terminal state with valid UUID - safe to reuse (e.g., reconnecting to IN_PROGRESS session)
this.state = initialState;
// Override audioUtteranceId in config if state has one
if (initialState.audioUtteranceId && !clientConfig.audioUtteranceId) {
clientConfig.audioUtteranceId = initialState.audioUtteranceId;
}
}
} else {
// Initialize VGF state from config
this.state = createVGFStateFromConfig(clientConfig);
}
// Client is immediately ready to accept audio (will buffer if not connected)
this.state = { ...this.state, startRecordingStatus: 'READY' };
// Track the expected UUID for this session
this.expectedUuid = this.state.audioUtteranceId;
// If VGF state has promptSlotMap, configure gameContext to use it
if (this.state.promptSlotMap) {
// Set useContext=true in ASR config to enable context processing
if (clientConfig.asrRequestConfig) {
clientConfig.asrRequestConfig.useContext = true;
}
// Add promptSlotMap to gameContext
if (!clientConfig.gameContext) {
// Only create gameContext if we have gameId and gamePhase
// These should come from the game's configuration
if (clientConfig.logger) {
clientConfig.logger('warn', '[VGF] promptSlotMap found but no gameContext provided. SlotMap will not be sent.');
}
} else {
// Merge promptSlotMap into existing gameContext
clientConfig.gameContext.slotMap = this.state.promptSlotMap;
}
}
// Create underlying client with callbacks that ONLY update VGF state
this.client = new RealTimeTwoWayWebSocketRecognitionClient({
...clientConfig,
// These callbacks ONLY update the VGF state sink
onTranscript: (result) => {
// Skip update if UUID doesn't match (stale callback from previous session)
if (result.audioUtteranceId && result.audioUtteranceId !== this.expectedUuid) {
if (this.logger) {
this.logger('warn',
`[VGF] Skipping transcript update: UUID mismatch (expected: ${this.expectedUuid}, got: ${result.audioUtteranceId})`
);
}
// Still call original callback if provided
if (clientConfig.onTranscript) {
clientConfig.onTranscript(result);
}
return;
}
// Update VGF state based on transcript
this.state = mapTranscriptionResultToState(this.state, result, this.isRecordingAudio);
this.notifyStateChange();
// Call original callback if provided
if (clientConfig.onTranscript) {
clientConfig.onTranscript(result);
}
},
onMetadata: (metadata) => {
// Skip update if UUID doesn't match (stale callback from previous session)
if (metadata.audioUtteranceId && metadata.audioUtteranceId !== this.expectedUuid) {
if (this.logger) {
this.logger('warn',
`[VGF] Skipping metadata update: UUID mismatch (expected: ${this.expectedUuid}, got: ${metadata.audioUtteranceId})`
);
}
return;
}
if (clientConfig.onMetadata) {
clientConfig.onMetadata(metadata);
}
},
onFunctionCall: (result) => {
// Pass through function call - no VGF state changes needed for P2 feature
if (clientConfig.onFunctionCall) {
clientConfig.onFunctionCall(result);
}
},
onError: (error) => {
// Skip update if UUID doesn't match (stale callback from previous session)
if (error.audioUtteranceId && error.audioUtteranceId !== this.expectedUuid) {
if (this.logger) {
this.logger('warn',
`[VGF] Skipping error update: UUID mismatch (expected: ${this.expectedUuid}, got: ${error.audioUtteranceId})`
);
}
return;
}
this.isRecordingAudio = false; // Reset on error
this.state = mapErrorToState(this.state, error);
this.notifyStateChange();
if (clientConfig.onError) {
clientConfig.onError(error);
}
},
onConnected: () => {
// Don't update READY here - client can accept audio before connection
if (clientConfig.onConnected) {
clientConfig.onConnected();
}
},
onDisconnected: (code, reason) => {
this.isRecordingAudio = false; // Reset on disconnect
if (clientConfig.onDisconnected) {
clientConfig.onDisconnected(code, reason);
}
}
});
}
// DELEGATE ALL METHODS TO UNDERLYING CLIENT
// The wrapper ONLY updates VGF state, doesn't use it for decisions
async connect(): Promise<void> {
await this.client.connect();
// State will be updated via onConnected callback
}
sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void {
// Track recording for state updates
if (!this.isRecordingAudio) {
this.isRecordingAudio = true;
this.state = {
...this.state,
startRecordingStatus: 'RECORDING',
startRecordingTimestamp: new Date().toISOString()
};
this.notifyStateChange();
}
this.client.sendAudio(audioData);
}
async stopRecording(): Promise<void> {
this.isRecordingAudio = false;
this.state = updateStateOnStop(this.state);
this.notifyStateChange();
await this.client.stopRecording();
}
stopAbnormally(): void {
const clientState = this.client.getState();
// Guard: Block if graceful shutdown in progress or already in terminal state
// This prevents stopAbnormally from disrupting stopRecording's graceful finalization
if (clientState === ClientState.STOPPING ||
clientState === ClientState.STOPPED ||
clientState === ClientState.FAILED) {
// Already stopping/stopped - do nothing to avoid disrupting graceful shutdown
return;
}
this.isRecordingAudio = false;
// Set state to ABORTED - preserve any partial transcript received so far
// This clearly indicates the session was cancelled/abandoned by user
if (this.state.transcriptionStatus !== TranscriptionStatus.ABORTED &&
this.state.transcriptionStatus !== TranscriptionStatus.FINALIZED) {
this.state = {
...this.state,
transcriptionStatus: TranscriptionStatus.ABORTED,
startRecordingStatus: RecordingStatus.FINISHED,
finalRecordingTimestamp: new Date().toISOString(),
finalTranscriptionTimestamp: new Date().toISOString()
};
this.notifyStateChange();
}
// Delegate to underlying client for actual WebSocket cleanup
this.client.stopAbnormally();
}
// Pure delegation methods - no state logic
getAudioUtteranceId(): string {
return this.client.getAudioUtteranceId();
}
getUrl(): string {
return this.client.getUrl();
}
getState(): ClientState {
return this.client.getState();
}
isConnected(): boolean {
return this.client.isConnected();
}
isConnecting(): boolean {
return this.client.isConnecting();
}
isStopping(): boolean {
return this.client.isStopping();
}
isTranscriptionFinished(): boolean {
return this.client.isTranscriptionFinished();
}
isBufferOverflowing(): boolean {
return this.client.isBufferOverflowing();
}
// VGF State access (read-only for consumers)
getVGFState(): RecognitionState {
return { ...this.state };
}
private notifyStateChange(): void {
// State has already been validated for correct UUID before this is called
if (this.stateChangeCallback) {
this.stateChangeCallback({ ...this.state });
}
}
}
/**
* Factory function for creating simplified client
* Usage examples:
*
* // Basic usage
* const client = createSimplifiedVGFClient({
* asrRequestConfig: { provider: 'deepgram', language: 'en' },
* onStateChange: (state) => {
* console.log('VGF State updated:', state);
* // Update React state, game UI, etc.
* }
* });
*
* // With initial state (e.g., restoring from previous session)
* const client = createSimplifiedVGFClient({
* asrRequestConfig: { provider: 'deepgram', language: 'en' },
* initialState: previousState, // Will use audioUtteranceId from state
* onStateChange: (state) => setVGFState(state)
* });
*
* // With initial state containing promptSlotMap for enhanced recognition
* const stateWithSlots: RecognitionState = {
* audioUtteranceId: 'session-123',
* promptSlotMap: {
* 'song_title': ['one time', 'baby'],
* 'artists': ['justin bieber']
* }
* };
* const client = createSimplifiedVGFClient({
* asrRequestConfig: { provider: 'deepgram', language: 'en' },
* gameContext: {
* type: RecognitionContextTypeV1.GAME_CONTEXT,
* gameId: 'music-quiz', // Your game's ID
* gamePhase: 'song-guessing' // Current game phase
* },
* initialState: stateWithSlots, // promptSlotMap will be added to gameContext
* onStateChange: (state) => setVGFState(state)
* });
*
* await client.connect();
* client.sendAudio(audioData);
* // VGF state automatically updates based on transcription results
*/
export function createSimplifiedVGFClient(config: SimplifiedVGFClientConfig): ISimplifiedVGFRecognitionClient {
return new SimplifiedVGFRecognitionClient(config);
}