UNPKG

@volley/recognition-client-sdk

Version:

Recognition Service TypeScript/Node.js Client SDK

265 lines 10.1 kB
/** * Recognition Client Types * * Type definitions and interfaces for the recognition client SDK. * These interfaces enable dependency injection, testing, and alternative implementations. */ import { TranscriptionResultV1, FunctionCallResultV1, MetadataResultV1, ErrorResultV1, ASRRequestConfig, GameContextV1, Stage } from '@recog/shared-types'; /** * Client connection state enum * Represents the various states a recognition client can be in during its lifecycle */ export declare enum ClientState { /** Initial state, no connection established */ INITIAL = "initial", /** Actively establishing WebSocket connection */ CONNECTING = "connecting", /** WebSocket connected but waiting for server ready signal */ CONNECTED = "connected", /** Server ready, can send audio */ READY = "ready", /** Sent stop signal, waiting for final transcript */ STOPPING = "stopping", /** Connection closed normally after stop */ STOPPED = "stopped", /** Connection failed or lost unexpectedly */ FAILED = "failed" } /** * Callback URL configuration with message type filtering */ export interface RecognitionCallbackUrl { /** The callback URL endpoint */ url: string; /** Array of message types to send to this URL. If empty/undefined, all types are sent */ messageTypes?: Array<string | number>; } export type IRecognitionCallbackUrl = RecognitionCallbackUrl; export interface IRecognitionClientConfig { /** * WebSocket endpoint URL (optional) * Either `url` or `stage` must be provided. * If both are provided, `url` takes precedence. * * Example with explicit URL: * ```typescript * { url: 'wss://custom-endpoint.example.com/ws/v1/recognize' } * ``` */ url?: string; /** * Stage for recognition service (recommended) * Either `url` or `stage` must be provided. * If both are provided, `url` takes precedence. * Defaults to production if neither is provided. * * Example with STAGES enum (recommended): * ```typescript * import { STAGES } from '@recog/shared-types'; * { stage: STAGES.STAGING } * ``` * * String values also accepted: * ```typescript * { stage: 'staging' } // STAGES.LOCAL | STAGES.DEV | STAGES.STAGING | STAGES.PRODUCTION * ``` */ stage?: Stage | string; /** ASR configuration (provider, model, language, etc.) - optional */ asrRequestConfig?: ASRRequestConfig; /** Game context for improved recognition accuracy */ gameContext?: GameContextV1; /** Audio utterance ID (optional) - if not provided, a UUID v4 will be generated */ audioUtteranceId?: string; /** Callback URLs for server-side notifications with optional message type filtering (optional) * Game side only need to use it if another service need to be notified about the transcription results. */ callbackUrls?: RecognitionCallbackUrl[]; /** User identification (optional) */ userId?: string; /** Game session identification (optional). called 'sessionId' in Platform and most games. */ gameSessionId?: string; /** Device identification (optional) */ deviceId?: string; /** Account identification (optional) */ accountId?: string; /** Question answer identifier for tracking Q&A sessions (optional and tracking purpose only) */ questionAnswerId?: string; /** Platform for audio recording device (optional, e.g., 'ios', 'android', 'web', 'unity') */ platform?: string; /** Callback when transcript is received */ onTranscript?: (result: TranscriptionResultV1) => void; /** * Callback when function call is received * Note: Not supported in 2025. P2 feature for future speech-to-function-call capability. */ onFunctionCall?: (result: FunctionCallResultV1) => void; /** Callback when metadata is received. Only once after transcription is complete.*/ onMetadata?: (metadata: MetadataResultV1) => void; /** Callback when error occurs */ onError?: (error: ErrorResultV1) => void; /** Callback when connected to WebSocket */ onConnected?: () => void; /** * Callback when WebSocket disconnects * @param code - WebSocket close code (1000 = normal, 1006 = abnormal, etc.) * @param reason - Close reason string */ onDisconnected?: (code: number, reason: string) => void; /** High water mark for backpressure control (bytes) */ highWaterMark?: number; /** Low water mark for backpressure control (bytes) */ lowWaterMark?: number; /** Maximum buffer duration in seconds (default: 60s) */ maxBufferDurationSec?: number; /** Expected chunks per second for ring buffer sizing (default: 100) */ chunksPerSecond?: number; /** * Connection retry configuration (optional) * Only applies to initial connection establishment, not mid-stream interruptions. * * Default: { maxAttempts: 4, delayMs: 200 } (try once, retry 3 times = 4 total attempts) * * Timing: Attempt 1 → FAIL → wait 200ms → Attempt 2 → FAIL → wait 200ms → Attempt 3 → FAIL → wait 200ms → Attempt 4 * * Example: * ```typescript * { * connectionRetry: { * maxAttempts: 2, // Try connecting up to 2 times (1 retry) * delayMs: 500 // Wait 500ms between attempts * } * } * ``` */ connectionRetry?: { /** Maximum number of connection attempts (default: 4, min: 1, max: 5) */ maxAttempts?: number; /** Delay in milliseconds between retry attempts (default: 200ms) */ delayMs?: number; }; /** * Optional logger function for debugging * If not provided, no logging will occur * @param level - Log level: 'debug', 'info', 'warn', 'error' * @param message - Log message * @param data - Optional additional data */ logger?: (level: 'debug' | 'info' | 'warn' | 'error', message: string, data?: any) => void; } /** * Recognition Client Interface * * Main interface for real-time speech recognition clients. * Provides methods for connection management, audio streaming, and session control. */ export interface IRecognitionClient { /** * Connect to the WebSocket endpoint * @returns Promise that resolves when connected * @throws Error if connection fails or times out */ connect(): Promise<void>; /** * Send audio data to the recognition service * Audio is buffered locally and sent when connection is ready. * @param audioData - PCM audio data as ArrayBuffer, typed array view, or Blob */ sendAudio(audioData: ArrayBuffer | ArrayBufferView | Blob): void; /** * Stop recording and wait for final transcript * The server will close the connection after sending the final transcript. * @returns Promise that resolves when final transcript is received */ stopRecording(): Promise<void>; /** * Force stop and immediately close connection without waiting for server * * WARNING: This is an abnormal shutdown that bypasses the graceful stop flow: * - Does NOT wait for server to process remaining audio * - Does NOT receive final transcript from server * - Immediately closes WebSocket connection * - Cleans up resources (buffers, listeners) * * Use Cases: * - User explicitly cancels/abandons session * - Timeout scenarios where waiting is not acceptable * - Need immediate cleanup and can't wait for server * * RECOMMENDED: Use stopRecording() for normal shutdown. * Only use this when immediate disconnection is required. */ stopAbnormally(): void; /** * Get the audio utterance ID for this session * Available immediately after client construction. * @returns UUID v4 string identifying this recognition session */ getAudioUtteranceId(): string; /** * Get the current state of the client * @returns Current ClientState value */ getState(): ClientState; /** * Check if WebSocket connection is open * @returns true if connected and ready to communicate */ isConnected(): boolean; /** * Check if client is currently connecting * @returns true if connection is in progress */ isConnecting(): boolean; /** * Check if client is currently stopping * @returns true if stopRecording() is in progress */ isStopping(): boolean; /** * Check if transcription has finished * @returns true if the transcription is complete */ isTranscriptionFinished(): boolean; /** * Check if the audio buffer has overflowed * @returns true if the ring buffer has wrapped around */ isBufferOverflowing(): boolean; /** * Get client statistics * @returns Statistics about audio transmission and buffering */ getStats(): IRecognitionClientStats; /** * Get the WebSocket URL being used by this client * Available immediately after client construction. * @returns WebSocket URL string */ getUrl(): string; } /** * Client statistics interface */ export interface IRecognitionClientStats { /** Total audio bytes sent to server */ audioBytesSent: number; /** Total number of audio chunks sent */ audioChunksSent: number; /** Total number of audio chunks buffered */ audioChunksBuffered: number; /** Number of times the ring buffer overflowed */ bufferOverflowCount: number; /** Current number of chunks in buffer */ currentBufferedChunks: number; /** Whether the ring buffer has wrapped (overwritten old data) */ hasWrapped: boolean; } /** * Configuration for RealTimeTwoWayWebSocketRecognitionClient * This extends IRecognitionClientConfig and is the main configuration interface * for creating a new RealTimeTwoWayWebSocketRecognitionClient instance. */ export interface RealTimeTwoWayWebSocketRecognitionClientConfig extends IRecognitionClientConfig { } //# sourceMappingURL=recognition-client.types.d.ts.map