@volley/recognition-client-sdk

Version:

Recognition Service TypeScript/Node.js Client SDK

102 lines (85 loc) • 4.69 kB

text/typescript

import { z } from "zod" /** * VGF-style state schema for game-side recognition state/results management. * * This schema provides a standardized way for game developers to manage * voice recognition state and results in their applications. It supports: * * STEP 1: Basic transcription flow * STEP 2: Mic auto-stop upon correct answer (using partial transcripts) * STEP 3: Semantic/function-call outcomes for game actions * * Ideally this should be part of a more centralized shared type library to free * game developers and provide helper functions (VGF? Platform SDK?). */ export const RecognitionVGFStateSchema = z.object({ // Core STT state audioUtteranceId: z.string(), startRecordingStatus: z.string().optional(), // "NOT_READY", "READY", "RECORDING", "FINISHED". States follow this order. // Streaming should only start when "READY". Other states control mic UI and recording. transcriptionStatus: z.string().optional(), // "NOT_STARTED", "IN_PROGRESS", "FINALIZED", "ABORTED", "ERROR" finalTranscript: z.string().optional(), // Full finalized transcript for the utterance. Will not change. finalConfidence: z.number().optional(), // Tracking-only metadata asrConfig: z.string().optional(), // Json format of the ASR config startRecordingTimestamp: z.string().optional(), // Start of recording. Immutable after set. finalRecordingTimestamp: z.string().optional(), // End of recording. Immutable after set. Transcription may still be in progress. finalTranscriptionTimestamp: z.string().optional(), // When the final transcript was produced. Immutable after set. // STEP 2: Support for mic auto-stop upon correct answer pendingTranscript: z.string().optional().default(""), // Non-final transcript that may change (matches existing naming) pendingConfidence: z.number().optional(), // STEP 3: Support for semantic/function-call outcomes functionCallMetadata: z.string().optional(), // Function call metadata in JSON, e.g. "{artist: true, title: true}" functionCallConfidence: z.number().optional(), // Confidence score for the function call. finalFunctionCallTimestamp: z.string().optional(), // When the final action after interpreting the transcript was taken. Immutable. // Support for prompt slot mapping - passed to recognition context when present promptSlotMap: z.record(z.string(), z.array(z.string())).optional(), // Optional map of slot names to prompt values for recognition context // Recognition action processing state - managed externally, SDK preserves but never modifies recognitionActionProcessingState: z.string().optional(), // "NOT_STARTED", "IN_PROGRESS", "COMPLETED" }) export type RecognitionState = z.infer<typeof RecognitionVGFStateSchema> // Status constants for better type safety and consistency export const RecordingStatus = { NOT_READY: "NOT_READY", READY: "READY", RECORDING: "RECORDING", FINISHED: "FINISHED", } as const export type RecordingStatusType = typeof RecordingStatus[keyof typeof RecordingStatus] export const TranscriptionStatus = { NOT_STARTED: "NOT_STARTED", IN_PROGRESS: "IN_PROGRESS", FINALIZED: "FINALIZED", ABORTED: "ABORTED", // Session was cancelled/abandoned by user ERROR: "ERROR", } as const export type TranscriptionStatusType = typeof TranscriptionStatus[keyof typeof TranscriptionStatus] export const RecognitionActionProcessingState = { NOT_STARTED: "NOT_STARTED", IN_PROGRESS: "IN_PROGRESS", COMPLETED: "COMPLETED", } as const export type RecognitionActionProcessingStateType = typeof RecognitionActionProcessingState[keyof typeof RecognitionActionProcessingState] // Helper function to create initial state export function createInitialRecognitionState(audioUtteranceId: string): RecognitionState { return { audioUtteranceId, startRecordingStatus: RecordingStatus.NOT_READY, transcriptionStatus: TranscriptionStatus.NOT_STARTED, pendingTranscript: "", recognitionActionProcessingState: RecognitionActionProcessingState.NOT_STARTED, } } // Helper function to validate state transitions export function isValidRecordingStatusTransition(from: string | undefined, to: string): boolean { const statusOrder = [ RecordingStatus.NOT_READY, RecordingStatus.READY, RecordingStatus.RECORDING, RecordingStatus.FINISHED, ] const fromIndex = from ? statusOrder.indexOf(from as RecordingStatusType) : -1 const toIndex = statusOrder.indexOf(to as RecordingStatusType) // Can only move forward in the status order return toIndex > fromIndex && toIndex !== -1 }