UNPKG

web-asr-core

Version:

WebASR Core - Browser-based speech processing with VAD, WakeWord and Whisper - Unified all-in-one version

252 lines 7.91 kB
/** * VadService - VAD 服務類別(Event Architecture v2) * * 提供事件驅動的語音活動檢測服務 * 包裝無狀態的 VAD 處理函數並提供事件發射功能 */ import { EventEmitter } from '../core/EventEmitter'; import { AudioChunker } from '../utils/AudioChunker'; import { AudioRingBuffer } from '../utils/AudioRingBuffer'; import { ConfigManager } from '../utils/config-manager'; import { loadVadSession, createVadState, createDefaultVadParams, processVad } from './vad'; /** * VadService - 事件驅動的 VAD 服務 * * @example * ```typescript * const vad = new VadService(); * // 或使用自訂選項 * const vad = new VadService({ threshold: 0.6 }); * * // 訂閱事件 * vad.on('speechStart', ({ timestamp, score }) => { * console.log('Speech started:', timestamp, score); * }); * * vad.on('speechEnd', ({ duration }) => { * console.log('Speech duration:', duration); * }); * * // 初始化 * await vad.initialize(); * * // 處理音訊 * let state = vad.createState(); * const params = vad.createParams(); * * const result = await vad.process(state, audioChunk, params); * state = result.state; * ``` */ export class VadService extends EventEmitter { session = null; chunker = null; lastSpeechStart = null; config = ConfigManager.getInstance(); options; // 統計資料 stats = { chunksProcessed: 0, totalProcessingTime: 0, speechDuration: 0, silenceDuration: 0, lastStatsEmit: Date.now() }; constructor(options) { super(); this.options = options || {}; } /** * 初始化 VAD 服務 * @param modelUrl VAD 模型 URL(可選) * @returns Promise<void> */ async initialize(modelUrl) { try { // 載入模型 this.session = await loadVadSession(modelUrl); // 創建 chunker this.chunker = AudioChunker.forVAD(); // 發射 ready 事件 this.emit('ready', { config: { sampleRate: this.config.audio.sampleRate, windowSize: this.options.windowSize ?? this.config.vad.windowSize, threshold: this.options.threshold ?? this.config.vad.threshold }, timestamp: Date.now() }); } catch (error) { this.emit('error', { error: error, context: 'initialize', timestamp: Date.now() }); throw error; } } /** * 處理單個音訊塊 * @param state VAD 狀態 * @param audio 音訊資料(512 樣本 @ 16kHz) * @param params VAD 參數 * @returns VAD 結果 */ async process(state, audio, params) { if (!this.session) { throw new Error('VAD service not initialized. Call initialize() first.'); } const startTime = performance.now(); try { // 呼叫核心無狀態處理函數 const result = await processVad(this.session, state, audio, params); // 更新統計 const processingTime = performance.now() - startTime; this.updateStatistics(processingTime, result.detected); // 發射處理事件 this.emit('process', { result: { detected: result.detected, score: result.score }, timestamp: Date.now() }); // 檢測語音狀態變化 if (!state.isSpeechActive && result.state.isSpeechActive) { // 語音開始 this.lastSpeechStart = Date.now(); this.emit('speechStart', { timestamp: this.lastSpeechStart, score: result.score }); } else if (state.isSpeechActive && !result.state.isSpeechActive) { // 語音結束 const now = Date.now(); const duration = this.lastSpeechStart ? now - this.lastSpeechStart : 0; this.emit('speechEnd', { timestamp: now, duration }); this.lastSpeechStart = null; } return result; } catch (error) { this.emit('error', { error: error, context: 'process', timestamp: Date.now() }); throw error; } } /** * 處理連續音訊流 * @param ringBuffer 環形緩衝區 * @param state VAD 狀態 * @param params VAD 參數 * @returns 更新後的狀態 */ async processStream(ringBuffer, state, params) { if (!this.chunker) { throw new Error('VAD service not initialized'); } // 從環形緩衝區讀取可用資料 const available = ringBuffer.available(); if (available < 512) { return state; // 資料不足,返回原狀態 } const audio = ringBuffer.read(available); if (!audio) return state; // 使用 chunker 切割成適當大小 const chunks = this.chunker.chunk(audio); let currentState = state; for (const chunk of chunks) { const result = await this.process(currentState, chunk, params); currentState = result.state; } return currentState; } /** * 創建 VAD 狀態 * @returns 新的 VAD 狀態 */ createState() { return createVadState(); } /** * 創建 VAD 參數 * @param overrides 參數覆蓋 * @returns VAD 參數 */ createParams(overrides) { const defaults = createDefaultVadParams(); // 應用服務選項覆蓋 if (this.options.threshold !== undefined) { defaults.threshold = this.options.threshold; } // Note: minSpeechFrames and speechEndFrames are service-level options, // not part of the core VadParams that go to the stateless function return { ...defaults, ...overrides }; } /** * 重置服務狀態 */ reset() { this.lastSpeechStart = null; this.chunker?.reset(); this.resetStatistics(); } /** * 更新統計資料 */ updateStatistics(processingTime, detected) { this.stats.chunksProcessed++; this.stats.totalProcessingTime += processingTime; const chunkDuration = 32; // 512 samples @ 16kHz = 32ms if (detected) { this.stats.speechDuration += chunkDuration; } else { this.stats.silenceDuration += chunkDuration; } // 每秒發射一次統計事件 const now = Date.now(); if (now - this.stats.lastStatsEmit > 1000) { this.emit('statistics', { chunksProcessed: this.stats.chunksProcessed, averageProcessingTime: this.stats.totalProcessingTime / this.stats.chunksProcessed, speechDuration: this.stats.speechDuration, silenceDuration: this.stats.silenceDuration }); this.stats.lastStatsEmit = now; } } /** * 重置統計資料 */ resetStatistics() { this.stats = { chunksProcessed: 0, totalProcessingTime: 0, speechDuration: 0, silenceDuration: 0, lastStatsEmit: Date.now() }; } /** * 清理資源 */ dispose() { this.removeAllListeners(); this.session = null; this.chunker = null; this.lastSpeechStart = null; this.resetStatistics(); } } export default VadService; //# sourceMappingURL=VadService.js.map