UNPKG

speaktome-api

Version:

JavaScript modules for Mozilla's cloud speech recognition API

257 lines (236 loc) 7.12 kB
// Webrtc_Vad integration function SpeakToMeVAD(options) { var config = { listener: function() { console.error('SpeakToMeVAD: No listener configured!'); }, maxSilence: 500 }; if (options) { if (options['listener'] != undefined) { config.listener = options.listener; } if (options['maxSilence'] != undefined) { console.log('MAXSILDNECE', options.maxSilence) config.maxSilence = options.maxSilence; } } var webrtc_main = Module.cwrap("main"); webrtc_main(); var webrtc_setmode = Module.cwrap("setmode", "number", ["number"]); // set_mode defines the aggressiveness degree of the voice activity detection algorithm // for more info see: https://github.com/mozilla/gecko/blob/central/media/webrtc/trunk/webrtc/common_audio/vad/vad_core.h#L68 webrtc_setmode(3); var webrtc_process_data = Module.cwrap("process_data", "number", [ "number", "number", "number", "number", "number", "number" ]); // frame length that should be passed to the vad engine. Depends on audio sample rate // https://github.com/mozilla/gecko/blob/central/media/webrtc/trunk/webrtc/common_audio/vad/vad_core.h#L106 var sizeBufferVad = 480; // var buffer_vad = new Int16Array(sizeBufferVad); // var leftovers = 0; // var finishedVoice = false; // var samplesvoice = 0; // var touchedvoice = false; // var touchedsilence = false; // var dtantes = Date.now(); // var dtantesmili = Date.now(); // var done = false; // minimum of voice (in milliseconds) that should be captured to be considered voice var minvoice = 250; // max amount of silence (in milliseconds) that should be captured to be considered end-of-speech var maxsilence = config.maxSilence; // max amount of capturing time (in seconds) var maxtime = 6; function reset() { buffer_vad = new Int16Array(sizeBufferVad); leftovers = 0; samplesvoice = 0; samplessilence = 0; touchedvoice = false; touchedsilence = false; dtantes = Date.now(); dtantesmili = Date.now(); done = false; } // function that returns if the specified buffer has silence of speech function isSilence(buffer_pcm) { // Get data byte size, allocate memory on Emscripten heap, and get pointer var nDataBytes = buffer_pcm.length * buffer_pcm.BYTES_PER_ELEMENT; var dataPtr = Module._malloc(nDataBytes); // Copy data to Emscripten heap (directly accessed from Module.HEAPU8) var dataHeap = new Uint8Array( Module.HEAPU8.buffer, dataPtr, nDataBytes ); dataHeap.set(new Uint8Array(buffer_pcm.buffer)); // Call function and get result var result = webrtc_process_data( dataHeap.byteOffset, buffer_pcm.length, 48000, buffer_pcm[0], buffer_pcm[100], buffer_pcm[2000] ); // Free memory Module._free(dataHeap.byteOffset); return result; } function floatTo16BitPCM(output, input) { for (var i = 0; i < input.length; i++) { var s = Math.max(-1, Math.min(1, input[i])); output[i] = s < 0 ? s * 0x8000 : s * 0x7fff; } } function onAudioProcessingEvent(e) { var buffer_pcm = new Int16Array( e.inputBuffer.getChannelData(0).length ); floatTo16BitPCM( buffer_pcm, e.inputBuffer.getChannelData(0) ); // algorithm used to determine if the user stopped speaking or not for ( var i = 0; i < Math.ceil(buffer_pcm.length / sizeBufferVad) && !done; i++ ) { var start = i * sizeBufferVad; var end = start + sizeBufferVad; if (start + sizeBufferVad > buffer_pcm.length) { // store to the next buffer buffer_vad.set(buffer_pcm.slice(start)); leftovers = buffer_pcm.length - start; } else { if (leftovers > 0) { // we have this.leftovers from previous array end = end - this.leftovers; buffer_vad.set( buffer_pcm.slice(start, end), leftovers ); leftovers = 0; } else { // send to the vad buffer_vad.set(buffer_pcm.slice(start, end)); } var vad = isSilence(buffer_vad); // TODO: this doesn't seem necessary, we do it in reset() buffer_vad = new Int16Array(sizeBufferVad); var dtdepois = Date.now(); if (vad === 0) { if (touchedvoice) { samplessilence += dtdepois - dtantesmili; if (samplessilence > maxsilence) { touchedsilence = true; } } } else { samplesvoice += dtdepois - dtantesmili; if (samplesvoice > minvoice) { touchedvoice = true; } } dtantesmili = dtdepois; if (touchedvoice && touchedsilence) { done = true; onComplete("finishedvoice"); } // TODO: should be an else here, yah? else if ((dtdepois - dtantes) / 1000 > maxtime) { done = true; if (touchedvoice) { onComplete("timeout"); } else { onComplete("novoice"); } } } } } function onComplete(why) { try { config.listener(why); } catch(ex) { console.log('SpeakToMe_VAD: onCompleteCallback exception', ex); } // Auto-reset for next input reset(); } // TODO: something not initializing right - this shouldn't be necessary reset(); // Public return { reset: reset, onAudioProcessingEvent: onAudioProcessingEvent }; } if (typeof(module) != "undefined") { module.exports = SpeakToMe; } // TODO modularize and protect the emscripten stuff // because it seems to depend on window globals being exposed. // // Also, maybe just put this and webrtc_vad.js all in one file... // Creation of the configuration object // that will be pick by emscripten module var Module = { preRun: [], postRun: [], print: (function() { return function(text) { console.log("[webrtc_vad.js print]", text); }; })(), printErr: function(text) { console.error("[webrtc_vad.js error]", text); }, canvas: (function() {})(), setStatus: function(text) { console.log("[webrtc_vad.js status] ", text); }, totalDependencies: 0, monitorRunDependencies: function(left) { this.totalDependencies = Math.max(this.totalDependencies, left); Module.setStatus( left ? "Preparing... (" + (this.totalDependencies - left) + "/" + this.totalDependencies + ")" : "All downloads complete." ); } }; Module.setStatus("Loading webrtc_vad..."); window.onerror = function(event) { // TODO: do not warn on ok events like simulating an infinite loop or exitStatus Module.setStatus("Exception thrown, see JavaScript console"); Module.setStatus = function(text) { if (text) { Module.printErr("[post-exception status] " + text); } }; }; Module.noInitialRun = true; Module["onRuntimeInitialized"] = function() { Module.setStatus("Webrtc_vad and SpeakToMeVad loaded"); };