UNPKG

watson-speech

Version:

IBM Watson Speech to Text and Text to Speech SDK for web browsers.

241 lines (199 loc) 8.02 kB
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <title>JSDoc: Source: webaudio-l16-stream.js</title> <script src="scripts/prettify/prettify.js"> </script> <script src="scripts/prettify/lang-css.js"> </script> <!--[if lt IE 9]> <script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script> <![endif]--> <link type="text/css" rel="stylesheet" href="styles/prettify-tomorrow.css"> <link type="text/css" rel="stylesheet" href="styles/jsdoc-default.css"> </head> <body> <div id="main"> <h1 class="page-title">Source: webaudio-l16-stream.js</h1> <section> <article> <pre class="prettyprint source linenums"><code>'use strict'; var Transform = require('stream').Transform; var util = require('util'); var TARGET_SAMPLE_RATE = 16000; /** * Transforms Buffers or AudioBuffers into a binary stream of l16 (raw wav) audio, downsampling in the process. * * The watson speech-to-text service works on 1600khz and internally downsamples audio received at higher samplerates. * WebAudio is usually 48000khz, so downsampling here reduces bandwidth usage by 2/3. * * Format event + stream can be combined with https://www.npmjs.com/package/wav to generate a wav file with a proper header * * Todo: support multi-channel audio (for use with &lt;audio>/&lt;video> elements) - will require interleaving audio channels * * @constructor */ function WebAudioL16Stream(opts) { opts = this.opts = util._extend({ sourceSampleRate: 48000, writableObjectMode: true, downsample: true }, opts); Transform.call(this, opts); this.bufferUnusedSamples = []; if (opts.writableObjectMode) { this.formatEmitted = false; this._transform = this.handleFirstAudioBuffer; } else { this._transform = this.transformBuffer; process.nextTick(this.emitFormat.bind(this)); } } util.inherits(WebAudioL16Stream, Transform); WebAudioL16Stream.prototype.emitFormat = function emitFormat() { this.formatEmitted = true; this.emit('format', { channels: 1, bitDepth: 16, sampleRate: this.opts.downsample ? TARGET_SAMPLE_RATE : this.opts.sourceSampleRate, signed: true, float: false }); }; /** * Downsamples WebAudio to 16 kHz. * * Browsers can downsample WebAudio natively with OfflineAudioContext's but it was designed for non-streaming use and * requires a new context for each AudioBuffer. Firefox can handle this, but chrome (v47) crashes after a few minutes. * So, we'll do it in JS for now. * * This really belongs in it's own stream, but there's no way to create new AudioBuffer instances from JS, so its * fairly coupled to the wav conversion code. * * @param {AudioBuffer} buffer Microphone/MediaElement audio chunk * @return {Float32Array} 'audio/l16' chunk */ WebAudioL16Stream.prototype.downsample = function downsample(bufferNewSamples) { var buffer = null, newSamples = bufferNewSamples.length, unusedSamples = this.bufferUnusedSamples.length; if (unusedSamples > 0) { buffer = new Float32Array(unusedSamples + newSamples); for (var i = 0; i &lt; unusedSamples; ++i) { buffer[i] = this.bufferUnusedSamples[i]; } for (i = 0; i &lt; newSamples; ++i) { buffer[unusedSamples + i] = bufferNewSamples[i]; } } else { buffer = bufferNewSamples; } // downsampling variables var filter = [ -0.037935, -0.00089024, 0.040173, 0.019989, 0.0047792, -0.058675, -0.056487, -0.0040653, 0.14527, 0.26927, 0.33913, 0.26927, 0.14527, -0.0040653, -0.056487, -0.058675, 0.0047792, 0.019989, 0.040173, -0.00089024, -0.037935 ], samplingRateRatio = this.opts.sourceSampleRate / TARGET_SAMPLE_RATE, nOutputSamples = Math.floor((buffer.length - filter.length) / (samplingRateRatio)) + 1, outputBuffer = new Float32Array(nOutputSamples); for (var offset, i2 = 0; i2 + filter.length - 1 &lt; buffer.length; i2++) { offset = Math.round(samplingRateRatio * i2); var sample = 0; for (var j = 0; j &lt; filter.length; ++j) { sample += buffer[offset + j] * filter[j]; } outputBuffer[i2] = sample; } var indexSampleAfterLastUsed = Math.round(samplingRateRatio * i2); var remaining = buffer.length - indexSampleAfterLastUsed; if (remaining > 0) { this.bufferUnusedSamples = new Float32Array(remaining); for (i = 0; i &lt; remaining; ++i) { this.bufferUnusedSamples[i] = buffer[indexSampleAfterLastUsed + i]; } } else { this.bufferUnusedSamples = new Float32Array(0); } return outputBuffer }; /** * Accepts a Float32Array of audio data and converts it to a Buffer of l16 audio data (raw wav) * * Explanation for the math: The raw values captured from the Web Audio API are * in 32-bit Floating Point, between -1 and 1 (per the specification). * The values for 16-bit PCM range between -32768 and +32767 (16-bit signed integer). * Filter &amp; combine samples to reduce frequency, then multiply to by 0x7FFF (32767) to convert. * Store in little endian. * * @param input * @returns {Buffer} */ WebAudioL16Stream.prototype.floatTo16BitPCM = function(input){ var output = new DataView(new ArrayBuffer(input.length*2)); // length is in bytes (8-bit), so *2 to get 16-bit length for (var i = 0; i &lt; input.length; i++){ var multiplier = input[i] &lt; 0 ? 0x8000 : 0x7FFF; // 16-bit signed range is -32768 to 32767 output.setInt16(i*2, (input[i] * multiplier)|0, true); // index, value, little edian } return new Buffer(output.buffer); }; /** * Does some one-time setup to grab sampleRate and emit format, then sets _transform to the actual audio buffer handler and calls it. * @param audioBuffer * @param encoding * @param next */ WebAudioL16Stream.prototype.handleFirstAudioBuffer = function handleFirstAudioBuffer(audioBuffer, encoding, next) { this.opts.sourceSampleRate = audioBuffer.sampleRate; if (!this.formatEmitted) { this.emitFormat(); } this._transform = this.transformAudioBuffer; this._transform(audioBuffer, encoding, next); }; /** * Accepts an AudioBuffer (for objectMode), then downsamples to 16000 and converts to a 16-bit pcm * * @param audioBuffer * @param encoding * @param next */ WebAudioL16Stream.prototype.transformAudioBuffer = function (audioBuffer, encoding, next) { var source = audioBuffer.getChannelData(0); if (this.opts.downsample) { source = this.downsample(source); } this.push(this.floatTo16BitPCM(source)); next(); }; /** * Accepts a Buffer (for binary mode), then downsamples to 16000 and converts to a 16-bit pcm * * @param audioBuffer * @param encoding * @param next */ WebAudioL16Stream.prototype.transformBuffer = function (nodebuffer, encoding, next) { var source = new Float32Array(nodebuffer.buffer); if (this.opts.downsample) { source = this.downsample(source); } this.push(this.floatTo16BitPCM(source)); next(); }; //new Float32Array(nodebuffer.buffer) module.exports = WebAudioL16Stream; </code></pre> </article> </section> </div> <nav> <h2><a href="index.html">Home</a></h2><h3>Classes</h3><ul><li><a href="FormatStream.html">FormatStream</a></li><li><a href="MediaElementAudioStream.html">MediaElementAudioStream</a></li><li><a href="RecognizeStream.html">RecognizeStream</a></li><li><a href="TimingStream.html">TimingStream</a></li><li><a href="WebAudioL16Stream.html">WebAudioL16Stream</a></li></ul><h3>Events</h3><ul><li><a href="RecognizeStream.html#event:connection-close">connection-close</a></li><li><a href="RecognizeStream.html#event:data">data</a></li><li><a href="RecognizeStream.html#event:error">error</a></li><li><a href="RecognizeStream.html#event:results">results</a></li></ul> </nav> <br class="clear"> <footer> Documentation generated by <a href="https://github.com/jsdoc3/jsdoc">JSDoc 3.4.0</a> on Mon Feb 08 2016 19:56:04 GMT+0000 (UTC) </footer> <script> prettyPrint(); </script> <script src="scripts/linenumber.js"> </script> </body> </html>