UNPKG

watson-speech

Version:

IBM Watson Speech to Text and Text to Speech SDK for web browsers.

355 lines (292 loc) 12.3 kB
<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <title>JSDoc: Source: recognize-stream.js</title> <script src="scripts/prettify/prettify.js"> </script> <script src="scripts/prettify/lang-css.js"> </script> <!--[if lt IE 9]> <script src="//html5shiv.googlecode.com/svn/trunk/html5.js"></script> <![endif]--> <link type="text/css" rel="stylesheet" href="styles/prettify-tomorrow.css"> <link type="text/css" rel="stylesheet" href="styles/jsdoc-default.css"> </head> <body> <div id="main"> <h1 class="page-title">Source: recognize-stream.js</h1> <section> <article> <pre class="prettyprint source linenums"><code>/** * Copyright 2014 IBM Corp. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ 'use strict'; var Duplex = require('stream').Duplex; var util = require('util'); var pick = require('object.pick'); var W3CWebSocket = require('websocket').w3cwebsocket; var contentType = require('./content-type'); var OPENING_MESSAGE_PARAMS_ALLOWED = ['continuous', 'max_alternatives', 'timestamps', 'word_confidence', 'inactivity_timeout', 'content-type', 'interim_results', 'keywords', 'keywords_threshold', 'word_alternatives_threshold']; var QUERY_PARAMS_ALLOWED = ['model', 'watson-token']; //, 'X-Watson-Learning-Opt-Out' - should be allowed but currently isn't due to a service bug /** * pipe()-able Node.js Readable/Writeable stream - accepts binary audio and emits text in it's `data` events. * Also emits `results` events with interim results and other data. * * Cannot be instantiated directly, instead reated by calling #createRecognizeStream() * * Uses WebSockets under the hood. For audio with no recognizable speech, no `data` events are emitted. * @param options * @param {String} [options.model='en-US_BroadbandModel'] - voice model to use. Microphone streaming only supports broadband models. * @param {String} [options.url='wss://stream.watsonplatform.net/speech-to-text/api'] base URL for service * @param {String} [options.content-type='audio/wav'] - content type of audio; should be automatically determined in most cases * @param {Boolean} [options.interim_results=true] - Send back non-final previews of each "sentence" as it is being processed * @param {Boolean} [options.continuous=true] - set to false to automatically stop the transcription after the first "sentence" * @param {Boolean} [options.word_confidence=true] - include confidence scores with results * @param {Boolean} [options.timestamps=true] - include timestamps with results * @param {Number} [options.max_alternatives=3] - maximum number of alternative transcriptions to include * @param {Number} [options.inactivity_timeout=30] - how many seconds of silence before automatically closing the stream (even if continuous is true). use -1 for infinity * //todo: investigate other options at http://www.ibm.com/smarterplanet/us/en/ibmwatson/developercloud/apis/#!/speech-to-text/recognizeSessionless * * @constructor */ function RecognizeStream(options) { Duplex.call(this, options); this.options = options; this.listening = false; this.initialized = false; this.finished = false; var self = this; // listening for `results` events should put the stream in flowing mode just like `data` events function flowForResults(event) { if (event == 'results' || event == 'result') { self.removeListener('newListener', flowForResults); process.nextTick(function () { self.on('data', function () { }); // todo: is there a better way to put a stream in flowing mode? }); } } this.on('newListener', flowForResults); } util.inherits(RecognizeStream, Duplex); RecognizeStream.prototype.initialize = function () { var options = this.options; // todo: apply these corrections to other methods (?) if (options.token &amp;&amp; !options['watson-token']) { options['watson-token'] = options.token; } if (options.content_type &amp;&amp; !options['content-type']) { options['content-type'] = options.content_type; } if (options['X-WDC-PL-OPT-OUT'] &amp;&amp; !options['X-Watson-Learning-Opt-Out']) { options['X-Watson-Learning-Opt-Out'] = options['X-WDC-PL-OPT-OUT']; } var queryParams = util._extend({model: 'en-US_BroadbandModel'}, pick(options, QUERY_PARAMS_ALLOWED)); var queryString = Object.keys(queryParams).map(function (key) { return key + '=' + (key == 'watson-token' ? queryParams[key] : encodeURIComponent(queryParams[key])); // the server chokes if the token is correctly url-encoded }).join('&amp;'); var url = (options.url || "wss://stream.watsonplatform.net/speech-to-text/api").replace(/^http/, 'ws') + '/v1/recognize?' + queryString; var openingMessage = util._extend({ action: 'start', 'content-type': 'audio/wav', continuous: true, interim_results: true, word_confidence: true, timestamps: true, max_alternatives: 3, inactivity_timeout: 30 }, pick(options, OPENING_MESSAGE_PARAMS_ALLOWED)); var self = this; //node params: requestUrl, protocols, origin, headers, extraRequestOptions // browser params: requestUrl, protocols (all others ignored) var socket = this.socket = new W3CWebSocket(url, null, null, options.headers, null); // when the input stops, let the service know that we're done self.on('finish', self.finish.bind(self)); socket.onerror = function (error) { self.listening = false; self.emit('error', error); }; this.socket.onopen = function () { self.sendJSON(openingMessage); self.emit('connect'); }; this.socket.onclose = function (e) { if (self.listening) { self.listening = false; self.push(null); } /** * @event RecognizeStream#connection-close * @param {Number} reasonCode * @param {String} description */ self.emit('close', e.code, e.reason); }; /** * @event RecognizeStream#error */ function emitError(msg, frame, err) { if (err) { err.message = msg + ' ' + err.message; } else { err = new Error(msg); } err.raw = frame; self.emit('error', err); } socket.onmessage = function (frame) { if (typeof frame.data !== 'string') { return emitError('Unexpected binary data received from server', frame); } var data; try { data = JSON.parse(frame.data); } catch (jsonEx) { return emitError('Invalid JSON received from service:', frame, jsonEx); } self.emit('message', data); if (data.error) { emitError(data.error, frame); } else if (data.state === 'listening') { // this is emitted both when the server is ready for audio, and after we send the close message to indicate that it's done processing if (!self.listening) { self.listening = true; self.emit('listening'); } else { self.listening = false; self.push(null); socket.close(); } } else if (data.results) { /** * Object with interim or final results, including possible alternatives. May have no results at all for empty audio files. * @event RecognizeStream#results * @param {Object} results * @deprecated - use the 'result' event (singular) instead */ self.emit('results', data.results); // note: currently there is always either 0 or 1 entries in the results array. However, this may change in the future. data.results.forEach(function (result) { /** * Object with interim or final results, including possible alternatives. May have no results at all for empty audio files. * @event RecognizeStream#results * @param {Object} results */ result.index = data.result_index; self.emit('result', result); if (result.final &amp;&amp; result.alternatives) { /** * Finalized text * @event RecognizeStream#data * @param {String} transcript */ self.push(result.alternatives[0].transcript, 'utf8'); // this is the "data" event that can be easily piped to other streams } }); } else { emitError('Unrecognised message from server', frame); } }; //this.messages = []; //var send = socket.send; //socket.send = function(msg) { // self.messages.push(msg); // return send.apply(socket, arguments); //}; this.initialized = true; }; RecognizeStream.prototype.sendJSON = function sendJSON(msg) { this.emit('send-json', msg); return this.socket.send(JSON.stringify(msg)); }; RecognizeStream.prototype.sendData = function sendData(data) { this.emit('send-data', data); return this.socket.send(data); }; RecognizeStream.prototype._read = function (size) { // there's no easy way to control reads from the underlying library // so, the best we can do here is a no-op }; RecognizeStream.prototype._write = function (chunk, encoding, callback) { var self = this; if (self.finished) { // can't send any more data after the stop message (although this shouldn't happen normally...) return; } if (self.listening) { self.sendData(chunk); this.afterSend(callback); } else { if (!this.initialized) { if (!this.options['content-type']) { this.options['content-type'] = RecognizeStream.getContentType(chunk); } this.initialize(); } this.once('listening', function () { self.sendData(chunk); this.afterSend(callback); }); } }; // flow control - don't ask for more data until we've finished what we have // todo: see if this can be improved RecognizeStream.prototype.afterSend = function afterSend(next) { if (this.socket.bufferedAmount &lt;= this._writableState.highWaterMark || 0) { next(); } else { setTimeout(this.afterSend.bind(this, next), 10); } }; RecognizeStream.prototype.stop = function () { this.emit('stop'); this.finish(); }; RecognizeStream.prototype.finish = function finish() { // this is called both when the source stream finishes, and when .stop() is fired, but we only want to send the stop message once. if (this.finished) { return; } this.finished = true; var self = this; var closingMessage = {action: 'stop'}; if (self.socket) { self.sendJSON(closingMessage); } else { this.once('connect', function () { self.sendJSON(closingMessage); }); } }; RecognizeStream.prototype.promise = require('./promise'); RecognizeStream.getContentType = function (buffer) { return contentType(buffer.slice(0, 4).toString()); }; module.exports = RecognizeStream; </code></pre> </article> </section> </div> <nav> <h2><a href="index.html">Home</a></h2><h3>Classes</h3><ul><li><a href="FormatStream.html">FormatStream</a></li><li><a href="MediaElementAudioStream.html">MediaElementAudioStream</a></li><li><a href="RecognizeStream.html">RecognizeStream</a></li><li><a href="TimingStream.html">TimingStream</a></li></ul><h3>Events</h3><ul><li><a href="MicrophoneStream.html#event:data">data</a></li><li><a href="MicrophoneStream.html#event:raw">raw</a></li><li><a href="RecognizeStream.html#event:connection-close">connection-close</a></li><li><a href="RecognizeStream.html#event:data">data</a></li><li><a href="RecognizeStream.html#event:error">error</a></li><li><a href="RecognizeStream.html#event:results">results</a></li></ul><h3>Global</h3><ul><li><a href="global.html#MAX_WAV">MAX_WAV</a></li></ul> </nav> <br class="clear"> <footer> Documentation generated by <a href="https://github.com/jsdoc3/jsdoc">JSDoc 3.4.0</a> on Mon Feb 08 2016 16:11:17 GMT+0000 (UTC) </footer> <script> prettyPrint(); </script> <script src="scripts/linenumber.js"> </script> </body> </html>