UNPKG

watson-developer-cloud

Version:

Client library to use the IBM Watson Services and AlchemyAPI

240 lines (205 loc) 7.69 kB
/** * Copyright 2014 IBM Corp. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ 'use strict'; var Duplex = require('stream').Duplex; var util = require('util'); var extend = require('extend'); var pick = require('object.pick'); var W3CWebSocket = require('websocket').w3cwebsocket; var OPENING_MESSAGE_PARAMS_ALLOWED = ['continuous', 'max_alternatives', 'timestamps', 'word_confidence', 'inactivity_timeout', 'content-type', 'interim_results', 'keywords', 'keywords_threshold', 'word_alternatives_threshold', 'smart_formatting' ]; var QUERY_PARAMS_ALLOWED = ['model', 'X-Watson-Learning-Opt-Out', 'watson-token']; /** * pipe()-able Node.js Readable/Writeable stream - accepts binary audio and emits text in it's `data` events. * Also emits `results` events with interim results and other data. * * Cannot be instantiated directly, instead reated by calling #createRecognizeStream() * * Uses WebSockets under the hood. For audio with no recognizable speech, no `data` events are emitted. * @param options * @constructor */ function RecognizeStream(options){ Duplex.call(this, options); this.options = options; this.listening = false; this.initialized = false; } util.inherits(RecognizeStream, Duplex); RecognizeStream.prototype.initialize = function() { var options = this.options; // todo: apply these corrections to other methods (?) if (options.token && !options['watson-token']) { options['watson-token'] = options.token; } if (options.content_type && !options['content-type']) { options['content-type'] = options.content_type; } if (options['X-WDC-PL-OPT-OUT'] && !options['X-Watson-Learning-Opt-Out']) { options['X-Watson-Learning-Opt-Out'] = options['X-WDC-PL-OPT-OUT']; } var queryParams = extend({model: 'en-US_BroadbandModel'}, pick(options, QUERY_PARAMS_ALLOWED)); var queryString = Object.keys(queryParams).map(function(key) { return key + '=' + (key === 'watson-token' ? queryParams[key] : encodeURIComponent(queryParams[key])); // our server chokes if the token is correctly url-encoded }).join('&'); var url = (options.url || 'wss://stream.watsonplatform.net/speech-to-text/api').replace(/^http/, 'ws') + '/v1/recognize?' + queryString; var openingMessage = extend({ action: 'start', 'content-type': 'audio/wav', continuous: true, interim_results: true, word_confidence: true, timestamps: true, max_alternatives: 3, inactivity_timeout: 600 }, pick(options, OPENING_MESSAGE_PARAMS_ALLOWED)); var closingMessage = {action: 'stop'}; var self = this; //node params: requestUrl, protocols, origin, headers, extraRequestOptions // browser params: requestUrl, protocols (all others ignored) var socket = this.socket = new W3CWebSocket(url, null, null, options.headers, null); // when the input stops, let the service know that we're done self.on('finish', function() { if (self.socket) { self.socket.send(JSON.stringify(closingMessage)); } else { this.once('connect', function () { self.socket.send(JSON.stringify(closingMessage)); }); } }); socket.onerror = function(error) { self.listening = false; self.emit('error', error); }; this.socket.onopen = function() { socket.send(JSON.stringify(openingMessage)); self.emit('connect'); }; this.socket.onclose = function(e) { self.listening = false; self.push(null); /** * @event RecognizeStream#close * @param {Number} reasonCode * @param {String} description */ self.emit('close', e.code, e.reason); }; /** * @event RecognizeStream#error */ function emitError(msg, frame, err) { if (err) { err.message = msg + ' ' + err.message; } else { err = new Error(msg); } err.raw = frame; self.emit('error', err); } socket.onmessage = function(frame) { if (typeof frame.data !== 'string') { return emitError('Unexpected binary data received from server', frame); } var data; try { data = JSON.parse(frame.data); } catch (jsonEx) { return emitError('Invalid JSON received from service:', frame, jsonEx); } if (data.error) { emitError(data.error, frame); } else if (data.state === 'listening') { // this is emitted both when the server is ready for audio, and after we send the close message to indicate that it's done processing if (!self.listening) { self.listening = true; self.emit('listening'); } else { self.listening = false; socket.close(); } } else if (data.results) { /** * Object with interim or final results, including possible alternatives. May have no results at all for empty audio files. * @event RecognizeStream#results * @param {Object} results */ self.emit('results', data); // note: currently there is always either no entries or exactly 1 entry in the results array. However, this may change in the future. if (data.results[0] && data.results[0].final && data.results[0].alternatives) { /** * Finalized text * @event RecognizeStream#data * @param {String} transcript */ self.push(data.results[0].alternatives[0].transcript, 'utf8'); // this is the "data" event that can be easily piped to other streams } } else { emitError('Unrecognised message from server', frame); } }; this.initialized = true; }; RecognizeStream.prototype._read = function(/*size*/) { // there's no easy way to control reads from the underlying library // so, the best we can do here is a no-op }; RecognizeStream.prototype._write = function(chunk, encoding, callback) { var self = this; if (self.listening) { self.socket.send(chunk); this.afterSend(callback); } else { if (!this.initialized) { if (!this.options['content-type']) { this.options['content-type'] = RecognizeStream.getContentType(chunk); } this.initialize(); } this.once('listening', function() { self.socket.send(chunk); this.afterSend(callback); }); } }; // flow control - don't ask for more data until we've finished what we have // todo: see if this can be improved RecognizeStream.prototype.afterSend = function afterSend(next) { // note: bufferedAmount is currently always 0 // see https://github.com/theturtle32/WebSocket-Node/issues/243 if (this.socket.bufferedAmount <= (this._writableState.highWaterMark || 0)) { process.nextTick(next); } else { setTimeout(this.afterSend.bind(this, next), 10); } }; RecognizeStream.prototype.stop = function() { this.emit('stopping'); this.listening = false; this.socket.close(); }; // quick/dumb way to determine content type from a supported file format var headerToContentType = { 'fLaC': 'audio/flac', 'RIFF': 'audio/wav', 'OggS': 'audio/ogg; codecs=opus' }; RecognizeStream.getContentType = function(buffer) { var header = buffer.slice(0,4).toString(); return headerToContentType[header]; }; module.exports = RecognizeStream;