UNPKG

node-red-node-watson

Version:
667 lines (579 loc) 25.5 kB
<!-- Copyright 2016, 2022 IBM Corp. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. --> <script type="text/x-red" data-template-name="watson-speech-to-text"> <div id="credentials-check" class="form-row"> <div class="form-tips"> <i class="fa fa-question-circle"></i><b> Please wait: </b> Checking for bound service credentials... </div> </div> <div> <label id="node-label-message"><i class="fa fa-exclamation-triangle"></i></label> </div> <div class="form-row"> <label for="node-input-name"><i class="fa fa-tag"></i> Name</label> <input type="text" id="node-input-name" placeholder="Name"></input> </div> <div class="form-row credentials" style="display: none;"> <label for="node-input-apikey"><i class="fa fa-key"></i> API Key</label> <input type="password" id="node-input-apikey" placeholder="API Key"></input> </div> <div class="form-row credentials"> <label for="node-input-service-endpoint"><i class="fa fa-tag"></i> Service Endpoint</label> <input type="text" id="node-input-service-endpoint" placeholder="https://stream.watsonplatform.net/speech-to-text/api"> </div> <div class="form-row"> <label for="node-input-lang"><i class="fa fa-language"></i> Language</label> <select type="text" id="node-input-lang" style="display: inline-block; width: 70%;"> </select> </div> <div class="form-row"> <input type="hidden" id="node-input-langhidden"/> </div> <div class="form-row"> <label for="node-input-langcustom"><i class="fa fa-language"></i> Language Customization</label> <select type="text" id="node-input-langcustom" style="display: inline-block; width: 70%;"> </select> </div> <div class="form-row"> <input type="hidden" id="node-input-langcustomhidden"/> </div> <div class="form-row"> <label for="node-input-custom-weight"><i class="fa fa-tag"></i> Customization Weight</label> <input type="number" min='0' max='1' step='0.01' id="node-input-custom-weight"> </div> <div class="form-row"> <label for="node-input-band"><i class="fa fa-audio-o"></i> Quality</label> <select type="text" id="node-input-band" style="display: inline-block; width: 70%;"> </select> </div> <div class="form-row"> <input type="hidden" id="node-input-bandhidden"/> </div> <div class="form-row"> <label for="node-input-alternatives">Max Alternative Transcripts</label> <input type="number" min="1" max="5" id="node-input-alternatives" /> </div> <div class="form-row"> <label for="node-input-keywords"><i class="fa fa-tag"></i> Keywords</label> <input type="text" id="node-input-keywords"> </div> <div class="form-row"> <label for="node-input-keywords-threshold"><i class="fa fa-tag"></i> Keywords Threshold</label> <input type="number" min='0' max='1' step='0.01' id="node-input-keywords-threshold"> </div> <div class="form-row"> <label>&nbsp;</label> <input type="checkbox" id="node-input-word-confidence" style="display: inline-block; width: auto; vertical-align: top;"> <label for="node-input-word-confidence" style="width: 70%;"> Word Confidence</label> </div> <div class="form-row"> <label>&nbsp;</label> <input type="checkbox" id="node-input-speakerlabels"style="display: inline-block; width: auto; vertical-align: top;"></input> <label for="node-input-speakerlabels" style="width: 70%;"><i class="fa fa-audio-o"></i> Speaker Labels</label> </div> <div class="form-row"> <label>&nbsp;</label> <input type="checkbox" id="node-input-smartformatting"style="display: inline-block; width: auto; vertical-align: top;"></input> <label for="node-input-smartformatting" style="width: 70%;"><i class="fa fa-audio-o"></i> Smart Formatting</label> </div> <div class="form-row"> <label>&nbsp;</label> <input type="checkbox" id="node-input-payload-response" style="display: inline-block; width: auto; vertical-align: top;"> <label for="node-input-payload-response" style="width: 70%;"> Place output on msg.payload</label> </div> <div class="form-row"> <label>&nbsp;</label> <input type="checkbox" id="node-input-disable-precheck" style="display: inline-block; width: auto; vertical-align: top;"> <label for="node-input-disable-precheck" style="width: 70%;"> Disable Audio format pre-check</label> </div> <div class="form-row"> <label>&nbsp;</label> <input type="checkbox" id="node-input-streaming-mode" style="display: inline-block; width: auto; vertical-align: top;"> <label for="node-input-streaming-mode" style="width: 70%;"> Streaming Mode</label> </div> <div class="form-row"> <label>&nbsp;</label> <input type="checkbox" id="node-input-streaming-mute" style="display: inline-block; width: auto; vertical-align: top;"> <label for="node-input-streaming-mute" style="width: 70%;"> Mute in Streaming Mode</label> </div> <div class="form-row"> <label>&nbsp;</label> <input type="checkbox" id="node-input-discard-listening" style="display: inline-block; width: auto; vertical-align: top;"> <label for="node-input-discard-listening" style="width: 70%;"> Discard Listening events from STT</label> </div> <div class="form-row"> <label>&nbsp;</label> <input type="checkbox" id="node-input-auto-connect" style="display: inline-block; width: auto; vertical-align: top;"> <label for="node-input-auto-connect" style="width: 70%;"> Attempt auto connect on connection close</label> </div> <div class="form-row"> <label>&nbsp;</label> <input type="button" id="node-input-flushcache" value="Flush Config Cache" style="display: inline-block; width: auto; vertical-align: top;"> </div> </script> <script type="text/x-red" data-help-name="watson-speech-to-text"> <p>The Speech To Text converts the human voice into the written word. This service uses machine intelligence to combine information about grammar and language structure with knowledge of the composition of the audio signal to generate a more accurate transcription</p> <p>The audio file to be analysed should be passed in on <code>msg.payload</code>.</p> <p>Supported <code>msg.payload</code> types:</b>.</p> <ul> <li><b>String</b> URL to audio</li> <li><b>Buffer</b> Raw Audio Bytes</li> </ul> <p>Audio must be a <b>WAV, FLAC or OGG encoded file</b>.</p> <p>Speech To Text supports decoding speech from three languages: <b>English, Spanish, Japanese, Brazilian Portuguese and Mandarin Chinese</b>. This option can be modified in the configuration panel.</p> <p>Along with the language, you can also configure the source audio quality. The service supports two different source sampling rates, <b>Narrowband (8KHz) and Broadband (16Khz)</b>. Higher sample rates will be downsampled automatically but the is not true in reverse.</p> <p>Use the speaker label property to identify which individuals spoke which words in a multi-participant exchange. </p> <p>Source language parameters can be configured through the editor panel or set dynamically using the language code in <code>msg.srclang</code> Please see the documentation linked below for the currently supported language codes. You can use the two character code from Language Indentification, but the language will default. eg. <code>en</code> will default to <code>en-US</code></p> <p></p> Keywords to identify should be entered as a comma separted list. If keywords are specified then keyword threshold must also be set. <p></p> Keywords to identify should be entered as a comma separted list. If keywords are specified then keyword threshold must also be set. <p></p> Check the word confidence setting if you want confidence scores for each word. <p></p> <p>The returned audio transcription will be returned on <code>msg.transcription</code>.</p> <p>The full response, including alternative transcriptions can be found on <code>msg.fullresult</code>.</p> <p>When streaming mode is selected the node makes use of a websocket connection to communicate with the Speech to Text service. Input to the node will be from a Node-RED websocket node. Output will be from a Node-RED websocket. WebSocket input is as per the WebSocket input for Speech To Text, with an action of either <code>start</code> or <code>stop</code> or an audio blob. No token is needed as the node takes care of that step.</p> <p>>The Mute option allows for the supression of session timeout messages.</p> <p>>The Discard Listening option allows for listening events from STT to be discarded.</p> <p>For more information about the Speech To Text service, read the <a href="https://www.ibm.com/watson/services/speech-to-text/">documentation</a>.</p> </script> <script type="text/javascript"> // Need to simulate a namespace, as some of the variables had started to leak across nodes function STT () { } // This is the namespace for stt. Currently only contains models, but more vars and functions may need to be // moved in if there is a clash with other nodes. var stt = new STT(); stt.models = null; stt.languages = null; stt.customisations = null; stt.bands = null; stt.language_selected = ''; stt.custom_selected = ''; stt.band_selected = ''; stt.LANGUAGES = { 'en-US': 'US English', 'pt-BR': 'Portuguese Braziilian', 'en-UK': 'UK English', 'en-GB': 'UK English', 'en-AU': 'Australian English', 'en-IN': 'Indian English', 'cs-CZ': 'Czech', 'de-DE': 'German', 'fr-CA': 'French (Canadian)', 'fr-FR': 'French', 'hi-IN': 'Hindi', 'nl-NL': 'Dutch', 'nl-BE': 'Dutch (Belgian)', 'it-IT': 'Italian', 'zh-CN': 'Mandarin', 'es-ES': 'Spanish', 'es-CO': 'Spanish (Columbian)', 'es-MX': 'Spanish (Mexican)', 'es-CL': 'Spanish (Chilean)', 'es-AR': 'Spanish (Argentinian)', 'es-PE': 'Spanish (Peruvian)', 'es-LA': 'Latin American Spanish', 'ar-AR': 'Arabic', 'ar-MS': 'Arabic (MS)', 'ko-KR': 'Korean', 'ja-JP': 'Japanese' }; // sorting functions stt.onlyUnique = function (value, index, self) { return self.indexOf(value) === index; } stt.showSelectedFields = function(fields) { for (i = 0; i < fields.length; i++) { $(fields[i]).parent().show(); } } stt.hideSelectedFields = function(fields) { for (i = 0; i < fields.length; i++) { $(fields[i]).parent().hide(); } } // Function to be used at the start, as don't want to expose any fields, unless the models are // available. The models can only be fetched if the credentials are available. stt.hideEverything = function () { if (!stt.models) { var fields = []; $('#credentials-not-found').show(); fields.push('#node-label-message' + ', #node-input-alternatives' + ', #node-input-keywords' + ', #node-input-keywords-threshold' + ', #node-input-word-confidence' + ', #node-input-speakerlabels' + ', #node-input-smartformatting' + ', #node-input-lang' + ', #node-input-band' + ', #node-input-langcustom' + ', #node-input-custom-weight'); stt.hideSelectedFields(fields); } } // Check if there is a model then can show the fields. // available. The models can only be fetched if the credentials are available. stt.VisibilityCheck = function () { var showFields = []; var hideFields = []; if (stt.models) { showFields.push('#node-input-alternatives' + ', #node-input-keywords' + ', #node-input-keywords-threshold' + ', #node-input-word-confidence' + ', #node-input-speakerlabels' + ', #node-input-smartformatting' + ', #node-input-lang' + ', #node-input-band'); hideFields.push('#node-label-message'); } else { hideFields.push('#node-label-message' + ', #node-input-alternatives' + ', #node-input-keywords' + ', #node-input-keywords-threshold' + ', #node-input-word-confidence' + ', #node-input-speakerlabels' + ', #node-input-smartformatting' + ', #node-input-langcustom' + ', #node-input-custom-weight' + ', #node-input-lang' + ', #node-input-band'); } stt.hideSelectedFields(hideFields); stt.showSelectedFields(showFields); } // Simple check that is only invoked if the service is not bound into bluemix. In this case the // user has to provide credentials. Once there are credentials, then the stt.models are retrieved. stt.checkCredentials = function () { var k = $('#node-input-apikey').val(); if (k && k.length) { if (!stt.models) { stt.getModels(); } } } // Populate the quality select field stt.populateBands = function () { if (!stt.bands && stt.models) { stt.bands = stt.models.map(function(m) { return m.name.split('_')[1]; }); var unique_bands = stt.bands.filter(stt.onlyUnique); stt.bands = unique_bands; } if (stt.bands) { $('select#node-input-band').empty(); stt.bands.forEach(function(b) { var selectedText = ''; if (stt.band_selected === b) { selectedText = 'selected="selected"'; } $('select#node-input-band') .append('<option value=' + '"' + b + '"' + selectedText + '>' + b + '</option>'); }); } } // Populate the customisations selection field // Need to check if we have customisations and if the // selected language and model have an entry. stt.populateCustoms = function() { var showit = false; $('select#node-input-langcustom').empty(); if (stt.customisations) { stt.language_selected = stt.language_selected ? stt.language_selected : $('#node-input-langhidden').val(); stt.band_selected = stt.band_selected ? stt.band_selected : $('#node-input-band').val(); var compareValue = stt.language_selected + '_' + stt.band_selected; stt.customisations.forEach(function(c) { // Should really be checking against the cached values // stt.language_selected and stt.band_selected // but for some reason they are not always set. if (compareValue === c['base_model_name']) { showit = true; var selectedText = ''; if (stt.custom_selected === c['customization_id']) { selectedText = ' selected="selected" '; } $('select#node-input-langcustom') .append('<option value=' + '"' + c['customization_id'] + '"' + selectedText + '>' + c['name'] + '</option>'); } }); } // Add an option to not select $('select#node-input-langcustom') .append('<option value=' + 'NoCustomisationSetting' + '' //showit ? '' : ' selected="selected" ' + '>' + 'None' + '</option>'); if (showit) { $('select#node-input-langcustom').parent().show(); $('#node-input-custom-weight').parent().show(); } else { $('select#node-input-langcustom').parent().hide(); $('#node-input-custom-weight').parent().hide(); } } stt.flushCache = function () { stt.models = null; stt.languages = null; stt.customisations = null; stt.bands = null; sttoneditprepare(); } // Register the handlers for the fields stt.handlersUI = function () { $('#node-input-apikey').change(function(val){ stt.checkCredentials(); }); $('#node-input-lang').change(function (val) { stt.language_selected = $('#node-input-lang').val(); stt.populateBands(); stt.populateCustoms(); }); $('#node-input-langcustom').change(function (val) { stt.custom_selected = $('#node-input-langcustom').val(); }); $('#node-input-band').change(function (val) { stt.band_selected = $('#node-input-band').val(); stt.populateCustoms(); }); $('#node-input-streaming-mode').change(function () { var fields = []; fields.push('#node-input-streaming-mute' + ', #node-input-discard-listening' + ', #node-input-auto-connect'); var checked = $('#node-input-streaming-mode').prop('checked') if (checked) { stt.showSelectedFields(fields); } else { stt.hideSelectedFields(fields); } }); $('#node-input-flushcache').click(function () { stt.flushCache(); }); } // Function to fetch the customisations, should only be done once stt.fetchCustomisations = function() { if (!stt.customisations) { var k = $('#node-input-apikey').val(); var e = $('#node-input-service-endpoint').val(); var creds = {key: k}; creds.e = e; $.getJSON('watson-speech-to-text/customs', creds) .done(function (data) { $('label#node-label-message').parent().hide(); if (data.error) { $('label#node-label-message').parent().show(); $('label#node-label-message').text(data.error); } else { stt.customisations = data.customizations; stt.populateCustoms(); } }).fail(function (err) { var txt = 'Got server error '; if (err.status) { txt += '(' + err.status + ') ' } txt += 'when trying to retrieve available customisations'; $('label#node-label-message').parent().show(); $('label#node-label-message').text(txt); }).always(function () { }); } } // Function called when either when the models have been retrieved, or // on dialog load, if the model has already been retrieved stt.postModelCheck = function () { stt.processModels(); stt.VisibilityCheck(); stt.fetchCustomisations(); } // Retrieve the available models from the server, if data is returned, then // can enable the dynamic selection fields. stt.getModels = function (haveCredentials) { var k = $('#node-input-apikey').val(); var e = $('#node-input-service-endpoint').val(); var creds = {key: k}; creds.e = e; $.getJSON('watson-speech-to-text/models/', creds) .done(function (data) { if (data.error) { $('label#node-label-message').parent().show(); $('label#node-label-message').text(data.error); } else if (data.models) { stt.models = data.models; //have_credentials = true; stt.postModelCheck(); } }).fail(function (err) { $('label#node-label-message').parent().show(); $('label#node-label-message').text('Error trying to determine available service models'); }).always(function () {}); } // Called to complete the languages selection table stt.processSTTLanguages = function () { if (!stt.languages && stt.models) { stt.languages = stt.models.map(function(m) { return m.language; }); } if (stt.languages) { $('select#node-input-lang').empty(); var unique_langs = stt.languages.filter(stt.onlyUnique); unique_langs.sort(); unique_langs.forEach(function(l) { var selectedText = ''; if (stt.language_selected === l) { selectedText = 'selected="selected"'; } $('select#node-input-lang') .append('<option value=' + '"' + l + '"' + selectedText + '>' + (stt.LANGUAGES[l] ? stt.LANGUAGES[l] : l) + '</option>'); }); } } // Called to work through the models, completing the dyanmic selection fields. stt.processModels = function () { if (stt.models) { stt.processSTTLanguages(); stt.populateBands(); } } // The dynamic nature of the selection fields in this node has caused problems. // Whenever there is a fetch for the models, on a page refresh or applicaiton // restart, the settings for the dynamic fields are lost. // So hidden (text) fields are being used to squirrel away the values, so that // they can be restored. stt.restoreFromHidden = function () { stt.language_selected = $('#node-input-langhidden').val(); $('select#node-input-lang').val(stt.language_selected); stt.band_selected = $('#node-input-bandhidden').val(); $('select#node-input-band').val(stt.band_selected); stt.custom_selected = $('#node-input-langcustomhidden').val(); $('select#node-input-langcustom').val(stt.custom_selected); } // This is the on edit prepare function, which will be invoked everytime the dialog // is shown. function sttoneditprepare() { stt.hideEverything(); stt.restoreFromHidden(); stt.handlersUI(); $.getJSON('watson-speech-to-text/vcap/') // for some reason the getJSON resets the vars so need to restoreFromHidden again // so again. .done(function (service) { stt.restoreFromHidden(); $('.credentials').toggle(!service); if (!stt.models) {stt.getModels(service);} else {stt.postModelCheck();} }) .fail(function () { $('.credentials').show(); }).always(function () { $('#credentials-check').hide(); }); } // Save the values in the dyanmic lists to the hidden fields. function sttoneditsave(){ $('#node-input-langhidden').val(stt.language_selected); $('#node-input-bandhidden').val(stt.band_selected); $('#node-input-langcustomhidden').val(stt.custom_selected); } (function() { RED.nodes.registerType('watson-speech-to-text', { category: 'IBM Watson', defaults: { name: {value: ''}, alternatives: {value: 1}, speakerlabels: {value: true}, smartformatting: {value: false}, lang: {value: ''}, langhidden: {value: ''}, langcustom: {value: ''}, langcustomhidden: {value: ''}, 'custom-weight': {value: '0.5'}, band: {value: ''}, bandhidden: {value: ''}, keywords: {value: ''}, 'keywords-threshold': {value: '0.5'}, 'word-confidence': {value: false}, password: {value: ''}, apikey: {value: ''}, 'payload-response' :{value: false}, 'streaming-mode' :{value: false}, 'streaming-mute' :{value: true}, 'auto-connect' :{value: false}, 'discard-listening' :{value: false}, 'disable-precheck' :{value: false}, 'service-endpoint' :{value: ''} }, credentials: { // password: {type: 'password'} - // Taken out because, was not being restored on dialog open. }, color: 'rgb(140, 198, 63)', inputs: 1, outputs: 1, icon: 'speech_to_text.png', paletteLabel: 'speech to text', label: function() { return this.name || 'speech to text'; }, labelStyle: function() { return this.name ? 'node_label_italic' : ''; }, oneditprepare: sttoneditprepare, oneditsave: sttoneditsave }); })(); </script>