node-red-contrib-ms-cognitive-services
Version:
Node Red module for making use of Microsoft Cognitive Services, including Text-To-Speech.
299 lines (260 loc) • 10.5 kB
HTML
<!--- CONFIG NODE --->
<script type="text/javascript">
RED.nodes.registerType('ms-cognitive-services-config',{
category: 'config',
defaults: {
name: { value: "", required: true },
region: { value: "westeurope", required: true }
},
credentials: {
apiKey: {type:"text"}
},
label: function() { return this.name; },
paletteLabel: "MS cognitive services config"
});
</script>
<script type="text/html" data-template-name="ms-cognitive-services-config">
<div class="form-row">
<label for="node-input-name"><i class="fa fa-tag"></i> Name</label>
<input type="text" id="node-config-input-name" placeholder="Name" />
</div>
<div class="form-row">
<label for="node-input-apiKey"><i class="fa fa-tag"></i> ApiKey</label>
<input type="password" id="node-config-input-apiKey" placeholder="ApiKey" />
</div>
<div class="form-row">
<label for="node-input-region"><i class="fa fa-tag"></i> Region</label>
<input type="text" id="node-config-input-region" placeholder="Region" />
</div>
</script>
<script type="text/html" data-help-name="ms-cognitive-services-config">
<p>
Stores config data for ms-cognitive-services. You can find your ApiKey in the resource within your
<a href="https://portal.azure.com" title="Azure Portal" target="_blank">Azure Portal</a>. Browse to your resource
and open "Resource Management > Keys and Endpoint".
</p>
</script>
<!--- TEXT TO SPEECH NODE --->
<script type="text/javascript">
const pitchMin = -50;
const pitchMax = 50;
const rateMin = -100;
const rateMax = 200;
function isInRange(value, min, max) {
return value <= max && value >= min;
}
RED.nodes.registerType('ms-cognitive-services-tts',{
category: 'Microsoft Cognitive Services',
color: '#a6bbcf',
defaults: {
name: { value: "" },
apiConfig: { value: "", type: "ms-cognitive-services-config" },
voice: { value: "de-DE-KatjaNeural" },
expression: { value: "General" },
rate: { value: 0, validate: function(v) { return isInRange(v, rateMin, rateMax); }},
pitch: { value: 0, validate: function(v) { return isInRange(v, pitchMin, pitchMax); }},
text: { value: "Text to synthesize" },
storeAndReuse: { value: true }
},
inputs: 1,
outputs: 1,
icon: "text-to-speech-icon.png",
label: function() { return this.name || "Text to speech"; },
paletteLabel: "Text to speech",
oneditprepare: function() {
$("#node-input-rate").typedInput({
type:"num",
types:["num"],
typeField: "#node-input-rate-type"
});
$("#node-input-pitch").typedInput({
type:"num",
types:["num"],
typeField: "#node-input-pitch-type"
});
$("#node-input-pitch").change(function(){
var valid = isInRange($(this).val(), pitchMin, pitchMax);
var targetElement = $(this).parent().find('.red-ui-typedInput-container');
if (!valid) {
targetElement.addClass('input-error');
} else {
targetElement.removeClass('input-error');
}
});
$("#node-input-rate").change(function(){
var valid = isInRange($(this).val(), rateMin, rateMax);
var targetElement = $(this).parent().find('.red-ui-typedInput-container');
if (!valid) {
targetElement.addClass('input-error');
} else {
targetElement.removeClass('input-error');
}
});
$('#voiceSelectButton').click(function () {
if ($('#voiceSelect').length === 0){
fetchBearerToken(function(bearerToken){
var selectedVoice = $('#node-input-voice').val();
fetchVoicesList(bearerToken, function(availableVoices){
// response is array of objects. Object structure:
// {
// "Name": "Microsoft Server Speech Text to Speech Voice (ar-EG, Hoda)",
// "DisplayName": "Hoda",
// "LocalName": "هدى",
// "ShortName": "ar-EG-Hoda",
// "Gender": "Female",
// "Locale": "ar-EG",
// "SampleRateHertz": "16000",
// "VoiceType": "Standard"
// }
// generate select
$('#voiceSelectRow').append("<select id=\"voiceSelect\" size=\"9\" style=\"width:100%; position:absolute; top:45px; margin: 0 0 40px 0; z-index:9999;\"></select>");
availableVoices.forEach(function(voice){
if (voice.ShortName === selectedVoice) {
$('#voiceSelect').append('<option selected="selected">'+voice.ShortName+'</option>');
} else {
$('#voiceSelect').append('<option>'+voice.ShortName+'</option>');
}
});
// append changed-callback
$('#voiceSelect').change(function(){
var option = $(this).val();
$('#node-input-voice').val(option);
$('#voiceSelect').remove();
});
});
});
} else {
$('#voiceSelect').remove();
}
});
}
});
function fetchBearerToken(callback){
var config = RED.nodes.node($("#node-input-apiConfig").val());
if (config === undefined || config === null){
alert("No config selected!");
return;
}
var settings = {
"url": "https://" + config.region + ".api.cognitive.microsoft.com/sts/v1.0/issueToken",
"method": "POST",
"timeout": 0,
"headers": {
"Ocp-Apim-Subscription-Key": config.apiKey,
"Host": config.region + ".api.cognitive.microsoft.com"
},
};
$.ajax(settings).done(function (response) {
callback(response);
});
}
function fetchVoicesList(bearerToken, callback){
var config = RED.nodes.node($("#node-input-apiConfig").val());
if (config === undefined || config === null){
alert("No config selected!");
return;
}
var settings = {
"url": "https://" + config.region + ".tts.speech.microsoft.com/cognitiveservices/voices/list",
"method": "GET",
"timeout": 0,
"headers": {
"Authorization": "Bearer " + bearerToken,
"Host": config.region + ".tts.speech.microsoft.com"
},
};
$.ajax(settings).done(function (response) {
callback(response);
});
}
</script>
<script type="text/html" data-template-name="ms-cognitive-services-tts">
<div class="form-row">
<label for="node-input-name"><i class="fa fa-tag"></i> Name</label>
<input type="text" id="node-input-name" placeholder="Name" />
</div>
<div class="form-row">
<label for="node-input-apiConfig"><i class="fa fa-tag"></i> ApiConfig</label>
<input type="text" id="node-input-apiConfig" placeholder="ApiConfig" />
</div>
<div class="form-row">
<label for="node-input-text"><i class="fa fa-tag"></i> Text</label>
<input type="text" id="node-input-text" placeholder="Text" />
</div>
<div class="form-row">
<label for="node-input-voice"><i class="fa fa-tag"></i> Voice</label>
<div style="display: inline-block; position: relative; width: 70%; height: 20px;" id="voiceSelectRow">
<div style="position: absolute; left: 0px; right: 40px;">
<input type="text" id="node-input-voice" placeholder="Voice" style="width:100%" />
</div>
<button class="red-ui-button" id="voiceSelectButton" style="position: absolute; right: 0px; top: 0px;"><i class="fa fa-search"></i></button>
</div>
</div>
<div class="form-row">
<label for="node-input-expression"><i class="fa fa-tag"></i> Expression</label>
<select id="node-input-expression" placeholder="Expression">
<option>General</option>
<option>Cheerful</option>
<option>CustomerService</option>
<option>Empathy</option>
<option>Newscast</option>
<option>Chat</option>
</select>
</div>
<div class="form-row">
<label for="node-input-rate"><i class="fa fa-tag"></i> Rate (%)</label>
<input type="text" id="node-input-rate">
<input type="hidden" id="node-input-rate-type">
</div>
<div class="form-row">
<label for="node-input-pitch"><i class="fa fa-tag"></i> Pitch (%)</label>
<input type="text" id="node-input-pitch">
<input type="hidden" id="node-input-pitch-type">
</div>
<div class="form-row">
<label for="node-input-storeAndReuse"><i class="fa fa-tag"></i> Store and reuse audiofile</label>
<input type="checkbox" id="node-input-storeAndReuse" placeholder="Store and reuse audiofile" />
</div>
</script>
<script type="text/html" data-help-name="ms-cognitive-services-tts">
<h3>Input message</h3>
<dl class="message-properties">
<dt class="optional">textToSynthesize <span class="property-type">string</span> </dt>
<dd> The text which should by synthesized. Can be enhanced with additional infos,
like adding short breaks within your text <code><break time="600ms"/></code>.
See <a href="https://speech.microsoft.com/audiocontentcreation" target=_blank>here</a> for more.</dd>
<dt class="optional">voice <span class="property-type">enum</span> </dt>
<dd> The voice with which the text should be spoken. Select one from
<a href="https://docs.microsoft.com/de-de/azure/cognitive-services/speech-service/language-support#standard-voices" target=_blank>here</a>.
</dd>
<dt class="optional">expression <span class="property-type">enum</span> </dt>
<dd> The expression of the voice. Select one from
<ul>
<li>General</li>
<li>Cheerful</li>
<li>CustomerService</li>
<li>Empathy</li>
<li>Newscast</li>
<li>Chat</li>
</ul>
</dd>
<dt class="optional">rate <span class="property-type">number</span> </dt>
<dd> The speaking rate. Allowed range from -100% up to 200%. </dd>
<dt class="optional">pitch <span class="property-type">number</span> </dt>
<dd> The pitch of the voice. Allowed range from -50% to 50%. </dd>
<dt class="optional">storeAndReuse <span class="property-type">boolean</span> </dt>
<dd> If true, on each request the node checks wether the same request (regarding, text, voice, rate, pitch, ...) has
already been downloaded and reuses the file if so. </dd>
</dl>
<h3>Output message</h3>
<dl class="message-properties">
<dt>payload <span class="property-type">buffer</span> </dt>
<dd> The audio file in a buffer, can e.g. be used for writing into file. </dd>
<dt>tempFilename <span class="property-type">string</span></dt>
<dd> The name of the temporary file, where the audio is stored in data folder. </dd>
</dl>
<h3>References</h3>
<ul>
<li><a href="https://github.com/mdhom/node-red-contrib-ms-cognitive-services" target=_blank>GitHub</a> - the nodes github repository</li>
</ul>
</script>