warmaitech-mstts
Version:
a library for get free mstts service
187 lines (158 loc) • 6.53 kB
JavaScript
const axios = require("axios");
const {v4: uuidv4} = require('uuid');
const ws = require("nodejs-websocket");
async function getAuthToken() {
//https://azure.microsoft.com/en-gb/services/cognitive-services/text-to-speech/
//https://azure.microsoft.com/zh-cn/products/cognitive-services/text-to-speech/
const res = await axios.get("https://azure.microsoft.com/en-gb/services/cognitive-services/text-to-speech/");
const reg = /token: \"(.*?)\"/;
if (reg.test(res.data)) {
const token = RegExp.$1;
return token;
}
}
function getXTime() {
return new Date().toISOString();
}
function wssSend(connect, msg) {
return new Promise((resolve, reject) => {
connect.send(msg, resolve);
})
}
function wssConnect(url) {
return new Promise((resolve, reject) => {
const connect = ws.connect(url, {
'extraHeaders': {
'Origin': 'https://azure.microsoft.com'
}
}, function () {
resolve(connect);
});
});
}
async function getTTSData(text, voice = 'zh-CN-YunxiNeural', express = 'general', role = '', rate = 0, pitch = 0) {
if (!express) express = 'general';
const SSML = `
<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="en-US">
<voice name="${voice}">
<mstts:express-as style="${express}" ${role != '' ? ('role=\"' + role + '\"') : ''}>
<prosody rate="${rate}%" pitch="${pitch}%">
${text}
</prosody>
</mstts:express-as>
</voice>
</speak>
`
console.log(SSML);
console.log("获取Token...");
const Authorization = 'bearer%20undefined';//await getAuthToken();
const XConnectionId = uuidv4().toUpperCase();
console.log(`Authorization:${Authorization} XConnectionId:${XConnectionId}`);
console.log("创建webscoket连接...");
const connect = await wssConnect(`wss://eastus.api.speech.microsoft.com/cognitiveservices/websocket/v1?TrafficType=AzureDemo&Authorization=${Authorization}&X-ConnectionId=${XConnectionId}`);
console.log("第1次上报...");
const message_1 = `Path: speech.config\r\nX-RequestId: ${XConnectionId}\r\nX-Timestamp: ${getXTime()}\r\nContent-Type: application/json\r\n\r\n{"context":{"system":{"name":"SpeechSDK","version":"1.19.0","build":"JavaScript","lang":"JavaScript","os":{"platform":"Browser/Linux x86_64","name":"Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0","version":"5.0 (X11)"}}}}`;
await wssSend(connect, message_1);
console.log("第2次上报...");
const message_2 = `Path: synthesis.context\r\nX-RequestId: ${XConnectionId}\r\nX-Timestamp: ${getXTime()}\r\nContent-Type: application/json\r\n\r\n{"synthesis":{"audio":{"metadataOptions":{"sentenceBoundaryEnabled":false,"wordBoundaryEnabled":false},"outputFormat":"audio-24khz-160kbitrate-mono-mp3"}}}`;
await wssSend(connect, message_2);
console.log("第3次上报...");
const message_3 = `Path: ssml\r\nX-RequestId: ${XConnectionId}\r\nX-Timestamp: ${getXTime()}\r\nContent-Type: application/ssml+xml\r\n\r\n${SSML}`
await wssSend(connect, message_3);
return new Promise((resolve, reject) => {
let final_data = Buffer.alloc(0);
connect.on("text", (data) => {
if (data.indexOf("Path:turn.end") >= 0) {
console.log("已完成");
connect.close();
resolve(final_data);
}
})
connect.on("binary", function (response) {
console.log("正在接收数据...");
let data = Buffer.alloc(0);
response.on("readable", function () {
const newData = response.read()
if (newData) data = Buffer.concat([data, newData], data.length + newData.length);
})
response.on("end", function () {
const index = data.toString().indexOf("Path:audio") + 10;
const cmbData = data.slice(index + 2);
final_data = Buffer.concat([final_data, cmbData]);
})
});
connect.on("close", function (code, reason) {
})
})
}
async function getVoiceList() {
//https://eastus.tts.speech.microsoft.com/cognitiveservices/voices/list?Authorization=token
//todo
}
const voices = {
"CN": {
"晓晓": "Xiaoxiao",
"晓辰": "Xiaochen",
"晓涵": "Xiaohan",
"晓墨": "Xiaomo",
"晓秋": "Xiaoqiu",
"晓睿": "Xiaorui",
"晓双": "Xiaoshuang",
"晓萱": "Xiaoxuan",
"晓颜": "Xiaoyan",
"晓悠": "Xiaoyou",
"云扬": "Yunyang",
"云希": "Yunxi",
"云野": "Yunye",
"辽宁晓北": "LN-Xiaobei",
"四川云希": "SC-Yunxi",
"云皓": "Yunhao",
"云健": "Yunjian"
},
"TW": {
"曉臻": "HsiaoChen",
"曉雨": "HsiaoYu",
"雲哲": "YunJhe"
},
"HK": {
"曉曼": "HiuMaan",
"曉佳": "HiuGaai",
"雲龍": "WanLung"
}
}
async function showMenu() {
const fs = require("fs");
const inquirer = require('inquirer');
const argv = require('minimist')(process.argv.slice(2));
let text = argv.i || 'WarmAITech';
let langChoices = {
"中文普通话": "CN",
"中国台湾-国语": "TW",
"中国香港-粤语": "HK"
};
res = await inquirer.prompt([
{
name: "请选择语言",
type: "list",
choices: Object.keys(langChoices),
required: true,
}
])
let lang = langChoices[res['请选择语言']];
res = await inquirer.prompt([
{
name: "请选择语音",
type: "list",
choices: Object.keys(voices[lang]),
required: true,
}
])
let voice = voices[lang][res['请选择语音']];
const mp3buffer = await getTTSData(text, lang + "-" + voice);
let output = argv.o || "./" + lang + "-" + voice + "-" + (new Date().getTime()) + ".mp3"
fs.writeFileSync(output, mp3buffer);
}
exports.getTTSData = getTTSData;
if (require.main === module) {
// showMenu();
}