node-edge-tts
Version:
node-edge-tts is a module that using Microsoft Edge's online TTS (Text-to-Speech) service on the Node.js
131 lines (130 loc) • 5.87 kB
JavaScript
Object.defineProperty(exports, "__esModule", { value: true });
exports.EdgeTTS = void 0;
const node_crypto_1 = require("node:crypto");
const node_fs_1 = require("node:fs");
const ws_1 = require("ws");
const https_proxy_agent_1 = require("https-proxy-agent");
const drm_1 = require("./drm");
class EdgeTTS {
constructor({ voice = 'zh-CN-XiaoyiNeural', lang = 'zh-CN', outputFormat = 'audio-24khz-48kbitrate-mono-mp3', saveSubtitles = false, proxy, rate = 'default', pitch = 'default', volume = 'default', timeout = 10000 } = {}) {
this.voice = voice;
this.lang = lang;
this.outputFormat = outputFormat;
this.saveSubtitles = saveSubtitles;
this.proxy = proxy;
this.rate = rate;
this.pitch = pitch;
this.volume = volume;
this.timeout = timeout;
}
async _connectWebSocket() {
const wsConnect = new ws_1.WebSocket(`wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=${drm_1.TRUSTED_CLIENT_TOKEN}&Sec-MS-GEC=${(0, drm_1.generateSecMsGecToken)()}&Sec-MS-GEC-Version=1-${drm_1.CHROMIUM_FULL_VERSION}`, {
host: 'speech.platform.bing.com',
origin: 'chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold',
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36 Edg/130.0.0.0',
},
agent: this.proxy ? new https_proxy_agent_1.HttpsProxyAgent(this.proxy) : undefined
});
return new Promise((resolve, reject) => {
wsConnect.on('open', () => {
wsConnect.send(`Content-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n
{
"context": {
"synthesis": {
"audio": {
"metadataoptions": {
"sentenceBoundaryEnabled": "false",
"wordBoundaryEnabled": "true"
},
"outputFormat": "${this.outputFormat}"
}
}
}
}
`);
resolve(wsConnect);
});
wsConnect.on('error', (err) => {
reject(err);
});
});
}
_saveSubFile(subFile, text, audioPath) {
let subPath = audioPath + '.json';
let subChars = text.split('');
let subCharIndex = 0;
subFile.forEach((cue, index) => {
let fullPart = '';
let stepIndex = 0;
for (let sci = subCharIndex; sci < subChars.length; sci++) {
if (subChars[sci] === cue.part[stepIndex]) {
fullPart = fullPart + subChars[sci];
stepIndex += 1;
}
else if (subChars[sci] === subFile?.[index + 1]?.part?.[0]) {
subCharIndex = sci;
break;
}
else {
fullPart = fullPart + subChars[sci];
}
}
cue.part = fullPart;
});
(0, node_fs_1.writeFileSync)(subPath, JSON.stringify(subFile, null, ' '), { encoding: 'utf-8' });
}
async ttsPromise(text, audioPath) {
const _wsConnect = await this._connectWebSocket();
return new Promise((resolve, reject) => {
let audioStream = (0, node_fs_1.createWriteStream)(audioPath);
let subFile = [];
let timeout = setTimeout(() => reject('Timed out'), this.timeout);
_wsConnect.on('message', async (data, isBinary) => {
if (isBinary) {
let separator = 'Path:audio\r\n';
let index = data.indexOf(separator) + separator.length;
let audioData = data.subarray(index);
audioStream.write(audioData);
}
else {
let message = data.toString();
if (message.includes('Path:turn.end')) {
audioStream.end();
_wsConnect.close();
if (this.saveSubtitles) {
this._saveSubFile(subFile, text, audioPath);
}
clearTimeout(timeout);
resolve();
}
else if (message.includes('Path:audio.metadata')) {
let splitTexts = message.split('\r\n');
try {
let metadata = JSON.parse(splitTexts[splitTexts.length - 1]);
metadata['Metadata'].forEach((element) => {
subFile.push({
part: element['Data']['text']['Text'],
start: Math.floor(element['Data']['Offset'] / 10000),
end: Math.floor((element['Data']['Offset'] + element['Data']['Duration']) / 10000)
});
});
}
catch { }
}
}
});
let requestId = (0, node_crypto_1.randomBytes)(16).toString('hex');
_wsConnect.send(`X-RequestId:${requestId}\r\nContent-Type:application/ssml+xml\r\nPath:ssml\r\n\r\n
` + `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="${this.lang}">
<voice name="${this.voice}">
<prosody rate="${this.rate}" pitch="${this.pitch}" volume="${this.volume}">
${text}
</prosody>
</voice>
</speak>`);
});
}
}
exports.EdgeTTS = EdgeTTS;
;