@lobehub/tts
Version:
A high-quality & reliable TTS React Hooks library
96 lines (94 loc) • 3.47 kB
JavaScript
import { genSSML } from "../utils/genSSML.mjs";
import { genSendContent } from "../utils/genSendContent.mjs";
import { getHeadersAndData } from "../utils/getHeadersAndData.mjs";
import qs from "query-string";
import { v4 } from "uuid";
//#region src/core/EdgeSpeechTTS/createEdgeSpeech.ts
const EDGE_SPEECH_URL = "wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1";
const EDGE_API_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4";
const EDGE_VERSION = "1-132.0.2957.140";
const WINDOWS_FILE_TIME_EPOCH = BigInt("11644473600");
const configContent = JSON.stringify({ context: { synthesis: { audio: {
metadataoptions: {
sentenceBoundaryEnabled: false,
wordBoundaryEnabled: true
},
outputFormat: "audio-24khz-48kbitrate-mono-mp3"
} } } });
const genHeader = (connectId) => {
const date = (/* @__PURE__ */ new Date()).toString();
return {
configHeader: {
"Content-Type": "application/json; charset=utf-8",
"Path": "speech.config",
"X-Timestamp": date
},
contentHeader: {
"Content-Type": "application/ssml+xml",
"Path": "ssml",
"X-RequestId": connectId,
"X-Timestamp": date
}
};
};
async function generateSecMsGecToken() {
const ticks = BigInt(Math.floor(Date.now() / 1e3 + Number(WINDOWS_FILE_TIME_EPOCH))) * BigInt("10000000");
const strToHash = ticks - ticks % BigInt("3000000000") + EDGE_API_TOKEN;
return (await import("../utils/getSHA256.mjs")).hash(strToHash).toUpperCase();
}
const createEdgeSpeech = async ({ payload }, { proxyUrl, token } = {}) => {
const { input, options } = payload;
const connectId = v4().replaceAll("-", "");
const url = qs.stringifyUrl({
query: {
ConnectionId: connectId,
"Sec-MS-GEC": token ? token : await generateSecMsGecToken(),
"Sec-MS-GEC-Version": token ? token : EDGE_VERSION,
TrustedClientToken: token ? token : EDGE_API_TOKEN
},
url: proxyUrl ? proxyUrl : EDGE_SPEECH_URL
});
const { configHeader, contentHeader } = genHeader(connectId);
const config = genSendContent(configHeader, configContent);
const content = genSendContent(contentHeader, genSSML(input, options));
return new Promise((resolve, reject) => {
const ws = new WebSocket(url);
ws.binaryType = "arraybuffer";
const onOpen = () => {
ws.send(config);
ws.send(content);
};
let audioData = /* @__PURE__ */ new ArrayBuffer(0);
const onMessage = async (event) => {
if (typeof event.data === "string") {
const { headers } = getHeadersAndData(event.data);
switch (headers["Path"]) {
case "turn.end":
ws.close();
if (!audioData.byteLength) return;
resolve(new Response(audioData));
break;
}
} else if (event.data instanceof ArrayBuffer) {
const headerLength = new DataView(event.data).getInt16(0);
if (event.data.byteLength > headerLength + 2) {
const newBody = event.data.slice(2 + headerLength);
const newAudioData = new ArrayBuffer(audioData.byteLength + newBody.byteLength);
const mergedUint8Array = new Uint8Array(newAudioData);
mergedUint8Array.set(new Uint8Array(audioData), 0);
mergedUint8Array.set(new Uint8Array(newBody), audioData.byteLength);
audioData = newAudioData;
}
}
};
const onError = () => {
reject(/* @__PURE__ */ new Error("WebSocket error occurred."));
ws.close();
};
ws.addEventListener("open", onOpen);
ws.addEventListener("message", onMessage);
ws.addEventListener("error", onError);
});
};
//#endregion
export { createEdgeSpeech };