UNPKG

@lobehub/tts

Version:

A high-quality & reliable TTS React Hooks library

96 lines (94 loc) 3.47 kB
import { genSSML } from "../utils/genSSML.mjs"; import { genSendContent } from "../utils/genSendContent.mjs"; import { getHeadersAndData } from "../utils/getHeadersAndData.mjs"; import qs from "query-string"; import { v4 } from "uuid"; //#region src/core/EdgeSpeechTTS/createEdgeSpeech.ts const EDGE_SPEECH_URL = "wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1"; const EDGE_API_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4"; const EDGE_VERSION = "1-132.0.2957.140"; const WINDOWS_FILE_TIME_EPOCH = BigInt("11644473600"); const configContent = JSON.stringify({ context: { synthesis: { audio: { metadataoptions: { sentenceBoundaryEnabled: false, wordBoundaryEnabled: true }, outputFormat: "audio-24khz-48kbitrate-mono-mp3" } } } }); const genHeader = (connectId) => { const date = (/* @__PURE__ */ new Date()).toString(); return { configHeader: { "Content-Type": "application/json; charset=utf-8", "Path": "speech.config", "X-Timestamp": date }, contentHeader: { "Content-Type": "application/ssml+xml", "Path": "ssml", "X-RequestId": connectId, "X-Timestamp": date } }; }; async function generateSecMsGecToken() { const ticks = BigInt(Math.floor(Date.now() / 1e3 + Number(WINDOWS_FILE_TIME_EPOCH))) * BigInt("10000000"); const strToHash = ticks - ticks % BigInt("3000000000") + EDGE_API_TOKEN; return (await import("../utils/getSHA256.mjs")).hash(strToHash).toUpperCase(); } const createEdgeSpeech = async ({ payload }, { proxyUrl, token } = {}) => { const { input, options } = payload; const connectId = v4().replaceAll("-", ""); const url = qs.stringifyUrl({ query: { ConnectionId: connectId, "Sec-MS-GEC": token ? token : await generateSecMsGecToken(), "Sec-MS-GEC-Version": token ? token : EDGE_VERSION, TrustedClientToken: token ? token : EDGE_API_TOKEN }, url: proxyUrl ? proxyUrl : EDGE_SPEECH_URL }); const { configHeader, contentHeader } = genHeader(connectId); const config = genSendContent(configHeader, configContent); const content = genSendContent(contentHeader, genSSML(input, options)); return new Promise((resolve, reject) => { const ws = new WebSocket(url); ws.binaryType = "arraybuffer"; const onOpen = () => { ws.send(config); ws.send(content); }; let audioData = /* @__PURE__ */ new ArrayBuffer(0); const onMessage = async (event) => { if (typeof event.data === "string") { const { headers } = getHeadersAndData(event.data); switch (headers["Path"]) { case "turn.end": ws.close(); if (!audioData.byteLength) return; resolve(new Response(audioData)); break; } } else if (event.data instanceof ArrayBuffer) { const headerLength = new DataView(event.data).getInt16(0); if (event.data.byteLength > headerLength + 2) { const newBody = event.data.slice(2 + headerLength); const newAudioData = new ArrayBuffer(audioData.byteLength + newBody.byteLength); const mergedUint8Array = new Uint8Array(newAudioData); mergedUint8Array.set(new Uint8Array(audioData), 0); mergedUint8Array.set(new Uint8Array(newBody), audioData.byteLength); audioData = newAudioData; } } }; const onError = () => { reject(/* @__PURE__ */ new Error("WebSocket error occurred.")); ws.close(); }; ws.addEventListener("open", onOpen); ws.addEventListener("message", onMessage); ws.addEventListener("error", onError); }); }; //#endregion export { createEdgeSpeech };