volcengine-tts
Version:
适用于火山引擎模块,支持火山引擎 等 TTS 服务。
1,092 lines (1,085 loc) • 26.1 kB
JavaScript
import { Readable, PassThrough } from 'stream';
import { randomUUID } from 'crypto';
import WebSocket2 from 'ws';
import { fetch } from 'node-fetch-native/proxy';
import * as zlib from 'zlib';
// src/index.ts
// src/common/const.ts
var kTTSDefaultText = "\u4F60\u597D\uFF0C\u5F88\u9AD8\u5174\u8BA4\u8BC6\u4F60\uFF01";
// src/common/speaker.ts
var findTTSProvider = (speakerNameOrId, defaultSpeaker) => {
initDefaultSpeaker(defaultSpeaker);
let speaker = kDefaultSpeaker.speaker;
const provider = kTTSProviders.find((e) => {
const sp = e.speakers.find(
(s) => s.name === speakerNameOrId || s.speaker === speakerNameOrId
);
if (sp) {
speaker = sp == null ? void 0 : sp.speaker;
return true;
}
});
return provider ? { tts: provider.tts, speaker } : kDefaultSpeaker;
};
var kDefaultSpeaker;
var initDefaultSpeaker = (defaultSpeaker) => {
if (kDefaultSpeaker) {
return;
}
if (defaultSpeaker) {
let speaker = "";
const provider = kTTSProviders.find((e) => {
const sp = e.speakers.find(
(s) => s.name === defaultSpeaker || s.speaker === defaultSpeaker
);
if (sp) {
speaker = sp == null ? void 0 : sp.speaker;
return true;
}
});
if (provider) {
kDefaultSpeaker = { tts: provider.tts, speaker };
}
}
kDefaultSpeaker ??= {
tts: kTTSProviders[0].tts,
speaker: kTTSProviders[0].speakers[0].speaker
};
};
var createStreamHandler = (responseStream) => {
let audioBuffer = new Uint8Array();
const requestId = randomUUID().substring(0, 8);
let resolve, reject;
const result = new Promise((res, rej) => {
resolve = res;
reject = rej;
});
const push = (audioData) => {
responseStream.push(audioData);
const newData = new Uint8Array(audioBuffer.length + audioData.length);
newData.set(audioBuffer, 0);
newData.set(audioData, audioBuffer.length);
audioBuffer = newData;
};
const end = () => {
console.log(requestId, "\u2705 Done: ", audioBuffer.length);
responseStream.push(null);
resolve(audioBuffer.length < 1e3 ? null : audioBuffer);
};
const error = (err, msg = "Something went wrong") => {
console.log(requestId, "\u274C " + msg, err);
responseStream.push("error");
responseStream.push(null);
reject();
};
return { requestId, result, push, end, error };
};
// src/tts/edge.ts
var kEdgeTTSSpeakers = [
{
name: "\u4E91\u5E0C",
speaker: "zh-CN-YunxiNeural",
gender: "\u7537"
},
{
name: "\u9655\u897F\u5C0F\u59AE",
speaker: "zh-CN-shaanxi-XiaoniNeural",
gender: "\u5973"
},
{
name: "\u5C0F\u5C0F",
speaker: "zh-CN-XiaoxiaoNeural",
gender: "\u5973"
},
{
name: "\u5C0F\u827A",
speaker: "zh-CN-XiaoyiNeural",
gender: "\u5973"
},
{
name: "\u4E91\u5065",
speaker: "zh-CN-YunjianNeural",
gender: "\u7537"
},
{
name: "\u4E91\u590F",
speaker: "zh-CN-YunxiaNeural",
gender: "\u7537"
},
{
name: "\u4E91\u626C",
speaker: "zh-CN-YunyangNeural",
gender: "\u7537"
},
{
name: "\u8FBD\u5B81\u5C0F\u5317",
speaker: "zh-CN-liaoning-XiaobeiNeural",
gender: "\u5973"
},
{
name: "\u5C0F\u73CD",
speaker: "zh-TW-HsiaoChenNeural",
gender: "\u5973"
},
{
name: "\u4E91\u54F2",
speaker: "zh-TW-YunJheNeural",
gender: "\u7537"
},
{
name: "\u5C0F\u5B87",
speaker: "zh-TW-HsiaoYuNeural",
gender: "\u7537"
}
];
var kAPI = "wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1";
var edgeTTS = async ({
edge,
text,
speaker,
stream: responseStream
}) => {
const token = edge == null ? void 0 : edge.trustedToken;
if (!token) {
console.log("\u274C \u627E\u4E0D\u5230\u5FAE\u8F6F\u5FC5\u5E94 TTS \u73AF\u5883\u53D8\u91CF\uFF1AEDGE_TTS_TRUSTED_TOKEN");
return null;
}
const streamHandler = createStreamHandler(responseStream);
try {
const ws = new WebSocket2(
`${kAPI}?TrustedClientToken=${token}&ConnectionId=${randomUUID()}`,
{
host: "speech.platform.bing.com",
headers: {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.66 Safari/537.36 Edg/103.0.1264.44"
}
}
);
ws.on("message", (rawData, isBinary) => {
if (!isBinary) {
const data = rawData.toString("utf8");
if (data.includes("turn.end")) {
ws.close();
}
return;
}
const separator = "Path:audio\r\n";
const audioData = rawData.subarray(
rawData.indexOf(separator) + separator.length
);
if (audioData.length > 0) {
streamHandler.push(audioData);
}
});
ws.on("error", (err) => {
streamHandler.error(err, "Edge | WebSocket error");
});
ws.on("close", () => {
streamHandler.end();
});
ws.on("open", () => {
const request = getEdgeTTSMessages({ text, speaker });
ws.send(request.config, { compress: true }, (configError) => {
if (configError) {
streamHandler.error(configError, "Edge | Send config msg failed!");
return;
}
ws.send(request.ssml, { compress: true }, (ssmlError) => {
if (ssmlError) {
streamHandler.error(ssmlError, "Edge | Send ssml msg failed!");
}
});
});
});
} catch (err) {
streamHandler.error(err, "Edge | Unknown error");
}
return streamHandler.result;
};
function getEdgeTTSMessages(options) {
const {
text,
speaker = "zh-CN-YunxiNeural",
volume = "+0%",
rate = "+0%",
pitch = "+0Hz"
} = options;
const speechConfig = JSON.stringify({
context: {
synthesis: {
audio: {
outputFormat: "audio-24khz-48kbitrate-mono-mp3",
metadataoptions: {
sentenceBoundaryEnabled: false,
wordBoundaryEnabled: false
}
}
}
}
});
const configMessage = `X-Timestamp:${Date()}\r
Content-Type:application/json; charset=utf-8\r
Path:speech.config\r
\r
${speechConfig}`;
const ssmlMessage = `X-RequestId:${randomUUID()}\r
Content-Type:application/ssml+xml\r
X-Timestamp:${Date()}Z\r
Path:ssml\r
\r
<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'><voice name='${speaker}'><prosody pitch='${pitch}' rate='${rate}' volume='${volume}'>${text}</prosody></voice></speak>`;
return {
config: configMessage,
ssml: ssmlMessage
};
}
var kEdgeTTS = {
name: "\u5FAE\u8F6F\u5FC5\u5E94 TTS",
tts: edgeTTS,
speakers: kEdgeTTSSpeakers
};
var kOpenAISpeakers = [
{
name: "Alloy",
gender: "\u7537",
speaker: "alloy"
},
{
name: "Echo",
gender: "\u7537",
speaker: "echo"
},
{
name: "Fable",
gender: "\u7537",
speaker: "fable"
},
{
name: "Onyx",
gender: "\u7537",
speaker: "onyx"
},
{
name: "Nova",
gender: "\u5973",
speaker: "nova"
},
{
name: "Shimmer",
gender: "\u5973",
speaker: "shimmer"
}
];
var openaiTTS = async ({
openai,
text,
speaker,
stream: responseStream
}) => {
const key = openai == null ? void 0 : openai.apiKey;
const model = (openai == null ? void 0 : openai.model) ?? "tts-1";
const baseUrl = (openai == null ? void 0 : openai.baseUrl) ?? "https://api.openai.com/v1";
if (!key) {
console.log("\u274C \u627E\u4E0D\u5230 OpenAI TTS \u73AF\u5883\u53D8\u91CF\uFF1AOPENAI_API_KEY");
return null;
}
const streamHandler = createStreamHandler(responseStream);
fetch(`${baseUrl}/audio/speech`, {
method: "post",
headers: {
Authorization: `Bearer ${key}`,
"Content-Type": "application/json"
},
body: JSON.stringify({
model,
input: text,
voice: speaker
})
}).catch((error) => error).then(async (resp) => {
const stream = resp == null ? void 0 : resp.body;
if (!stream) {
streamHandler.error(resp, "OpenAI | Get stream body failed!");
return;
}
const reader = stream.getReader();
try {
while (true) {
const { done, value } = await reader.read();
if (value) {
streamHandler.push(value);
}
if (done) {
streamHandler.end();
break;
}
}
} catch (err) {
streamHandler.error(resp, "OpenAI | Read stream failed!");
} finally {
reader.releaseLock();
}
});
return streamHandler.result;
};
var kOpenAI = {
name: "OpenAI TTS",
tts: openaiTTS,
speakers: kOpenAISpeakers
};
var kVolcanoTTSSpeakers = [
/**
* 通用场景
*/
{
name: "\u707F\u707F",
gender: "\u5973",
speaker: "BV700_streaming"
},
{
name: "\u75C5\u5A07\u59D0\u59D0",
gender: "\u5973",
speaker: "ICL_zh_female_bingjiaojiejie_tob"
},
{
name: "\u707F\u707F\u4E8C",
gender: "\u5973",
speaker: "BV700_V2_streaming"
},
{
name: "\u6893\u6893",
gender: "\u5973",
speaker: "BV406_streaming"
},
{
name: "\u6893\u6893\u4E8C",
gender: "\u5973",
speaker: "BV406_V2_streaming"
},
{
name: "\u71C3\u71C3",
gender: "\u5973",
speaker: "BV407_streaming"
},
{
name: "\u71C3\u71C3\u4E8C",
gender: "\u5973",
speaker: "BV407_V2_streaming"
},
{
name: "\u7080\u7080",
gender: "\u5973",
speaker: "BV705_streaming"
},
{
name: "\u64CE\u82CD",
gender: "\u5973",
speaker: "BV701_streaming"
},
{
name: "\u64CE\u82CD\u4E8C",
gender: "\u5973",
speaker: "BV701_V2_streaming"
},
{
name: "\u901A\u7528\u5973\u58F0",
gender: "\u5973",
speaker: "BV001_streaming"
},
{
name: "\u901A\u7528\u5973\u58F0\u4E8C",
gender: "\u5973",
speaker: "BV001_V2_streaming"
},
{
name: "\u901A\u7528\u7537\u58F0",
gender: "\u7537",
speaker: "BV002_streaming"
},
/**
* 有声阅读
*/
{
name: "\u9633\u5149\u9752\u5E74",
gender: "\u7537",
speaker: "BV123_streaming"
},
{
name: "\u53CD\u5377\u9752\u5E74",
gender: "\u7537",
speaker: "BV120_streaming"
},
{
name: "\u901A\u7528\u8D58\u5A7F",
gender: "\u7537",
speaker: "BV119_streaming"
},
{
name: "\u53E4\u98CE\u5C11\u5FA1",
gender: "\u5973",
speaker: "BV115_streaming"
},
{
name: "\u9738\u6C14\u9752\u53D4",
gender: "\u7537",
speaker: "BV107_streaming"
},
{
name: "\u8D28\u6734\u9752\u5E74",
gender: "\u7537",
speaker: "BV100_streaming"
},
{
name: "\u6E29\u67D4\u6DD1\u5973",
gender: "\u5973",
speaker: "BV104_streaming"
},
{
name: "\u5F00\u6717\u9752\u5E74",
gender: "\u7537",
speaker: "BV004_streaming"
},
{
name: "\u751C\u5BA0\u5C11\u5FA1",
gender: "\u5973",
speaker: "BV113_streaming"
},
{
name: "\u5112\u96C5\u9752\u5E74",
gender: "\u7537",
speaker: "BV102_streaming"
},
/**
* 智能助手
*/
{
name: "\u751C\u7F8E\u5C0F\u6E90",
gender: "\u5973",
speaker: "BV405_streaming"
},
{
name: "\u4EB2\u5207\u5973\u58F0",
gender: "\u5973",
speaker: "BV007_streaming"
},
{
name: "\u77E5\u6027\u5973\u58F0",
gender: "\u5973",
speaker: "BV009_streaming"
},
{
name: "\u8BDA\u8BDA",
gender: "\u5973",
speaker: "BV419_streaming"
},
{
name: "\u7AE5\u7AE5",
gender: "\u5973",
speaker: "BV415_streaming"
},
{
name: "\u4EB2\u5207\u7537\u58F0",
gender: "\u7537",
speaker: "BV008_streaming"
},
/**
* 视频配音
*/
{
name: "\u8BD1\u5236\u7247\u7537\u58F0",
gender: "\u7537",
speaker: "BV408_streaming"
},
{
name: "\u61D2\u5C0F\u7F8A",
gender: "\u7537",
speaker: "BV426_streaming"
},
{
name: "\u6E05\u65B0\u6587\u827A\u5973\u58F0",
gender: "\u5973",
speaker: "BV428_streaming"
},
{
name: "\u9E21\u6C64\u5973\u58F0",
gender: "\u5973",
speaker: "BV403_streaming"
},
{
name: "\u667A\u6167\u8001\u8005",
gender: "\u7537",
speaker: "BV158_streaming"
},
{
name: "\u6148\u7231\u59E5\u59E5",
gender: "\u5973",
speaker: "BV157_streaming"
},
{
name: "\u8BF4\u5531\u5C0F\u54E5",
gender: "\u7537",
speaker: "BR001_streaming"
},
{
name: "\u6D3B\u529B\u89E3\u8BF4\u7537",
gender: "\u7537",
speaker: "BV410_streaming"
},
{
name: "\u5C0F\u5E05",
gender: "\u7537",
speaker: "BV411_streaming"
},
{
name: "\u5C0F\u5E05\u591A\u60C5\u611F",
gender: "\u7537",
speaker: "BV437_streaming"
},
{
name: "\u5C0F\u7F8E",
gender: "\u5973",
speaker: "BV412_streaming"
},
{
name: "\u7EA8\u7ED4\u9752\u5E74",
gender: "\u7537",
speaker: "BV159_streaming"
},
{
name: "\u76F4\u64AD\u4E00\u59D0",
gender: "\u7537",
speaker: "BV418_streaming"
},
{
name: "\u53CD\u5377\u9752\u5E74",
gender: "\u7537",
speaker: "BV120_streaming"
},
{
name: "\u6C89\u7A33\u89E3\u8BF4\u7537",
gender: "\u7537",
speaker: "BV142_streaming"
},
{
name: "\u6F47\u6D12\u9752\u5E74",
gender: "\u7537",
speaker: "BV143_streaming"
},
{
name: "\u9633\u5149\u7537\u58F0",
gender: "\u7537",
speaker: "BV056_streaming"
},
{
name: "\u6D3B\u6CFC\u5973\u58F0",
gender: "\u5973",
speaker: "BV005_streaming"
},
{
name: "\u5C0F\u841D\u8389",
gender: "\u5973",
speaker: "BV064_streaming"
},
/**
* 特色音色
*/
{
name: "\u5976\u6C14\u840C\u5A03",
gender: "\u7537",
speaker: "BV051_streaming"
},
{
name: "\u52A8\u6F2B\u6D77\u7EF5",
gender: "\u7537",
speaker: "BV063_streaming"
},
{
name: "\u52A8\u6F2B\u6D77\u661F",
gender: "\u7537",
speaker: "BV417_streaming"
},
{
name: "\u52A8\u6F2B\u5C0F\u65B0",
gender: "\u7537",
speaker: "BV050_streaming"
},
{
name: "\u5929\u624D\u7AE5\u58F0",
gender: "\u7537",
speaker: "BV061_streaming"
},
/**
* 广告配音
*/
{
name: "\u4FC3\u9500\u7537\u58F0",
gender: "\u7537",
speaker: "BV401_streaming"
},
{
name: "\u4FC3\u9500\u5973\u58F0",
gender: "\u5973",
speaker: "BV402_streaming"
},
{
name: "\u78C1\u6027\u7537\u58F0",
gender: "\u7537",
speaker: "BV006_streaming"
},
/**
* 新闻播报
*/
{
name: "\u65B0\u95FB\u5973\u58F0",
gender: "\u5973",
speaker: "BV011_streaming"
},
{
name: "\u65B0\u95FB\u7537\u58F0",
gender: "\u7537",
speaker: "BV012_streaming"
},
/**
* 教育场景
*/
{
name: "\u77E5\u6027\u59D0\u59D0",
gender: "\u5973",
speaker: "BV034_streaming"
},
{
name: "\u6E29\u67D4\u5C0F\u54E5",
gender: "\u7537",
speaker: "BV033_streaming"
},
/**
* 方言
*/
{
name: "\u4E1C\u5317\u8001\u94C1",
gender: "\u7537",
speaker: "BV021_streaming"
},
{
name: "\u4E1C\u5317\u4E2B\u5934",
gender: "\u5973",
speaker: "BV020_streaming"
},
{
name: "\u65B9\u8A00\u707F\u707F",
gender: "\u5973",
speaker: "BV704_streaming"
},
{
name: "\u4F5F\u638C\u67DC",
gender: "\u5973",
speaker: "BV210_streaming"
},
{
name: "\u6CAA\u4E0A\u963F\u59E8",
gender: "\u5973",
speaker: "BV217_streaming"
},
{
name: "\u5E7F\u897F\u8001\u8868",
gender: "\u7537",
speaker: "BV213_streaming"
},
{
name: "\u751C\u7F8E\u53F0\u59B9",
gender: "\u5973",
speaker: "BV025_streaming"
},
{
name: "\u53F0\u666E\u7537\u58F0",
gender: "\u7537",
speaker: "BV227_streaming"
},
{
name: "\u6E2F\u5267\u7537\u795E",
gender: "\u7537",
speaker: "BV026_streaming"
},
{
name: "\u5E7F\u4E1C\u8BDD",
gender: "\u5973",
speaker: "BV424_streaming"
},
{
name: "\u5929\u6D25\u8BDD",
gender: "\u7537",
speaker: "BV212_streaming"
},
{
name: "\u90D1\u5DDE\u8BDD",
gender: "\u7537",
speaker: "BV214_streaming"
},
{
name: "\u91CD\u5E86\u8BDD",
gender: "\u7537",
speaker: "BV019_streaming"
},
{
name: "\u56DB\u5DDD\u8BDD",
gender: "\u5973",
speaker: "BV221_streaming"
},
{
name: "\u91CD\u5E86\u8BDD",
gender: "\u5973",
speaker: "BV423_streaming"
},
{
name: "\u6E56\u5357\u8BDD",
gender: "\u5973",
speaker: "BV226_streaming"
},
{
name: "\u957F\u6C99\u8BDD",
gender: "\u5973",
speaker: "BV216_streaming"
}
];
var kAPI2 = "wss://openspeech.bytedance.com/api/v1/tts/ws_binary";
var kDefaultHeader = Buffer.from([17, 16, 17, 0]);
var volcanoTTS = async ({
volcano,
text,
speaker,
stream: responseStream
}) => {
const request = getVolcanoConfig(volcano);
if (!request) {
return null;
}
let requestId = randomUUID();
request.request.text = text;
request.request.reqid = requestId;
request.audio.voice_type = speaker;
requestId = requestId.substring(0, 8);
const payloadLength = Buffer.alloc(4, 0);
let payloadBytes = Buffer.from(JSON.stringify(request));
payloadBytes = zlib.gzipSync(payloadBytes);
payloadLength.writeUInt32BE(payloadBytes.length, 0);
const fullClientRequest = Buffer.concat([
kDefaultHeader,
payloadLength,
payloadBytes
]);
const streamHandler = createStreamHandler(responseStream || new Readable({ read() {
} }));
try {
const ws = new WebSocket2(kAPI2, {
headers: { Authorization: `Bearer; ${request.app.token}` }
});
ws.on("message", (data) => {
const responseBuffer = Buffer.from(data);
const messageSpecificFlags = responseBuffer[1] & 15;
const audioData = parseAudioData(streamHandler, responseBuffer);
if (!audioData || audioData === "started") {
return;
}
if (audioData.length > 0) {
streamHandler.push(audioData);
if (messageSpecificFlags === 3) {
ws.close();
}
}
});
ws.on("error", (err) => {
streamHandler.error(err, "Volcano | WebSocket error");
});
ws.on("close", () => {
streamHandler.end();
});
ws.on("open", () => {
ws.send(fullClientRequest);
});
} catch (err) {
streamHandler.error(err, "Volcano | Unknown error");
}
return streamHandler.result;
};
function parseAudioData(streamHandler, responseBuffer) {
const headerSize = responseBuffer[0] & 15;
const messageType = responseBuffer[1] >> 4;
const messageSpecificFlags = responseBuffer[1] & 15;
const messageCompression = responseBuffer[2] & 15;
const payload = responseBuffer.subarray(headerSize * 4);
if (messageType === 11) {
if (messageSpecificFlags === 0) {
return "started";
} else {
return payload.subarray(8);
}
} else if (messageType === 15) {
const errorCode = payload.readInt32BE(0);
let errorMessage = payload.subarray(8);
if (messageCompression === 1) {
errorMessage = zlib.gunzipSync(errorMessage);
}
streamHandler.error(
String(errorMessage),
`Volcano | Error code: ${errorCode}`
);
} else {
streamHandler.error("Unknown", `Message`);
}
}
var getVolcanoConfig = (volcano) => {
const appid = volcano == null ? void 0 : volcano.appId;
const token = volcano == null ? void 0 : volcano.accessToken;
const uid = (volcano == null ? void 0 : volcano.userId) ?? "666";
const encoding = (volcano == null ? void 0 : volcano.encoding) ?? "pcm";
if (!appid || !token) {
console.log(
"\u274C \u627E\u4E0D\u5230\u706B\u5C71\u5F15\u64CE TTS \u73AF\u5883\u53D8\u91CF\uFF1AVOLCANO_TTS_APP_ID\u3001VOLCANO_TTS_ACCESS_TOKEN"
);
return;
}
return {
app: {
appid,
token,
cluster: "volcano_tts"
},
user: {
uid
},
audio: {
encoding
},
request: {
text_type: "plain",
operation: "submit"
}
};
};
var kVolcanoTTS = {
name: "\u706B\u5C71\u5F15\u64CE TTS",
tts: volcanoTTS,
speakers: kVolcanoTTSSpeakers
};
function parseAudioData_bigModel(streamHandler, responseBuffer) {
const headerSize = responseBuffer[0] & 15;
const messageType = responseBuffer[1] >> 4;
const messageSpecificFlags = responseBuffer[1] & 15;
const messageCompression = responseBuffer[2] & 15;
const payload = responseBuffer.subarray(headerSize * 4);
if (messageType === 11) {
if (messageSpecificFlags === 0) {
console.log("Server ACK received (no sequence number)");
return "started";
} else {
const sequenceNumber = payload.readInt32BE(0);
const payloadSize = payload.readUInt32BE(4);
const audioData = payload.subarray(8);
console.log(
`Sequence number: ${sequenceNumber}, Payload size: ${payloadSize}`
);
return {
audio: audioData,
isDone: sequenceNumber < 0,
sequenceNumber
};
}
} else if (messageType === 15) {
const errorCode = payload.readInt32BE(0);
let errorMessage = payload.subarray(8);
if (messageCompression === 1) {
errorMessage = zlib.gunzipSync(errorMessage);
}
streamHandler.error(
String(errorMessage),
`Volcano | Error code: ${errorCode}`
);
return null;
} else {
streamHandler.error("Unknown", `Message type: ${messageType}`);
return null;
}
}
async function volcanoWebSocketTTS({
volcano,
text,
speaker,
stream: responseStream,
operation = "submit"
}) {
const request = getVolcanoConfig(volcano);
if (!request) {
return null;
}
let requestId = randomUUID();
request.request.text = text;
request.request.reqid = requestId;
request.audio.voice_type = speaker;
request.request.operation = operation;
requestId = requestId.substring(0, 8);
const defaultHeader = Buffer.from([17, 16, 17, 0]);
let payloadBytes = Buffer.from(JSON.stringify(request));
payloadBytes = zlib.gzipSync(payloadBytes);
const payloadLength = Buffer.alloc(4);
payloadLength.writeUInt32BE(payloadBytes.length, 0);
const fullClientRequest = Buffer.concat([
defaultHeader,
payloadLength,
payloadBytes
]);
const streamHandler = createStreamHandler(responseStream);
try {
const ws = new WebSocket2(kAPI2, {
headers: { Authorization: `Bearer; ${request.app.token}` }
});
ws.on("message", (data) => {
const responseBuffer = Buffer.from(data);
const audioResult = parseAudioData_bigModel(
streamHandler,
responseBuffer
);
if (!audioResult) {
return;
}
if (audioResult === "started") {
console.log("TTS started, waiting for audio data...");
return;
}
if (audioResult.audio.length > 0) {
streamHandler.push(audioResult.audio);
if (audioResult.isDone) {
console.log("Final audio chunk received, closing connection");
ws.close();
}
}
});
ws.on("error", (err) => {
streamHandler.error(err, "Volcano WebSocket | Error");
});
ws.on("close", () => {
streamHandler.end();
});
ws.on("open", () => {
ws.send(fullClientRequest);
});
} catch (err) {
streamHandler.error(err, "Volcano WebSocket | Unknown error");
}
return streamHandler.result;
}
// src/index.ts
var kTTSProviders = [
kVolcanoTTS,
// 火山引擎
kEdgeTTS,
// 微软必应 Read Aloud
kOpenAI
// OpenAI TTS
];
var kTTSSpeakers = kTTSProviders.reduce(
(pre, s) => [...pre, ...s.speakers],
[]
);
async function tts(options) {
const {
text,
speaker,
stream,
defaultSpeaker,
protocol = "default",
operation = "submit",
signal,
...rest
} = options;
if (signal) {
if (signal.aborted) {
console.log("TTS operation was aborted before it started");
return null;
}
signal.addEventListener("abort", () => {
console.log("TTS operation aborted");
});
}
if (protocol === "websocket" && rest.volcano) {
const { volcano } = rest;
return volcanoWebSocketTTS({
volcano,
text: text || kTTSDefaultText,
speaker: speaker || defaultSpeaker,
stream: stream || new Readable({ read() {
} }),
operation
}).catch(() => null);
}
const provider = findTTSProvider(speaker, defaultSpeaker);
return provider.tts({
...rest,
speaker: provider.speaker,
text: text || kTTSDefaultText,
stream: stream || new Readable({ read() {
} })
}).catch(() => null);
}
function createTTS(config) {
return (options) => tts({ ...config, ...options });
}
async function tts2(options) {
let {
text,
speaker,
stream,
defaultSpeaker,
protocol = "default",
operation = "submit",
signal,
textFilter,
customTextProcessor,
// 新增:客户端可以提供一个完全自定义的处理函数
...rest
} = options;
const outputStream = stream || new PassThrough();
if (text && customTextProcessor) {
try {
await customTextProcessor({
text,
outputStream,
tts,
options: { ...options, text: void 0, stream: void 0 }
});
return null;
} catch (error) {
console.error("\u81EA\u5B9A\u4E49\u5904\u7406\u5668\u6267\u884C\u5931\u8D25:", error);
}
} else if (text && textFilter && typeof textFilter === "function") {
text = textFilter(text);
}
if (signal) {
if (signal.aborted) {
console.log("TTS operation was aborted before it started");
return null;
}
signal.addEventListener("abort", () => {
console.log("TTS operation aborted");
});
}
if (protocol === "websocket" && rest.volcano) {
const { volcano } = rest;
return volcanoWebSocketTTS({
volcano,
text: text || kTTSDefaultText,
speaker: speaker || defaultSpeaker,
stream: outputStream,
operation
}).catch(() => null);
}
const provider = findTTSProvider(speaker, defaultSpeaker);
return provider.tts({
...rest,
speaker: provider.speaker,
text: text || kTTSDefaultText,
stream: outputStream
}).catch(() => null);
}
function createTTS2(config) {
return (options) => tts2({ ...config, ...options });
}
export { createTTS, createTTS2, kTTSProviders, kTTSSpeakers, tts, tts2 };