UNPKG

esp-ai

Version:

Provide a complete set of AI dialogue solutions for your development board, including but not limited to the IAT+LLM+TTS integration solution for the ESP32 series development board. | 为你的开发板提供全套的AI对话方案，包括但不限于 `ESP32` 系列开发板的 `IAT+LLM+TTS` 集成方案。

github.com/wangzongming/esp-ai

wangzongming/esp-ai

276 lines (254 loc) • 11.7 kB

JavaScript

/* * MIT License * * Copyright (c) 2025-至今小明IO * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * @author 小明IO * @email 1746809408@qq.com * @github https://github.com/wangzongming/esp-ai * @websit https://espai.fun */ const log = require("../../utils/log"); const createUUID = require("../../utils/createUUID"); const Audio_sender = require("../../utils/audio_sender"); /** * @param {Buffer} is_over 是否完毕 * @param {Buffer} audio 音频流 * @param {WebSocket} tts_task_id WebSocket 连接key * @param {WebSocket} ws WebSocket 连接 */ async function cb({ device_id, is_over, audio, ws, tts_task_id, session_id, text_is_over, need_record, frameOnTTScb, is_create_cache, audio_sender }) { try { const { devLog, onTTScb } = G_config; if (!G_devices.get(device_id)) return; const { ws: ws_client, tts_list, session_id: now_session_id } = G_devices.get(device_id); if (!ws_client) return; if ( !is_create_cache && session_id && now_session_id && session_id !== now_session_id && !([G_session_ids["tts_fn"]].includes(session_id)) ) return; !is_create_cache && onTTScb && onTTScb({ device_id, is_over, audio, ws: ws_client, instance: G_Instance, sendToClient: () => ws_client && ws_client.send(JSON.stringify({ type: "instruct", command_id: "on_tts_cb", data: audio.toString('base64') })) }); !is_create_cache && frameOnTTScb && frameOnTTScb(audio, is_over); ws_client.isAlive = true; audio.length && audio_sender.sendAudio(audio); // 告诉客户端本 TTS chunk 播放完毕 if (is_over) { devLog && log.tts_info('-> TTS 片段转换完毕'); ws.close && ws.close(); tts_list.delete(tts_task_id); function sendEndBuffer() { audio_sender.sendAudio(null, G_session_ids["tts_all_end"]); } function sendEndAlignBuffer() { audio_sender.sendAudio(null, G_session_ids["tts_all_end_align"]); } if (text_is_over) { /** * 意图推理中时候并不知道是否还需要重新采集音频 * 所以需要等待推理完毕后才进行最后一帧音频流的发送 */ G_Instance.awaitIntention(device_id, () => { if (!G_devices.get(device_id)) return; const { stop_next_session } = G_devices.get(device_id); if (!stop_next_session) { if (need_record) { sendEndAlignBuffer(); } else { sendEndBuffer(); } } else { sendEndBuffer(); } }) } else { // 文本没有结束，发送 chunk 标识 audio_sender.sendAudio(null, G_session_ids["tts_chunk_end"]); } } } catch (err) { console.log(err); log.error(`[${device_id}] TTS 回调错误： ${err}`) } } /** * TTS 模块 * @param {String} device_id 设备id * @param {String} text 待播报的文本 * @param {Boolean} session_id 会话id(这里绝不是从设备信息中取，设备信息会实时更新) * @param {Boolean} text_is_over 文本是否完整，或者文本是否是最后一段 * @param {Boolean} need_record 是否需要重新识别，由客户端控制 * @param {Boolean} frameOnTTScb 上层要进行流监听时提供的回调 * @param {Boolean} is_create_cache 创造缓存数据二执行的函数 * @return {Function} (pcm)=> Promise<Boolean> * */ function TTSFN(device_id, opts) { try { return new Promise((resolve) => { try { if (!G_devices.get(device_id)) return; const { devLog, plugins = [], tts_params_set, onTTS } = G_config; const { ws: ws_client, error_catch, tts_list, user_config: { iat_server, llm_server, tts_server, tts_config } } = G_devices.get(device_id); const { text, is_pre_connect, session_id, text_is_over = true, need_record = false, frameOnTTScb, is_cache, is_create_cache, tts_task_id = createUUID() } = opts; const plugin = plugins.find(item => item.name === tts_server && item.type === "TTS")?.main; const TTS_FN = plugin || require(`./${tts_server}`); if (!is_pre_connect) { if (!text || !(`${text}`.replace(/\s/g, ''))) return true; // 只有一个符号时放弃 const punctuationRegex = /[。，！？!?；;！？…~～」]|(?<![A-Za-z0-9])\.(?![A-Za-z0-9])|(?<![A-Za-z])'(?![A-Za-z])/g; if(text.replace(punctuationRegex, "") === "")return true; } if (is_create_cache) { devLog && log.tts_info('-> 开始缓存TTS: ', text); } else { devLog && log.tts_info('-> 开始请求TTS: ', text); } const audio_sender = new Audio_sender(ws_client, device_id); !is_create_cache && onTTS && onTTS({ device_id, tts_task_id, ws: ws_client, text, text_is_over, instance: G_Instance, sendToClient: (_text) => ws_client && ws_client.send(JSON.stringify({ type: "instruct", command_id: "on_tts", data: _text || text })) }); /** * 记录 tts 服务对象 */ const logWSServer = (wsServer) => { tts_list && typeof tts_list.set === 'function' && tts_list.set(tts_task_id, wsServer) } /** * 开始连接 tts 服务的回调 */ const connectServerBeforeCb = () => { if (!G_devices.get(device_id)) return; G_devices.set(device_id, { ...G_devices.get(device_id), tts_server_connect_ing: true, // 记录下来，如果设备打断时，需要框架调用，否则本任务将永远无法结束 resolve_tts_task: resolve }) } /** * 连接 tts 服务后的回调 */ const connectServerCb = async (connected) => { if (connected) { if (!G_devices.get(device_id)) return; devLog && log.tts_info("-> TTS 服务连接成功！") G_devices.set(device_id, { ...G_devices.get(device_id), audio_sender: audio_sender, tts_server_connected: true, tts_server_connect_ing: false, }) !is_create_cache && ws_client && ws_client.send(JSON.stringify({ type: "session_status", status: "tts_chunk_start", })); // 启动音频发送任务 audio_sender.startSend(tts_task_id === "connected_reply" ? "0001" : session_id, () => { G_devices.set(device_id, { ...G_devices.get(device_id), resolve_tts_task: null, audio_sender: null, }) resolve(true); }); } else { if (!G_devices.get(device_id)) { G_devices.set(device_id, { ...G_devices.get(device_id), resolve_tts_task: null }) return resolve(true); } G_devices.set(device_id, { ...G_devices.get(device_id), tts_server_connected: false, tts_server_connect_ing: false, }) } } /** * tts 服务发生错误时调用 */ const ttsServerErrorCb = (err, code) => { error_catch("TTS", code || "302", err); tts_list.delete(tts_task_id) log.error(err) G_devices.set(device_id, { ...G_devices.get(device_id), resolve_tts_task: null }) resolve(true); } ws_client && ws_client.send(JSON.stringify({ type: "play_audio", tts_task_id })); TTS_FN({ text, device_id, session_id, is_pre_connect, devLog, tts_config, tts_params_set, log, iat_server, llm_server, tts_server, text_is_over, cb: (arg) => cb({ ...arg, tts_task_id, device_id, session_id, text_is_over, need_record, frameOnTTScb, is_cache, is_create_cache, audio_sender }), logWSServer, ttsServerErrorCb, connectServerBeforeCb, connectServerCb, }) } catch (err) { console.log(err); log.error(`TTS 错误： ${err}`) } }) } catch (err) { console.error(`tts/index.js 错误：`, err) } }; module.exports = TTSFN;