UNPKG

@minto-ai/huoshan-tts

Version:

借助“火山引擎在线语音合成API”实现浏览器端“文本转语音

143 lines (124 loc) 4.58 kB
import type { ParallelTaskExecuteContext } from '../../handler' import { ParallelHandler } from '../../handler' class DecodeData extends ParallelHandler<string, AudioBuffer> { private audioContext: AudioContext = new AudioContext({ sampleRate: 44100, }) public execute( context: ParallelTaskExecuteContext<string, AudioBuffer>, ): void { if (context.isLastExecute) { this.taskCompletedCallback(context.taskItem.uuid) return } if (!context.taskItem.original) { this.taskCompletedCallback(context.taskItem.uuid) return } // 至少需要100个字节才能构成有效的音频数据 if (atob(context.taskItem.original).length < 100) { this.taskCompletedCallback(context.taskItem.uuid) return } const base64Data = this.convertPCMToWAV(context.taskItem.original!, 24000, 1, 16) const arrayBuffer = this.base64ToArrayBuffer(base64Data) this.audioContext.decodeAudioData(arrayBuffer, (audioData) => { this.forwardToHandler(audioData) this.taskCompletedCallback(context.taskItem.uuid) }) } /** * 创建WAV文件头 * @param sampleRate - 采样率 * @param numChannels - 声道数 * @param bitsPerSample - 位深度 * @param dataLength - PCM数据长度 * @returns {ArrayBuffer} WAV文件头 */ createWAVHeader(sampleRate: number, numChannels: number, bitsPerSample: number, dataLength: number): ArrayBuffer { const buffer = new ArrayBuffer(44) const view = new DataView(buffer) // RIFF标识符 view.setUint32(0, 0x52494646, false) // "RIFF" // 文件大小 view.setUint32(4, 36 + dataLength, true) // WAVE标识符 view.setUint32(8, 0x57415645, false) // "WAVE" // fmt子块 view.setUint32(12, 0x666D7420, false) // "fmt " // fmt子块大小 view.setUint32(16, 16, true) // 音频格式(PCM = 1) view.setUint16(20, 1, true) // 声道数 view.setUint16(22, numChannels, true) // 采样率 view.setUint32(24, sampleRate, true) // 字节率 view.setUint32(28, sampleRate * numChannels * bitsPerSample / 8, true) // 块对齐 view.setUint16(32, numChannels * bitsPerSample / 8, true) // 位深度 view.setUint16(34, bitsPerSample, true) // data子块 view.setUint32(36, 0x64617461, false) // "data" // 数据大小 view.setUint32(40, dataLength, true) return buffer } /** * 将原始PCM数据转换为完整的WAV格式 * @param base64PCMData - Base64编码的PCM数据 * @param sampleRate - 采样率(默认16000) * @param numChannels - 声道数(默认1) * @param bitsPerSample - 位深度(默认16) * @returns Base64编码的WAV数据 */ convertPCMToWAV(base64PCMData: string, sampleRate: number = 24000, numChannels: number = 1, bitsPerSample: number = 16): string { try { // 解码Base64 PCM数据 const binaryString = atob(base64PCMData) const pcmData = new Uint8Array(binaryString.length) for (let i = 0; i < binaryString.length; i++) { pcmData[i] = binaryString.charCodeAt(i) } // 创建WAV文件头 const wavHeader = this.createWAVHeader(sampleRate, numChannels, bitsPerSample, pcmData.length) // 合并文件头和PCM数据 const wavData = new Uint8Array(wavHeader.byteLength + pcmData.length) wavData.set(new Uint8Array(wavHeader), 0) wavData.set(pcmData, wavHeader.byteLength) // 转换为Base64 let binaryStr = '' for (let i = 0; i < wavData.length; i++) { binaryStr += String.fromCharCode(wavData[i]) } return btoa(binaryStr) } catch (error) { console.error('PCM转WAV失败:', error) throw error } } /** * 将Base64编码的WAV数据转换为ArrayBuffer * @param base64Data - Base64编码的WAV数据 * @returns 转换后的ArrayBuffer */ base64ToArrayBuffer(base64Data: string): ArrayBuffer { const binaryString = atob(base64Data) const bytes = new Uint8Array(binaryString.length) for (let i = 0; i < binaryString.length; i++) { bytes[i] = binaryString.charCodeAt(i) } return bytes.buffer } protected onFinish(): void { if (this.audioContext) { this.audioContext.suspend() this.audioContext = new AudioContext() } this.executeController?.$bus.emit('_decodeDataFinish') } } export default DecodeData