@minto-ai/huoshan-tts
Version:
借助“火山引擎在线语音合成API”实现浏览器端“文本转语音
143 lines (124 loc) • 4.58 kB
text/typescript
import type { ParallelTaskExecuteContext } from '../../handler'
import { ParallelHandler } from '../../handler'
class DecodeData extends ParallelHandler<string, AudioBuffer> {
private audioContext: AudioContext = new AudioContext({
sampleRate: 44100,
})
public execute(
context: ParallelTaskExecuteContext<string, AudioBuffer>,
): void {
if (context.isLastExecute) {
this.taskCompletedCallback(context.taskItem.uuid)
return
}
if (!context.taskItem.original) {
this.taskCompletedCallback(context.taskItem.uuid)
return
}
// 至少需要100个字节才能构成有效的音频数据
if (atob(context.taskItem.original).length < 100) {
this.taskCompletedCallback(context.taskItem.uuid)
return
}
const base64Data = this.convertPCMToWAV(context.taskItem.original!, 24000, 1, 16)
const arrayBuffer = this.base64ToArrayBuffer(base64Data)
this.audioContext.decodeAudioData(arrayBuffer, (audioData) => {
this.forwardToHandler(audioData)
this.taskCompletedCallback(context.taskItem.uuid)
})
}
/**
* 创建WAV文件头
* @param sampleRate - 采样率
* @param numChannels - 声道数
* @param bitsPerSample - 位深度
* @param dataLength - PCM数据长度
* @returns {ArrayBuffer} WAV文件头
*/
createWAVHeader(sampleRate: number, numChannels: number, bitsPerSample: number, dataLength: number): ArrayBuffer {
const buffer = new ArrayBuffer(44)
const view = new DataView(buffer)
// RIFF标识符
view.setUint32(0, 0x52494646, false) // "RIFF"
// 文件大小
view.setUint32(4, 36 + dataLength, true)
// WAVE标识符
view.setUint32(8, 0x57415645, false) // "WAVE"
// fmt子块
view.setUint32(12, 0x666D7420, false) // "fmt "
// fmt子块大小
view.setUint32(16, 16, true)
// 音频格式(PCM = 1)
view.setUint16(20, 1, true)
// 声道数
view.setUint16(22, numChannels, true)
// 采样率
view.setUint32(24, sampleRate, true)
// 字节率
view.setUint32(28, sampleRate * numChannels * bitsPerSample / 8, true)
// 块对齐
view.setUint16(32, numChannels * bitsPerSample / 8, true)
// 位深度
view.setUint16(34, bitsPerSample, true)
// data子块
view.setUint32(36, 0x64617461, false) // "data"
// 数据大小
view.setUint32(40, dataLength, true)
return buffer
}
/**
* 将原始PCM数据转换为完整的WAV格式
* @param base64PCMData - Base64编码的PCM数据
* @param sampleRate - 采样率(默认16000)
* @param numChannels - 声道数(默认1)
* @param bitsPerSample - 位深度(默认16)
* @returns Base64编码的WAV数据
*/
convertPCMToWAV(base64PCMData: string, sampleRate: number = 24000, numChannels: number = 1, bitsPerSample: number = 16): string {
try {
// 解码Base64 PCM数据
const binaryString = atob(base64PCMData)
const pcmData = new Uint8Array(binaryString.length)
for (let i = 0; i < binaryString.length; i++) {
pcmData[i] = binaryString.charCodeAt(i)
}
// 创建WAV文件头
const wavHeader = this.createWAVHeader(sampleRate, numChannels, bitsPerSample, pcmData.length)
// 合并文件头和PCM数据
const wavData = new Uint8Array(wavHeader.byteLength + pcmData.length)
wavData.set(new Uint8Array(wavHeader), 0)
wavData.set(pcmData, wavHeader.byteLength)
// 转换为Base64
let binaryStr = ''
for (let i = 0; i < wavData.length; i++) {
binaryStr += String.fromCharCode(wavData[i])
}
return btoa(binaryStr)
}
catch (error) {
console.error('PCM转WAV失败:', error)
throw error
}
}
/**
* 将Base64编码的WAV数据转换为ArrayBuffer
* @param base64Data - Base64编码的WAV数据
* @returns 转换后的ArrayBuffer
*/
base64ToArrayBuffer(base64Data: string): ArrayBuffer {
const binaryString = atob(base64Data)
const bytes = new Uint8Array(binaryString.length)
for (let i = 0; i < binaryString.length; i++) {
bytes[i] = binaryString.charCodeAt(i)
}
return bytes.buffer
}
protected onFinish(): void {
if (this.audioContext) {
this.audioContext.suspend()
this.audioContext = new AudioContext()
}
this.executeController?.$bus.emit('_decodeDataFinish')
}
}
export default DecodeData