UNPKG

@aituber-onair/voice

Version:

Voice synthesis library for AITuber OnAir

206 lines (205 loc) 7.58 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.AivisSpeechEngine = void 0; const voiceEngine_1 = require("../constants/voiceEngine"); /** * AivisSpeech voice synthesis engine */ class AivisSpeechEngine { constructor() { this.apiEndpoint = voiceEngine_1.AIVIS_SPEECH_API_URL; this.queryOverrides = {}; } async fetchAudio(input, speaker) { const talk = input; // Get emotion from talk.style const emotion = talk.style || 'neutral'; const text = talk.message.trim(); const queryUrl = this.buildUrl('/audio_query', { speaker: String(speaker), text: text, }); const ttsQueryResponse = await fetch(queryUrl, { method: 'POST' }); if (!ttsQueryResponse.ok) { throw new Error('Failed to fetch TTS query from AivisSpeech Engine.'); } const ttsQueryJson = await ttsQueryResponse.json(); // adjust parameters according to emotion this.adjustEmotionParameters(ttsQueryJson, emotion); this.applyQueryOverrides(ttsQueryJson); const synthesisUrl = this.buildUrl('/synthesis', { speaker: String(speaker), }); const synthesisResponse = await fetch(synthesisUrl, { method: 'POST', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify(ttsQueryJson), }); if (!synthesisResponse.ok) { throw new Error('Failed to fetch TTS synthesis result from AivisSpeech Engine.'); } const blob = await synthesisResponse.blob(); return await blob.arrayBuffer(); } adjustEmotionParameters(ttsQueryJson, emotion) { // default values ttsQueryJson.speedScale = 1.0; ttsQueryJson.pitchScale = 0.0; ttsQueryJson.intonationScale = 1.0; ttsQueryJson.tempoDynamicsScale = 1.0; ttsQueryJson.volumeScale = 1.0; switch (emotion.toLowerCase()) { case 'happy': ttsQueryJson.speedScale = 1.1; ttsQueryJson.pitchScale = 0.05; ttsQueryJson.intonationScale = 1.2; ttsQueryJson.tempoDynamicsScale = 1.1; ttsQueryJson.volumeScale = 1.05; break; case 'sad': ttsQueryJson.speedScale = 0.9; ttsQueryJson.pitchScale = -0.03; ttsQueryJson.intonationScale = 0.8; ttsQueryJson.tempoDynamicsScale = 0.9; ttsQueryJson.volumeScale = 0.95; break; case 'angry': ttsQueryJson.speedScale = 1.0; ttsQueryJson.pitchScale = 0.0; ttsQueryJson.intonationScale = 1.4; ttsQueryJson.tempoDynamicsScale = 1.2; ttsQueryJson.volumeScale = 1.1; break; case 'surprised': ttsQueryJson.speedScale = 1.2; ttsQueryJson.pitchScale = 0.07; ttsQueryJson.intonationScale = 1.3; ttsQueryJson.tempoDynamicsScale = 1.0; ttsQueryJson.volumeScale = 1.05; break; // default: "neutral" etc. other than default values } } /** * Apply user-provided overrides to audio query payload */ applyQueryOverrides(ttsQueryJson) { if (this.hasOverride('speedScale')) { ttsQueryJson.speedScale = this.queryOverrides.speedScale; } if (this.hasOverride('pitchScale')) { ttsQueryJson.pitchScale = this.queryOverrides.pitchScale; } if (this.hasOverride('intonationScale')) { ttsQueryJson.intonationScale = this.queryOverrides.intonationScale; } if (this.hasOverride('tempoDynamicsScale')) { ttsQueryJson.tempoDynamicsScale = this.queryOverrides.tempoDynamicsScale; } if (this.hasOverride('volumeScale')) { ttsQueryJson.volumeScale = this.queryOverrides.volumeScale; } if (this.hasOverride('prePhonemeLength')) { ttsQueryJson.prePhonemeLength = this.queryOverrides.prePhonemeLength; } if (this.hasOverride('postPhonemeLength')) { ttsQueryJson.postPhonemeLength = this.queryOverrides.postPhonemeLength; } if (this.hasOverride('pauseLength')) { ttsQueryJson.pauseLength = this.queryOverrides.pauseLength; } if (this.hasOverride('pauseLengthScale')) { ttsQueryJson.pauseLengthScale = this.queryOverrides.pauseLengthScale; } if (this.hasOverride('outputSamplingRate')) { ttsQueryJson.outputSamplingRate = this.queryOverrides.outputSamplingRate; } if (this.hasOverride('outputStereo')) { ttsQueryJson.outputStereo = this.queryOverrides.outputStereo; } } /** * Update override map while allowing undefined to reset values */ updateQueryOverrides(overrides) { for (const [key, value] of Object.entries(overrides)) { if (value === undefined) { delete this.queryOverrides[key]; } else { this.queryOverrides[key] = value; } } } hasOverride(key) { return Object.prototype.hasOwnProperty.call(this.queryOverrides, key); } getTestMessage(textVoiceText) { return textVoiceText || 'アイビススピーチを使用します'; } /** * Set custom API endpoint URL * @param apiUrl custom API endpoint URL */ setApiEndpoint(apiUrl) { this.apiEndpoint = apiUrl; } /** * Set query parameter overrides in batch * @param overrides Audio query parameter overrides */ setQueryParameters(overrides) { this.queryOverrides = {}; this.updateQueryOverrides(overrides); } setSpeedScale(speedScale) { this.updateQueryOverrides({ speedScale }); } setPitchScale(pitchScale) { this.updateQueryOverrides({ pitchScale }); } setIntonationScale(intonationScale) { this.updateQueryOverrides({ intonationScale }); } setTempoDynamicsScale(tempoDynamicsScale) { this.updateQueryOverrides({ tempoDynamicsScale }); } setVolumeScale(volumeScale) { this.updateQueryOverrides({ volumeScale }); } setPrePhonemeLength(prePhonemeLength) { this.updateQueryOverrides({ prePhonemeLength }); } setPostPhonemeLength(postPhonemeLength) { this.updateQueryOverrides({ postPhonemeLength }); } setPauseLength(pauseLength) { this.updateQueryOverrides({ pauseLength }); } setPauseLengthScale(pauseLengthScale) { this.updateQueryOverrides({ pauseLengthScale }); } setOutputSamplingRate(outputSamplingRate) { this.updateQueryOverrides({ outputSamplingRate }); } setOutputStereo(outputStereo) { this.updateQueryOverrides({ outputStereo }); } /** * Build endpoint URL with optional query parameters */ buildUrl(path, params) { const base = this.apiEndpoint.replace(/\/$/, ''); const url = new URL(`${base}${path}`); for (const [key, value] of Object.entries(params)) { if (value !== undefined) { url.searchParams.set(key, value); } } return url.toString(); } } exports.AivisSpeechEngine = AivisSpeechEngine;