UNPKG

@aituber-onair/voice

Version:

Voice synthesis library for AITuber OnAir

210 lines (209 loc) 7.54 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.VoiceVoxEngine = void 0; const voiceEngine_1 = require("../constants/voiceEngine"); /** * VoiceVox voice synthesis engine */ class VoiceVoxEngine { constructor() { this.apiEndpoint = voiceEngine_1.VOICE_VOX_API_URL; this.queryOverrides = {}; } async fetchAudio(input, speaker) { const talk = input; // get emotion from talk.style const emotion = talk.style || 'neutral'; const ttsQueryUrl = this.buildUrl('/audio_query', { speaker: String(speaker), text: talk.message, enable_katakana_english: this.enableKatakanaEnglish === undefined ? undefined : String(this.enableKatakanaEnglish), core_version: this.coreVersion, }); const ttsQueryResponse = await fetch(ttsQueryUrl, { method: 'POST' }); if (!ttsQueryResponse.ok) { throw new Error('Failed to fetch TTS query.'); } const ttsQueryJson = await ttsQueryResponse.json(); // adjust parameters according to emotion this.adjustEmotionParameters(ttsQueryJson, emotion); this.applyQueryOverrides(ttsQueryJson); const synthesisUrl = this.buildUrl('/synthesis', { speaker: String(speaker), enable_interrogative_upspeak: this.enableInterrogativeUpspeak === undefined ? undefined : String(this.enableInterrogativeUpspeak), core_version: this.coreVersion, }); const synthesisResponse = await fetch(synthesisUrl, { method: 'POST', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify(ttsQueryJson), }); if (!synthesisResponse.ok) { throw new Error('Failed to fetch TTS synthesis result.'); } const blob = await synthesisResponse.blob(); return await blob.arrayBuffer(); } /** * Adjust parameters according to emotion */ adjustEmotionParameters(ttsQueryJson, emotion) { // default values ttsQueryJson.speedScale = 1.16; ttsQueryJson.pitchScale = -0.02; ttsQueryJson.intonationScale = 1.26; switch (emotion.toLowerCase()) { case 'happy': ttsQueryJson.speedScale = 1.25; ttsQueryJson.pitchScale = 0.05; ttsQueryJson.intonationScale = 1.4; break; case 'sad': ttsQueryJson.speedScale = 1.0; ttsQueryJson.pitchScale = -0.1; ttsQueryJson.intonationScale = 1.0; break; case 'angry': ttsQueryJson.speedScale = 1.2; ttsQueryJson.pitchScale = -0.05; ttsQueryJson.intonationScale = 1.5; break; case 'surprised': ttsQueryJson.speedScale = 1.3; ttsQueryJson.pitchScale = 0.1; ttsQueryJson.intonationScale = 1.4; break; // default: "neutral" etc. other than default values } } /** * Apply user-provided overrides to audio query payload */ applyQueryOverrides(ttsQueryJson) { if (this.hasOverride('speedScale')) { ttsQueryJson.speedScale = this.queryOverrides.speedScale; } if (this.hasOverride('pitchScale')) { ttsQueryJson.pitchScale = this.queryOverrides.pitchScale; } if (this.hasOverride('intonationScale')) { ttsQueryJson.intonationScale = this.queryOverrides.intonationScale; } if (this.hasOverride('volumeScale')) { ttsQueryJson.volumeScale = this.queryOverrides.volumeScale; } if (this.hasOverride('prePhonemeLength')) { ttsQueryJson.prePhonemeLength = this.queryOverrides.prePhonemeLength; } if (this.hasOverride('postPhonemeLength')) { ttsQueryJson.postPhonemeLength = this.queryOverrides.postPhonemeLength; } if (this.hasOverride('pauseLength')) { ttsQueryJson.pauseLength = this.queryOverrides.pauseLength; } if (this.hasOverride('pauseLengthScale')) { ttsQueryJson.pauseLengthScale = this.queryOverrides.pauseLengthScale; } if (this.hasOverride('outputSamplingRate')) { ttsQueryJson.outputSamplingRate = this.queryOverrides.outputSamplingRate; } if (this.hasOverride('outputStereo')) { ttsQueryJson.outputStereo = this.queryOverrides.outputStereo; } } /** * Update override map while allowing undefined to reset values */ updateQueryOverrides(overrides) { for (const [key, value] of Object.entries(overrides)) { if (value === undefined) { delete this.queryOverrides[key]; } else { this.queryOverrides[key] = value; } } } hasOverride(key) { return Object.prototype.hasOwnProperty.call(this.queryOverrides, key); } getTestMessage(textVoiceText) { return textVoiceText || 'ボイスボックスを使用します'; } /** * Set custom API endpoint URL * @param apiUrl custom API endpoint URL */ setApiEndpoint(apiUrl) { this.apiEndpoint = apiUrl; } /** * Set query parameter overrides in batch * @param overrides Audio query parameter overrides */ setQueryParameters(overrides) { this.queryOverrides = {}; this.updateQueryOverrides(overrides); } setSpeedScale(speedScale) { this.updateQueryOverrides({ speedScale }); } setPitchScale(pitchScale) { this.updateQueryOverrides({ pitchScale }); } setIntonationScale(intonationScale) { this.updateQueryOverrides({ intonationScale }); } setVolumeScale(volumeScale) { this.updateQueryOverrides({ volumeScale }); } setPrePhonemeLength(prePhonemeLength) { this.updateQueryOverrides({ prePhonemeLength }); } setPostPhonemeLength(postPhonemeLength) { this.updateQueryOverrides({ postPhonemeLength }); } setPauseLength(pauseLength) { this.updateQueryOverrides({ pauseLength }); } setPauseLengthScale(pauseLengthScale) { this.updateQueryOverrides({ pauseLengthScale }); } setOutputSamplingRate(outputSamplingRate) { this.updateQueryOverrides({ outputSamplingRate }); } setOutputStereo(outputStereo) { this.updateQueryOverrides({ outputStereo }); } setEnableKatakanaEnglish(enable) { this.enableKatakanaEnglish = enable; } setEnableInterrogativeUpspeak(enable) { this.enableInterrogativeUpspeak = enable; } setCoreVersion(coreVersion) { const trimmed = coreVersion?.trim(); this.coreVersion = trimmed ? trimmed : undefined; } /** * Build endpoint URL with optional query parameters */ buildUrl(path, params) { const base = this.apiEndpoint.replace(/\/$/, ''); const url = new URL(`${base}${path}`); for (const [key, value] of Object.entries(params)) { if (value !== undefined) { url.searchParams.set(key, value); } } return url.toString(); } } exports.VoiceVoxEngine = VoiceVoxEngine;