@aituber-onair/voice
Version:
Voice synthesis library for AITuber OnAir
210 lines (209 loc) • 7.54 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.VoiceVoxEngine = void 0;
const voiceEngine_1 = require("../constants/voiceEngine");
/**
* VoiceVox voice synthesis engine
*/
class VoiceVoxEngine {
constructor() {
this.apiEndpoint = voiceEngine_1.VOICE_VOX_API_URL;
this.queryOverrides = {};
}
async fetchAudio(input, speaker) {
const talk = input;
// get emotion from talk.style
const emotion = talk.style || 'neutral';
const ttsQueryUrl = this.buildUrl('/audio_query', {
speaker: String(speaker),
text: talk.message,
enable_katakana_english: this.enableKatakanaEnglish === undefined
? undefined
: String(this.enableKatakanaEnglish),
core_version: this.coreVersion,
});
const ttsQueryResponse = await fetch(ttsQueryUrl, { method: 'POST' });
if (!ttsQueryResponse.ok) {
throw new Error('Failed to fetch TTS query.');
}
const ttsQueryJson = await ttsQueryResponse.json();
// adjust parameters according to emotion
this.adjustEmotionParameters(ttsQueryJson, emotion);
this.applyQueryOverrides(ttsQueryJson);
const synthesisUrl = this.buildUrl('/synthesis', {
speaker: String(speaker),
enable_interrogative_upspeak: this.enableInterrogativeUpspeak === undefined
? undefined
: String(this.enableInterrogativeUpspeak),
core_version: this.coreVersion,
});
const synthesisResponse = await fetch(synthesisUrl, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(ttsQueryJson),
});
if (!synthesisResponse.ok) {
throw new Error('Failed to fetch TTS synthesis result.');
}
const blob = await synthesisResponse.blob();
return await blob.arrayBuffer();
}
/**
* Adjust parameters according to emotion
*/
adjustEmotionParameters(ttsQueryJson, emotion) {
// default values
ttsQueryJson.speedScale = 1.16;
ttsQueryJson.pitchScale = -0.02;
ttsQueryJson.intonationScale = 1.26;
switch (emotion.toLowerCase()) {
case 'happy':
ttsQueryJson.speedScale = 1.25;
ttsQueryJson.pitchScale = 0.05;
ttsQueryJson.intonationScale = 1.4;
break;
case 'sad':
ttsQueryJson.speedScale = 1.0;
ttsQueryJson.pitchScale = -0.1;
ttsQueryJson.intonationScale = 1.0;
break;
case 'angry':
ttsQueryJson.speedScale = 1.2;
ttsQueryJson.pitchScale = -0.05;
ttsQueryJson.intonationScale = 1.5;
break;
case 'surprised':
ttsQueryJson.speedScale = 1.3;
ttsQueryJson.pitchScale = 0.1;
ttsQueryJson.intonationScale = 1.4;
break;
// default: "neutral" etc. other than default values
}
}
/**
* Apply user-provided overrides to audio query payload
*/
applyQueryOverrides(ttsQueryJson) {
if (this.hasOverride('speedScale')) {
ttsQueryJson.speedScale = this.queryOverrides.speedScale;
}
if (this.hasOverride('pitchScale')) {
ttsQueryJson.pitchScale = this.queryOverrides.pitchScale;
}
if (this.hasOverride('intonationScale')) {
ttsQueryJson.intonationScale = this.queryOverrides.intonationScale;
}
if (this.hasOverride('volumeScale')) {
ttsQueryJson.volumeScale = this.queryOverrides.volumeScale;
}
if (this.hasOverride('prePhonemeLength')) {
ttsQueryJson.prePhonemeLength = this.queryOverrides.prePhonemeLength;
}
if (this.hasOverride('postPhonemeLength')) {
ttsQueryJson.postPhonemeLength = this.queryOverrides.postPhonemeLength;
}
if (this.hasOverride('pauseLength')) {
ttsQueryJson.pauseLength = this.queryOverrides.pauseLength;
}
if (this.hasOverride('pauseLengthScale')) {
ttsQueryJson.pauseLengthScale = this.queryOverrides.pauseLengthScale;
}
if (this.hasOverride('outputSamplingRate')) {
ttsQueryJson.outputSamplingRate = this.queryOverrides.outputSamplingRate;
}
if (this.hasOverride('outputStereo')) {
ttsQueryJson.outputStereo = this.queryOverrides.outputStereo;
}
}
/**
* Update override map while allowing undefined to reset values
*/
updateQueryOverrides(overrides) {
for (const [key, value] of Object.entries(overrides)) {
if (value === undefined) {
delete this.queryOverrides[key];
}
else {
this.queryOverrides[key] =
value;
}
}
}
hasOverride(key) {
return Object.prototype.hasOwnProperty.call(this.queryOverrides, key);
}
getTestMessage(textVoiceText) {
return textVoiceText || 'ボイスボックスを使用します';
}
/**
* Set custom API endpoint URL
* @param apiUrl custom API endpoint URL
*/
setApiEndpoint(apiUrl) {
this.apiEndpoint = apiUrl;
}
/**
* Set query parameter overrides in batch
* @param overrides Audio query parameter overrides
*/
setQueryParameters(overrides) {
this.queryOverrides = {};
this.updateQueryOverrides(overrides);
}
setSpeedScale(speedScale) {
this.updateQueryOverrides({ speedScale });
}
setPitchScale(pitchScale) {
this.updateQueryOverrides({ pitchScale });
}
setIntonationScale(intonationScale) {
this.updateQueryOverrides({ intonationScale });
}
setVolumeScale(volumeScale) {
this.updateQueryOverrides({ volumeScale });
}
setPrePhonemeLength(prePhonemeLength) {
this.updateQueryOverrides({ prePhonemeLength });
}
setPostPhonemeLength(postPhonemeLength) {
this.updateQueryOverrides({ postPhonemeLength });
}
setPauseLength(pauseLength) {
this.updateQueryOverrides({ pauseLength });
}
setPauseLengthScale(pauseLengthScale) {
this.updateQueryOverrides({ pauseLengthScale });
}
setOutputSamplingRate(outputSamplingRate) {
this.updateQueryOverrides({ outputSamplingRate });
}
setOutputStereo(outputStereo) {
this.updateQueryOverrides({ outputStereo });
}
setEnableKatakanaEnglish(enable) {
this.enableKatakanaEnglish = enable;
}
setEnableInterrogativeUpspeak(enable) {
this.enableInterrogativeUpspeak = enable;
}
setCoreVersion(coreVersion) {
const trimmed = coreVersion?.trim();
this.coreVersion = trimmed ? trimmed : undefined;
}
/**
* Build endpoint URL with optional query parameters
*/
buildUrl(path, params) {
const base = this.apiEndpoint.replace(/\/$/, '');
const url = new URL(`${base}${path}`);
for (const [key, value] of Object.entries(params)) {
if (value !== undefined) {
url.searchParams.set(key, value);
}
}
return url.toString();
}
}
exports.VoiceVoxEngine = VoiceVoxEngine;