@aituber-onair/voice
Version:
Voice synthesis library for AITuber OnAir
282 lines (281 loc) • 10.1 kB
JavaScript
import { AIVIS_CLOUD_API_URL } from '../constants/voiceEngine';
/**
* Aivis Cloud API voice synthesis engine
*/
export class AivisCloudEngine {
constructor() {
this.useSSML = true;
this.speakingRate = 1.0;
this.emotionalIntensity = 1.0;
this.tempoDynamics = 1.0;
this.pitch = 0.0;
this.volume = 1.0;
this.leadingSilence = 0.1;
this.trailingSilence = 0.1;
this.lineBreakSilence = 0.4;
this.outputFormat = 'mp3';
this.outputSamplingRate = 44100;
this.outputChannels = 'mono';
this.enableBillingLogs = false;
}
/**
* Set model UUID
* @param modelUuid Aivis Cloud model UUID
*/
setModelUuid(modelUuid) {
this.modelUuid = modelUuid;
}
/**
* Set speaker UUID
* @param speakerUuid Aivis Cloud speaker UUID
*/
setSpeakerUuid(speakerUuid) {
this.speakerUuid = speakerUuid;
}
/**
* Set style ID
* @param styleId Style ID (0-31)
*/
setStyleId(styleId) {
this.styleId = styleId;
this.styleName = undefined; // Clear style name when setting style ID
}
/**
* Set style name
* @param styleName Style name
*/
setStyleName(styleName) {
this.styleName = styleName;
this.styleId = undefined; // Clear style ID when setting style name
}
/**
* Set SSML usage
* @param useSSML Enable SSML interpretation
*/
setUseSSML(useSSML) {
this.useSSML = useSSML;
}
/**
* Set speaking rate
* @param rate Speaking rate (0.5-2.0)
*/
setSpeakingRate(rate) {
this.speakingRate = Math.max(0.5, Math.min(2.0, rate));
}
/**
* Set emotional intensity
* @param intensity Emotional intensity (0.0-2.0)
*/
setEmotionalIntensity(intensity) {
this.emotionalIntensity = Math.max(0.0, Math.min(2.0, intensity));
}
/**
* Set tempo dynamics
* @param dynamics Tempo dynamics (0.0-2.0)
*/
setTempoDynamics(dynamics) {
this.tempoDynamics = Math.max(0.0, Math.min(2.0, dynamics));
}
/**
* Set pitch
* @param pitch Pitch (-1.0-1.0)
*/
setPitch(pitch) {
this.pitch = Math.max(-1.0, Math.min(1.0, pitch));
}
/**
* Set volume
* @param volume Volume (0.0-2.0)
*/
setVolume(volume) {
this.volume = Math.max(0.0, Math.min(2.0, volume));
}
/**
* Set silence durations
* @param leading Leading silence in seconds
* @param trailing Trailing silence in seconds
* @param lineBreak Line break silence in seconds
*/
setSilenceDurations(leading, trailing, lineBreak) {
this.leadingSilence = Math.max(0, leading);
this.trailingSilence = Math.max(0, trailing);
this.lineBreakSilence = Math.max(0, lineBreak);
}
/**
* Set output format
* @param format Output format
*/
setOutputFormat(format) {
this.outputFormat = format;
}
/**
* Set output bitrate
* @param bitrate Output bitrate in kbps (8-320)
*/
setOutputBitrate(bitrate) {
this.outputBitrate = Math.max(8, Math.min(320, bitrate));
}
/**
* Set output sampling rate
* @param rate Output sampling rate in Hz
*/
setOutputSamplingRate(rate) {
this.outputSamplingRate = rate;
}
/**
* Set output channels
* @param channels Output channels (mono or stereo)
*/
setOutputChannels(channels) {
this.outputChannels = channels;
}
/**
* Enable or disable billing/usage information logs
* @param enable Whether to enable billing logs (default: false)
*/
setEnableBillingLogs(enable) {
this.enableBillingLogs = enable;
}
async fetchAudio(input, speaker, apiKey) {
if (!apiKey) {
throw new Error('Aivis Cloud API key is required');
}
if (!this.modelUuid && !speaker) {
throw new Error('Aivis Cloud model UUID is required. Set it using setModelUuid() or pass as speaker parameter');
}
const talk = input;
const text = talk.message.trim();
// Use speaker parameter as model UUID if modelUuid is not set
const actualModelUuid = this.modelUuid || speaker;
// Get emotion from talk.style and adjust emotional intensity if needed
const emotionSettings = this.getEmotionSettings(talk.style || 'talk');
const requestBody = {
model_uuid: actualModelUuid,
text: text,
use_ssml: this.useSSML,
speaking_rate: this.speakingRate,
emotional_intensity: emotionSettings.emotionalIntensity,
tempo_dynamics: this.tempoDynamics,
pitch: this.pitch,
volume: this.volume,
leading_silence_seconds: this.leadingSilence,
trailing_silence_seconds: this.trailingSilence,
line_break_silence_seconds: this.lineBreakSilence,
output_format: this.outputFormat,
output_sampling_rate: this.outputSamplingRate,
output_audio_channels: this.outputChannels,
};
// Add optional fields
if (this.speakerUuid) {
requestBody.speaker_uuid = this.speakerUuid;
}
if (this.styleId !== undefined) {
requestBody.style_id = this.styleId;
}
else if (this.styleName) {
requestBody.style_name = this.styleName;
}
if (this.outputBitrate &&
this.outputFormat !== 'wav' &&
this.outputFormat !== 'flac') {
requestBody.output_bitrate = this.outputBitrate;
}
const response = await fetch(AIVIS_CLOUD_API_URL, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${apiKey}`,
},
body: JSON.stringify(requestBody),
});
if (!response.ok) {
let errorMessage = `HTTP error ${response.status}`;
try {
const errorText = await response.text();
console.error('Failed to fetch TTS from Aivis Cloud:', response.status, errorText);
// Parse specific error codes
switch (response.status) {
case 401:
errorMessage = 'Invalid API key for Aivis Cloud';
break;
case 402:
errorMessage = 'Insufficient credit balance in Aivis Cloud account';
break;
case 404:
errorMessage = `Model UUID not found: ${actualModelUuid}`;
break;
case 422:
errorMessage = `Invalid request parameters: ${errorText}`;
break;
case 429:
errorMessage = 'Rate limit exceeded for Aivis Cloud API';
break;
case 500:
case 503:
case 504:
errorMessage = `Aivis Cloud server error: ${errorText}`;
break;
default:
errorMessage = `Failed to fetch TTS from Aivis Cloud: ${response.status} - ${errorText}`;
}
}
catch (e) {
console.error('Failed to parse error response:', e);
}
throw new Error(errorMessage);
}
// Log billing/usage information from response headers (if enabled)
if (this.enableBillingLogs) {
const billingMode = response.headers.get('X-Aivis-Billing-Mode');
const characterCount = response.headers.get('X-Aivis-Character-Count');
const creditsUsed = response.headers.get('X-Aivis-Credits-Used');
const creditsRemaining = response.headers.get('X-Aivis-Credits-Remaining');
const rateLimitRemaining = response.headers.get('X-Aivis-Rate-Limit-Remaining');
if (billingMode) {
console.log(`Aivis Cloud billing mode: ${billingMode}`);
if (characterCount)
console.log(`Characters synthesized: ${characterCount}`);
if (creditsUsed)
console.log(`Credits used: ${creditsUsed}`);
if (creditsRemaining)
console.log(`Credits remaining: ${creditsRemaining}`);
if (rateLimitRemaining)
console.log(`Rate limit remaining: ${rateLimitRemaining}/min`);
}
}
const blob = await response.blob();
return await blob.arrayBuffer();
}
/**
* Get emotion settings based on emotion type
* @param emotion Emotion type
* @returns Emotion settings with adjusted emotional intensity
*/
getEmotionSettings(emotion) {
// Use base emotional intensity or adjust based on emotion
let emotionalIntensity = this.emotionalIntensity;
// Optionally adjust emotional intensity based on emotion type
// This can be customized based on specific needs
switch (emotion.toLowerCase()) {
case 'happy':
case 'surprised':
// Slightly increase emotional intensity for more expressive emotions
emotionalIntensity = Math.min(2.0, emotionalIntensity * 1.1);
break;
case 'sad':
// Keep normal emotional intensity for sad
break;
case 'angry':
// Slightly increase for angry expression
emotionalIntensity = Math.min(2.0, emotionalIntensity * 1.05);
break;
default:
// Use default emotional intensity
break;
}
return { emotionalIntensity };
}
getTestMessage(textVoiceText) {
return textVoiceText || 'Aivis Cloud APIを使用します';
}
}