@aituber-onair/voice
Version:
Voice synthesis library for AITuber OnAir
514 lines (513 loc) • 18.3 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.MinimaxEngine = void 0;
const voiceEngine_1 = require("../constants/voiceEngine");
/**
* MiniMax TTS voice synthesis engine
*/
class MinimaxEngine {
constructor() {
this.model = 'speech-2.6-hd';
this.defaultVoiceId = 'male-qn-qingse';
this.language = 'Japanese';
this.endpoint = 'global';
this.voiceOverrides = {};
this.audioOverrides = {};
}
/**
* Set GroupId for MiniMax API
*
* GroupId is a unique identifier for the user group in MiniMax's system.
* Unlike other TTS engines that only require an API key, MiniMax requires both
* an API key and a GroupId for authentication and usage tracking.
*
* This GroupId is used by MiniMax for:
* - User group management
* - Usage statistics tracking
* - Billing and quota management
*
* You must obtain this pre-generated value from your MiniMax account dashboard.
*
* @param groupId GroupId for MiniMax API (required for production synthesis)
*/
setGroupId(groupId) {
this.groupId = groupId;
}
/**
* Set endpoint region for MiniMax API
* @param endpoint Endpoint region ('global' or 'china')
*/
setEndpoint(endpoint) {
this.endpoint = endpoint;
}
/**
* Set model for MiniMax TTS
* Available models:
* - speech-2.6-hd: Latest flagship HD model with highest fidelity
* - speech-2.6-turbo: Low-latency Turbo model from 2.6 generation
* - speech-2.5-hd-preview: Latest high-quality model (preview)
* - speech-2.5-turbo-preview: Latest fast model (preview)
* - speech-02-hd: High-quality model
* - speech-02-turbo: Fast model
* - speech-01-hd: Previous generation high-quality model
* - speech-01-turbo: Previous generation fast model
* @param model Model name
*/
setModel(model) {
this.model = model;
}
/**
* Set language boost
* @param language Language to boost recognition
*/
setLanguage(language) {
this.language = language;
}
/**
* Set voice setting overrides (speed, volume, pitch)
* @param settings Voice setting overrides
*/
setVoiceSettings(settings) {
this.updateVoiceOverrides(settings);
}
/**
* Set speech speed multiplier
* @param speed Speed multiplier
*/
setSpeed(speed) {
this.updateVoiceOverrides({ speed });
}
/**
* Set output volume multiplier
* @param vol Volume multiplier
*/
setVolume(vol) {
this.updateVoiceOverrides({ vol });
}
/**
* Set pitch adjustment in semitones
* @param pitch Pitch adjustment
*/
setPitch(pitch) {
this.updateVoiceOverrides({ pitch });
}
/**
* Set audio encoding overrides (sample rate, bitrate, format, channel)
* @param settings Audio setting overrides
*/
setAudioSettings(settings) {
this.updateAudioOverrides(settings);
}
/**
* Set audio sampling rate (Hz)
* @param sampleRate Sampling rate
*/
setSampleRate(sampleRate) {
this.updateAudioOverrides({ sampleRate });
}
/**
* Set audio bitrate (bps)
* @param bitrate Bitrate
*/
setBitrate(bitrate) {
this.updateAudioOverrides({ bitrate });
}
/**
* Set audio output format
* @param format Audio format
*/
setAudioFormat(format) {
this.updateAudioOverrides({ format });
}
/**
* Set audio channel count
* @param channel Number of channels
*/
setAudioChannel(channel) {
this.updateAudioOverrides({ channel });
}
/**
* Alias for setLanguage to emphasize MiniMax terminology
* @param language Language boost string
*/
setLanguageBoost(language) {
this.setLanguage(language);
}
/**
* Get current API endpoint URL based on selected endpoint
* @returns API endpoint URL
*/
getTtsApiUrl() {
return this.endpoint === 'china'
? voiceEngine_1.MINIMAX_CHINA_API_URL
: voiceEngine_1.MINIMAX_GLOBAL_API_URL;
}
/**
* Get current voice list API endpoint URL based on selected endpoint
* @returns Voice list API endpoint URL
*/
getVoiceListApiUrl() {
return this.endpoint === 'china'
? voiceEngine_1.MINIMAX_CHINA_VOICE_LIST_URL
: voiceEngine_1.MINIMAX_GLOBAL_VOICE_LIST_URL;
}
/**
* Get available voice speakers list
* Requires only API key
* @param apiKey MiniMax API key
* @returns Promise<MinimaxVoiceSpeaker[]>
*/
async getVoiceList(apiKey) {
if (!apiKey) {
throw new Error('MiniMax API key is required');
}
const response = await fetch(this.getVoiceListApiUrl(), {
method: 'GET',
headers: {
Authorization: `Bearer ${apiKey}`,
'Content-Type': 'application/json',
},
});
if (!response.ok) {
let errorMessage = `Failed to fetch voice list: ${response.status}`;
try {
const errorText = await response.text();
console.error('Failed to fetch voice list from MiniMax:', response.status, errorText);
errorMessage = `Failed to fetch voice list: ${response.status} - ${errorText}`;
}
catch (e) {
console.error('Failed to fetch voice list from MiniMax:', response.status, response.statusText);
errorMessage = `Failed to fetch voice list: ${response.status} - ${response.statusText}`;
}
throw new Error(errorMessage);
}
const result = await response.json();
// Check base_resp for API errors
if (result.base_resp && result.base_resp.status_code !== 0) {
const errorMsg = result.base_resp.status_msg || 'Unknown error';
throw new Error(`MiniMax API error: ${result.base_resp.status_code} - ${errorMsg}`);
}
// Return voice speakers data
return result.data?.speakers || [];
}
/**
* Build MiniMax voice settings by merging emotion defaults with overrides
* @param voiceId Target voice ID
* @param defaults Default emotion-based values
*/
buildVoiceSetting(voiceId, defaults) {
return {
voice_id: voiceId,
speed: this.voiceOverrides.speed !== undefined
? this.voiceOverrides.speed
: defaults.speed,
vol: this.voiceOverrides.vol !== undefined
? this.voiceOverrides.vol
: defaults.vol,
pitch: this.voiceOverrides.pitch !== undefined
? this.voiceOverrides.pitch
: defaults.pitch,
};
}
/**
* Build MiniMax audio settings from overrides
*/
buildAudioSetting() {
return {
sample_rate: this.audioOverrides.sampleRate ?? 32000,
bitrate: this.audioOverrides.bitrate ?? 128000,
format: this.audioOverrides.format ?? 'mp3',
channel: this.audioOverrides.channel ?? 1,
};
}
/**
* Test voice synthesis with minimal requirements
* Requires API key and voice ID, but not GroupId
* @param text Text to synthesize (shorter text recommended for testing)
* @param voiceId Voice ID to test
* @param apiKey MiniMax API key
* @returns Promise<ArrayBuffer>
*/
async testVoice(text, voiceId, apiKey) {
if (!apiKey) {
throw new Error('MiniMax API key is required');
}
if (!voiceId) {
throw new Error('Voice ID is required');
}
// Limit test text length to avoid quota waste
const testText = text.length > 100 ? text.substring(0, 100) + '...' : text;
// Use a temporary GroupId for testing or make it optional
const tempGroupId = this.groupId || '1';
const requestBody = {
model: this.model,
text: testText,
stream: false,
voice_setting: this.buildVoiceSetting(voiceId, {
speed: 1.0,
vol: 1.0,
pitch: 0,
}),
audio_setting: this.buildAudioSetting(),
language_boost: this.language,
};
const response = await fetch(`${this.getTtsApiUrl()}?GroupId=${tempGroupId}`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${apiKey}`,
},
body: JSON.stringify(requestBody),
});
if (!response.ok) {
let errorMessage = `HTTP error ${response.status}`;
try {
const errorText = await response.text();
console.error('Failed to test voice from MiniMax:', response.status, errorText);
errorMessage = `Failed to test voice: ${response.status} - ${errorText}`;
}
catch (e) {
console.error('Failed to test voice from MiniMax:', response.status, response.statusText);
errorMessage = `Failed to test voice: ${response.status} - ${response.statusText}`;
}
throw new Error(errorMessage);
}
const result = await response.json();
// Check base_resp for API errors
if (result.base_resp && result.base_resp.status_code !== 0) {
const errorMsg = result.base_resp.status_msg || 'Unknown error';
throw new Error(`MiniMax API error: ${result.base_resp.status_code} - ${errorMsg}`);
}
// Get audio data from response
if (!result.data || !result.data.audio) {
console.error('Invalid response structure:', result);
throw new Error('Audio data not found in MiniMax response');
}
// Convert hex string to ArrayBuffer
try {
return this.hexToArrayBuffer(result.data.audio);
}
catch (error) {
console.error('Failed to convert hex audio data:', error);
throw new Error(`Failed to process audio data: ${error}`);
}
}
/**
* Full production audio synthesis
* Requires API key, voice ID, and GroupId
* @param input Talk object
* @param speaker Voice ID
* @param apiKey MiniMax API key
* @param voiceActor Not used for MiniMax (for interface compatibility)
* @returns Promise<ArrayBuffer>
*/
async fetchAudio(input, speaker, apiKey, voiceActor) {
return this.fetchAudioWithOptions(input, speaker, apiKey, true);
}
/**
* Audio synthesis with flexible GroupId requirement
* @param input Talk object
* @param speaker Voice ID
* @param apiKey MiniMax API key
* @param requireGroupId Whether to require GroupId (default: true)
* @returns Promise<ArrayBuffer>
*/
async fetchAudioWithOptions(input, speaker, apiKey, requireGroupId = true) {
if (!apiKey) {
throw new Error('MiniMax API key is required');
}
if (requireGroupId && !this.groupId) {
throw new Error('MiniMax GroupId is required for production synthesis. Please set it using setGroupId(), or use testVoice() for testing.');
}
const talk = input;
const text = talk.message.trim();
// Validate text length (max 5000 characters)
if (text.length > 5000) {
throw new Error('Text exceeds maximum length of 5000 characters');
}
// Get emotion from talk.style and adjust voice settings
const emotionVoiceSettings = this.getVoiceSettings(talk.style || 'talk');
const requestBody = {
model: this.model,
text: text,
stream: false,
voice_setting: this.buildVoiceSetting(speaker || this.defaultVoiceId, emotionVoiceSettings),
audio_setting: this.buildAudioSetting(),
language_boost: this.language,
};
// Use provided GroupId or temporary one for testing
const groupIdToUse = this.groupId || '1';
const response = await fetch(`${this.getTtsApiUrl()}?GroupId=${groupIdToUse}`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${apiKey}`,
},
body: JSON.stringify(requestBody),
});
if (!response.ok) {
let errorMessage = `HTTP error ${response.status}`;
try {
const errorText = await response.text();
console.error('Failed to fetch TTS from MiniMax:', response.status, errorText);
errorMessage = `Failed to fetch TTS from MiniMax: ${response.status} - ${errorText}`;
}
catch (e) {
console.error('Failed to fetch TTS from MiniMax:', response.status, response.statusText);
errorMessage = `Failed to fetch TTS from MiniMax: ${response.status} - ${response.statusText}`;
}
throw new Error(errorMessage);
}
const result = await response.json();
// Check base_resp for API errors
if (result.base_resp && result.base_resp.status_code !== 0) {
const errorMsg = result.base_resp.status_msg || 'Unknown error';
throw new Error(`MiniMax API error: ${result.base_resp.status_code} - ${errorMsg}`);
}
// Get audio data from response
if (!result.data || !result.data.audio) {
console.error('Invalid response structure:', result);
throw new Error('Audio data not found in MiniMax response');
}
// Convert hex string to ArrayBuffer
try {
return this.hexToArrayBuffer(result.data.audio);
}
catch (error) {
console.error('Failed to convert hex audio data:', error);
throw new Error(`Failed to process audio data: ${error}`);
}
}
/**
* Check if GroupId is configured
* @returns boolean
*/
hasGroupId() {
return !!this.groupId;
}
/**
* Get current endpoint setting
* @returns MinimaxEndpoint
*/
getEndpoint() {
return this.endpoint;
}
/**
* Set custom API endpoint URL (VoiceEngine interface compatibility)
* @param apiUrl custom API endpoint URL
*/
setApiEndpoint(apiUrl) {
// For MiniMax, we override the endpoint URLs directly
if (apiUrl.includes('minimaxi.com')) {
this.endpoint = 'china';
}
else {
this.endpoint = 'global';
}
}
/**
* Get voice settings based on emotion
* @param emotion Emotion type
* @returns Voice settings
*/
getVoiceSettings(emotion) {
// Default settings
let speed = 1.0;
let vol = 1.0;
let pitch = 0;
// Adjust settings based on emotion
switch (emotion.toLowerCase()) {
case 'happy':
speed = 1.1;
pitch = 1;
break;
case 'sad':
speed = 0.9;
pitch = -1;
break;
case 'angry':
speed = 1.0;
vol = 1.1;
pitch = 0;
break;
case 'surprised':
speed = 1.2;
pitch = 2;
break;
default:
// Keep default values
break;
}
return { speed, vol, pitch };
}
/**
* Merge incoming voice overrides into the current override map
* Passing undefined removes the override and falls back to defaults
* @param settings Voice setting overrides
*/
updateVoiceOverrides(settings) {
for (const [key, value] of Object.entries(settings)) {
if (value === undefined || value === null) {
delete this.voiceOverrides[key];
}
else {
this.voiceOverrides[key] = value;
}
}
}
/**
* Merge incoming audio overrides into the current override map
* Passing undefined removes the override and falls back to defaults
* @param settings Audio setting overrides
*/
updateAudioOverrides(settings) {
for (const [key, value] of Object.entries(settings)) {
if (value === undefined || value === null) {
delete this.audioOverrides[key];
}
else {
switch (key) {
case 'sampleRate':
this.audioOverrides.sampleRate = value;
break;
case 'bitrate':
this.audioOverrides.bitrate = value;
break;
case 'format':
this.audioOverrides.format = value;
break;
case 'channel':
this.audioOverrides.channel = value;
break;
}
}
}
}
/**
* Convert hex string to ArrayBuffer
* @param hex Hex string
* @returns ArrayBuffer
*/
hexToArrayBuffer(hex) {
// Remove any whitespace or newlines
const cleanHex = hex.replace(/[\s\n]/g, '');
// Ensure even number of characters
if (cleanHex.length % 2 !== 0) {
throw new Error('Invalid hex string: odd number of characters');
}
// Validate hex string
if (!/^[0-9a-fA-F]*$/.test(cleanHex)) {
throw new Error('Invalid hex string: contains non-hex characters');
}
const buffer = new ArrayBuffer(cleanHex.length / 2);
const view = new Uint8Array(buffer);
for (let i = 0; i < cleanHex.length; i += 2) {
view[i / 2] = Number.parseInt(cleanHex.substr(i, 2), 16);
}
return buffer;
}
getTestMessage(textVoiceText) {
return textVoiceText || 'MiniMax Audioを使用します';
}
}
exports.MinimaxEngine = MinimaxEngine;