UNPKG

@aituber-onair/voice

Version:

Voice synthesis library for AITuber OnAir

514 lines (513 loc) 18.3 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.MinimaxEngine = void 0; const voiceEngine_1 = require("../constants/voiceEngine"); /** * MiniMax TTS voice synthesis engine */ class MinimaxEngine { constructor() { this.model = 'speech-2.6-hd'; this.defaultVoiceId = 'male-qn-qingse'; this.language = 'Japanese'; this.endpoint = 'global'; this.voiceOverrides = {}; this.audioOverrides = {}; } /** * Set GroupId for MiniMax API * * GroupId is a unique identifier for the user group in MiniMax's system. * Unlike other TTS engines that only require an API key, MiniMax requires both * an API key and a GroupId for authentication and usage tracking. * * This GroupId is used by MiniMax for: * - User group management * - Usage statistics tracking * - Billing and quota management * * You must obtain this pre-generated value from your MiniMax account dashboard. * * @param groupId GroupId for MiniMax API (required for production synthesis) */ setGroupId(groupId) { this.groupId = groupId; } /** * Set endpoint region for MiniMax API * @param endpoint Endpoint region ('global' or 'china') */ setEndpoint(endpoint) { this.endpoint = endpoint; } /** * Set model for MiniMax TTS * Available models: * - speech-2.6-hd: Latest flagship HD model with highest fidelity * - speech-2.6-turbo: Low-latency Turbo model from 2.6 generation * - speech-2.5-hd-preview: Latest high-quality model (preview) * - speech-2.5-turbo-preview: Latest fast model (preview) * - speech-02-hd: High-quality model * - speech-02-turbo: Fast model * - speech-01-hd: Previous generation high-quality model * - speech-01-turbo: Previous generation fast model * @param model Model name */ setModel(model) { this.model = model; } /** * Set language boost * @param language Language to boost recognition */ setLanguage(language) { this.language = language; } /** * Set voice setting overrides (speed, volume, pitch) * @param settings Voice setting overrides */ setVoiceSettings(settings) { this.updateVoiceOverrides(settings); } /** * Set speech speed multiplier * @param speed Speed multiplier */ setSpeed(speed) { this.updateVoiceOverrides({ speed }); } /** * Set output volume multiplier * @param vol Volume multiplier */ setVolume(vol) { this.updateVoiceOverrides({ vol }); } /** * Set pitch adjustment in semitones * @param pitch Pitch adjustment */ setPitch(pitch) { this.updateVoiceOverrides({ pitch }); } /** * Set audio encoding overrides (sample rate, bitrate, format, channel) * @param settings Audio setting overrides */ setAudioSettings(settings) { this.updateAudioOverrides(settings); } /** * Set audio sampling rate (Hz) * @param sampleRate Sampling rate */ setSampleRate(sampleRate) { this.updateAudioOverrides({ sampleRate }); } /** * Set audio bitrate (bps) * @param bitrate Bitrate */ setBitrate(bitrate) { this.updateAudioOverrides({ bitrate }); } /** * Set audio output format * @param format Audio format */ setAudioFormat(format) { this.updateAudioOverrides({ format }); } /** * Set audio channel count * @param channel Number of channels */ setAudioChannel(channel) { this.updateAudioOverrides({ channel }); } /** * Alias for setLanguage to emphasize MiniMax terminology * @param language Language boost string */ setLanguageBoost(language) { this.setLanguage(language); } /** * Get current API endpoint URL based on selected endpoint * @returns API endpoint URL */ getTtsApiUrl() { return this.endpoint === 'china' ? voiceEngine_1.MINIMAX_CHINA_API_URL : voiceEngine_1.MINIMAX_GLOBAL_API_URL; } /** * Get current voice list API endpoint URL based on selected endpoint * @returns Voice list API endpoint URL */ getVoiceListApiUrl() { return this.endpoint === 'china' ? voiceEngine_1.MINIMAX_CHINA_VOICE_LIST_URL : voiceEngine_1.MINIMAX_GLOBAL_VOICE_LIST_URL; } /** * Get available voice speakers list * Requires only API key * @param apiKey MiniMax API key * @returns Promise<MinimaxVoiceSpeaker[]> */ async getVoiceList(apiKey) { if (!apiKey) { throw new Error('MiniMax API key is required'); } const response = await fetch(this.getVoiceListApiUrl(), { method: 'GET', headers: { Authorization: `Bearer ${apiKey}`, 'Content-Type': 'application/json', }, }); if (!response.ok) { let errorMessage = `Failed to fetch voice list: ${response.status}`; try { const errorText = await response.text(); console.error('Failed to fetch voice list from MiniMax:', response.status, errorText); errorMessage = `Failed to fetch voice list: ${response.status} - ${errorText}`; } catch (e) { console.error('Failed to fetch voice list from MiniMax:', response.status, response.statusText); errorMessage = `Failed to fetch voice list: ${response.status} - ${response.statusText}`; } throw new Error(errorMessage); } const result = await response.json(); // Check base_resp for API errors if (result.base_resp && result.base_resp.status_code !== 0) { const errorMsg = result.base_resp.status_msg || 'Unknown error'; throw new Error(`MiniMax API error: ${result.base_resp.status_code} - ${errorMsg}`); } // Return voice speakers data return result.data?.speakers || []; } /** * Build MiniMax voice settings by merging emotion defaults with overrides * @param voiceId Target voice ID * @param defaults Default emotion-based values */ buildVoiceSetting(voiceId, defaults) { return { voice_id: voiceId, speed: this.voiceOverrides.speed !== undefined ? this.voiceOverrides.speed : defaults.speed, vol: this.voiceOverrides.vol !== undefined ? this.voiceOverrides.vol : defaults.vol, pitch: this.voiceOverrides.pitch !== undefined ? this.voiceOverrides.pitch : defaults.pitch, }; } /** * Build MiniMax audio settings from overrides */ buildAudioSetting() { return { sample_rate: this.audioOverrides.sampleRate ?? 32000, bitrate: this.audioOverrides.bitrate ?? 128000, format: this.audioOverrides.format ?? 'mp3', channel: this.audioOverrides.channel ?? 1, }; } /** * Test voice synthesis with minimal requirements * Requires API key and voice ID, but not GroupId * @param text Text to synthesize (shorter text recommended for testing) * @param voiceId Voice ID to test * @param apiKey MiniMax API key * @returns Promise<ArrayBuffer> */ async testVoice(text, voiceId, apiKey) { if (!apiKey) { throw new Error('MiniMax API key is required'); } if (!voiceId) { throw new Error('Voice ID is required'); } // Limit test text length to avoid quota waste const testText = text.length > 100 ? text.substring(0, 100) + '...' : text; // Use a temporary GroupId for testing or make it optional const tempGroupId = this.groupId || '1'; const requestBody = { model: this.model, text: testText, stream: false, voice_setting: this.buildVoiceSetting(voiceId, { speed: 1.0, vol: 1.0, pitch: 0, }), audio_setting: this.buildAudioSetting(), language_boost: this.language, }; const response = await fetch(`${this.getTtsApiUrl()}?GroupId=${tempGroupId}`, { method: 'POST', headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${apiKey}`, }, body: JSON.stringify(requestBody), }); if (!response.ok) { let errorMessage = `HTTP error ${response.status}`; try { const errorText = await response.text(); console.error('Failed to test voice from MiniMax:', response.status, errorText); errorMessage = `Failed to test voice: ${response.status} - ${errorText}`; } catch (e) { console.error('Failed to test voice from MiniMax:', response.status, response.statusText); errorMessage = `Failed to test voice: ${response.status} - ${response.statusText}`; } throw new Error(errorMessage); } const result = await response.json(); // Check base_resp for API errors if (result.base_resp && result.base_resp.status_code !== 0) { const errorMsg = result.base_resp.status_msg || 'Unknown error'; throw new Error(`MiniMax API error: ${result.base_resp.status_code} - ${errorMsg}`); } // Get audio data from response if (!result.data || !result.data.audio) { console.error('Invalid response structure:', result); throw new Error('Audio data not found in MiniMax response'); } // Convert hex string to ArrayBuffer try { return this.hexToArrayBuffer(result.data.audio); } catch (error) { console.error('Failed to convert hex audio data:', error); throw new Error(`Failed to process audio data: ${error}`); } } /** * Full production audio synthesis * Requires API key, voice ID, and GroupId * @param input Talk object * @param speaker Voice ID * @param apiKey MiniMax API key * @param voiceActor Not used for MiniMax (for interface compatibility) * @returns Promise<ArrayBuffer> */ async fetchAudio(input, speaker, apiKey, voiceActor) { return this.fetchAudioWithOptions(input, speaker, apiKey, true); } /** * Audio synthesis with flexible GroupId requirement * @param input Talk object * @param speaker Voice ID * @param apiKey MiniMax API key * @param requireGroupId Whether to require GroupId (default: true) * @returns Promise<ArrayBuffer> */ async fetchAudioWithOptions(input, speaker, apiKey, requireGroupId = true) { if (!apiKey) { throw new Error('MiniMax API key is required'); } if (requireGroupId && !this.groupId) { throw new Error('MiniMax GroupId is required for production synthesis. Please set it using setGroupId(), or use testVoice() for testing.'); } const talk = input; const text = talk.message.trim(); // Validate text length (max 5000 characters) if (text.length > 5000) { throw new Error('Text exceeds maximum length of 5000 characters'); } // Get emotion from talk.style and adjust voice settings const emotionVoiceSettings = this.getVoiceSettings(talk.style || 'talk'); const requestBody = { model: this.model, text: text, stream: false, voice_setting: this.buildVoiceSetting(speaker || this.defaultVoiceId, emotionVoiceSettings), audio_setting: this.buildAudioSetting(), language_boost: this.language, }; // Use provided GroupId or temporary one for testing const groupIdToUse = this.groupId || '1'; const response = await fetch(`${this.getTtsApiUrl()}?GroupId=${groupIdToUse}`, { method: 'POST', headers: { 'Content-Type': 'application/json', Authorization: `Bearer ${apiKey}`, }, body: JSON.stringify(requestBody), }); if (!response.ok) { let errorMessage = `HTTP error ${response.status}`; try { const errorText = await response.text(); console.error('Failed to fetch TTS from MiniMax:', response.status, errorText); errorMessage = `Failed to fetch TTS from MiniMax: ${response.status} - ${errorText}`; } catch (e) { console.error('Failed to fetch TTS from MiniMax:', response.status, response.statusText); errorMessage = `Failed to fetch TTS from MiniMax: ${response.status} - ${response.statusText}`; } throw new Error(errorMessage); } const result = await response.json(); // Check base_resp for API errors if (result.base_resp && result.base_resp.status_code !== 0) { const errorMsg = result.base_resp.status_msg || 'Unknown error'; throw new Error(`MiniMax API error: ${result.base_resp.status_code} - ${errorMsg}`); } // Get audio data from response if (!result.data || !result.data.audio) { console.error('Invalid response structure:', result); throw new Error('Audio data not found in MiniMax response'); } // Convert hex string to ArrayBuffer try { return this.hexToArrayBuffer(result.data.audio); } catch (error) { console.error('Failed to convert hex audio data:', error); throw new Error(`Failed to process audio data: ${error}`); } } /** * Check if GroupId is configured * @returns boolean */ hasGroupId() { return !!this.groupId; } /** * Get current endpoint setting * @returns MinimaxEndpoint */ getEndpoint() { return this.endpoint; } /** * Set custom API endpoint URL (VoiceEngine interface compatibility) * @param apiUrl custom API endpoint URL */ setApiEndpoint(apiUrl) { // For MiniMax, we override the endpoint URLs directly if (apiUrl.includes('minimaxi.com')) { this.endpoint = 'china'; } else { this.endpoint = 'global'; } } /** * Get voice settings based on emotion * @param emotion Emotion type * @returns Voice settings */ getVoiceSettings(emotion) { // Default settings let speed = 1.0; let vol = 1.0; let pitch = 0; // Adjust settings based on emotion switch (emotion.toLowerCase()) { case 'happy': speed = 1.1; pitch = 1; break; case 'sad': speed = 0.9; pitch = -1; break; case 'angry': speed = 1.0; vol = 1.1; pitch = 0; break; case 'surprised': speed = 1.2; pitch = 2; break; default: // Keep default values break; } return { speed, vol, pitch }; } /** * Merge incoming voice overrides into the current override map * Passing undefined removes the override and falls back to defaults * @param settings Voice setting overrides */ updateVoiceOverrides(settings) { for (const [key, value] of Object.entries(settings)) { if (value === undefined || value === null) { delete this.voiceOverrides[key]; } else { this.voiceOverrides[key] = value; } } } /** * Merge incoming audio overrides into the current override map * Passing undefined removes the override and falls back to defaults * @param settings Audio setting overrides */ updateAudioOverrides(settings) { for (const [key, value] of Object.entries(settings)) { if (value === undefined || value === null) { delete this.audioOverrides[key]; } else { switch (key) { case 'sampleRate': this.audioOverrides.sampleRate = value; break; case 'bitrate': this.audioOverrides.bitrate = value; break; case 'format': this.audioOverrides.format = value; break; case 'channel': this.audioOverrides.channel = value; break; } } } } /** * Convert hex string to ArrayBuffer * @param hex Hex string * @returns ArrayBuffer */ hexToArrayBuffer(hex) { // Remove any whitespace or newlines const cleanHex = hex.replace(/[\s\n]/g, ''); // Ensure even number of characters if (cleanHex.length % 2 !== 0) { throw new Error('Invalid hex string: odd number of characters'); } // Validate hex string if (!/^[0-9a-fA-F]*$/.test(cleanHex)) { throw new Error('Invalid hex string: contains non-hex characters'); } const buffer = new ArrayBuffer(cleanHex.length / 2); const view = new Uint8Array(buffer); for (let i = 0; i < cleanHex.length; i += 2) { view[i / 2] = Number.parseInt(cleanHex.substr(i, 2), 16); } return buffer; } getTestMessage(textVoiceText) { return textVoiceText || 'MiniMax Audioを使用します'; } } exports.MinimaxEngine = MinimaxEngine;