UNPKG

echogarden

Version:

An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.

161 lines 5.15 kB
import { request } from 'gaxios'; import * as FFMpegTranscoder from '../codecs/FFMpegTranscoder.js'; import { Logger } from '../utilities/Logger.js'; import { logToStderr } from '../utilities/Utilities.js'; import { extendDeep } from '../utilities/ObjectUtilities.js'; import { decodeBase64 } from '../encodings/Base64.js'; import { isWordOrSymbolWord, splitToWords } from '../nlp/Segmentation.js'; const log = logToStderr; export async function synthesize(text, voiceId, language, options) { const logger = new Logger(); logger.start('Request synthesis from ElevenLabs'); options = extendDeep(defaultElevenLabsTTSOptions, options); let response; try { response = await request({ url: `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}/with-timestamps`, method: 'POST', headers: { 'accept': 'audio/mpeg', 'xi-api-key': options.apiKey, }, params: { output_format: 'mp3_44100_64', }, data: { text, model_id: options.modelId, voice_setting: { stability: options.stability, similarity_boost: options.similarityBoost, style: options.style, use_speaker_boost: options.useSpeakerBoost, }, seed: options.seed }, responseType: 'json' }); } catch (e) { const response = e.response; if (response) { logger.log(`Request failed with status code ${response.status}`); if (response.data) { logger.log(`Server responded with:`); logger.log(response.data); } } throw e; } logger.start('Decode synthesized audio'); const audioData = decodeBase64(response.data.audio_base64); const rawAudio = await FFMpegTranscoder.decodeToChannels(audioData); let timeline; const characters = response.data.alignment?.characters; const characterStartTimes = response.data.alignment?.character_start_times_seconds; const characterEndTimes = response.data.alignment?.character_end_times_seconds; if (characters && characterStartTimes && characterEndTimes) { logger.start('Create timeline from returned character timings'); const referenceText = characters.join(''); const words = (await splitToWords(referenceText, language)).filter(w => isWordOrSymbolWord(w)); timeline = []; let offset = 0; for (const word of words) { const wordStartIndex = referenceText.indexOf(word, offset); const wordEndIndex = wordStartIndex + word.length; timeline.push({ type: 'word', text: word, startTime: characterStartTimes[wordStartIndex], endTime: characterEndTimes[wordEndIndex] ?? characterEndTimes[wordEndIndex - 1] }); offset = wordEndIndex; } } logger.end(); return { rawAudio, timeline }; } export async function getVoiceList(apiKey) { const response = await request({ method: 'GET', url: 'https://api.elevenlabs.io/v1/voices', headers: { 'accept': 'accept: application/json', 'xi-api-key': apiKey }, responseType: 'json' }); const elevenLabsVoices = response.data.voices; const voices = elevenLabsVoices.map(elevenLabsVoice => { const gender = elevenLabsVoice?.labels?.gender ?? 'unknown'; const supportedLanguages = []; let accent = elevenLabsVoice?.labels?.accent; accent = accent?.toLowerCase() ?? ''; if (accent.startsWith('american')) { supportedLanguages.push('en-US'); } else if (accent.startsWith('british')) { supportedLanguages.push('en-GB'); } else if (accent === 'irish') { supportedLanguages.push('en-IE'); } else if (accent == 'australian') { supportedLanguages.push('en-AU'); } else { supportedLanguages.push('en'); } supportedLanguages.push(...supportedLanguagesInMultilingualModels); return { name: elevenLabsVoice.name, languages: supportedLanguages, gender, elevenLabsVoiceId: elevenLabsVoice.voice_id, }; }); return voices; } export const defaultElevenLabsTTSOptions = { apiKey: undefined, modelId: 'eleven_multilingual_v2', stability: 0.5, similarityBoost: 0.5, style: 0, useSpeakerBoost: true, seed: undefined, }; export const supportedLanguagesInMultilingualModels = [ 'ja', 'zh', 'de', 'hi', 'fr', 'ko', 'pt', 'it', 'es', 'id', 'nl', 'tr', 'fil', 'pl', 'sv', 'bg', 'ro', 'ar', 'cs', 'el', 'fi', 'hr', 'ms', 'sk', 'da', 'ta', 'uk', 'ru', 'hu', 'no', 'vi', ]; //# sourceMappingURL=ElevenLabsTTS.js.map