UNPKG

echogarden

Version:

An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.

140 lines 6.04 kB
import * as EspeakTTS from '../synthesis/EspeakTTS.js'; import { logToStderr } from '../utilities/Utilities.js'; import * as Segmentation from './Segmentation.js'; const log = logToStderr; export async function phonemizeSentence(sentence, espeakVoice, substitutionMap, useIpa = true) { const ipaString = await EspeakTTS.textToPhonemes(sentence, espeakVoice, useIpa); const phraseStrings = ipaString.split(' | '); const phrases = []; for (let phraseIndex = 0; phraseIndex < phraseStrings.length; phraseIndex++) { const phraseString = phraseStrings[phraseIndex]; const wordStrings = phraseString.trim().split(/ +/g); const words = []; for (let wordIndex = 0; wordIndex < wordStrings.length; wordIndex++) { const word = wordStrings[wordIndex]; let wordPhonemes = word.split('_'); wordPhonemes = wordPhonemes.flatMap(phoneme => { if (!phoneme || phoneme.startsWith('(')) { return []; } else if (phoneme.startsWith(`ˈ`) || phoneme.startsWith(`ˌ`)) { return [phoneme[0], phoneme.substring(1)]; } else if (phoneme.endsWith(`ˈ`) || phoneme.endsWith(`ˌ`)) { return [phoneme.substring(0, phoneme.length - 1), phoneme[phoneme.length - 1]]; } else { return substitutionMap?.get(phoneme) || [phoneme]; } }); if (wordPhonemes.length > 0) { words.push(wordPhonemes); } } if (words.length > 0) { phrases.push(words); } } return phrases; } export async function phonemizeText(text, voice, substitutionMap) { text = text .replaceAll(',', ',') .replaceAll('、', ',') .replaceAll('。', '.') .replaceAll('(', ', ') .replaceAll(')', ', ') .replaceAll('«', ', ') .replaceAll('»', ', '); const segmentedText = await Segmentation.parse(text, voice); const preparedPhrases = []; const phraseBreakers = []; for (const sentence of segmentedText) { for (const phrase of sentence.phrases) { const words = phrase.words.filter(wordObject => Segmentation.isWordOrSymbolWord(wordObject.text)); const preparedPhraseText = words.map(word => word.text.replace(/\./g, ' ')).join(' '); preparedPhrases.push(preparedPhraseText); const trimmedPhraseText = phrase.text.trim(); const lastChar = trimmedPhraseText[trimmedPhraseText.length - 1]; if (phrase.isSentenceFinalizer) { if (trimmedPhraseText.endsWith('?') || trimmedPhraseText.endsWith(`?"`)) { phraseBreakers.push('?'); } else if (trimmedPhraseText.endsWith('!') || trimmedPhraseText.endsWith(`!"`)) { phraseBreakers.push('!'); } else { phraseBreakers.push('.'); } } else { if (lastChar == ':' || lastChar == ';') { phraseBreakers.push(lastChar); } else { phraseBreakers.push(','); } } } } return phonemizePhrases(preparedPhrases, voice, phraseBreakers, substitutionMap); } export async function phonemizePhrases(phrases, voice, phraseBreakers, substitutionMap) { if (phrases.length == 0) { return []; } const preparedText = phrases.join('\n\n'); // filter(phrase => phrase.trim().length > 0) const ipaString = await EspeakTTS.textToIPA(preparedText, voice); const ipaLines = ipaString.split('\n'); const phonemeLines = ipaLines.map(line => { line = line.replace(/_+/g, '_').replace(/ +/g, ' '); return line.split(' ').map(word => { word = word.replaceAll('_', ' ').trim(); let wordPhonemes = word.split(' '); wordPhonemes = wordPhonemes.flatMap(phoneme => { if (!phoneme || phoneme.startsWith('(')) { return []; } else if (phoneme.startsWith('ˈ') || phoneme.startsWith('ˌ')) { return [phoneme[0], phoneme.substring(1)]; } else if (phoneme.endsWith('ˈ') || phoneme.endsWith('ˌ')) { return [phoneme.substring(0, phoneme.length - 1), phoneme[phoneme.length - 1]]; } else { return [phoneme]; } }); if (substitutionMap) { wordPhonemes = wordPhonemes.flatMap(phoneme => substitutionMap.get(phoneme) || [phoneme]); } return wordPhonemes; }); }); if (ipaLines.length != phraseBreakers.length) { log(phrases); log(ipaLines); log(phraseBreakers); throw new Error(`Unexpected: IPA lines count (${ipaLines.length}) is not equal to phrase breakers count (${phraseBreakers.length})`); } for (let i = 0; i < phonemeLines.length; i++) { const line = phonemeLines[i]; const lastWordInLine = line[line.length - 1]; lastWordInLine.push(phraseBreakers[i]); } return phonemeLines; } export function phonemizedPhrasesToSentences(phonemizedPhrases) { let phonemizedSentences = [[]]; for (const phonemizedPhrase of phonemizedPhrases) { phonemizedSentences[phonemizedSentences.length - 1].push(...phonemizedPhrase); const lastWord = phonemizedPhrase[phonemizedPhrase.length - 1]; const lastPhoneme = lastWord[lastWord.length - 1]; if (['.', '?', '!'].includes(lastPhoneme)) { phonemizedSentences.push([]); } } phonemizedSentences = phonemizedSentences.filter(entry => entry.length > 0); return phonemizedSentences; } //# sourceMappingURL=EspeakPhonemizer.js.map