echogarden
Version:
An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.
72 lines • 3.27 kB
JavaScript
import { readAndParseJsonFile } from '../utilities/FileSystem.js';
import { getShortLanguageCode } from '../utilities/Locale.js';
import { resolveToModuleRootDir } from '../utilities/PathUtilities.js';
export function tryGetFirstLexiconSubstitution(sentenceWords, wordIndex, lexicons, languageCode) {
for (let i = lexicons.length - 1; i >= 0; i--) {
const lexicon = lexicons[i];
const match = tryGetLexiconSubstitution(sentenceWords, wordIndex, lexicon, languageCode);
if (match) {
return match;
}
}
return undefined;
}
export function tryGetLexiconSubstitution(sentenceWords, wordIndex, lexicon, languageCode) {
let word = sentenceWords[wordIndex];
if (!word) {
return undefined;
}
const shortLanguageCode = getShortLanguageCode(languageCode);
const lexiconForLanguage = lexicon[shortLanguageCode];
if (!lexiconForLanguage) {
return undefined;
}
let lexiconEntry = lexiconForLanguage[word];
if (!lexiconEntry) {
return undefined;
}
if (!Array.isArray(lexiconEntry)) {
lexiconEntry = [lexiconEntry];
}
for (let i = 0; i < lexiconEntry.length; i++) {
const candidateEntry = lexiconEntry[i];
const pronunciationPhonemesText = candidateEntry?.pronunciation?.espeak?.[languageCode];
if (!pronunciationPhonemesText) {
continue;
}
const precedingWord = sentenceWords[wordIndex - 1] ?? '';
const followingWord = sentenceWords[wordIndex + 1] ?? '';
const precededBy = candidateEntry?.precededBy ?? [];
const notPrecededBy = candidateEntry?.notPrecededBy ?? [];
const followedBy = candidateEntry?.followedBy ?? candidateEntry?.succeededBy ?? [];
const notFollowedBy = candidateEntry?.notFollowedBy ?? candidateEntry?.notSucceededBy ?? [];
const hasNegativePattern = notPrecededBy.includes(precedingWord) || notFollowedBy.includes(followingWord);
const hasPositivePattern = precededBy.includes(precedingWord) || followedBy.includes(followingWord);
if (i === lexiconEntry.length - 1 || (hasPositivePattern && !hasNegativePattern)) {
const substitutionPhonemes = pronunciationPhonemesText.split(/ +/g);
return substitutionPhonemes;
}
}
return undefined;
}
export async function loadLexiconFile(jsonFilePath) {
const parsedLexicon = await readAndParseJsonFile(jsonFilePath);
return parsedLexicon;
}
export async function loadLexiconsForLanguage(language, customLexiconPaths) {
const lexicons = [];
if (getShortLanguageCode(language) == 'en') {
const wordsLexicon = await loadLexiconFile(resolveToModuleRootDir('data/lexicons/words.en.json'));
lexicons.push(wordsLexicon);
const heteronymsLexicon = await loadLexiconFile(resolveToModuleRootDir('data/lexicons/heteronyms.en.json'));
lexicons.push(heteronymsLexicon);
}
if (customLexiconPaths && customLexiconPaths.length > 0) {
for (const customLexicon of customLexiconPaths) {
const customLexiconObject = await loadLexiconFile(customLexicon);
lexicons.push(customLexiconObject);
}
}
return lexicons;
}
//# sourceMappingURL=Lexicon.js.map