echogarden
Version:
An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.
76 lines • 2.89 kB
JavaScript
import { getShortLanguageCode } from '../utilities/Locale.js';
import { logToStderr } from '../utilities/Utilities.js';
const log = logToStderr;
export async function parse(text) {
const { default: nlp } = await import('compromise');
const doc = nlp(text);
doc.compute('penn');
const jsonDoc = doc.json({ offset: true });
//log(jsonDoc)
const result = jsonDoc.map(sentence => {
const terms = sentence.terms;
const parsedSentence = [];
for (let termIndex = 0; termIndex < terms.length; termIndex++) {
const term = terms[termIndex];
const parsedTerm = {
text: term.text,
pos: term.penn,
tags: term.tags,
preText: term.pre,
postText: term.post,
startOffset: term.offset.start,
endOffset: term.offset.start + term.offset.length
};
if (parsedTerm.text == '') {
if (parsedSentence.length > 0) {
parsedSentence[parsedSentence.length - 1].postText += parsedTerm.preText + parsedTerm.postText;
}
}
else if (parsedTerm.tags.includes('Abbreviation') && parsedTerm.postText.startsWith('.')) {
parsedTerm.text += '.';
parsedTerm.endOffset += 1;
parsedSentence.push(parsedTerm);
}
else {
parsedSentence.push(parsedTerm);
}
}
return parsedSentence;
});
//log(result)
return result;
}
export function tryMatchInLexicons(term, lexicons, espeakVoice) {
const reversedLexicons = [...lexicons].reverse(); // Give precedence to later lexicons
for (const lexicon of reversedLexicons) {
const match = tryMatchInLexicon(term, lexicon, espeakVoice);
if (match) {
return match;
}
}
return undefined;
}
export function tryMatchInLexicon(term, lexicon, espeakVoice) {
const shortLanguageCode = getShortLanguageCode(espeakVoice);
const lexiconForLanguage = lexicon[shortLanguageCode];
if (!lexiconForLanguage) {
return undefined;
}
const termText = term.text;
const lowerCaseTermText = termText.toLocaleLowerCase();
const entry = lexiconForLanguage[lowerCaseTermText];
if (!entry) {
return undefined;
}
for (const substitutionEntry of entry) {
if (!substitutionEntry.pos || substitutionEntry.pos.includes(term.pos)) {
const substitutionPhonemesText = substitutionEntry?.pronunciation?.espeak?.[espeakVoice];
if (substitutionPhonemesText) {
const substitutionPhonemes = substitutionPhonemesText.split(/ +/g);
return substitutionPhonemes;
}
}
}
return undefined;
}
//# sourceMappingURL=CompromiseNLP.js.map