taipa
Version:
Taiwanese morphological parsing library
163 lines • 5.65 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.getSyllablesEnd = exports.getSyllablesStart = exports.getSyllablesInclude = exports.getDesinence = exports.getStem = exports.pairsToString = exports.getLemmas = exports.getStandaloneForms = exports.analyzeIntoSequence = exports.analyzeIntoSyllables = exports.getLetterSoundPairsSyllabic = exports.getLetterSoundPairsSequential = void 0;
const client_1 = require("./client");
const analyzer_1 = require("./unchange/analyzer");
const metaplasm_1 = require("./unchange/metaplasm");
const lemmatizer_1 = require("./unchange/lemmatizer");
const syllablelists_1 = require("./tonal/syllablelists");
/** Turn sounds into a sequence of letter-sound pairs */
function getLetterSoundPairsSequential(soundSeqs) {
return soundSeqs
.flatMap((v) => {
return v;
})
.map((v) => [v.toString(), v.name]);
}
exports.getLetterSoundPairsSequential = getLetterSoundPairsSequential;
/** Turn sounds into syllabic letter-sound pairs. */
function getLetterSoundPairsSyllabic(soundSeqs) {
return soundSeqs.map((v) => {
return v.map((v) => [v.toString(), v.name]);
});
}
exports.getLetterSoundPairsSyllabic = getLetterSoundPairsSyllabic;
/** Analyze an input into syllabic letter-sound pairs. */
function analyzeIntoSyllables(input) {
if (!input)
return [];
const cli = new client_1.Client();
const tla = analyzer_1.tonalLemmatizationAnalyzer;
const ta = cli.processTonal(input);
const wrd = ta.word;
const pairs = getLetterSoundPairsSyllabic(tla
.morphAnalyze(wrd.literal, new metaplasm_1.TonalStandaloneForms([]))
.map((x) => x.sounds));
return pairs;
}
exports.analyzeIntoSyllables = analyzeIntoSyllables;
/** Analyze an input into a sequence of letter-sound-pairs. */
function analyzeIntoSequence(input) {
if (!input)
return [];
const cli = new client_1.Client();
const tla = analyzer_1.tonalLemmatizationAnalyzer;
const ta = cli.processTonal(input);
const wrd = ta.word;
const pairs = getLetterSoundPairsSequential(tla
.morphAnalyze(wrd.literal, new metaplasm_1.TonalStandaloneForms([]))
.map((x) => x.sounds));
return pairs;
}
exports.analyzeIntoSequence = analyzeIntoSequence;
/** Get the standalone forms of a syllable. */
function getStandaloneForms(syl) {
if (!syl)
return [];
const cli = new client_1.Client();
const tla = analyzer_1.tonalLemmatizationAnalyzer;
const ta = cli.processTonal(syl);
const wrd = ta.word;
return tla
.morphAnalyze(wrd.literal, new metaplasm_1.TonalStandaloneForms([]))
.flatMap((mrfm) => mrfm.getForms().map((frm) => frm.literal));
}
exports.getStandaloneForms = getStandaloneForms;
/** Get lemmas of the word. */
function getLemmas(input) {
if (!input)
return [];
const lxLemma = (0, lemmatizer_1.lemmatize)(input);
return lxLemma.getLemmas().map((x) => x.literal);
}
exports.getLemmas = getLemmas;
/** Join the letters in each tuple. */
function pairsToString(pairs) {
const chars = pairs.map((pair) => {
return pair[0];
});
const syl = chars.join('');
return syl;
}
exports.pairsToString = pairsToString;
/** Get the stem of a word. */
function getStem(input) {
const lxLemma = (0, lemmatizer_1.lemmatize)(input);
const literal = lxLemma.word.literal;
const ending = lxLemma.getInflectionalEnding();
if (literal.length - ending.length != 0) {
return literal.substring(0, literal.length - ending.length);
}
return '';
}
exports.getStem = getStem;
/** Get the inflectional suffix of a word. */
function getDesinence(input) {
const lxLemma = (0, lemmatizer_1.lemmatize)(input);
return lxLemma.getInflectionalEnding();
}
exports.getDesinence = getDesinence;
/** Get the syllables whose substring is matched. */
function getSyllablesInclude(input) {
const syls = [];
syllablelists_1.basicSyllables.forEach((it) => {
if (it.includes(input)) {
syls.push(it);
}
});
syllablelists_1.missingSyllables.forEach((it) => {
if (it.includes(input)) {
syls.push(it);
}
});
syllablelists_1.syllabicLoanwords.forEach((it) => {
if (it.includes(input)) {
syls.push(it);
}
});
return syls;
}
exports.getSyllablesInclude = getSyllablesInclude;
/** Get the syllables that matched from the beginning. */
function getSyllablesStart(input) {
const syls = [];
syllablelists_1.basicSyllables.forEach((it) => {
if (it.startsWith(input, 0)) {
syls.push(it);
}
});
syllablelists_1.missingSyllables.forEach((it) => {
if (it.startsWith(input, 0)) {
syls.push(it);
}
});
syllablelists_1.syllabicLoanwords.forEach((it) => {
if (it.startsWith(input, 0)) {
syls.push(it);
}
});
return syls;
}
exports.getSyllablesStart = getSyllablesStart;
/** Get the syllables that matched to the end. */
function getSyllablesEnd(input) {
const syls = [];
syllablelists_1.basicSyllables.forEach((it) => {
if (it.endsWith(input, it.length)) {
syls.push(it);
}
});
syllablelists_1.missingSyllables.forEach((it) => {
if (it.endsWith(input, it.length)) {
syls.push(it);
}
});
syllablelists_1.syllabicLoanwords.forEach((it) => {
if (it.endsWith(input, it.length)) {
syls.push(it);
}
});
return syls;
}
exports.getSyllablesEnd = getSyllablesEnd;
//# sourceMappingURL=util.js.map