UNPKG

taipa

Version:

Taiwanese morphological parsing library

163 lines 5.65 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.getSyllablesEnd = exports.getSyllablesStart = exports.getSyllablesInclude = exports.getDesinence = exports.getStem = exports.pairsToString = exports.getLemmas = exports.getStandaloneForms = exports.analyzeIntoSequence = exports.analyzeIntoSyllables = exports.getLetterSoundPairsSyllabic = exports.getLetterSoundPairsSequential = void 0; const client_1 = require("./client"); const analyzer_1 = require("./unchange/analyzer"); const metaplasm_1 = require("./unchange/metaplasm"); const lemmatizer_1 = require("./unchange/lemmatizer"); const syllablelists_1 = require("./tonal/syllablelists"); /** Turn sounds into a sequence of letter-sound pairs */ function getLetterSoundPairsSequential(soundSeqs) { return soundSeqs .flatMap((v) => { return v; }) .map((v) => [v.toString(), v.name]); } exports.getLetterSoundPairsSequential = getLetterSoundPairsSequential; /** Turn sounds into syllabic letter-sound pairs. */ function getLetterSoundPairsSyllabic(soundSeqs) { return soundSeqs.map((v) => { return v.map((v) => [v.toString(), v.name]); }); } exports.getLetterSoundPairsSyllabic = getLetterSoundPairsSyllabic; /** Analyze an input into syllabic letter-sound pairs. */ function analyzeIntoSyllables(input) { if (!input) return []; const cli = new client_1.Client(); const tla = analyzer_1.tonalLemmatizationAnalyzer; const ta = cli.processTonal(input); const wrd = ta.word; const pairs = getLetterSoundPairsSyllabic(tla .morphAnalyze(wrd.literal, new metaplasm_1.TonalStandaloneForms([])) .map((x) => x.sounds)); return pairs; } exports.analyzeIntoSyllables = analyzeIntoSyllables; /** Analyze an input into a sequence of letter-sound-pairs. */ function analyzeIntoSequence(input) { if (!input) return []; const cli = new client_1.Client(); const tla = analyzer_1.tonalLemmatizationAnalyzer; const ta = cli.processTonal(input); const wrd = ta.word; const pairs = getLetterSoundPairsSequential(tla .morphAnalyze(wrd.literal, new metaplasm_1.TonalStandaloneForms([])) .map((x) => x.sounds)); return pairs; } exports.analyzeIntoSequence = analyzeIntoSequence; /** Get the standalone forms of a syllable. */ function getStandaloneForms(syl) { if (!syl) return []; const cli = new client_1.Client(); const tla = analyzer_1.tonalLemmatizationAnalyzer; const ta = cli.processTonal(syl); const wrd = ta.word; return tla .morphAnalyze(wrd.literal, new metaplasm_1.TonalStandaloneForms([])) .flatMap((mrfm) => mrfm.getForms().map((frm) => frm.literal)); } exports.getStandaloneForms = getStandaloneForms; /** Get lemmas of the word. */ function getLemmas(input) { if (!input) return []; const lxLemma = (0, lemmatizer_1.lemmatize)(input); return lxLemma.getLemmas().map((x) => x.literal); } exports.getLemmas = getLemmas; /** Join the letters in each tuple. */ function pairsToString(pairs) { const chars = pairs.map((pair) => { return pair[0]; }); const syl = chars.join(''); return syl; } exports.pairsToString = pairsToString; /** Get the stem of a word. */ function getStem(input) { const lxLemma = (0, lemmatizer_1.lemmatize)(input); const literal = lxLemma.word.literal; const ending = lxLemma.getInflectionalEnding(); if (literal.length - ending.length != 0) { return literal.substring(0, literal.length - ending.length); } return ''; } exports.getStem = getStem; /** Get the inflectional suffix of a word. */ function getDesinence(input) { const lxLemma = (0, lemmatizer_1.lemmatize)(input); return lxLemma.getInflectionalEnding(); } exports.getDesinence = getDesinence; /** Get the syllables whose substring is matched. */ function getSyllablesInclude(input) { const syls = []; syllablelists_1.basicSyllables.forEach((it) => { if (it.includes(input)) { syls.push(it); } }); syllablelists_1.missingSyllables.forEach((it) => { if (it.includes(input)) { syls.push(it); } }); syllablelists_1.syllabicLoanwords.forEach((it) => { if (it.includes(input)) { syls.push(it); } }); return syls; } exports.getSyllablesInclude = getSyllablesInclude; /** Get the syllables that matched from the beginning. */ function getSyllablesStart(input) { const syls = []; syllablelists_1.basicSyllables.forEach((it) => { if (it.startsWith(input, 0)) { syls.push(it); } }); syllablelists_1.missingSyllables.forEach((it) => { if (it.startsWith(input, 0)) { syls.push(it); } }); syllablelists_1.syllabicLoanwords.forEach((it) => { if (it.startsWith(input, 0)) { syls.push(it); } }); return syls; } exports.getSyllablesStart = getSyllablesStart; /** Get the syllables that matched to the end. */ function getSyllablesEnd(input) { const syls = []; syllablelists_1.basicSyllables.forEach((it) => { if (it.endsWith(input, it.length)) { syls.push(it); } }); syllablelists_1.missingSyllables.forEach((it) => { if (it.endsWith(input, it.length)) { syls.push(it); } }); syllablelists_1.syllabicLoanwords.forEach((it) => { if (it.endsWith(input, it.length)) { syls.push(it); } }); return syls; } exports.getSyllablesEnd = getSyllablesEnd; //# sourceMappingURL=util.js.map