UNPKG

taipa

Version:

Taiwanese morphological parsing library

456 lines 23.3 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.TonalLemmatization = exports.StandaloneFormsIetfIetwToEkEkk = exports.TransfixStandalone = exports.LastSyllableForms = exports.PrecedingExStandalone = exports.PrecedingAyStandalone = exports.PhrasalVerbParticleStandalone = exports.TonalStandaloneForms = void 0; const metaplasm_1 = require("../metaplasm"); const unit_1 = require("./unit"); const tonalres_1 = require("../tonal/tonalres"); const unit_2 = require("../unit"); const metaplasm_2 = require("../metaplasm"); const collections_1 = require("../tonal/collections"); const syllablelists_1 = require("../tonal/syllablelists"); /** Returns the standalone forms of a syllable. */ class TonalStandaloneForms extends metaplasm_1.TonalStandaloneMetaplasm { soundsFollowing; constructor(soundsFollowing) { super(); this.soundsFollowing = soundsFollowing; } handleAssimilatedFinal(syllable, toneLetter) { const fnlsOfLemma = collections_1.finalConsonantsForBgjlsFw.get(syllable.lastLetter.literal + toneLetter) || collections_1.finalConsonantsForBglX.get(syllable.lastLetter.literal + toneLetter); if (fnlsOfLemma) { const clones = fnlsOfLemma.map((it) => { const clone = Object.create(syllable); clone.replaceLetter(syllable.letters.length - 1, tonalres_1.lowerLettersTonal.get(it.toString())); return clone; }); const ret = []; clones.map((it) => ret.push(it)); return clones; } return []; } apply(sounds, allomorph) { if (allomorph) { if (allomorph instanceof tonalres_1.FreeAllomorph) { if (allomorph instanceof tonalres_1.ZeroAllomorph) { // push y to make tone 2 // 1 to 2 const s = new unit_1.TonalSyllable(sounds.map((x) => new unit_2.AlphabeticLetter(x.characters))); const tnltrs = tonalres_1.freeAllomorphStandaloneRules.get('zero'); if (tnltrs) s.pushLetter(new unit_2.AlphabeticLetter(tnltrs[0].characters)); return [s]; } else { // the 7th tone has two baseforms const ret = []; const rules = tonalres_1.freeAllomorphStandaloneRules.get(allomorph.toString()); const tnltrs = !rules ? [] : rules; for (let i in tnltrs) { const s = new unit_1.TonalSyllable(sounds.map((x) => new unit_2.AlphabeticLetter(x.characters))); if (!(tnltrs[i] instanceof tonalres_1.ZeroAllomorph)) { // 2 to 3. 3 to 7. 7 to 5. 3 to 5. // replace z with f or x s.popLetter(); s.pushLetter(new unit_2.AlphabeticLetter(tnltrs[i].characters)); ret.push(s); } else { // 7 to 1 // pop z s.popLetter(); ret.push(s); } } return ret; } } else if (allomorph instanceof tonalres_1.CheckedAllomorph) { const s = new unit_1.TonalSyllable(sounds.map((it) => new unit_2.AlphabeticLetter(it.characters))); if (allomorph.tonal.toString() === '') { // when the final is assimilated, an empty array will be returned return this.handleAssimilatedFinal(s, ''); } // pop the tone letter // 1 to 4. 3 to 8. 2 to 4. 5 to 8. const tnl = s.letters[s.letters.length - 1].literal; const nslFnls = sounds.filter((it) => it.name === tonalres_1.TonalSpellingTags.nasalFinalConsonant); s.popLetter(); // pop out the tone letter if (nslFnls.length == 0 && (tnl === tonalres_1.TonalLetterTags.w || tnl === tonalres_1.TonalLetterTags.x) && Array.from(collections_1.fourthToEighthFinalConsonants.keys()).includes(s.lastLetter.literal)) { // in case of no internal sandhi const fnl = s.lastLetter.literal; s.popLetter(); // pop the 4th final consonant const got = collections_1.fourthToEighthFinalConsonants.get(fnl); if (got) { if ((0, syllablelists_1.isInSyllableTable)(s.literal + tonalres_1.lowerLettersTonal.get(got).literal)) { // push the 8th final consonant if it is present in syllable table s.pushLetter(tonalres_1.lowerLettersTonal.get(got)); } else { // if ( // s.letters.length === 2 && // s.letters[0].literal === TonalLetterTags.t && // s.letters[1].literal === TonalLetterTags.i && // fnl === TonalLetterTags.k // ) { // // handle combining form 'tikw' of lexical root 'tekk' // // combining forms 'tietw' and 'tietf is handled in another function // s.popLetter(); // pop out vowel i // s.pushLetter(lowerLettersTonal.get(TonalLetterTags.e)); // push vowel e // s.pushLetter(lowerLettersTonal.get(TonalLetterTags.kk)); // push final consonant kk // } else { // restore the popped-out final consonant. // a syllable is just returned with its tone letter popped out s.pushLetter(tonalres_1.lowerLettersTonal.get(fnl)); // } } } } else if (collections_1.finalConsonantsForBgjlsbbggjjllss.has(s.lastLetter.literal)) { // in case of internal or external sandhi const ret = this.handleAssimilatedFinal(s, tnl); if (ret && ret.length > 0) return ret; } else if (sounds.filter((it) => it.name === tonalres_1.TonalSpellingTags.vowel).length > 0 && collections_1.nasalFinalConsonants.includes(s.lastSecondLetter.literal) && tonalres_1.neutralFinalConsonantsTonal.includes(s.lastLetter.literal)) { // in case of internal sandhi of p or t // if there is no medials, e.g. hmhh, hngh, just bypass this block // mhh, mh, nhh, nh, nghh, ngh if (this.soundsFollowing[0] && this.soundsFollowing[0].name === tonalres_1.TonalSpellingTags.initialConsonant && s.lastSecondLetter.literal === this.soundsFollowing[0].toString()) { // unchange to -tt or -t s.popLetter(); // pop the neutral s.popLetter(); // pop the nasal const clone = Object.create(s); if (tnl === tonalres_1.TonalLetterTags.w) { clone.pushLetter(tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.tt)); } else { clone.pushLetter(tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.t)); } return [clone]; } else if (this.soundsFollowing[0]) { // there has to be a following syllable for this syllable to change form // unchange to -pp or -p s.popLetter(); // pop the neutral s.popLetter(); // pop the nasal const clone = Object.create(s); // if (ntrl === TonalLetterTags.hh) { if (tnl === tonalres_1.TonalLetterTags.w) { clone.pushLetter(tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.pp)); } else { clone.pushLetter(tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.p)); } return [clone]; } } // a syllable is just returned with the tone letter popped out // e.g. tnghw's w is popped and tngh is returned return [s]; } } return []; } } exports.TonalStandaloneForms = TonalStandaloneForms; /** Returns the standalone forms of a phrasl verb particle syllable. */ class PhrasalVerbParticleStandalone extends metaplasm_1.TonalStandaloneMetaplasm { apply(sounds, allomorph) { if (allomorph) { if (allomorph instanceof tonalres_1.FreeAllomorph) { // 7 to 4 const s = new unit_1.TonalSyllable(sounds.map((it) => new unit_2.AlphabeticLetter(it.characters))); const tnl = sounds.filter((it) => it.name === tonalres_1.TonalSpellingTags.freeTone); if (tnl && tnl[0].toString() === tonalres_1.TonalLetterTags.z) { s.popLetter(); // pop the tonal s.pushLetter(tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.h)); // push neutral final } return [s]; } else if (allomorph instanceof tonalres_1.CheckedAllomorph) { // 1 to 4. 3 to 4. const s = new unit_1.TonalSyllable(sounds.map((it) => new unit_2.AlphabeticLetter(it.characters))); const tnl = sounds.filter((it) => it.name === tonalres_1.TonalSpellingTags.checkedTone); if (tnl && (tnl[0].toString() === tonalres_1.TonalLetterTags.f || tnl[0].toString() === tonalres_1.TonalLetterTags.w)) { s.popLetter(); // pop the tonal } return [s]; } } return []; } } exports.PhrasalVerbParticleStandalone = PhrasalVerbParticleStandalone; /** Returns the standalone forms of the syllable preceding ay */ class PrecedingAyStandalone extends metaplasm_1.TonalStandaloneMetaplasm { getStandaloneForms(syllable, letters) { if (collections_1.voicedVoicelessFinalConsonants.has(letters[letters.length - 2].toString())) { // in case of sandhi finals const fnl = collections_1.voicedVoicelessFinalConsonants.get(syllable.lastLetter.literal + letters[letters.length - 1].toString()); if (fnl) syllable.replaceLetter(syllable.letters.length - 1, tonalres_1.lowerLettersTonal.get(fnl)); } else if (collections_1.fourthToEighthFinalConsonants.has(letters[letters.length - 2].toString()) && letters[letters.length - 1].toString() === tonalres_1.TonalLetterTags.x) { const fnl = collections_1.fourthToEighthFinalConsonants.get(syllable.lastLetter.literal); if (fnl) syllable.replaceLetter(syllable.letters.length - 1, tonalres_1.lowerLettersTonal.get(fnl)); } } apply(sounds, allomorph) { if (allomorph) { if (allomorph.tonal.toString() === tonalres_1.TonalLetterTags.f) { if (allomorph instanceof tonalres_1.FreeAllomorph) { const ret = []; const rls = tonalres_1.standaloneRulesAy.get(allomorph.toString()); const tnls = !rls ? [] : rls; for (let i in tnls) { let s = new unit_1.TonalSyllable(sounds.map((it) => new unit_2.AlphabeticLetter(it.characters))); // 1 to 2. 1 to 3 // replace f with y or w s.popLetter(); s.pushLetter(new unit_2.AlphabeticLetter(tnls[i].characters)); ret.push(s); } return ret; } else if (allomorph instanceof tonalres_1.CheckedAllomorph) { const s = new unit_1.TonalSyllable(sounds.map((it) => new unit_2.AlphabeticLetter(it.characters))); // pop f s.popLetter(); this.getStandaloneForms(s, sounds); return [s]; } } else if (allomorph.tonal.toString() === tonalres_1.TonalLetterTags.x) { // 5 to 1. 5 to 7. 5 to 5. if (allomorph instanceof tonalres_1.FreeAllomorph) { const ret = []; const rls = tonalres_1.standaloneRulesAy.get(allomorph.toString()); const tnls = !rls ? [] : rls; for (let i in tnls) { let s = new unit_1.TonalSyllable(sounds.map((it) => new unit_2.AlphabeticLetter(it.characters))); if (!(tnls[i] instanceof tonalres_1.ZeroTonal)) { if (tnls[i] instanceof tonalres_1.FreeTonalZ) { // 5 to 7 // replace x with z s.popLetter(); s.pushLetter(new unit_2.AlphabeticLetter(tnls[i].characters)); ret.push(s); } else if (tnls[i] instanceof tonalres_1.FreeTonalX) { // 5 to 5 ret.push(s); } } else { // 5 to 1 // pop x s.popLetter(); ret.push(s); } } return ret; } else if (allomorph instanceof tonalres_1.CheckedAllomorph) { // 5 to 8. const s = new unit_1.TonalSyllable(sounds.map((it) => new unit_2.AlphabeticLetter(it.characters))); s.popLetter(); // pop x this.getStandaloneForms(s, sounds); return [s]; } } else if (allomorph.tonal.toString() === tonalres_1.TonalLetterTags.y) { return []; } } return []; } } exports.PrecedingAyStandalone = PrecedingAyStandalone; /** Returns the standalone forms of the syllable preceding ex */ class PrecedingExStandalone extends metaplasm_1.TonalStandaloneMetaplasm { handleAssimilatedFinal(syllable) { const finalConsonant = syllable.lastSecondLetter.literal; const fnlsOfLemma = collections_1.finalConsonantsForBgjlsFw.get(syllable.lastSecondLetter.literal + syllable.lastLetter.literal); if (fnlsOfLemma) { if (finalConsonant === tonalres_1.TonalLetterTags.g || finalConsonant === tonalres_1.TonalLetterTags.b) { // when the final is b or g, and // when the following syllable is ex fnlsOfLemma.pop(); // pop tt or t } syllable.popLetter(); // pop tonal const clones = fnlsOfLemma.map((it) => { const clone = Object.create(syllable); // replace the final consonant which is the last letter after // the tonal is popped clone.replaceLetter(syllable.letters.length - 1, tonalres_1.lowerLettersTonal.get(it.toString())); return clone; }); const ret = []; clones.map((it) => ret.push(it)); return clones; } return []; } apply(sounds, allomorph) { if (allomorph) { const syl = new unit_1.TonalSyllable(sounds.map((it) => new unit_2.AlphabeticLetter(it.characters))); return this.handleAssimilatedFinal(syl); } return []; } } exports.PrecedingExStandalone = PrecedingExStandalone; /** Returns the last syllable of a double or triple construction as an standalone form. */ class LastSyllableForms extends metaplasm_1.TonalStandaloneMetaplasm { lettersLastSyllable; constructor(lettersLastSyllable) { super(); this.lettersLastSyllable = lettersLastSyllable; } apply(sounds, allomorph) { if (allomorph) { // skip the last syllable. it is the base form of the preceding 2 syllables. if (this.lettersLastSyllable[this.lettersLastSyllable.length - 1].toString() === sounds[sounds.length - 1].toString()) return []; const s = new unit_1.TonalSyllable(this.lettersLastSyllable.map((it) => new unit_2.AlphabeticLetter(it.characters))); return [s]; } return []; } } exports.LastSyllableForms = LastSyllableForms; /** Returns the standalone forms of a transfix inflected syllable. */ class TransfixStandalone extends metaplasm_1.TonalStandaloneMetaplasm { apply(sounds, allomorph) { if (allomorph) { const vwlA = sounds.filter((it) => it.toString() === tonalres_1.TonalLetterTags.a); const chkdFnls = sounds.filter((it) => it.name === tonalres_1.TonalSpellingTags.checkedTone); const s = new unit_1.TonalSyllable(sounds.map((it) => new unit_2.AlphabeticLetter(it.characters))); if (vwlA.length == 1) { // aw -> ay s.popLetter(); // pop letter w s.pushLetter(tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.y)); return [s]; } else if (chkdFnls.length == 1) { // checked tones s.popLetter(); // pop letter w const sylChkd = Object.create(s); // get hh or tt const got = collections_1.finalConsonantsForTransfix.get(s.letters[s.letters.length - 1].literal); if (got) { sylChkd.popLetter(); // pop final t sylChkd.pushLetter(tonalres_1.lowerLettersTonal.get(got)); // push hh or tt } return [s, sylChkd]; } else { // in case of free tones other than aw, return the other four free tones const syl1 = Object.create(s); // 1st tone const syl2 = Object.create(s); // 2nd tone const syl5 = Object.create(s); // 5th tone const syl7 = Object.create(s); // 7th tone syl1.popLetter(); // pop w syl2.popLetter(); // pop w syl2.pushLetter(tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.y)); syl5.popLetter(); // pop w syl5.pushLetter(tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.x)); syl7.popLetter(); // pop w syl7.pushLetter(tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.z)); // console.log(syl1.literal, syl2.literal, syl5.literal, syl7.literal); return [syl1, syl2, syl5, syl7]; } } return []; } } exports.TransfixStandalone = TransfixStandalone; /** Change ~ietf or ietw to ~ek or ~ekk. */ class StandaloneFormsIetfIetwToEkEkk extends metaplasm_1.TonalStandaloneMetaplasm { apply(sounds, allomorph) { if (allomorph) { const ics = sounds.filter((i) => i.name === tonalres_1.TonalSpellingTags.initialConsonant); const ts = sounds.filter((i) => i.name === tonalres_1.TonalSpellingTags.checkedTone); if (ics.length > 0 && ts.length > 0) { if (ts[0].toString() === tonalres_1.TonalLetterTags.f) { // in case of ~ietf const s = new unit_1.TonalSyllable([ new unit_2.AlphabeticLetter(ics[0].characters), tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.e), tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.k), ]); return [s]; } else if (ts[0].toString() === tonalres_1.TonalLetterTags.w) { // in case of ~ietw const s = new unit_1.TonalSyllable([ new unit_2.AlphabeticLetter(ics[0].characters), tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.e), tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.kk), ]); return [s]; } } } return []; } } exports.StandaloneFormsIetfIetwToEkEkk = StandaloneFormsIetfIetwToEkEkk; /** Lemmatizes a word and returns its base forms. */ class TonalLemmatization extends metaplasm_2.TonalLemmatizationMetaplasm { apply(morphemes, inflectionalEnding) { return this.populateLemmata(morphemes, inflectionalEnding); } getLemmas(morphemes, inflectionalEnding) { if (inflectionalEnding) { if (inflectionalEnding instanceof unit_1.FreeInflectionalEnding) { const ret = []; const arr = morphemes[morphemes.length - 1].getForms(); for (const key in arr) { const wrd = new unit_1.TonalWord(morphemes.map((it) => it.syllable)); wrd.popSyllable(); wrd.pushSyllable(arr[key]); ret.push(wrd); } return ret; } else if (inflectionalEnding instanceof unit_1.CheckedInflectionalEnding) { if (morphemes[morphemes.length - 1].getForms().length == 0) return []; const wrd = new unit_1.TonalWord(morphemes.map((it) => it.syllable)); wrd.popSyllable(); wrd.pushSyllable(morphemes[morphemes.length - 1].getForms()[0]); return [wrd]; } } return []; } populateLemmata(morphemes, inflectionalEnding) { let lemmata = new Array(); // turn morphemes into lemmas let lms = this.getLemmas(morphemes, inflectionalEnding); if (lms.length > 0) { for (let key in lms) { lemmata.push(lms[key]); } } return lemmata; } } exports.TonalLemmatization = TonalLemmatization; //# sourceMappingURL=metaplasm.js.map