taipa
Version:
Taiwanese morphological parsing library
456 lines • 23.3 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.TonalLemmatization = exports.StandaloneFormsIetfIetwToEkEkk = exports.TransfixStandalone = exports.LastSyllableForms = exports.PrecedingExStandalone = exports.PrecedingAyStandalone = exports.PhrasalVerbParticleStandalone = exports.TonalStandaloneForms = void 0;
const metaplasm_1 = require("../metaplasm");
const unit_1 = require("./unit");
const tonalres_1 = require("../tonal/tonalres");
const unit_2 = require("../unit");
const metaplasm_2 = require("../metaplasm");
const collections_1 = require("../tonal/collections");
const syllablelists_1 = require("../tonal/syllablelists");
/** Returns the standalone forms of a syllable. */
class TonalStandaloneForms extends metaplasm_1.TonalStandaloneMetaplasm {
soundsFollowing;
constructor(soundsFollowing) {
super();
this.soundsFollowing = soundsFollowing;
}
handleAssimilatedFinal(syllable, toneLetter) {
const fnlsOfLemma = collections_1.finalConsonantsForBgjlsFw.get(syllable.lastLetter.literal + toneLetter) ||
collections_1.finalConsonantsForBglX.get(syllable.lastLetter.literal + toneLetter);
if (fnlsOfLemma) {
const clones = fnlsOfLemma.map((it) => {
const clone = Object.create(syllable);
clone.replaceLetter(syllable.letters.length - 1, tonalres_1.lowerLettersTonal.get(it.toString()));
return clone;
});
const ret = [];
clones.map((it) => ret.push(it));
return clones;
}
return [];
}
apply(sounds, allomorph) {
if (allomorph) {
if (allomorph instanceof tonalres_1.FreeAllomorph) {
if (allomorph instanceof tonalres_1.ZeroAllomorph) {
// push y to make tone 2
// 1 to 2
const s = new unit_1.TonalSyllable(sounds.map((x) => new unit_2.AlphabeticLetter(x.characters)));
const tnltrs = tonalres_1.freeAllomorphStandaloneRules.get('zero');
if (tnltrs)
s.pushLetter(new unit_2.AlphabeticLetter(tnltrs[0].characters));
return [s];
}
else {
// the 7th tone has two baseforms
const ret = [];
const rules = tonalres_1.freeAllomorphStandaloneRules.get(allomorph.toString());
const tnltrs = !rules ? [] : rules;
for (let i in tnltrs) {
const s = new unit_1.TonalSyllable(sounds.map((x) => new unit_2.AlphabeticLetter(x.characters)));
if (!(tnltrs[i] instanceof tonalres_1.ZeroAllomorph)) {
// 2 to 3. 3 to 7. 7 to 5. 3 to 5.
// replace z with f or x
s.popLetter();
s.pushLetter(new unit_2.AlphabeticLetter(tnltrs[i].characters));
ret.push(s);
}
else {
// 7 to 1
// pop z
s.popLetter();
ret.push(s);
}
}
return ret;
}
}
else if (allomorph instanceof tonalres_1.CheckedAllomorph) {
const s = new unit_1.TonalSyllable(sounds.map((it) => new unit_2.AlphabeticLetter(it.characters)));
if (allomorph.tonal.toString() === '') {
// when the final is assimilated, an empty array will be returned
return this.handleAssimilatedFinal(s, '');
}
// pop the tone letter
// 1 to 4. 3 to 8. 2 to 4. 5 to 8.
const tnl = s.letters[s.letters.length - 1].literal;
const nslFnls = sounds.filter((it) => it.name === tonalres_1.TonalSpellingTags.nasalFinalConsonant);
s.popLetter(); // pop out the tone letter
if (nslFnls.length == 0 &&
(tnl === tonalres_1.TonalLetterTags.w || tnl === tonalres_1.TonalLetterTags.x) &&
Array.from(collections_1.fourthToEighthFinalConsonants.keys()).includes(s.lastLetter.literal)) {
// in case of no internal sandhi
const fnl = s.lastLetter.literal;
s.popLetter(); // pop the 4th final consonant
const got = collections_1.fourthToEighthFinalConsonants.get(fnl);
if (got) {
if ((0, syllablelists_1.isInSyllableTable)(s.literal + tonalres_1.lowerLettersTonal.get(got).literal)) {
// push the 8th final consonant if it is present in syllable table
s.pushLetter(tonalres_1.lowerLettersTonal.get(got));
}
else {
// if (
// s.letters.length === 2 &&
// s.letters[0].literal === TonalLetterTags.t &&
// s.letters[1].literal === TonalLetterTags.i &&
// fnl === TonalLetterTags.k
// ) {
// // handle combining form 'tikw' of lexical root 'tekk'
// // combining forms 'tietw' and 'tietf is handled in another function
// s.popLetter(); // pop out vowel i
// s.pushLetter(lowerLettersTonal.get(TonalLetterTags.e)); // push vowel e
// s.pushLetter(lowerLettersTonal.get(TonalLetterTags.kk)); // push final consonant kk
// } else {
// restore the popped-out final consonant.
// a syllable is just returned with its tone letter popped out
s.pushLetter(tonalres_1.lowerLettersTonal.get(fnl));
// }
}
}
}
else if (collections_1.finalConsonantsForBgjlsbbggjjllss.has(s.lastLetter.literal)) {
// in case of internal or external sandhi
const ret = this.handleAssimilatedFinal(s, tnl);
if (ret && ret.length > 0)
return ret;
}
else if (sounds.filter((it) => it.name === tonalres_1.TonalSpellingTags.vowel).length >
0 &&
collections_1.nasalFinalConsonants.includes(s.lastSecondLetter.literal) &&
tonalres_1.neutralFinalConsonantsTonal.includes(s.lastLetter.literal)) {
// in case of internal sandhi of p or t
// if there is no medials, e.g. hmhh, hngh, just bypass this block
// mhh, mh, nhh, nh, nghh, ngh
if (this.soundsFollowing[0] &&
this.soundsFollowing[0].name ===
tonalres_1.TonalSpellingTags.initialConsonant &&
s.lastSecondLetter.literal === this.soundsFollowing[0].toString()) {
// unchange to -tt or -t
s.popLetter(); // pop the neutral
s.popLetter(); // pop the nasal
const clone = Object.create(s);
if (tnl === tonalres_1.TonalLetterTags.w) {
clone.pushLetter(tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.tt));
}
else {
clone.pushLetter(tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.t));
}
return [clone];
}
else if (this.soundsFollowing[0]) {
// there has to be a following syllable for this syllable to change form
// unchange to -pp or -p
s.popLetter(); // pop the neutral
s.popLetter(); // pop the nasal
const clone = Object.create(s);
// if (ntrl === TonalLetterTags.hh) {
if (tnl === tonalres_1.TonalLetterTags.w) {
clone.pushLetter(tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.pp));
}
else {
clone.pushLetter(tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.p));
}
return [clone];
}
}
// a syllable is just returned with the tone letter popped out
// e.g. tnghw's w is popped and tngh is returned
return [s];
}
}
return [];
}
}
exports.TonalStandaloneForms = TonalStandaloneForms;
/** Returns the standalone forms of a phrasl verb particle syllable. */
class PhrasalVerbParticleStandalone extends metaplasm_1.TonalStandaloneMetaplasm {
apply(sounds, allomorph) {
if (allomorph) {
if (allomorph instanceof tonalres_1.FreeAllomorph) {
// 7 to 4
const s = new unit_1.TonalSyllable(sounds.map((it) => new unit_2.AlphabeticLetter(it.characters)));
const tnl = sounds.filter((it) => it.name === tonalres_1.TonalSpellingTags.freeTone);
if (tnl && tnl[0].toString() === tonalres_1.TonalLetterTags.z) {
s.popLetter(); // pop the tonal
s.pushLetter(tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.h)); // push neutral final
}
return [s];
}
else if (allomorph instanceof tonalres_1.CheckedAllomorph) {
// 1 to 4. 3 to 4.
const s = new unit_1.TonalSyllable(sounds.map((it) => new unit_2.AlphabeticLetter(it.characters)));
const tnl = sounds.filter((it) => it.name === tonalres_1.TonalSpellingTags.checkedTone);
if (tnl &&
(tnl[0].toString() === tonalres_1.TonalLetterTags.f ||
tnl[0].toString() === tonalres_1.TonalLetterTags.w)) {
s.popLetter(); // pop the tonal
}
return [s];
}
}
return [];
}
}
exports.PhrasalVerbParticleStandalone = PhrasalVerbParticleStandalone;
/** Returns the standalone forms of the syllable preceding ay */
class PrecedingAyStandalone extends metaplasm_1.TonalStandaloneMetaplasm {
getStandaloneForms(syllable, letters) {
if (collections_1.voicedVoicelessFinalConsonants.has(letters[letters.length - 2].toString())) {
// in case of sandhi finals
const fnl = collections_1.voicedVoicelessFinalConsonants.get(syllable.lastLetter.literal + letters[letters.length - 1].toString());
if (fnl)
syllable.replaceLetter(syllable.letters.length - 1, tonalres_1.lowerLettersTonal.get(fnl));
}
else if (collections_1.fourthToEighthFinalConsonants.has(letters[letters.length - 2].toString()) &&
letters[letters.length - 1].toString() === tonalres_1.TonalLetterTags.x) {
const fnl = collections_1.fourthToEighthFinalConsonants.get(syllable.lastLetter.literal);
if (fnl)
syllable.replaceLetter(syllable.letters.length - 1, tonalres_1.lowerLettersTonal.get(fnl));
}
}
apply(sounds, allomorph) {
if (allomorph) {
if (allomorph.tonal.toString() === tonalres_1.TonalLetterTags.f) {
if (allomorph instanceof tonalres_1.FreeAllomorph) {
const ret = [];
const rls = tonalres_1.standaloneRulesAy.get(allomorph.toString());
const tnls = !rls ? [] : rls;
for (let i in tnls) {
let s = new unit_1.TonalSyllable(sounds.map((it) => new unit_2.AlphabeticLetter(it.characters)));
// 1 to 2. 1 to 3
// replace f with y or w
s.popLetter();
s.pushLetter(new unit_2.AlphabeticLetter(tnls[i].characters));
ret.push(s);
}
return ret;
}
else if (allomorph instanceof tonalres_1.CheckedAllomorph) {
const s = new unit_1.TonalSyllable(sounds.map((it) => new unit_2.AlphabeticLetter(it.characters)));
// pop f
s.popLetter();
this.getStandaloneForms(s, sounds);
return [s];
}
}
else if (allomorph.tonal.toString() === tonalres_1.TonalLetterTags.x) {
// 5 to 1. 5 to 7. 5 to 5.
if (allomorph instanceof tonalres_1.FreeAllomorph) {
const ret = [];
const rls = tonalres_1.standaloneRulesAy.get(allomorph.toString());
const tnls = !rls ? [] : rls;
for (let i in tnls) {
let s = new unit_1.TonalSyllable(sounds.map((it) => new unit_2.AlphabeticLetter(it.characters)));
if (!(tnls[i] instanceof tonalres_1.ZeroTonal)) {
if (tnls[i] instanceof tonalres_1.FreeTonalZ) {
// 5 to 7
// replace x with z
s.popLetter();
s.pushLetter(new unit_2.AlphabeticLetter(tnls[i].characters));
ret.push(s);
}
else if (tnls[i] instanceof tonalres_1.FreeTonalX) {
// 5 to 5
ret.push(s);
}
}
else {
// 5 to 1
// pop x
s.popLetter();
ret.push(s);
}
}
return ret;
}
else if (allomorph instanceof tonalres_1.CheckedAllomorph) {
// 5 to 8.
const s = new unit_1.TonalSyllable(sounds.map((it) => new unit_2.AlphabeticLetter(it.characters)));
s.popLetter(); // pop x
this.getStandaloneForms(s, sounds);
return [s];
}
}
else if (allomorph.tonal.toString() === tonalres_1.TonalLetterTags.y) {
return [];
}
}
return [];
}
}
exports.PrecedingAyStandalone = PrecedingAyStandalone;
/** Returns the standalone forms of the syllable preceding ex */
class PrecedingExStandalone extends metaplasm_1.TonalStandaloneMetaplasm {
handleAssimilatedFinal(syllable) {
const finalConsonant = syllable.lastSecondLetter.literal;
const fnlsOfLemma = collections_1.finalConsonantsForBgjlsFw.get(syllable.lastSecondLetter.literal + syllable.lastLetter.literal);
if (fnlsOfLemma) {
if (finalConsonant === tonalres_1.TonalLetterTags.g ||
finalConsonant === tonalres_1.TonalLetterTags.b) {
// when the final is b or g, and
// when the following syllable is ex
fnlsOfLemma.pop(); // pop tt or t
}
syllable.popLetter(); // pop tonal
const clones = fnlsOfLemma.map((it) => {
const clone = Object.create(syllable);
// replace the final consonant which is the last letter after
// the tonal is popped
clone.replaceLetter(syllable.letters.length - 1, tonalres_1.lowerLettersTonal.get(it.toString()));
return clone;
});
const ret = [];
clones.map((it) => ret.push(it));
return clones;
}
return [];
}
apply(sounds, allomorph) {
if (allomorph) {
const syl = new unit_1.TonalSyllable(sounds.map((it) => new unit_2.AlphabeticLetter(it.characters)));
return this.handleAssimilatedFinal(syl);
}
return [];
}
}
exports.PrecedingExStandalone = PrecedingExStandalone;
/** Returns the last syllable of a double or triple construction as an standalone form. */
class LastSyllableForms extends metaplasm_1.TonalStandaloneMetaplasm {
lettersLastSyllable;
constructor(lettersLastSyllable) {
super();
this.lettersLastSyllable = lettersLastSyllable;
}
apply(sounds, allomorph) {
if (allomorph) {
// skip the last syllable. it is the base form of the preceding 2 syllables.
if (this.lettersLastSyllable[this.lettersLastSyllable.length - 1].toString() === sounds[sounds.length - 1].toString())
return [];
const s = new unit_1.TonalSyllable(this.lettersLastSyllable.map((it) => new unit_2.AlphabeticLetter(it.characters)));
return [s];
}
return [];
}
}
exports.LastSyllableForms = LastSyllableForms;
/** Returns the standalone forms of a transfix inflected syllable. */
class TransfixStandalone extends metaplasm_1.TonalStandaloneMetaplasm {
apply(sounds, allomorph) {
if (allomorph) {
const vwlA = sounds.filter((it) => it.toString() === tonalres_1.TonalLetterTags.a);
const chkdFnls = sounds.filter((it) => it.name === tonalres_1.TonalSpellingTags.checkedTone);
const s = new unit_1.TonalSyllable(sounds.map((it) => new unit_2.AlphabeticLetter(it.characters)));
if (vwlA.length == 1) {
// aw -> ay
s.popLetter(); // pop letter w
s.pushLetter(tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.y));
return [s];
}
else if (chkdFnls.length == 1) {
// checked tones
s.popLetter(); // pop letter w
const sylChkd = Object.create(s);
// get hh or tt
const got = collections_1.finalConsonantsForTransfix.get(s.letters[s.letters.length - 1].literal);
if (got) {
sylChkd.popLetter(); // pop final t
sylChkd.pushLetter(tonalres_1.lowerLettersTonal.get(got)); // push hh or tt
}
return [s, sylChkd];
}
else {
// in case of free tones other than aw, return the other four free tones
const syl1 = Object.create(s); // 1st tone
const syl2 = Object.create(s); // 2nd tone
const syl5 = Object.create(s); // 5th tone
const syl7 = Object.create(s); // 7th tone
syl1.popLetter(); // pop w
syl2.popLetter(); // pop w
syl2.pushLetter(tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.y));
syl5.popLetter(); // pop w
syl5.pushLetter(tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.x));
syl7.popLetter(); // pop w
syl7.pushLetter(tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.z));
// console.log(syl1.literal, syl2.literal, syl5.literal, syl7.literal);
return [syl1, syl2, syl5, syl7];
}
}
return [];
}
}
exports.TransfixStandalone = TransfixStandalone;
/** Change ~ietf or ietw to ~ek or ~ekk. */
class StandaloneFormsIetfIetwToEkEkk extends metaplasm_1.TonalStandaloneMetaplasm {
apply(sounds, allomorph) {
if (allomorph) {
const ics = sounds.filter((i) => i.name === tonalres_1.TonalSpellingTags.initialConsonant);
const ts = sounds.filter((i) => i.name === tonalres_1.TonalSpellingTags.checkedTone);
if (ics.length > 0 && ts.length > 0) {
if (ts[0].toString() === tonalres_1.TonalLetterTags.f) {
// in case of ~ietf
const s = new unit_1.TonalSyllable([
new unit_2.AlphabeticLetter(ics[0].characters),
tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.e),
tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.k),
]);
return [s];
}
else if (ts[0].toString() === tonalres_1.TonalLetterTags.w) {
// in case of ~ietw
const s = new unit_1.TonalSyllable([
new unit_2.AlphabeticLetter(ics[0].characters),
tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.e),
tonalres_1.lowerLettersTonal.get(tonalres_1.TonalLetterTags.kk),
]);
return [s];
}
}
}
return [];
}
}
exports.StandaloneFormsIetfIetwToEkEkk = StandaloneFormsIetfIetwToEkEkk;
/** Lemmatizes a word and returns its base forms. */
class TonalLemmatization extends metaplasm_2.TonalLemmatizationMetaplasm {
apply(morphemes, inflectionalEnding) {
return this.populateLemmata(morphemes, inflectionalEnding);
}
getLemmas(morphemes, inflectionalEnding) {
if (inflectionalEnding) {
if (inflectionalEnding instanceof unit_1.FreeInflectionalEnding) {
const ret = [];
const arr = morphemes[morphemes.length - 1].getForms();
for (const key in arr) {
const wrd = new unit_1.TonalWord(morphemes.map((it) => it.syllable));
wrd.popSyllable();
wrd.pushSyllable(arr[key]);
ret.push(wrd);
}
return ret;
}
else if (inflectionalEnding instanceof unit_1.CheckedInflectionalEnding) {
if (morphemes[morphemes.length - 1].getForms().length == 0)
return [];
const wrd = new unit_1.TonalWord(morphemes.map((it) => it.syllable));
wrd.popSyllable();
wrd.pushSyllable(morphemes[morphemes.length - 1].getForms()[0]);
return [wrd];
}
}
return [];
}
populateLemmata(morphemes, inflectionalEnding) {
let lemmata = new Array();
// turn morphemes into lemmas
let lms = this.getLemmas(morphemes, inflectionalEnding);
if (lms.length > 0) {
for (let key in lms) {
lemmata.push(lms[key]);
}
}
return lemmata;
}
}
exports.TonalLemmatization = TonalLemmatization;
//# sourceMappingURL=metaplasm.js.map