UNPKG

taipa

Version:

Taiwanese morphological parsing library

249 lines 10.1 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.KanaStandaloneMorphemeMaker = exports.KanaStandaloneMorpheme = exports.KanaSyllable = void 0; const unit_1 = require("../unit"); const maker_1 = require("../maker"); const kanares_1 = require("./kanares"); const soundgen_1 = require("./soundgen"); class KanaSyllable extends unit_1.Syllable { } exports.KanaSyllable = KanaSyllable; /** A syllable. */ class KanaStandaloneMorpheme extends unit_1.Morpheme { syllable; sounds; constructor(syllable, sounds, kcm) { super(); this.syllable = syllable; this.sounds = new Array(); this.sounds = sounds; } } exports.KanaStandaloneMorpheme = KanaStandaloneMorpheme; function syllabifyKana(letters, beginOfSyllable) { let literal = ''; let matched = ''; let lookAhead = ''; const ltrs = new Array(); let matchedLtrs = new Array(); for (let i = beginOfSyllable; i < letters.length; i++) { literal = literal + letters[i].literal; ltrs.push(letters[i].literal); if (kanares_1.hiraganaKatakana.has(literal) || kanares_1.gairaigo.has(literal)) { matched = literal; Object.assign(matchedLtrs, ltrs); // look-ahead if (i + 1 < letters.length) lookAhead = letters[i + 1].literal; } else if (literal.length == 3 && letters[0].literal === kanares_1.KanaLetterTags.ng && kanares_1.vowelsKana.includes(letters[1].literal)) { // ng- matched = literal; Object.assign(matchedLtrs, ltrs); } else if (ltrs.length == 3 && (ltrs[0] === ltrs[1] || (ltrs[0] === kanares_1.KanaLetterTags.t && ltrs[1] === kanares_1.KanaLetterTags.ts)) && kanares_1.vowelsKana.includes(ltrs[2]) && !kanares_1.vowelsKana.includes(ltrs[0])) { // Initial sokuon. e.g. ggu, kku, ppa, etc. // When a final t followed by an initial ts. // In addition to that, the first two letters are not vowels. // Given that the condition is the first two letters should be the same, // we check out if the first letter is not a vowel. matched = literal; Object.assign(matchedLtrs, ltrs); lookAhead = ''; // console.debug(">>>GOT<<<, ltrs:", ltrs, "matchedLtrs:", matchedLtrs) } else if (kanares_1.finalConsonantsKana.includes(lookAhead) && i + 1 == letters.length) { // final sokuon, not medial sokuon matched = literal; Object.assign(matchedLtrs, ltrs); lookAhead = ''; } else if (kanares_1.geminatedConsonantsKana.includes(lookAhead) && i + 1 < letters.length // there is at least one letter after look-ahead ) { // medial sokuon, not final sokuon if (kanares_1.initialConsonantsKana.includes(letters[i + 1].literal) && (lookAhead === letters[i + 1].literal || (lookAhead === kanares_1.KanaLetterTags.t && letters[i + 1].literal === kanares_1.KanaLetterTags.ch))) { // check if the letter after look-ahead is an initial consonant matched = literal; Object.assign(matchedLtrs, ltrs); } else if (kanares_1.vowelsKana.includes(letters[i + 1].literal) || kanares_1.semivowelsKana.includes(letters[i + 1].literal)) { // check if the letter after look-ahead is a vowel matched = literal.slice(0, literal.length - 1); ltrs.pop(); Object.assign(matchedLtrs, ltrs); } lookAhead = ''; } else if (kanares_1.hatsuonsKana.includes(lookAhead)) { if (kanares_1.vowelsKana.includes(letters[i + 1].literal) || kanares_1.semivowelsKana.includes(letters[i + 1].literal)) { // check if the letter after look-ahead is a vowel matched = literal.slice(0, literal.length - 1); ltrs.pop(); Object.assign(matchedLtrs, ltrs); } else { matched = literal; Object.assign(matchedLtrs, ltrs); } lookAhead = ''; } } let list = new Array(); if (matched.length > 0) { // console.debug("matchedLtrs:", matchedLtrs.join('-'), "lookAhead:", lookAhead); const ksg = new soundgen_1.KanaSoundGenerator(); list = ksg.generate(matchedLtrs, lookAhead); // console.debug(list); } const arraysOfLetters = new Array(); const mp = new unit_1.MatchedPattern(); let sounds = new Array(); for (let m in list) { let min = Math.min(letters.length - beginOfSyllable, list[m].length); if (list[m].length == min) { for (let n = 0; n < min; n++) { if (list[m][n] != undefined) { if (letters[beginOfSyllable + n].literal === list[m][n].toString()) { if (n + 1 == min) { // copy the matched letters let arr = new Array(); for (let q = 0; q < min; q++) { arr[q] = letters[beginOfSyllable + q]; } arraysOfLetters.push(arr); sounds = list[m]; } } else { break; } } } } } if (arraysOfLetters.length == 1) { // only one matched // copy the matched letters for (let q = 0; q < arraysOfLetters[0].length; q++) { mp.letters[q] = letters[beginOfSyllable + q]; mp.pattern[q] = sounds[q]; } return mp; } if (arraysOfLetters.length > 1) { let longerEntry = -1; // length of the longest matched entry let shorterEntry = -1; let index = 0; for (let j = 0; j < arraysOfLetters.length; j++) { if (arraysOfLetters[j].length > arraysOfLetters[index].length) { index = j; } } if (index > 0) { longerEntry = index; shorterEntry = 0; } else { longerEntry = 0; shorterEntry = 1; } if (letters.length - beginOfSyllable == arraysOfLetters[longerEntry].length) { if (kanares_1.hatsuonsKana.includes(arraysOfLetters[longerEntry][arraysOfLetters[longerEntry].length - 1] .literal)) { // return the longer one for (let q = 0; q < arraysOfLetters[longerEntry].length; q++) { mp.letters[q] = letters[beginOfSyllable + q]; mp.pattern[q] = sounds[q]; } return mp; } // return the shorter one for (let q = 0; q < arraysOfLetters[shorterEntry].length; q++) { mp.letters[q] = letters[beginOfSyllable + q]; mp.pattern[q] = sounds[q]; } return mp; } // look ahead for 1 letter if (letters.length - beginOfSyllable == arraysOfLetters[longerEntry].length + 1) { if (kanares_1.initialConsonantsKana.includes(letters[beginOfSyllable + arraysOfLetters[longerEntry].length].literal) == true) { // consonant-ending // return the longer one for (let q = 0; q < arraysOfLetters[longerEntry].length; q++) { mp.letters[q] = letters[beginOfSyllable + q]; mp.pattern[q] = sounds[q]; } } else { // vowel ending // return the shorter one for (let q = 0; q < arraysOfLetters[shorterEntry].length; q++) { mp.letters[q] = letters[beginOfSyllable + q]; mp.pattern[q] = sounds[q]; } } return mp; } // look ahead for 2 letters if (letters.length - beginOfSyllable > arraysOfLetters[longerEntry].length + 1) { if (kanares_1.vowelsKana.includes(letters[beginOfSyllable + arraysOfLetters[longerEntry].length].literal) == true || kanares_1.semivowelsKana.includes(letters[beginOfSyllable + arraysOfLetters[longerEntry].length].literal) == true) { // return the shorter one for (let q = 0; q < arraysOfLetters[shorterEntry].length; q++) { mp.letters[q] = letters[beginOfSyllable + q]; mp.pattern[q] = sounds[q]; } return mp; } // return the longer one for (let q = 0; q < arraysOfLetters[longerEntry].length; q++) { mp.letters[q] = letters[beginOfSyllable + q]; mp.pattern[q] = sounds[q]; } } } return mp; } class KanaStandaloneMorphemeMaker extends maker_1.MorphemeMaker { metaplasm; constructor(kcm) { super(); this.metaplasm = kcm; } createArray() { return new Array(); } createMorpheme(msp) { return new KanaStandaloneMorpheme(new KanaSyllable(msp.letters), msp.pattern, this.metaplasm); } postprocess(patterns) { const morphemes = this.createArray(); for (let i in patterns) { morphemes.push(this.createMorpheme(patterns[i])); } return morphemes; } makeMorphemes(graphemes) { const ltrs = graphemes.map(it => it.letter); const ptrns = this.make(ltrs, syllabifyKana); const ms = this.postprocess(ptrns); return ms; } } exports.KanaStandaloneMorphemeMaker = KanaStandaloneMorphemeMaker; //# sourceMappingURL=morpheme.js.map