UNPKG

taipa

Version:

Taiwanese morphological parsing library

149 lines 6.13 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.getKanaBlocks = exports.checkNumberOfLettersKana = void 0; const kanares_1 = require("./kanares"); function checkNumberOfLettersKana() { if (kanares_1.kanaPositionalSounds.size !== kanares_1.lowerLettersKana.size) { console.log("sizes unmatched"); } } exports.checkNumberOfLettersKana = checkNumberOfLettersKana; function getKanasFollowingSmallTsu(key) { const kanaSequences = ["", "", ""]; let ks = kanares_1.kogakimoji.get("tsu"); if (ks) { kanaSequences[0] += ks[0]; kanaSequences[1] += ks[1]; kanaSequences[2] += ks[1]; } ks = kanares_1.hiraganaKatakana.get(key); if (ks) { kanaSequences[0] += ks[0]; kanaSequences[1] += ks[1]; kanaSequences[2] += ks[1]; } return kanaSequences; } function checkChouon(previousLetter, nextLetter) { if (previousLetter === nextLetter) return true; if (previousLetter === kanares_1.KanaLetterTags.e && nextLetter === kanares_1.KanaLetterTags.i) return true; if (previousLetter === kanares_1.KanaLetterTags.o && nextLetter === kanares_1.KanaLetterTags.u) return true; return false; } function lookUp(str) { let kanas = kanares_1.hiraganaKatakana.get(str); if (kanas == undefined) { kanas = kanares_1.gairaigo.get(str); } if (kanas == undefined) { kanas = kanares_1.special.get(str); } return kanas; } function lookUpOtherKanas(str) { if (kanares_1.otherKanas.has(str)) { return kanares_1.otherKanas.get(str); } } function getKanaBlocks(morphemes) { // string one is hiragana, string two is katakana, string three is chouon const kanaSequences = []; kanaSequences[0] = ""; kanaSequences[1] = ""; const hiraganaChouonSeqs = []; // chouon. should have only 1 element hiraganaChouonSeqs[0] = ""; const katakanaChouonSeqs = []; // chouon. should have only 1 element katakanaChouonSeqs[0] = ""; let previous = ""; for (const m of morphemes) { const ks = lookUp(m.syllable.literal); if (ks != undefined && ks[0] != undefined) { // in case the kana is absent, we check against ks[0] kanaSequences[0] += ks[0]; kanaSequences[1] += ks[1]; if (previous.length > 0 && checkChouon(previous[previous.length - 1], m.syllable.literal[m.syllable.literal.length - 1]) && kanares_1.initialConsonantsKana.includes(m.syllable.literal) == false && m.syllable.literal.length == 1) { // a vowel without a preceding initial consonant and is of length 1 // a vowel follows a previous vowel hiraganaChouonSeqs[0] += "ー"; katakanaChouonSeqs[0] += "ー"; } else { hiraganaChouonSeqs[0] += ks[0]; katakanaChouonSeqs[0] += ks[1]; } if (morphemes.length == 1) { const got = lookUpOtherKanas(m.syllable.literal); if (got) { if (got[0]) kanaSequences.push(got[0]); if (got[1]) kanaSequences.push(got[1]); } } } else if (kanares_1.finalConsonantsKana.includes(m.syllable.literal[m.syllable.literal.length - 1]) == true) { // a syllable with a final consonant const got = lookUp(m.syllable.literal.substring(0, m.syllable.literal.length - 1)); if (got != undefined && got[0] != undefined) { kanaSequences[0] += got[0]; kanaSequences[1] += got[1]; hiraganaChouonSeqs[0] += got[0]; katakanaChouonSeqs[0] += got[1]; } if (kanares_1.hatsuonsKana.includes(m.syllable.literal[m.syllable.literal.length - 1])) { const got = kanares_1.hatsuon.get("n"); if (got && got[0]) { kanaSequences[0] += got[0]; kanaSequences[1] += got[1]; hiraganaChouonSeqs[0] += got[0]; katakanaChouonSeqs[0] += got[1]; } } else { const got = kanares_1.kogakimoji.get("tsu"); if (got && got[0]) { kanaSequences[0] += got[0]; kanaSequences[1] += got[1]; hiraganaChouonSeqs[0] += got[0]; katakanaChouonSeqs[0] += got[1]; } } } else { if (m.sounds[0].toString() === m.sounds[1].toString() && kanares_1.geminatedConsonantsKana.includes(m.sounds[0].toString()) == true) { // e.g. ddo, ggu const got = getKanasFollowingSmallTsu(m.sounds[1].toString() + m.sounds[2].toString()); kanaSequences[0] += got[0]; kanaSequences[1] += got[1]; hiraganaChouonSeqs[0] += got[0]; katakanaChouonSeqs[0] += got[1]; } else if (m.sounds[0].toString() === kanares_1.KanaLetterTags.t && m.sounds[1].toString() === kanares_1.KanaLetterTags.ts && kanares_1.geminatedConsonantsKana.includes(m.sounds[0].toString()) == true) { // e.g. ttsu const got = getKanasFollowingSmallTsu(m.sounds[1].toString() + m.sounds[2].toString()); kanaSequences[0] += got[0]; kanaSequences[1] += got[1]; hiraganaChouonSeqs[0] += got[0]; katakanaChouonSeqs[0] += got[1]; } } previous = m.syllable.literal; } // copy chouon kanas if (kanaSequences[0] !== hiraganaChouonSeqs[0]) kanaSequences.push(hiraganaChouonSeqs[0]); if (kanaSequences[1] !== katakanaChouonSeqs[0]) kanaSequences.push(katakanaChouonSeqs[0]); return kanaSequences; } exports.getKanaBlocks = getKanaBlocks; //# sourceMappingURL=init.js.map