taipa
Version:
Taiwanese morphological parsing library
149 lines • 6.13 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.getKanaBlocks = exports.checkNumberOfLettersKana = void 0;
const kanares_1 = require("./kanares");
function checkNumberOfLettersKana() {
if (kanares_1.kanaPositionalSounds.size !== kanares_1.lowerLettersKana.size) {
console.log("sizes unmatched");
}
}
exports.checkNumberOfLettersKana = checkNumberOfLettersKana;
function getKanasFollowingSmallTsu(key) {
const kanaSequences = ["", "", ""];
let ks = kanares_1.kogakimoji.get("tsu");
if (ks) {
kanaSequences[0] += ks[0];
kanaSequences[1] += ks[1];
kanaSequences[2] += ks[1];
}
ks = kanares_1.hiraganaKatakana.get(key);
if (ks) {
kanaSequences[0] += ks[0];
kanaSequences[1] += ks[1];
kanaSequences[2] += ks[1];
}
return kanaSequences;
}
function checkChouon(previousLetter, nextLetter) {
if (previousLetter === nextLetter)
return true;
if (previousLetter === kanares_1.KanaLetterTags.e && nextLetter === kanares_1.KanaLetterTags.i)
return true;
if (previousLetter === kanares_1.KanaLetterTags.o && nextLetter === kanares_1.KanaLetterTags.u)
return true;
return false;
}
function lookUp(str) {
let kanas = kanares_1.hiraganaKatakana.get(str);
if (kanas == undefined) {
kanas = kanares_1.gairaigo.get(str);
}
if (kanas == undefined) {
kanas = kanares_1.special.get(str);
}
return kanas;
}
function lookUpOtherKanas(str) {
if (kanares_1.otherKanas.has(str)) {
return kanares_1.otherKanas.get(str);
}
}
function getKanaBlocks(morphemes) {
// string one is hiragana, string two is katakana, string three is chouon
const kanaSequences = [];
kanaSequences[0] = "";
kanaSequences[1] = "";
const hiraganaChouonSeqs = []; // chouon. should have only 1 element
hiraganaChouonSeqs[0] = "";
const katakanaChouonSeqs = []; // chouon. should have only 1 element
katakanaChouonSeqs[0] = "";
let previous = "";
for (const m of morphemes) {
const ks = lookUp(m.syllable.literal);
if (ks != undefined && ks[0] != undefined) {
// in case the kana is absent, we check against ks[0]
kanaSequences[0] += ks[0];
kanaSequences[1] += ks[1];
if (previous.length > 0 &&
checkChouon(previous[previous.length - 1], m.syllable.literal[m.syllable.literal.length - 1]) &&
kanares_1.initialConsonantsKana.includes(m.syllable.literal) == false &&
m.syllable.literal.length == 1) {
// a vowel without a preceding initial consonant and is of length 1
// a vowel follows a previous vowel
hiraganaChouonSeqs[0] += "ー";
katakanaChouonSeqs[0] += "ー";
}
else {
hiraganaChouonSeqs[0] += ks[0];
katakanaChouonSeqs[0] += ks[1];
}
if (morphemes.length == 1) {
const got = lookUpOtherKanas(m.syllable.literal);
if (got) {
if (got[0])
kanaSequences.push(got[0]);
if (got[1])
kanaSequences.push(got[1]);
}
}
}
else if (kanares_1.finalConsonantsKana.includes(m.syllable.literal[m.syllable.literal.length - 1]) == true) {
// a syllable with a final consonant
const got = lookUp(m.syllable.literal.substring(0, m.syllable.literal.length - 1));
if (got != undefined && got[0] != undefined) {
kanaSequences[0] += got[0];
kanaSequences[1] += got[1];
hiraganaChouonSeqs[0] += got[0];
katakanaChouonSeqs[0] += got[1];
}
if (kanares_1.hatsuonsKana.includes(m.syllable.literal[m.syllable.literal.length - 1])) {
const got = kanares_1.hatsuon.get("n");
if (got && got[0]) {
kanaSequences[0] += got[0];
kanaSequences[1] += got[1];
hiraganaChouonSeqs[0] += got[0];
katakanaChouonSeqs[0] += got[1];
}
}
else {
const got = kanares_1.kogakimoji.get("tsu");
if (got && got[0]) {
kanaSequences[0] += got[0];
kanaSequences[1] += got[1];
hiraganaChouonSeqs[0] += got[0];
katakanaChouonSeqs[0] += got[1];
}
}
}
else {
if (m.sounds[0].toString() === m.sounds[1].toString() &&
kanares_1.geminatedConsonantsKana.includes(m.sounds[0].toString()) == true) {
// e.g. ddo, ggu
const got = getKanasFollowingSmallTsu(m.sounds[1].toString() + m.sounds[2].toString());
kanaSequences[0] += got[0];
kanaSequences[1] += got[1];
hiraganaChouonSeqs[0] += got[0];
katakanaChouonSeqs[0] += got[1];
}
else if (m.sounds[0].toString() === kanares_1.KanaLetterTags.t &&
m.sounds[1].toString() === kanares_1.KanaLetterTags.ts &&
kanares_1.geminatedConsonantsKana.includes(m.sounds[0].toString()) == true) {
// e.g. ttsu
const got = getKanasFollowingSmallTsu(m.sounds[1].toString() + m.sounds[2].toString());
kanaSequences[0] += got[0];
kanaSequences[1] += got[1];
hiraganaChouonSeqs[0] += got[0];
katakanaChouonSeqs[0] += got[1];
}
}
previous = m.syllable.literal;
}
// copy chouon kanas
if (kanaSequences[0] !== hiraganaChouonSeqs[0])
kanaSequences.push(hiraganaChouonSeqs[0]);
if (kanaSequences[1] !== katakanaChouonSeqs[0])
kanaSequences.push(katakanaChouonSeqs[0]);
return kanaSequences;
}
exports.getKanaBlocks = getKanaBlocks;
//# sourceMappingURL=init.js.map