taipa
Version:
Taiwanese morphological parsing library
249 lines • 10.1 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.KanaStandaloneMorphemeMaker = exports.KanaStandaloneMorpheme = exports.KanaSyllable = void 0;
const unit_1 = require("../unit");
const maker_1 = require("../maker");
const kanares_1 = require("./kanares");
const soundgen_1 = require("./soundgen");
class KanaSyllable extends unit_1.Syllable {
}
exports.KanaSyllable = KanaSyllable;
/** A syllable. */
class KanaStandaloneMorpheme extends unit_1.Morpheme {
syllable;
sounds;
constructor(syllable, sounds, kcm) {
super();
this.syllable = syllable;
this.sounds = new Array();
this.sounds = sounds;
}
}
exports.KanaStandaloneMorpheme = KanaStandaloneMorpheme;
function syllabifyKana(letters, beginOfSyllable) {
let literal = '';
let matched = '';
let lookAhead = '';
const ltrs = new Array();
let matchedLtrs = new Array();
for (let i = beginOfSyllable; i < letters.length; i++) {
literal = literal + letters[i].literal;
ltrs.push(letters[i].literal);
if (kanares_1.hiraganaKatakana.has(literal) || kanares_1.gairaigo.has(literal)) {
matched = literal;
Object.assign(matchedLtrs, ltrs);
// look-ahead
if (i + 1 < letters.length)
lookAhead = letters[i + 1].literal;
}
else if (literal.length == 3 &&
letters[0].literal === kanares_1.KanaLetterTags.ng &&
kanares_1.vowelsKana.includes(letters[1].literal)) {
// ng-
matched = literal;
Object.assign(matchedLtrs, ltrs);
}
else if (ltrs.length == 3 &&
(ltrs[0] === ltrs[1] ||
(ltrs[0] === kanares_1.KanaLetterTags.t && ltrs[1] === kanares_1.KanaLetterTags.ts)) &&
kanares_1.vowelsKana.includes(ltrs[2]) && !kanares_1.vowelsKana.includes(ltrs[0])) {
// Initial sokuon. e.g. ggu, kku, ppa, etc.
// When a final t followed by an initial ts.
// In addition to that, the first two letters are not vowels.
// Given that the condition is the first two letters should be the same,
// we check out if the first letter is not a vowel.
matched = literal;
Object.assign(matchedLtrs, ltrs);
lookAhead = '';
// console.debug(">>>GOT<<<, ltrs:", ltrs, "matchedLtrs:", matchedLtrs)
}
else if (kanares_1.finalConsonantsKana.includes(lookAhead) &&
i + 1 == letters.length) {
// final sokuon, not medial sokuon
matched = literal;
Object.assign(matchedLtrs, ltrs);
lookAhead = '';
}
else if (kanares_1.geminatedConsonantsKana.includes(lookAhead) &&
i + 1 < letters.length // there is at least one letter after look-ahead
) {
// medial sokuon, not final sokuon
if (kanares_1.initialConsonantsKana.includes(letters[i + 1].literal) &&
(lookAhead === letters[i + 1].literal ||
(lookAhead === kanares_1.KanaLetterTags.t &&
letters[i + 1].literal === kanares_1.KanaLetterTags.ch))) {
// check if the letter after look-ahead is an initial consonant
matched = literal;
Object.assign(matchedLtrs, ltrs);
}
else if (kanares_1.vowelsKana.includes(letters[i + 1].literal) ||
kanares_1.semivowelsKana.includes(letters[i + 1].literal)) {
// check if the letter after look-ahead is a vowel
matched = literal.slice(0, literal.length - 1);
ltrs.pop();
Object.assign(matchedLtrs, ltrs);
}
lookAhead = '';
}
else if (kanares_1.hatsuonsKana.includes(lookAhead)) {
if (kanares_1.vowelsKana.includes(letters[i + 1].literal) ||
kanares_1.semivowelsKana.includes(letters[i + 1].literal)) {
// check if the letter after look-ahead is a vowel
matched = literal.slice(0, literal.length - 1);
ltrs.pop();
Object.assign(matchedLtrs, ltrs);
}
else {
matched = literal;
Object.assign(matchedLtrs, ltrs);
}
lookAhead = '';
}
}
let list = new Array();
if (matched.length > 0) {
// console.debug("matchedLtrs:", matchedLtrs.join('-'), "lookAhead:", lookAhead);
const ksg = new soundgen_1.KanaSoundGenerator();
list = ksg.generate(matchedLtrs, lookAhead);
// console.debug(list);
}
const arraysOfLetters = new Array();
const mp = new unit_1.MatchedPattern();
let sounds = new Array();
for (let m in list) {
let min = Math.min(letters.length - beginOfSyllable, list[m].length);
if (list[m].length == min) {
for (let n = 0; n < min; n++) {
if (list[m][n] != undefined) {
if (letters[beginOfSyllable + n].literal === list[m][n].toString()) {
if (n + 1 == min) {
// copy the matched letters
let arr = new Array();
for (let q = 0; q < min; q++) {
arr[q] = letters[beginOfSyllable + q];
}
arraysOfLetters.push(arr);
sounds = list[m];
}
}
else {
break;
}
}
}
}
}
if (arraysOfLetters.length == 1) {
// only one matched
// copy the matched letters
for (let q = 0; q < arraysOfLetters[0].length; q++) {
mp.letters[q] = letters[beginOfSyllable + q];
mp.pattern[q] = sounds[q];
}
return mp;
}
if (arraysOfLetters.length > 1) {
let longerEntry = -1; // length of the longest matched entry
let shorterEntry = -1;
let index = 0;
for (let j = 0; j < arraysOfLetters.length; j++) {
if (arraysOfLetters[j].length > arraysOfLetters[index].length) {
index = j;
}
}
if (index > 0) {
longerEntry = index;
shorterEntry = 0;
}
else {
longerEntry = 0;
shorterEntry = 1;
}
if (letters.length - beginOfSyllable ==
arraysOfLetters[longerEntry].length) {
if (kanares_1.hatsuonsKana.includes(arraysOfLetters[longerEntry][arraysOfLetters[longerEntry].length - 1]
.literal)) {
// return the longer one
for (let q = 0; q < arraysOfLetters[longerEntry].length; q++) {
mp.letters[q] = letters[beginOfSyllable + q];
mp.pattern[q] = sounds[q];
}
return mp;
}
// return the shorter one
for (let q = 0; q < arraysOfLetters[shorterEntry].length; q++) {
mp.letters[q] = letters[beginOfSyllable + q];
mp.pattern[q] = sounds[q];
}
return mp;
}
// look ahead for 1 letter
if (letters.length - beginOfSyllable ==
arraysOfLetters[longerEntry].length + 1) {
if (kanares_1.initialConsonantsKana.includes(letters[beginOfSyllable + arraysOfLetters[longerEntry].length].literal) == true) {
// consonant-ending
// return the longer one
for (let q = 0; q < arraysOfLetters[longerEntry].length; q++) {
mp.letters[q] = letters[beginOfSyllable + q];
mp.pattern[q] = sounds[q];
}
}
else {
// vowel ending
// return the shorter one
for (let q = 0; q < arraysOfLetters[shorterEntry].length; q++) {
mp.letters[q] = letters[beginOfSyllable + q];
mp.pattern[q] = sounds[q];
}
}
return mp;
}
// look ahead for 2 letters
if (letters.length - beginOfSyllable >
arraysOfLetters[longerEntry].length + 1) {
if (kanares_1.vowelsKana.includes(letters[beginOfSyllable + arraysOfLetters[longerEntry].length].literal) == true ||
kanares_1.semivowelsKana.includes(letters[beginOfSyllable + arraysOfLetters[longerEntry].length].literal) == true) {
// return the shorter one
for (let q = 0; q < arraysOfLetters[shorterEntry].length; q++) {
mp.letters[q] = letters[beginOfSyllable + q];
mp.pattern[q] = sounds[q];
}
return mp;
}
// return the longer one
for (let q = 0; q < arraysOfLetters[longerEntry].length; q++) {
mp.letters[q] = letters[beginOfSyllable + q];
mp.pattern[q] = sounds[q];
}
}
}
return mp;
}
class KanaStandaloneMorphemeMaker extends maker_1.MorphemeMaker {
metaplasm;
constructor(kcm) {
super();
this.metaplasm = kcm;
}
createArray() {
return new Array();
}
createMorpheme(msp) {
return new KanaStandaloneMorpheme(new KanaSyllable(msp.letters), msp.pattern, this.metaplasm);
}
postprocess(patterns) {
const morphemes = this.createArray();
for (let i in patterns) {
morphemes.push(this.createMorpheme(patterns[i]));
}
return morphemes;
}
makeMorphemes(graphemes) {
const ltrs = graphemes.map(it => it.letter);
const ptrns = this.make(ltrs, syllabifyKana);
const ms = this.postprocess(ptrns);
return ms;
}
}
exports.KanaStandaloneMorphemeMaker = KanaStandaloneMorphemeMaker;
//# sourceMappingURL=morpheme.js.map