taipa
Version:
Taiwanese morphological parsing library
400 lines • 12.4 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.Phrase = exports.Phraseme = exports.Word = exports.Lexeme = exports.makeMatchedPatterns = exports.Syllable = exports.Morpheme = exports.MatchedPattern = exports.SoundGeneration = exports.sgPipe = exports.soundSequence = exports.Sound = exports.GraphemeMaker = exports.Letters = exports.MatchedSequence = exports.AlphabeticLetter = exports.Letter = exports.AlphabeticGrapheme = exports.Grapheme = exports.characters = exports.Character = void 0;
class Character {
character;
constructor(s) {
this.character = s;
}
}
exports.Character = Character;
class Characters {
carr = [
'a',
'b',
'c',
'd',
'e',
'f',
'g',
'h',
'i',
'j',
'k',
'l',
'm',
'n',
'o',
'p',
'q',
'r',
's',
't',
'u',
'v',
'w',
'x',
'y',
'z',
];
o = new Map();
constructor() {
for (const e of this.carr) {
this.assign(e);
}
}
assign(e) {
this.o.set(e, new Character(e));
}
get(key) {
const value = this.o.get(key);
if (value) {
return value;
}
return new Character('');
}
get size() {
return this.o.size;
}
}
exports.characters = new Characters();
class Grapheme {
}
exports.Grapheme = Grapheme;
class AlphabeticGrapheme extends Grapheme {
letter;
constructor(letter) {
super();
this.letter = letter;
}
}
exports.AlphabeticGrapheme = AlphabeticGrapheme;
/** Letter is a subword unit. */
class Letter {
literal = '';
}
exports.Letter = Letter;
class AlphabeticLetter extends Letter {
characters;
constructor(characters) {
super();
this.characters = new Array();
if (characters) {
this.characters = characters;
this.concat();
}
}
pushCharacter(c) {
this.characters.push(c);
this.concat();
}
concat() {
this.literal = this.characters.map((x) => (x ? x.character : '')).join('');
}
}
exports.AlphabeticLetter = AlphabeticLetter;
class MatchedSequence {
characters = new Array();
get matchedLength() {
return this.characters.length;
}
toString() {
let str = '';
for (const i in this.characters) {
str += this.characters[i].character;
}
return str;
}
}
exports.MatchedSequence = MatchedSequence;
class Letters {
arr;
o = new Map();
constructor(larr) {
this.arr = larr;
for (let i = 0; i < this.arr.length; i++) {
this.assign(this.arr[i]);
}
}
assign(e) {
const carr = [];
for (let i = 0; i < e.length; i++) {
let c = exports.characters.get(e[i]);
if (c) {
carr.push(c);
}
}
this.o.set(e, new AlphabeticLetter(carr));
}
handleN(characters, beginOfLetter, listLength) {
return new MatchedSequence();
}
get(key) {
const value = this.o.get(key);
if (value) {
return value;
}
return new AlphabeticLetter([]);
}
get size() {
return this.o.size;
}
get values() {
return this.o.values();
}
}
exports.Letters = Letters;
/** Turn a string into graphemes. */
class GraphemeMaker {
lowerLetters;
listOfLetters = new Array();
constructor(lowerLetters) {
this.lowerLetters = lowerLetters;
this.listOfLetters = Array.from(lowerLetters.values);
}
makeGraphemes(str) {
const characters = new Array();
if (str) {
for (let i = 0; i < str.length; i++) {
if (str.charAt(i) != '\0') {
characters.push(new Character(str.charAt(i)));
}
}
}
const graphemes = this.make(characters);
return graphemes;
}
getMatchedSequence(characters, beginOfLetter, candidates) {
let ms = new MatchedSequence();
let matchedLen = 0;
//console.log(characters)
if (characters[beginOfLetter].character === 'n') {
// ms = this.handler.handleN(
ms = this.lowerLetters.handleN(characters, beginOfLetter, this.listOfLetters.length);
if (ms.matchedLength > 0)
return ms;
}
for (const j in candidates) {
const min = Math.min(characters.length - beginOfLetter, candidates[j].literal.length);
if (candidates[j].literal.length == min) {
for (let k = 0; k < min; k++) {
if (characters[beginOfLetter + k].character === candidates[j].literal[k]) {
if (k + 1 == min && min > matchedLen) {
// to make sure it is longer than previous patterns
// last letter matched for the pattern
matchedLen = min;
// copy the matched letters
for (let q = 0; q < matchedLen; q++) {
ms.characters[q] = characters[beginOfLetter + q];
}
}
}
else {
break;
}
}
}
}
return ms;
}
make(characters) {
const graphemes = new Array();
let beginOfLetter = 0;
const letters = new Array();
for (let i = 0; i < characters.length; i++) {
// console.log('examining character: %s. length of characters: %d', characters[i].character, characters.length);
//console.log("metadata letter array looping.");
if (i - beginOfLetter == 0) {
//console.log("matchedLen: %d", ms.matchedLength);
// let candidates = this.list.filter(
const candidates = this.listOfLetters.filter((l) => l.characters[0].character === characters[i].character);
const ms = this.getMatchedSequence(characters, beginOfLetter, candidates);
if (ms.matchedLength > 0) {
for (const key in candidates) {
// console.log(candidates[key].literal + ' - ' + ms.toString());
if (candidates[key].literal ===
new AlphabeticLetter(ms.characters).literal) {
letters.push(candidates[key]);
}
}
}
}
if (letters.length == 0) {
for (const j in characters) {
//console.log(characters[j].character)
}
// 'length of letters is zero'
}
else if (letters.length == 1) {
//console.log("just one matched. i:%d. ls[0].characters.length:%d. ls[0]:", i, ls[0].characters.length, ls[0])
//console.log("just one matched. i:%d. ls[0].characters.length:%d", i, ls[0].characters.length);
if (i + 1 - beginOfLetter == letters[0].characters.length) {
// when index i plus one equals the length of the matched syllable
const l = letters.shift();
if (l) {
beginOfLetter += l.characters.length;
// pack letters into graphemes
const gr = new AlphabeticGrapheme(l);
graphemes.push(gr);
}
}
}
}
//console.log("metadata letter array length %d", letters.length);
return graphemes;
}
}
exports.GraphemeMaker = GraphemeMaker;
/** The sound tag of a given letter. */
class Sound {
// one member of TonalSpelling Tags
name = '';
// an array of character objects. can be used to make a word object.
characters = new Array();
// we still need a method for combinning characters from each character objects.
// this is different from an array of character objects. it is a string.
toString() {
let l = '';
// there is no characters for 1st tone
if (this.characters != null) {
// when it is not 1st tone
for (let k in this.characters) {
l += this.characters[k].character;
}
}
return l;
}
makeCharacters(str) {
const arr = new Array();
for (let i = 0; i < str.length; i++) {
arr.push(new Character(str[i]));
}
return arr;
}
}
exports.Sound = Sound;
const soundSequence = function (sounds) {
return {
sounds: sounds,
includes(str) {
for (let i in this.sounds) {
if (str && this.sounds[i] && str === this.sounds[i].toString())
return true;
}
return false;
},
};
};
exports.soundSequence = soundSequence;
// spelling generation
const sgPipe = (...fns) => (x) => fns.reduce((v, f) => f(v), x);
exports.sgPipe = sgPipe;
/** Sound generation for syllable compositions. */
class SoundGeneration {
/** The letters to be matched. */
letters = [];
/** Matched sounds accumulator. */
matchedSounds = new Array();
/** flag for syllable matching process. */
matching = true;
}
exports.SoundGeneration = SoundGeneration;
class MatchedPattern {
letters = new Array();
pattern = new Array();
get matchedLength() {
return this.letters.length;
} // length of pattern can be optionally returned
get lastLetter() {
if (this.letters.length > 0)
return this.letters[this.letters.length - 1];
return new AlphabeticLetter([]);
}
get lastSecondLetter() {
if (this.letters.length > 1)
return this.letters[this.letters.length - 2];
return new AlphabeticLetter([]);
}
}
exports.MatchedPattern = MatchedPattern;
class Morpheme {
}
exports.Morpheme = Morpheme;
/** Syllable is a subword unit. */
class Syllable {
literal = '';
letters;
constructor(letters) {
this.letters = new Array();
if (letters) {
this.letters = letters;
this.concat();
}
}
pushLetter(l) {
this.letters.push(l);
this.concat();
}
replaceLetter(i, l) {
this.letters.splice(i, 1, l);
this.concat();
}
insertLetter(i, l) {
this.letters.splice(i, 0, l);
this.concat();
}
concat() {
this.literal = this.letters.map((x) => (x ? x.literal : '')).join('');
}
}
exports.Syllable = Syllable;
function makeMatchedPatterns(letters, syllabify) {
const patterns = new Array();
let beginOfSyllable = 0;
for (let i = 0; i < letters.length; i++) {
let msp = new MatchedPattern();
if (i - beginOfSyllable == 0) {
msp = syllabify(letters, beginOfSyllable);
if (msp.matchedLength == 0) {
//console.log('no matched syllables found. the syllable might need to be added')
}
// console.log('matchedLen: %d', msp.matchedLength);
// console.log(msp.pattern);
// console.log(msp.letters);
if (msp.letters.length > 0) {
for (const j in msp.letters) {
//console.log("msp.letters: %s", msp.letters[j].literal)
}
patterns.push(msp);
}
beginOfSyllable += msp.matchedLength;
}
if (patterns.length == 0) {
//console.log('nothing matched')
}
else if (patterns.length >= 1) {
if (msp == undefined)
break;
if (msp.matchedLength > 0) {
i += beginOfSyllable - i - 1;
}
}
}
return patterns;
}
exports.makeMatchedPatterns = makeMatchedPatterns;
class Lexeme {
}
exports.Lexeme = Lexeme;
class Word {
literal = '';
}
exports.Word = Word;
class Phraseme {
}
exports.Phraseme = Phraseme;
class Phrase {
literal = '';
}
exports.Phrase = Phrase;
//# sourceMappingURL=unit.js.map