UNPKG

taipa

Version:

Taiwanese morphological parsing library

400 lines 12.4 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.Phrase = exports.Phraseme = exports.Word = exports.Lexeme = exports.makeMatchedPatterns = exports.Syllable = exports.Morpheme = exports.MatchedPattern = exports.SoundGeneration = exports.sgPipe = exports.soundSequence = exports.Sound = exports.GraphemeMaker = exports.Letters = exports.MatchedSequence = exports.AlphabeticLetter = exports.Letter = exports.AlphabeticGrapheme = exports.Grapheme = exports.characters = exports.Character = void 0; class Character { character; constructor(s) { this.character = s; } } exports.Character = Character; class Characters { carr = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', ]; o = new Map(); constructor() { for (const e of this.carr) { this.assign(e); } } assign(e) { this.o.set(e, new Character(e)); } get(key) { const value = this.o.get(key); if (value) { return value; } return new Character(''); } get size() { return this.o.size; } } exports.characters = new Characters(); class Grapheme { } exports.Grapheme = Grapheme; class AlphabeticGrapheme extends Grapheme { letter; constructor(letter) { super(); this.letter = letter; } } exports.AlphabeticGrapheme = AlphabeticGrapheme; /** Letter is a subword unit. */ class Letter { literal = ''; } exports.Letter = Letter; class AlphabeticLetter extends Letter { characters; constructor(characters) { super(); this.characters = new Array(); if (characters) { this.characters = characters; this.concat(); } } pushCharacter(c) { this.characters.push(c); this.concat(); } concat() { this.literal = this.characters.map((x) => (x ? x.character : '')).join(''); } } exports.AlphabeticLetter = AlphabeticLetter; class MatchedSequence { characters = new Array(); get matchedLength() { return this.characters.length; } toString() { let str = ''; for (const i in this.characters) { str += this.characters[i].character; } return str; } } exports.MatchedSequence = MatchedSequence; class Letters { arr; o = new Map(); constructor(larr) { this.arr = larr; for (let i = 0; i < this.arr.length; i++) { this.assign(this.arr[i]); } } assign(e) { const carr = []; for (let i = 0; i < e.length; i++) { let c = exports.characters.get(e[i]); if (c) { carr.push(c); } } this.o.set(e, new AlphabeticLetter(carr)); } handleN(characters, beginOfLetter, listLength) { return new MatchedSequence(); } get(key) { const value = this.o.get(key); if (value) { return value; } return new AlphabeticLetter([]); } get size() { return this.o.size; } get values() { return this.o.values(); } } exports.Letters = Letters; /** Turn a string into graphemes. */ class GraphemeMaker { lowerLetters; listOfLetters = new Array(); constructor(lowerLetters) { this.lowerLetters = lowerLetters; this.listOfLetters = Array.from(lowerLetters.values); } makeGraphemes(str) { const characters = new Array(); if (str) { for (let i = 0; i < str.length; i++) { if (str.charAt(i) != '\0') { characters.push(new Character(str.charAt(i))); } } } const graphemes = this.make(characters); return graphemes; } getMatchedSequence(characters, beginOfLetter, candidates) { let ms = new MatchedSequence(); let matchedLen = 0; //console.log(characters) if (characters[beginOfLetter].character === 'n') { // ms = this.handler.handleN( ms = this.lowerLetters.handleN(characters, beginOfLetter, this.listOfLetters.length); if (ms.matchedLength > 0) return ms; } for (const j in candidates) { const min = Math.min(characters.length - beginOfLetter, candidates[j].literal.length); if (candidates[j].literal.length == min) { for (let k = 0; k < min; k++) { if (characters[beginOfLetter + k].character === candidates[j].literal[k]) { if (k + 1 == min && min > matchedLen) { // to make sure it is longer than previous patterns // last letter matched for the pattern matchedLen = min; // copy the matched letters for (let q = 0; q < matchedLen; q++) { ms.characters[q] = characters[beginOfLetter + q]; } } } else { break; } } } } return ms; } make(characters) { const graphemes = new Array(); let beginOfLetter = 0; const letters = new Array(); for (let i = 0; i < characters.length; i++) { // console.log('examining character: %s. length of characters: %d', characters[i].character, characters.length); //console.log("metadata letter array looping."); if (i - beginOfLetter == 0) { //console.log("matchedLen: %d", ms.matchedLength); // let candidates = this.list.filter( const candidates = this.listOfLetters.filter((l) => l.characters[0].character === characters[i].character); const ms = this.getMatchedSequence(characters, beginOfLetter, candidates); if (ms.matchedLength > 0) { for (const key in candidates) { // console.log(candidates[key].literal + ' - ' + ms.toString()); if (candidates[key].literal === new AlphabeticLetter(ms.characters).literal) { letters.push(candidates[key]); } } } } if (letters.length == 0) { for (const j in characters) { //console.log(characters[j].character) } // 'length of letters is zero' } else if (letters.length == 1) { //console.log("just one matched. i:%d. ls[0].characters.length:%d. ls[0]:", i, ls[0].characters.length, ls[0]) //console.log("just one matched. i:%d. ls[0].characters.length:%d", i, ls[0].characters.length); if (i + 1 - beginOfLetter == letters[0].characters.length) { // when index i plus one equals the length of the matched syllable const l = letters.shift(); if (l) { beginOfLetter += l.characters.length; // pack letters into graphemes const gr = new AlphabeticGrapheme(l); graphemes.push(gr); } } } } //console.log("metadata letter array length %d", letters.length); return graphemes; } } exports.GraphemeMaker = GraphemeMaker; /** The sound tag of a given letter. */ class Sound { // one member of TonalSpelling Tags name = ''; // an array of character objects. can be used to make a word object. characters = new Array(); // we still need a method for combinning characters from each character objects. // this is different from an array of character objects. it is a string. toString() { let l = ''; // there is no characters for 1st tone if (this.characters != null) { // when it is not 1st tone for (let k in this.characters) { l += this.characters[k].character; } } return l; } makeCharacters(str) { const arr = new Array(); for (let i = 0; i < str.length; i++) { arr.push(new Character(str[i])); } return arr; } } exports.Sound = Sound; const soundSequence = function (sounds) { return { sounds: sounds, includes(str) { for (let i in this.sounds) { if (str && this.sounds[i] && str === this.sounds[i].toString()) return true; } return false; }, }; }; exports.soundSequence = soundSequence; // spelling generation const sgPipe = (...fns) => (x) => fns.reduce((v, f) => f(v), x); exports.sgPipe = sgPipe; /** Sound generation for syllable compositions. */ class SoundGeneration { /** The letters to be matched. */ letters = []; /** Matched sounds accumulator. */ matchedSounds = new Array(); /** flag for syllable matching process. */ matching = true; } exports.SoundGeneration = SoundGeneration; class MatchedPattern { letters = new Array(); pattern = new Array(); get matchedLength() { return this.letters.length; } // length of pattern can be optionally returned get lastLetter() { if (this.letters.length > 0) return this.letters[this.letters.length - 1]; return new AlphabeticLetter([]); } get lastSecondLetter() { if (this.letters.length > 1) return this.letters[this.letters.length - 2]; return new AlphabeticLetter([]); } } exports.MatchedPattern = MatchedPattern; class Morpheme { } exports.Morpheme = Morpheme; /** Syllable is a subword unit. */ class Syllable { literal = ''; letters; constructor(letters) { this.letters = new Array(); if (letters) { this.letters = letters; this.concat(); } } pushLetter(l) { this.letters.push(l); this.concat(); } replaceLetter(i, l) { this.letters.splice(i, 1, l); this.concat(); } insertLetter(i, l) { this.letters.splice(i, 0, l); this.concat(); } concat() { this.literal = this.letters.map((x) => (x ? x.literal : '')).join(''); } } exports.Syllable = Syllable; function makeMatchedPatterns(letters, syllabify) { const patterns = new Array(); let beginOfSyllable = 0; for (let i = 0; i < letters.length; i++) { let msp = new MatchedPattern(); if (i - beginOfSyllable == 0) { msp = syllabify(letters, beginOfSyllable); if (msp.matchedLength == 0) { //console.log('no matched syllables found. the syllable might need to be added') } // console.log('matchedLen: %d', msp.matchedLength); // console.log(msp.pattern); // console.log(msp.letters); if (msp.letters.length > 0) { for (const j in msp.letters) { //console.log("msp.letters: %s", msp.letters[j].literal) } patterns.push(msp); } beginOfSyllable += msp.matchedLength; } if (patterns.length == 0) { //console.log('nothing matched') } else if (patterns.length >= 1) { if (msp == undefined) break; if (msp.matchedLength > 0) { i += beginOfSyllable - i - 1; } } } return patterns; } exports.makeMatchedPatterns = makeMatchedPatterns; class Lexeme { } exports.Lexeme = Lexeme; class Word { literal = ''; } exports.Word = Word; class Phraseme { } exports.Phraseme = Phraseme; class Phrase { literal = ''; } exports.Phrase = Phrase; //# sourceMappingURL=unit.js.map