UNPKG

taipa

Version:

Taiwanese morphological parsing library

151 lines 6.24 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.DependencyParser = void 0; const configuration_1 = require("./configuration"); const guide_1 = require("./guide"); const document_1 = require("../document"); const symbols_1 = require("./symbols"); const relation_1 = require("./relation"); class DependencyParser { c = this.getInitialConfiguration(); s1 = new document_1.Node(''); s2 = new document_1.Node(''); b1 = new document_1.Node(''); s1B1RightRelations = new Map() .set(symbols_1.Tagset.ppv + symbols_1.Tagset.psub, symbols_1.DepRelations.compoundPrt) .set(symbols_1.Tagset.ppv + symbols_1.Tagset.npr, symbols_1.DepRelations.compoundPrt); s1B1LeftRelations = new Map(); s2S1RightRelations = new Map() .set(symbols_1.Tagset.vb + symbols_1.Tagset.ppv, symbols_1.DepRelations.compoundPrt) .set(symbols_1.Tagset.vb + symbols_1.Tagset.psub, symbols_1.DepRelations.prt) .set(symbols_1.Tagset.vb + symbols_1.Tagset.vb, symbols_1.DepRelations.compound) .set(symbols_1.Tagset.vb + symbols_1.Tagset.npr, symbols_1.DepRelations.obj); s2S1LeftRelations = new Map() .set(symbols_1.Tagset.aux + symbols_1.Tagset.vb, symbols_1.DepRelations.aux) .set(symbols_1.Tagset.padv + symbols_1.Tagset.vb, symbols_1.DepRelations.advmod) .set(symbols_1.Tagset.appr + symbols_1.Tagset.npr, symbols_1.DepRelations.case); getInitialConfiguration() { return new configuration_1.Configuration(); } apply(t, c) { return t.do(c); } isQueueEmpty() { if (this.c.queue.length === 0) return true; return false; } isTwoNodesInStack() { if (this.c.stack.length === 2) return true; return false; } rightRelation(label) { this.s1.dep = label; this.s1.head = this.s2.token; return new relation_1.Relation(label, this.s2, this.s1); } leftRelation(label) { this.s2.dep = label; this.s2.head = this.s1.token; return new relation_1.Relation(label, this.s1, this.s2); } setS1S2B1() { this.s1 = new document_1.Node(''); if (this.c.stack.length > 0) this.s1 = this.c.stack[this.c.stack.length - 1]; this.s2 = new document_1.Node(''); if (this.c.stack.length > 1) this.s2 = this.c.stack[this.c.stack.length - 2]; this.b1 = new document_1.Node(''); if (this.c.queue.length > 0) this.b1 = this.c.queue[0]; } setS1B1Relation(t) { if (t instanceof configuration_1.RightArc) { if (this.s1B1RightRelations.has(this.s1.tag + this.b1.tag)) { const rel = this.s1B1RightRelations.get(this.s1.tag + this.b1.tag); if (rel) { this.c.relations.push(this.rightRelation(rel)); } } } else if (t instanceof configuration_1.LeftArc) { if (this.s1B1LeftRelations.has(this.s1.tag + this.b1.tag)) { const rel = this.s1B1LeftRelations.get(this.s1.tag + this.b1.tag); if (rel) { this.c.relations.push(this.leftRelation(rel)); } } } } s2S1LeftArgsToPronoun = new Map().set(symbols_1.Tagset.npr + symbols_1.Tagset.vb, [ symbols_1.DepRelations.nsubj, symbols_1.DepRelations.dislocated, ]); setS2S1Relation(t) { if (t instanceof configuration_1.RightArc) { if (this.s2S1RightRelations.has(this.s2.tag + this.s1.tag)) { const rel = this.s2S1RightRelations.get(this.s2.tag + this.s1.tag); if (rel) { this.c.relations.push(this.rightRelation(rel)); } } else if (this.isTwoNodesInStack()) { this.c.relations.push(this.rightRelation(symbols_1.DepRelations.root)); } } else if (t instanceof configuration_1.LeftArc) { if (this.s2S1LeftRelations.has(this.s2.tag + this.s1.tag)) { const rel = this.s2S1LeftRelations.get(this.s2.tag + this.s1.tag); if (rel) { this.c.relations.push(this.leftRelation(rel)); } } else if (this.s2S1LeftArgsToPronoun.has(this.s2.tag + this.s1.tag)) { // TODO: to be improved const labelsPronoun = this.s2S1LeftArgsToPronoun.get(this.s2.tag + this.s1.tag); const rels = this.c.relations.filter(it => it.dependent.token === 'gua'); if (labelsPronoun) if (this.s2.token === 'gua') { this.c.relations.push(this.leftRelation(labelsPronoun[0])); } else if (this.s2.token === 'che' && rels.length > 0 && rels[0].dependent.token === 'gua' && rels[0].dependent.tag === symbols_1.Tagset.npr) { this.c.relations.push(this.leftRelation(labelsPronoun[1])); } } } } parse(nodes) { for (let t of nodes) { this.c.queue.push(t); } let guide = new guide_1.Guide(); let rt = new document_1.Node('ROOT'); this.c.stack.push(rt); if (this.c.stack.length == 1 && this.c.queue.length > 0) { // initial configuration // shift the first lexeme from queue to stack guide.transitions.push(new configuration_1.Shift()); } while (!this.c.isTerminalConfiguration()) { let t = guide.getNextTransition(this.c); if (t == null || t == undefined) break; this.setS1S2B1(); if (this.s1.tag != '' && this.b1.tag != '') { this.setS1B1Relation(t); } else if (this.isQueueEmpty()) { this.setS2S1Relation(t); } this.c = this.apply(t, this.c); } return this.c.relations; } } exports.DependencyParser = DependencyParser; //# sourceMappingURL=parser.js.map