taipa
Version:
Taiwanese morphological parsing library
151 lines • 6.24 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.DependencyParser = void 0;
const configuration_1 = require("./configuration");
const guide_1 = require("./guide");
const document_1 = require("../document");
const symbols_1 = require("./symbols");
const relation_1 = require("./relation");
class DependencyParser {
c = this.getInitialConfiguration();
s1 = new document_1.Node('');
s2 = new document_1.Node('');
b1 = new document_1.Node('');
s1B1RightRelations = new Map()
.set(symbols_1.Tagset.ppv + symbols_1.Tagset.psub, symbols_1.DepRelations.compoundPrt)
.set(symbols_1.Tagset.ppv + symbols_1.Tagset.npr, symbols_1.DepRelations.compoundPrt);
s1B1LeftRelations = new Map();
s2S1RightRelations = new Map()
.set(symbols_1.Tagset.vb + symbols_1.Tagset.ppv, symbols_1.DepRelations.compoundPrt)
.set(symbols_1.Tagset.vb + symbols_1.Tagset.psub, symbols_1.DepRelations.prt)
.set(symbols_1.Tagset.vb + symbols_1.Tagset.vb, symbols_1.DepRelations.compound)
.set(symbols_1.Tagset.vb + symbols_1.Tagset.npr, symbols_1.DepRelations.obj);
s2S1LeftRelations = new Map()
.set(symbols_1.Tagset.aux + symbols_1.Tagset.vb, symbols_1.DepRelations.aux)
.set(symbols_1.Tagset.padv + symbols_1.Tagset.vb, symbols_1.DepRelations.advmod)
.set(symbols_1.Tagset.appr + symbols_1.Tagset.npr, symbols_1.DepRelations.case);
getInitialConfiguration() {
return new configuration_1.Configuration();
}
apply(t, c) {
return t.do(c);
}
isQueueEmpty() {
if (this.c.queue.length === 0)
return true;
return false;
}
isTwoNodesInStack() {
if (this.c.stack.length === 2)
return true;
return false;
}
rightRelation(label) {
this.s1.dep = label;
this.s1.head = this.s2.token;
return new relation_1.Relation(label, this.s2, this.s1);
}
leftRelation(label) {
this.s2.dep = label;
this.s2.head = this.s1.token;
return new relation_1.Relation(label, this.s1, this.s2);
}
setS1S2B1() {
this.s1 = new document_1.Node('');
if (this.c.stack.length > 0)
this.s1 = this.c.stack[this.c.stack.length - 1];
this.s2 = new document_1.Node('');
if (this.c.stack.length > 1)
this.s2 = this.c.stack[this.c.stack.length - 2];
this.b1 = new document_1.Node('');
if (this.c.queue.length > 0)
this.b1 = this.c.queue[0];
}
setS1B1Relation(t) {
if (t instanceof configuration_1.RightArc) {
if (this.s1B1RightRelations.has(this.s1.tag + this.b1.tag)) {
const rel = this.s1B1RightRelations.get(this.s1.tag + this.b1.tag);
if (rel) {
this.c.relations.push(this.rightRelation(rel));
}
}
}
else if (t instanceof configuration_1.LeftArc) {
if (this.s1B1LeftRelations.has(this.s1.tag + this.b1.tag)) {
const rel = this.s1B1LeftRelations.get(this.s1.tag + this.b1.tag);
if (rel) {
this.c.relations.push(this.leftRelation(rel));
}
}
}
}
s2S1LeftArgsToPronoun = new Map().set(symbols_1.Tagset.npr + symbols_1.Tagset.vb, [
symbols_1.DepRelations.nsubj,
symbols_1.DepRelations.dislocated,
]);
setS2S1Relation(t) {
if (t instanceof configuration_1.RightArc) {
if (this.s2S1RightRelations.has(this.s2.tag + this.s1.tag)) {
const rel = this.s2S1RightRelations.get(this.s2.tag + this.s1.tag);
if (rel) {
this.c.relations.push(this.rightRelation(rel));
}
}
else if (this.isTwoNodesInStack()) {
this.c.relations.push(this.rightRelation(symbols_1.DepRelations.root));
}
}
else if (t instanceof configuration_1.LeftArc) {
if (this.s2S1LeftRelations.has(this.s2.tag + this.s1.tag)) {
const rel = this.s2S1LeftRelations.get(this.s2.tag + this.s1.tag);
if (rel) {
this.c.relations.push(this.leftRelation(rel));
}
}
else if (this.s2S1LeftArgsToPronoun.has(this.s2.tag + this.s1.tag)) {
// TODO: to be improved
const labelsPronoun = this.s2S1LeftArgsToPronoun.get(this.s2.tag + this.s1.tag);
const rels = this.c.relations.filter(it => it.dependent.token === 'gua');
if (labelsPronoun)
if (this.s2.token === 'gua') {
this.c.relations.push(this.leftRelation(labelsPronoun[0]));
}
else if (this.s2.token === 'che' &&
rels.length > 0 &&
rels[0].dependent.token === 'gua' &&
rels[0].dependent.tag === symbols_1.Tagset.npr) {
this.c.relations.push(this.leftRelation(labelsPronoun[1]));
}
}
}
}
parse(nodes) {
for (let t of nodes) {
this.c.queue.push(t);
}
let guide = new guide_1.Guide();
let rt = new document_1.Node('ROOT');
this.c.stack.push(rt);
if (this.c.stack.length == 1 && this.c.queue.length > 0) {
// initial configuration
// shift the first lexeme from queue to stack
guide.transitions.push(new configuration_1.Shift());
}
while (!this.c.isTerminalConfiguration()) {
let t = guide.getNextTransition(this.c);
if (t == null || t == undefined)
break;
this.setS1S2B1();
if (this.s1.tag != '' && this.b1.tag != '') {
this.setS1B1Relation(t);
}
else if (this.isQueueEmpty()) {
this.setS2S1Relation(t);
}
this.c = this.apply(t, this.c);
}
return this.c.relations;
}
}
exports.DependencyParser = DependencyParser;
//# sourceMappingURL=parser.js.map