earley-sgf
Version:
Early algorithm used to parse SGF file
385 lines (384 loc) • 12.9 kB
JavaScript
/*
* The empty sequence epsilon is represented by Symbol('', true).
*/
export class Symbol {
constructor(token, isTerminal = false) {
Object.defineProperty(this, "isTerminal", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "token", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
console.assert(token.length > 0, "token cannot be an empty string");
this.token = token; /* string */
this.isTerminal = isTerminal; /* boolean */
}
equals(other) {
return this.token === other.token && this.isTerminal === other.isTerminal;
}
/*
* Distinguish terminals by enclosing in single quote marks.
*/
toString() {
return this.isTerminal ? "'" + this.token + "'" : this.token;
}
}
export class Rule {
constructor(lhs, rhs) {
Object.defineProperty(this, "lhs", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "rhs", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
console.assert(lhs.isTerminal === false, "lhs cannot be terminal");
this.lhs = lhs; /* Symbol */
this.rhs = rhs; /* List of Symbol */
}
equals(other) {
if (!other.lhs.equals(this.lhs))
return false;
if (other.rhs.length !== this.rhs.length)
return false;
for (let i = other.rhs.length; 0 <= --i;) {
if (!other.rhs[i].equals(this.rhs[i]))
return false;
}
return true;
}
/*
* A rule of the form "A -> epsilon", where epsilon is the empty sequence.
*/
isNullable() {
return this.rhs.length === 0;
}
toString() {
let r = this.lhs.toString() + " ->";
for (let i = 0; i < this.rhs.length; ++i) {
r += " " + this.rhs[i].toString();
}
return r;
}
}
class DotRule {
constructor(rule, dot) {
Object.defineProperty(this, "rule", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "dot", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
console.assert(0 <= dot && dot <= rule.rhs.length);
this.rule = rule;
this.dot = dot;
}
/*
* No rules are added after the grammar is created, so it is safe to
* not compare the internals of the rule.
*/
equals(other) {
return this.dot === other.dot && this.rule === other.rule;
}
isComplete() {
return this.dot === this.rule.rhs.length;
}
nextSymbol() {
console.assert(this.dot < this.rule.rhs.length, "Complete has no next symbol.");
return this.rule.rhs[this.dot];
}
advance() {
console.assert(this.dot < this.rule.rhs.length, "Complete cannot be advanced.");
return new DotRule(this.rule, this.dot + 1);
}
toString() {
let r = this.rule.lhs.toString() + " ->";
let i = 0;
for (; i < this.dot; ++i) {
r += " " + this.rule.rhs[i].toString();
}
r += " .";
for (; i < this.rule.rhs.length; i++) {
r += " " + this.rule.rhs[i].toString();
}
return r;
}
}
/*
* A DotRule with the part of the sentence it covers.
*
* sentence = | word | word | ... | word |
* ^ ^
* origin edge
*/
class State {
constructor(dotRule, origin, edge) {
Object.defineProperty(this, "dotRule", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "origin", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "edge", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "previous", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
this.dotRule = dotRule;
this.origin = origin;
this.edge = edge;
this.previous = [];
}
equals(other) {
if (this.origin !== other.origin)
return false;
return this.dotRule.equals(other.dotRule);
}
toString() {
return this.dotRule.toString() + " (" + this.origin + "," + this.edge + ")";
}
}
export class Grammar {
constructor(rules, start) {
Object.defineProperty(this, "rules", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "start", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
console.assert(start instanceof Symbol, "Expected start");
console.assert(rules.length > 0, "Need at least one rule");
console.assert(start.isTerminal === false, "start must be Non terminal");
/*
* Verify that no two rules are equal.
* There is no need to use Rule.equals() later if this is true.
*/
for (let i = rules.length; 0 < --i;) {
const rule = rules[i];
for (let j = i; 0 <= --j;) {
if (rule.equals(rules[j])) {
throw new Error('Duplicate rules in the grammar. "' + rule.toString() + '"');
}
}
}
this.rules = rules; /* List of Rule */
this.start = start; /* Symbol */
}
}
export class Earley {
constructor(grammar) {
Object.defineProperty(this, "grammar", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "memory", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "sentence", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
console.assert(grammar instanceof Grammar, "Earley needs a Grammar");
this.grammar = grammar;
this.memory = [];
this.sentence = [];
}
/*
* Add a new State if not already created.
*/
addState(dotRule, origin, edge) {
const state = new State(dotRule, origin, edge);
const states = this.memory[edge];
const found = states.find(x => x.equals(state));
if (found === undefined) {
states.push(state);
/* console.log("New state " + state.toString()); */
return state;
}
else {
return found;
}
}
parse(sentence) {
this.sentence = sentence; /* List of strings */
/*
* The states in memory[k] have dot rules ready to consume the
* sentence[k] token, or are finished just before.
*/
this.memory = []; /* List of States lists */
for (let i = sentence.length + 1; 0 <= --i;) {
this.memory.push([]);
}
/* Create a new rule (with a new Symbol) that predicts every start rule */
const topRule = new Rule(new Symbol('$%a$&', false), [this.grammar.start]);
const topDotRule = new DotRule(topRule, 0);
this.addState(topDotRule, 0, 0);
for (let k = 0; k <= this.sentence.length; ++k) {
/*
* The states ready to consume token at k or finished states.
* The list of states is first-in-first-processed.
*/
for (let n = 0; n < this.memory[k].length; ++n) {
const state = this.memory[k][n];
if (state.dotRule.isComplete()) {
/*
* Use the finished dot rule to advance other dot rules.
* The advanced rules are added to memory[k].
* Nullable rules have already been applied.
*/
if (!state.dotRule.rule.isNullable()) {
this.completer(state);
}
}
else {
const symbol = state.dotRule.nextSymbol();
if (symbol.isTerminal) {
/*
* Advance the dot rule with sentence[k],
* adding the new state to memory[k+1].
*/
if (k < sentence.length) {
this.scanner(state);
}
}
else {
/*
* Create new dot rules to advance the current dot rule.
*/
this.predictor(state);
}
}
}
}
const parsings = this.memory[this.sentence.length]
.filter(state => state.dotRule.isComplete()
&& state.origin === 0
&& state.dotRule.rule.lhs.equals(this.grammar.start));
/*
* Only interested in the first parse.
*/
return (parsings.length === 0) ? null : this.buildTree(parsings[0]);
}
/*
* Advances the dot rule if it consumes the next token.
*/
scanner(state) {
const k = state.edge;
const symbol = state.dotRule.nextSymbol();
console.assert(symbol.isTerminal);
if (symbol.token !== this.sentence[k].token)
return;
const dotRule = state.dotRule.advance();
const found = this.addState(dotRule, state.origin, k + 1);
found.previous.push([state]);
}
/*
* Add new dot rule for non-terminal B,if B is the next symbol in
* the current dot rule.
*/
predictor(state) {
const k = state.edge;
const symbol = state.dotRule.nextSymbol();
console.assert(!symbol.isTerminal);
this.grammar.rules.forEach(rule => {
if (rule.lhs.equals(symbol)) {
const dotRule = new DotRule(rule, 0);
const prediction = this.addState(dotRule, k, k);
/*
* Advance with a nullable rule immediately,
* since a nullable rule isn't added to the next list.
*/
if (prediction.dotRule.rule.isNullable()) {
const advanced = this.addState(state.dotRule.advance(), state.origin, k);
advanced.previous.push([state, prediction]);
}
}
});
}
/*
* Uses the given finished dot rule to advance other dot rules.
*/
completer(complete) {
const k = complete.edge;
const origin = complete.origin;
const symbol = complete.dotRule.rule.lhs;
this.memory[origin].forEach(state => {
if (!state.dotRule.isComplete() && symbol.equals(state.dotRule.nextSymbol())) {
const newDotRule = state.dotRule.advance();
const found = this.addState(newDotRule, state.origin, k);
found.previous.push([state, complete]);
}
});
}
/*
* Convert a complete state to a tree.
* Nonterminals are of the form { token, children }, where children is an array.
* Terminals have the form { token, text }.
*/
buildTree(complete) {
console.assert(complete.dotRule.isComplete(), "Not complete");
let children = [];
let state = complete;
while (0 < state.dotRule.dot) {
/*
* An entry in the 'previous' array is an array. The 1st element
* is the preceding dot rule, and if a 2nd element exists, it is a complete
* state used to advance the rule.
*/
const previous = state.previous[0];
state = previous[0];
let subTree;
if (state.dotRule.nextSymbol().isTerminal) {
subTree = this.sentence[state.edge];
}
else {
subTree = this.buildTree(previous[1]);
}
children[state.dotRule.dot] = subTree;
}
return { token: complete.dotRule.rule.lhs.token, children };
}
}