UNPKG

earley-sgf

Version:

Early algorithm used to parse SGF file

385 lines (384 loc) 12.9 kB
/* * The empty sequence epsilon is represented by Symbol('', true). */ export class Symbol { constructor(token, isTerminal = false) { Object.defineProperty(this, "isTerminal", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "token", { enumerable: true, configurable: true, writable: true, value: void 0 }); console.assert(token.length > 0, "token cannot be an empty string"); this.token = token; /* string */ this.isTerminal = isTerminal; /* boolean */ } equals(other) { return this.token === other.token && this.isTerminal === other.isTerminal; } /* * Distinguish terminals by enclosing in single quote marks. */ toString() { return this.isTerminal ? "'" + this.token + "'" : this.token; } } export class Rule { constructor(lhs, rhs) { Object.defineProperty(this, "lhs", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "rhs", { enumerable: true, configurable: true, writable: true, value: void 0 }); console.assert(lhs.isTerminal === false, "lhs cannot be terminal"); this.lhs = lhs; /* Symbol */ this.rhs = rhs; /* List of Symbol */ } equals(other) { if (!other.lhs.equals(this.lhs)) return false; if (other.rhs.length !== this.rhs.length) return false; for (let i = other.rhs.length; 0 <= --i;) { if (!other.rhs[i].equals(this.rhs[i])) return false; } return true; } /* * A rule of the form "A -> epsilon", where epsilon is the empty sequence. */ isNullable() { return this.rhs.length === 0; } toString() { let r = this.lhs.toString() + " ->"; for (let i = 0; i < this.rhs.length; ++i) { r += " " + this.rhs[i].toString(); } return r; } } class DotRule { constructor(rule, dot) { Object.defineProperty(this, "rule", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "dot", { enumerable: true, configurable: true, writable: true, value: void 0 }); console.assert(0 <= dot && dot <= rule.rhs.length); this.rule = rule; this.dot = dot; } /* * No rules are added after the grammar is created, so it is safe to * not compare the internals of the rule. */ equals(other) { return this.dot === other.dot && this.rule === other.rule; } isComplete() { return this.dot === this.rule.rhs.length; } nextSymbol() { console.assert(this.dot < this.rule.rhs.length, "Complete has no next symbol."); return this.rule.rhs[this.dot]; } advance() { console.assert(this.dot < this.rule.rhs.length, "Complete cannot be advanced."); return new DotRule(this.rule, this.dot + 1); } toString() { let r = this.rule.lhs.toString() + " ->"; let i = 0; for (; i < this.dot; ++i) { r += " " + this.rule.rhs[i].toString(); } r += " ."; for (; i < this.rule.rhs.length; i++) { r += " " + this.rule.rhs[i].toString(); } return r; } } /* * A DotRule with the part of the sentence it covers. * * sentence = | word | word | ... | word | * ^ ^ * origin edge */ class State { constructor(dotRule, origin, edge) { Object.defineProperty(this, "dotRule", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "origin", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "edge", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "previous", { enumerable: true, configurable: true, writable: true, value: void 0 }); this.dotRule = dotRule; this.origin = origin; this.edge = edge; this.previous = []; } equals(other) { if (this.origin !== other.origin) return false; return this.dotRule.equals(other.dotRule); } toString() { return this.dotRule.toString() + " (" + this.origin + "," + this.edge + ")"; } } export class Grammar { constructor(rules, start) { Object.defineProperty(this, "rules", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "start", { enumerable: true, configurable: true, writable: true, value: void 0 }); console.assert(start instanceof Symbol, "Expected start"); console.assert(rules.length > 0, "Need at least one rule"); console.assert(start.isTerminal === false, "start must be Non terminal"); /* * Verify that no two rules are equal. * There is no need to use Rule.equals() later if this is true. */ for (let i = rules.length; 0 < --i;) { const rule = rules[i]; for (let j = i; 0 <= --j;) { if (rule.equals(rules[j])) { throw new Error('Duplicate rules in the grammar. "' + rule.toString() + '"'); } } } this.rules = rules; /* List of Rule */ this.start = start; /* Symbol */ } } export class Earley { constructor(grammar) { Object.defineProperty(this, "grammar", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "memory", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "sentence", { enumerable: true, configurable: true, writable: true, value: void 0 }); console.assert(grammar instanceof Grammar, "Earley needs a Grammar"); this.grammar = grammar; this.memory = []; this.sentence = []; } /* * Add a new State if not already created. */ addState(dotRule, origin, edge) { const state = new State(dotRule, origin, edge); const states = this.memory[edge]; const found = states.find(x => x.equals(state)); if (found === undefined) { states.push(state); /* console.log("New state " + state.toString()); */ return state; } else { return found; } } parse(sentence) { this.sentence = sentence; /* List of strings */ /* * The states in memory[k] have dot rules ready to consume the * sentence[k] token, or are finished just before. */ this.memory = []; /* List of States lists */ for (let i = sentence.length + 1; 0 <= --i;) { this.memory.push([]); } /* Create a new rule (with a new Symbol) that predicts every start rule */ const topRule = new Rule(new Symbol('$%a$&', false), [this.grammar.start]); const topDotRule = new DotRule(topRule, 0); this.addState(topDotRule, 0, 0); for (let k = 0; k <= this.sentence.length; ++k) { /* * The states ready to consume token at k or finished states. * The list of states is first-in-first-processed. */ for (let n = 0; n < this.memory[k].length; ++n) { const state = this.memory[k][n]; if (state.dotRule.isComplete()) { /* * Use the finished dot rule to advance other dot rules. * The advanced rules are added to memory[k]. * Nullable rules have already been applied. */ if (!state.dotRule.rule.isNullable()) { this.completer(state); } } else { const symbol = state.dotRule.nextSymbol(); if (symbol.isTerminal) { /* * Advance the dot rule with sentence[k], * adding the new state to memory[k+1]. */ if (k < sentence.length) { this.scanner(state); } } else { /* * Create new dot rules to advance the current dot rule. */ this.predictor(state); } } } } const parsings = this.memory[this.sentence.length] .filter(state => state.dotRule.isComplete() && state.origin === 0 && state.dotRule.rule.lhs.equals(this.grammar.start)); /* * Only interested in the first parse. */ return (parsings.length === 0) ? null : this.buildTree(parsings[0]); } /* * Advances the dot rule if it consumes the next token. */ scanner(state) { const k = state.edge; const symbol = state.dotRule.nextSymbol(); console.assert(symbol.isTerminal); if (symbol.token !== this.sentence[k].token) return; const dotRule = state.dotRule.advance(); const found = this.addState(dotRule, state.origin, k + 1); found.previous.push([state]); } /* * Add new dot rule for non-terminal B,if B is the next symbol in * the current dot rule. */ predictor(state) { const k = state.edge; const symbol = state.dotRule.nextSymbol(); console.assert(!symbol.isTerminal); this.grammar.rules.forEach(rule => { if (rule.lhs.equals(symbol)) { const dotRule = new DotRule(rule, 0); const prediction = this.addState(dotRule, k, k); /* * Advance with a nullable rule immediately, * since a nullable rule isn't added to the next list. */ if (prediction.dotRule.rule.isNullable()) { const advanced = this.addState(state.dotRule.advance(), state.origin, k); advanced.previous.push([state, prediction]); } } }); } /* * Uses the given finished dot rule to advance other dot rules. */ completer(complete) { const k = complete.edge; const origin = complete.origin; const symbol = complete.dotRule.rule.lhs; this.memory[origin].forEach(state => { if (!state.dotRule.isComplete() && symbol.equals(state.dotRule.nextSymbol())) { const newDotRule = state.dotRule.advance(); const found = this.addState(newDotRule, state.origin, k); found.previous.push([state, complete]); } }); } /* * Convert a complete state to a tree. * Nonterminals are of the form { token, children }, where children is an array. * Terminals have the form { token, text }. */ buildTree(complete) { console.assert(complete.dotRule.isComplete(), "Not complete"); let children = []; let state = complete; while (0 < state.dotRule.dot) { /* * An entry in the 'previous' array is an array. The 1st element * is the preceding dot rule, and if a 2nd element exists, it is a complete * state used to advance the rule. */ const previous = state.previous[0]; state = previous[0]; let subTree; if (state.dotRule.nextSymbol().isTerminal) { subTree = this.sentence[state.edge]; } else { subTree = this.buildTree(previous[1]); } children[state.dotRule.dot] = subTree; } return { token: complete.dotRule.rule.lhs.token, children }; } }