earley-sgf
Version:
Early algorithm used to parse SGF file
152 lines (151 loc) • 5.59 kB
JavaScript
import { Earley, Grammar, Rule, Symbol } from './earley.js';
import { sgfTokenize } from './sgfTokenize.js';
const preGrammarSGF = {
start: "collection",
rules: [
/* At least one game */
{ lhs: 'collection', rhs: ['tree', 'trees'] },
/* A tree is a sequence of nodes, and the forest extends the sequence */
{ lhs: 'tree', rhs: ['(', 'nodes', 'trees', ')'] },
/* A forest is a list of trees */
{ lhs: 'trees', rhs: ['tree', 'trees'] },
/* A forest can be empty */
{ lhs: 'trees', rhs: [] },
/* A sequence of at least one node */
{ lhs: 'nodes', rhs: ['node'] },
{ lhs: 'nodes', rhs: ['node', 'nodes'] },
/* A node is a sequence of properties, possibly empty */
{ lhs: 'node', rhs: [';', 'props'] },
{ lhs: 'props', rhs: [] },
{ lhs: 'props', rhs: ['prop', 'props'] },
/* A move is a color plus either a pass or one position */
{ lhs: 'prop', rhs: ['move', '[', 'text', ']'] },
{ lhs: 'prop', rhs: ['move', '[', ']'] },
/* A setup is a color plus one or more positions */
{ lhs: 'prop', rhs: ['setup', 'values'] },
/* A territory is a color plus one or more positions */
{ lhs: 'prop', rhs: ['territory', 'values'] },
/* A color and name of player */
{ lhs: 'prop', rhs: ['player', '[', 'text', ']'] },
{ lhs: 'prop', rhs: ['komi', '[', 'text', ']'] },
{ lhs: 'prop', rhs: ['date', '[', 'text', ']'] },
{ lhs: 'prop', rhs: ['size', '[', 'text', ']'] },
{ lhs: 'prop', rhs: ['result', '[', 'text', ']'] },
{ lhs: 'move', rhs: ['B'] },
{ lhs: 'move', rhs: ['W'] },
{ lhs: 'setup', rhs: ['AB'] },
{ lhs: 'setup', rhs: ['AW'] },
{ lhs: 'setup', rhs: ['AE'] },
{ lhs: 'territory', rhs: ['TB'] },
{ lhs: 'territory', rhs: ['TW'] },
{ lhs: 'territory', rhs: ['TE'] },
{ lhs: 'player', rhs: ['PB'] },
{ lhs: 'player', rhs: ['PW'] },
{ lhs: 'komi', rhs: ['KM'] },
{ lhs: 'date', rhs: ['DT'] },
{ lhs: 'size', rhs: ['SZ'] },
{ lhs: 'result', rhs: ['RE'] },
/* Properties that are not supported are ignored. */
{ lhs: 'prop', rhs: ['ignored', 'values'] },
{ lhs: 'ignored', rhs: ['prefix'] },
/* Properties with empty brackets are malformed and ignored */
{ lhs: 'prop', rhs: ['malformed', '[', ']'] },
{ lhs: 'malformed', rhs: ['B'] },
{ lhs: 'malformed', rhs: ['W'] },
{ lhs: 'malformed', rhs: ['AB'] },
{ lhs: 'malformed', rhs: ['AW'] },
{ lhs: 'malformed', rhs: ['AE'] },
{ lhs: 'malformed', rhs: ['TB'] },
{ lhs: 'malformed', rhs: ['TW'] },
{ lhs: 'malformed', rhs: ['TE'] },
{ lhs: 'malformed', rhs: ['PB'] },
{ lhs: 'malformed', rhs: ['PW'] },
{ lhs: 'malformed', rhs: ['KM'] },
{ lhs: 'malformed', rhs: ['DT'] },
{ lhs: 'malformed', rhs: ['SZ'] },
{ lhs: 'malformed', rhs: ['RE'] },
{ lhs: 'malformed', rhs: ['prefix'] },
/* A non-empty sequence */
{ lhs: 'values', rhs: ['[', 'text', ']'] },
{ lhs: 'values', rhs: ['[', 'text', ']', 'values'] }
]
};
/*
* Create a Grammar instance from preGrammar by declaring which
* symbols are terminal lexical tokens.
*/
function createGrammar(preGrammar) {
const nonTerminals = new Set();
preGrammar.rules.forEach(r => nonTerminals.add(r.lhs));
/*
* After the non-terminals have been determined, construct a Rule
* for each production.
*/
const rules = preGrammar.rules.map(r => {
const lhs = new Symbol(r.lhs);
const rhs = r.rhs.map(x => new Symbol(x, !nonTerminals.has(x)));
return new Rule(lhs, rhs);
});
return new Grammar(rules, new Symbol(preGrammar.start));
}
const sgfGrammar = createGrammar(preGrammarSGF);
/*
* Returns the parse tree of the SGF file.
*/
function sgfTree(sgfFileContents) {
const sentence = [...sgfTokenize(sgfFileContents)];
return new Earley(sgfGrammar).parse(sentence);
}
/*
* Returns the date and players' names
* Useful for quickly identifying a SGF.
*/
function sgfHead(sgfFileContents) {
const tokens = sgfTokenize(sgfFileContents);
/*
* Search until the end of the first node.
* Expect sequence is "( ; ... ;" or "( ; ... )"
*/
const result = {};
let count = 0;
while (count < 3) {
const i = tokens.next();
if (i.done)
break;
switch (i.value.text) {
case ';':
case '(':
case ')':
++count;
break;
case 'PW':
tokens.next();
result.whiteName = tokens.next().value.text;
break;
case 'PB':
tokens.next();
result.blackName = tokens.next().value.text;
break;
case 'DT':
tokens.next();
result.date = tokens.next().value.text;
break;
}
}
return result;
}
/*
* Given a SGF string, print a parse tree.
*/
function display(tree, indent = "") {
if ('text' in tree) {
let tk = (tree.token === tree.text) ? "" : " " + tree.token;
console.log(indent + tree.text + tk);
}
else {
console.log(indent + tree.token);
let ind = indent + " ";
tree.children.forEach(child => display(child, ind));
}
}
export { sgfTree, sgfHead, display };