UNPKG

earley-sgf

Version:

Early algorithm used to parse SGF file

152 lines (151 loc) 5.59 kB
import { Earley, Grammar, Rule, Symbol } from './earley.js'; import { sgfTokenize } from './sgfTokenize.js'; const preGrammarSGF = { start: "collection", rules: [ /* At least one game */ { lhs: 'collection', rhs: ['tree', 'trees'] }, /* A tree is a sequence of nodes, and the forest extends the sequence */ { lhs: 'tree', rhs: ['(', 'nodes', 'trees', ')'] }, /* A forest is a list of trees */ { lhs: 'trees', rhs: ['tree', 'trees'] }, /* A forest can be empty */ { lhs: 'trees', rhs: [] }, /* A sequence of at least one node */ { lhs: 'nodes', rhs: ['node'] }, { lhs: 'nodes', rhs: ['node', 'nodes'] }, /* A node is a sequence of properties, possibly empty */ { lhs: 'node', rhs: [';', 'props'] }, { lhs: 'props', rhs: [] }, { lhs: 'props', rhs: ['prop', 'props'] }, /* A move is a color plus either a pass or one position */ { lhs: 'prop', rhs: ['move', '[', 'text', ']'] }, { lhs: 'prop', rhs: ['move', '[', ']'] }, /* A setup is a color plus one or more positions */ { lhs: 'prop', rhs: ['setup', 'values'] }, /* A territory is a color plus one or more positions */ { lhs: 'prop', rhs: ['territory', 'values'] }, /* A color and name of player */ { lhs: 'prop', rhs: ['player', '[', 'text', ']'] }, { lhs: 'prop', rhs: ['komi', '[', 'text', ']'] }, { lhs: 'prop', rhs: ['date', '[', 'text', ']'] }, { lhs: 'prop', rhs: ['size', '[', 'text', ']'] }, { lhs: 'prop', rhs: ['result', '[', 'text', ']'] }, { lhs: 'move', rhs: ['B'] }, { lhs: 'move', rhs: ['W'] }, { lhs: 'setup', rhs: ['AB'] }, { lhs: 'setup', rhs: ['AW'] }, { lhs: 'setup', rhs: ['AE'] }, { lhs: 'territory', rhs: ['TB'] }, { lhs: 'territory', rhs: ['TW'] }, { lhs: 'territory', rhs: ['TE'] }, { lhs: 'player', rhs: ['PB'] }, { lhs: 'player', rhs: ['PW'] }, { lhs: 'komi', rhs: ['KM'] }, { lhs: 'date', rhs: ['DT'] }, { lhs: 'size', rhs: ['SZ'] }, { lhs: 'result', rhs: ['RE'] }, /* Properties that are not supported are ignored. */ { lhs: 'prop', rhs: ['ignored', 'values'] }, { lhs: 'ignored', rhs: ['prefix'] }, /* Properties with empty brackets are malformed and ignored */ { lhs: 'prop', rhs: ['malformed', '[', ']'] }, { lhs: 'malformed', rhs: ['B'] }, { lhs: 'malformed', rhs: ['W'] }, { lhs: 'malformed', rhs: ['AB'] }, { lhs: 'malformed', rhs: ['AW'] }, { lhs: 'malformed', rhs: ['AE'] }, { lhs: 'malformed', rhs: ['TB'] }, { lhs: 'malformed', rhs: ['TW'] }, { lhs: 'malformed', rhs: ['TE'] }, { lhs: 'malformed', rhs: ['PB'] }, { lhs: 'malformed', rhs: ['PW'] }, { lhs: 'malformed', rhs: ['KM'] }, { lhs: 'malformed', rhs: ['DT'] }, { lhs: 'malformed', rhs: ['SZ'] }, { lhs: 'malformed', rhs: ['RE'] }, { lhs: 'malformed', rhs: ['prefix'] }, /* A non-empty sequence */ { lhs: 'values', rhs: ['[', 'text', ']'] }, { lhs: 'values', rhs: ['[', 'text', ']', 'values'] } ] }; /* * Create a Grammar instance from preGrammar by declaring which * symbols are terminal lexical tokens. */ function createGrammar(preGrammar) { const nonTerminals = new Set(); preGrammar.rules.forEach(r => nonTerminals.add(r.lhs)); /* * After the non-terminals have been determined, construct a Rule * for each production. */ const rules = preGrammar.rules.map(r => { const lhs = new Symbol(r.lhs); const rhs = r.rhs.map(x => new Symbol(x, !nonTerminals.has(x))); return new Rule(lhs, rhs); }); return new Grammar(rules, new Symbol(preGrammar.start)); } const sgfGrammar = createGrammar(preGrammarSGF); /* * Returns the parse tree of the SGF file. */ function sgfTree(sgfFileContents) { const sentence = [...sgfTokenize(sgfFileContents)]; return new Earley(sgfGrammar).parse(sentence); } /* * Returns the date and players' names * Useful for quickly identifying a SGF. */ function sgfHead(sgfFileContents) { const tokens = sgfTokenize(sgfFileContents); /* * Search until the end of the first node. * Expect sequence is "( ; ... ;" or "( ; ... )" */ const result = {}; let count = 0; while (count < 3) { const i = tokens.next(); if (i.done) break; switch (i.value.text) { case ';': case '(': case ')': ++count; break; case 'PW': tokens.next(); result.whiteName = tokens.next().value.text; break; case 'PB': tokens.next(); result.blackName = tokens.next().value.text; break; case 'DT': tokens.next(); result.date = tokens.next().value.text; break; } } return result; } /* * Given a SGF string, print a parse tree. */ function display(tree, indent = "") { if ('text' in tree) { let tk = (tree.token === tree.text) ? "" : " " + tree.token; console.log(indent + tree.text + tk); } else { console.log(indent + tree.token); let ind = indent + " "; tree.children.forEach(child => display(child, ind)); } } export { sgfTree, sgfHead, display };