UNPKG

@lahmatiy/jison

Version:

A parser generator with Bison's API

1,423 lines (1,180 loc) 45.2 kB
// Jison, an LR(0), SLR(1), LARL(1), LR(1) Parser Generator // Zachary Carter <zach@carter.name> // MIT X Licensed const Lexer = require('./lexer'); const ebnfParser = require('ebnf-parser'); const parse = require('./parse'); const { packTable, unpackTable } = require('./pack-table'); const { fnBody } = require('./utils'); const version = require('../package.json').version; const SHIFT = 1; const REDUCE = 2; const ACCEPT = 3; const Jison = exports.Jison = exports; Jison.version = version; Jison.print = console.log; Jison.Generator = createGenerator; Jison.Parser = function(grammar, options) { const gen = createGenerator(grammar, options); return gen.createParser(); }; function createGenerator(grammar, options) { const opt = { ...grammar.options, ...options }; switch (opt.type) { case 'lr0': return new LR0Generator(grammar, opt); case 'slr': return new SLRGenerator(grammar, opt); case 'lr': case 'lr1': return new LR1Generator(grammar, opt); case 'll': return new LLGenerator(grammar, opt); case 'lalr': default: return new LALRGenerator(grammar, opt); } } function addMissed(a, b) { const index = new Set(a); let added = false; for (const item of b) { if (!index.has(item)) { added = a.push(item) !== 0 || added; } } return added; } class Nonterminal { constructor(symbol) { this.symbol = symbol; this.productions = []; this.first = []; this.follows = []; this.nullable = false; } toString() { return [ this.symbol, this.nullable ? 'nullable' : 'not nullable', 'Firsts: ' + this.first.join(', '), 'Follows: ' + this.follows.join(', '), 'Productions:\n ' + this.productions.join('\n ') ].join('\n'); } } class Production { constructor(symbol, handle, id) { this.symbol = symbol; this.handle = handle; this.id = id; this.nullable = false; this.first = []; this.precedence = 0; } toString() { return this.symbol + ' -> ' + this.handle.join(' '); } } function buildProduction(id, handle, symbol, addSymbol, actionGroups) { let rhs; if (Array.isArray(handle)) { rhs = typeof handle[0] === 'string' ? handle[0].trim().split(' ') : handle[0].slice(0); rhs.forEach(addSymbol); if (typeof handle[1] === 'string' || handle.length == 3) { // semantic action specified const label = 'case ' + id + ':'; let action = handle[1]; // replace named semantic values ($nonterminal) if (/[$@][a-zA-Z]/.test(action)) { const count = {}; const names = {}; for (let i = 0; i < rhs.length; i++) { // check for aliased names, e.g., id[alias] let rhsi = rhs[i].match(/\[[a-zA-Z][a-zA-Z0-9_-]*\]/); if (rhsi) { rhsi = rhsi[0].slice(1, -1); rhs[i] = rhs[i].substr(0, rhs[i].indexOf('[')); } else { rhsi = rhs[i]; } if (names[rhsi]) { names[rhsi + (++count[rhsi])] = i + 1; } else { names[rhsi] = i + 1; names[rhsi + '1'] = i + 1; count[rhsi] = 1; } } action = action .replace(/([@$])([a-zA-Z][a-zA-Z0-9_]*)/g, (str, prefix, pl) => names[pl] ? prefix + names[pl] : str ); } action = action // replace references to $$ with this.$, and @$ with this._$ .replace(/([^'"])\$\$|^\$\$/g, '$1this.$') .replace(/@[0$]/g, 'this._$') // replace semantic value references ($n) with stack value (stack[n]) .replace(/\$(-?\d+)/g, (_, n) => '$$[$0' + (parseInt(n, 10) - rhs.length || '') + ']' ) // same as above for location references (@n) .replace(/@(-?\d+)/g, (_, n) => '_$[$0' + (n - rhs.length || '') + ']' ); if (action in actionGroups) { actionGroups[action].push(label); } else { actionGroups[action] = [label]; } } // strip aliases rhs = rhs.map(s => s.replace(/\[[a-zA-Z_][a-zA-Z0-9_-]*\]/g, '')); } else { // no action -> don't care about aliases; strip them. rhs = handle .replace(/\[[a-zA-Z_][a-zA-Z0-9_-]*\]/g, '') .trim() .split(' '); rhs.forEach(addSymbol); } return new Production(symbol, rhs, id); } class Generator { constructor(grammar, opt) { if (typeof grammar === 'string') { grammar = ebnfParser.parse(grammar); } const options = { ...grammar.options, ...opt }; this.DEBUG = options.debug || false; this.terms = Object.create(null); this.symbols = []; this.operators = Object.create(null); this.productions = []; this.conflicts = 0; this.resolutions = []; this.options = options; this.parseParams = grammar.parseParams; this.yy = {}; // accessed as yy free variable in the parser/lexer actions // source included in semantic action execution scope if (grammar.actionInclude) { this.actionInclude = typeof grammar.actionInclude === 'function' ? fnBody(grammar.actionInclude) : grammar.actionInclude; } this.moduleInclude = grammar.moduleInclude || ''; this.processGrammar(grammar); this.lexer = grammar.lex ? new Lexer(grammar.lex, this.terminals_) : null; } processGrammar(grammar) { if (this.DEBUG) { this.trace('Processing grammar.'); } let bnf = grammar.bnf; let tokens = grammar.tokens; this.nonterminals = Object.create(null); if (!bnf && grammar.ebnf) { bnf = ebnfParser.transform(grammar.ebnf); } if (tokens) { tokens = typeof tokens === 'string' ? tokens.trim().split(' ') : tokens.slice(0); } // calculate precedence of operators this.operators = processOperators(grammar.operators); // build productions from cfg this.buildProductions(bnf, this.productions, this.nonterminals, this.symbols, this.operators); if (tokens && this.terminals.length !== tokens.length) { this.trace('Warning: declared tokens differ from tokens found in rules.'); this.trace(this.terminals); this.trace(tokens); } // augment the grammar this.augmentGrammar(grammar); } augmentGrammar(grammar) { if (this.productions.length === 0) { throw new Error('Grammar error: must have at least one rule.'); } // use specified start symbol, or default to first user defined production this.startSymbol = grammar.start || grammar.startSymbol || this.productions[0].symbol; if (!this.nonterminals[this.startSymbol]) { throw new Error('Grammar error: startSymbol must be a non-terminal found in your grammar.'); } this.EOF = '$end'; // augment the grammar const acceptProduction = new Production('$accept', [this.startSymbol, '$end'], 0); this.productions.unshift(acceptProduction); // prepend parser tokens this.symbols.unshift('$accept', this.EOF); this.symbols_.$accept = 0; this.symbols_[this.EOF] = 1; this.terminals.unshift(this.EOF); this.nonterminals.$accept = new Nonterminal('$accept'); this.nonterminals.$accept.productions.push(acceptProduction); // add follow $ to start symbol this.nonterminals[this.startSymbol].follows.push(this.EOF); if (this.DEBUG) { for (const [id, sym] of Object.entries(this.symbols)) { this.trace(`${sym}(${id})`); } } } buildProductions(bnf, productions, nonterminals, symbols, operators) { const actionGroups = Object.create(null); const productions_ = [0]; const terminals = []; const terminals_ = Object.create(null); const symbols_ = Object.create(null); let symbolId = 1; let hasErrorRecovery = false; // has error recovery let actions = [ '/* this == yyval */', this.actionInclude || '', 'const $0 = $$.length - 1;', 'switch (yystate) {' ]; function addSymbol(s) { if (s === 'error') { hasErrorRecovery = true; } if (s && !symbols_[s]) { symbols_[s] = ++symbolId; symbols.push(s); } } // add error symbol; will be third symbol, or "2" ($accept, $end, error) addSymbol('error'); hasErrorRecovery = false; for (const symbol of Object.keys(bnf)) { addSymbol(symbol); nonterminals[symbol] = new Nonterminal(symbol); const prods = typeof bnf[symbol] === 'string' ? bnf[symbol].split(/\s*\|\s*/g) : bnf[symbol].slice(0); for (const handle of prods) { const production = buildProduction(productions.length + 1, handle, symbol, addSymbol, actionGroups); // set precedence if (Array.isArray(handle)) { const slotIdx = 1 + (typeof handle[1] === 'string' || handle.length == 3); if (handle[slotIdx] && operators[handle[slotIdx].prec]) { production.precedence = operators[handle[slotIdx].prec].precedence; } } if (production.precedence === 0) { for (const prodHandle of production.handle) { if (prodHandle in nonterminals === false && prodHandle in operators) { // FIXME: should it break on first se production.precedence = operators[prodHandle].precedence; } } } // add to dicts nonterminals[symbol].productions.push(production); productions.push(production); productions_.push([ symbols_[production.symbol], production.handle[0] === '' ? 0 : production.handle.length ]); } } for (const action in actionGroups) { actions.push(actionGroups[action].join(' '), action, 'break;'); } for (const [sym, id] of Object.entries(symbols_)) { if (sym in nonterminals === false) { terminals.push(sym); terminals_[id] = sym; } } this.hasErrorRecovery = hasErrorRecovery; this.terminals = terminals; this.terminals_ = terminals_; this.symbols_ = symbols_; this.productions_ = productions_; actions.push('}'); actions = actions.join('\n') .replace(/YYABORT/g, 'return false') .replace(/YYACCEPT/g, 'return true'); this.performAction = 'function(' + [ 'yytext', 'yyleng', 'yylineno', 'yy', 'yystate /* action[1] */', '$$ /* vstack */', '_$ /* lstack */', ...this.parseParams || [] ] + ') {\n' + actions + '\n}'; } createParser() { throw new Error('Calling abstract method.'); } // noop. implemented in debug mixin trace(...args) { if (this.DEBUG) { Jison.print(...args); } } warn(...args) { Jison.print(...args); } error(msg) { throw new Error(msg); } }; // set precedence and associativity of operators function processOperators(ops) { const operators = Object.create(null); if (ops) { for (let i = 0, prec; prec = ops[i]; i++) { for (let k = 1; k < prec.length; k++) { operators[prec[k]] = { precedence: i + 1, assoc: prec[0] }; } } } return operators; } /* * Lookahead parsers * */ class LookaheadGenerator extends Generator { computeLookaheads() { this.computeLookaheads = () => {}; this.nullableSets(); this.firstSets(); this.followSets(); } // calculate follow sets typald on first and nullable followSets() { const { productions, nonterminals } = this; let cont = true; // loop until no further changes have been made while (cont) { cont = false; for (const production of productions) { // q is used in Simple LALR algorithm determine follows in context const ctx = this.go_; for (let i = 0, t; t = production.handle[i]; ++i) { if (t in nonterminals === false) { continue; } // for Simple LALR algorithm, this.go_ checks if const bool = !ctx || this.nterms_[t] === this.go_(production.symbol, production.handle.slice(0, i)); let set; if (i === production.handle.length + 1 && bool) { set = nonterminals[production.symbol].follows; } else { set = this.first(production.handle, i + 1); if (this.nullable(production.handle, i + 1) && bool) { set.push(...nonterminals[production.symbol].follows); } } if (addMissed(nonterminals[t].follows, set)) { cont = true; } } } } } // return the FIRST set of a symbol or series of symbols first(symbol, offset = 0) { // epsilon if (symbol === '') { return []; } // RHS if (Array.isArray(symbol)) { const firsts = new Set(); for (let i = offset, t; t = symbol[i]; i++) { if (t in this.nonterminals) { this.nonterminals[t].first.forEach(firsts.add, firsts); } else { firsts.add(t); } if (!this.nullable(t)) { break; } } return [...firsts]; } // nonterminal if (symbol in this.nonterminals) { return this.nonterminals[symbol].first; } // terminal return [symbol]; } // fixed-point calculation of FIRST sets firstSets() { const { productions, nonterminals } = this; let cont = true; // loop until no further changes have been made while (cont) { cont = false; for (const production of productions) { const firsts = this.first(production.handle); if (firsts.length !== production.first.length) { production.first = firsts; cont = true; } } for (const symbol of Object.keys(nonterminals)) { const firsts = new Set(); for (const production of nonterminals[symbol].productions) { production.first.forEach(firsts.add, firsts); } if (firsts.size !== nonterminals[symbol].first.length) { nonterminals[symbol].first = [...firsts]; cont = true; } } } } // check if a token or series of tokens is nullable nullable(symbol, offset = 0) { // epsilon if (symbol === '') { return true; } // RHS if (Array.isArray(symbol)) { for (let i = offset; i < symbol.length; i++) { if (!this.nullable(symbol[i])) { return false; } } return true; } // nonterminal if (symbol in this.nonterminals) { return this.nonterminals[symbol].nullable; } // terminal return false; } // fixed-point calculation of NULLABLE nullableSets() { let { productions, nonterminals } = this; let cont = true; // loop until no further changes have been made while (cont) { cont = false; // check if each production is nullable for (const production of productions) { if (!production.nullable) { // production is nullable if all tokens are nullable if (this.nullable(production.handle)) { production.nullable = true; cont = true; } } } // check if each symbol is nullable for (const symbol in nonterminals) { if (!this.nullable(symbol)) { for (const production of nonterminals[symbol].productions) { if (production.nullable) { nonterminals[symbol].nullable = true; cont = true; } } } } } } }; /* * Mixin for common LR parser behavior * */ const NONASSOC = 0; class LRGenerator extends LookaheadGenerator { buildTable() { this.states = this.canonicalCollection(); this.table = this.parseTable(this.states); this.defaultActions = findDefaults(this.table); } closureOperation(itemSet) { const { nonterminals } = this; const closureSet = new this.ItemSet(...itemSet); for (const item of closureSet) { const symbol = item.markedSymbol; // if token is a non-terminal, recursively add closures if (symbol in nonterminals) { for (const production of nonterminals[symbol].productions) { closureSet.push(new this.Item(production, 0)); } } else if (!symbol) { // reduction closureSet.reductions.push(item); closureSet.inadequate = closureSet.reductions.length > 1 || closureSet.shifts; } else { // shift closureSet.shifts = true; closureSet.inadequate = closureSet.reductions.length > 0; } } return closureSet; } gotoOperation(itemSet, symbol) { const gotoSet = new this.ItemSet(); for (const item of itemSet) { if (item.markedSymbol === symbol) { gotoSet.push(new this.Item(item.production, item.dotPosition + 1, item.follows)); } } return gotoSet.length === 0 ? gotoSet : this.closureOperation(gotoSet); } /* Create unique set of item sets * */ canonicalCollection() { const firstItem = new this.Item(this.productions[0], 0, [this.EOF]); const firstState = this.closureOperation(new this.ItemSet(firstItem)); const states = [firstState]; states.has = Object.create(null); states.has[firstState] = 0; for (const itemSet of states) { for (const item of itemSet) { if (item.markedSymbol && item.markedSymbol !== this.EOF) { this.canonicalCollectionInsert(itemSet, item.markedSymbol, states); } } } return states; } // Pushes a unique state into the que. Some parsing algorithms may perform additional operations canonicalCollectionInsert(itemSet, symbol, states) { const goto = this.gotoOperation(itemSet, symbol); // add goto to que if not empty or duplicate if (goto.length > 0) { const gotoId = goto.valueOf(); if (gotoId in states.has) { itemSet.edges[symbol] = states.has[gotoId]; // store goto transition for table } else { itemSet.edges[symbol] = states.length; // store goto transition for table states.has[gotoId] = states.length; states.push(goto); } } } parseTable(itemSets) { const conflictedStates = []; // array of item index // for each item set const states = itemSets.map((itemSet, k) => { const state = Object.create(null); // set shift and goto actions for (const stackSymbol in itemSet.edges) { for (const item of itemSet) { // find shift and goto actions if (item.markedSymbol === stackSymbol) { const gotoState = itemSet.edges[stackSymbol]; if (this.nonterminals[stackSymbol]) { // store state to go to after a reduce // this.trace(k, stackSymbol, 'g'+gotoState); state[this.symbols_[stackSymbol]] = gotoState; } else { // this.trace(k, stackSymbol, 's'+gotoState); state[this.symbols_[stackSymbol]] = [SHIFT, gotoState]; } } } } // set accept action for (const item of itemSet) { if (item.markedSymbol === this.EOF) { // accept state[this.symbols_[this.EOF]] = [ACCEPT]; } } // set reductions and resolve potential conflicts for (const reduction of itemSet.reductions) { // if parser uses lookahead, only enumerate those terminals const terminals = this.lookAheads ? this.lookAheads(itemSet, reduction) : this.terminals; for (const stackSymbol of terminals) { let action = state[this.symbols_[stackSymbol]]; // Reading a terminal and current position is at the end of a production, try to reduce if (action && action.length) { const op = this.operators[stackSymbol]; const solution = resolveConflict( reduction.production, op, [REDUCE, reduction.production.id], Array.isArray(action[0]) ? action[0] : action ); this.resolutions.push([k, stackSymbol, solution]); if (solution.bydefault) { this.conflicts++; if (this.DEBUG) { conflictedStates.push(k); this.warn( 'Conflict in grammar: multiple actions possible when lookahead token is ', stackSymbol, ' in state ', k, '\n- ', printAction(solution.reduce, this), '\n- ', printAction(solution.shift, this) ); } if (this.options.noDefaultResolve) { if (!Array.isArray(action[0])) { action = [action]; } action.push(solution.reduce); } } else { action = solution.action; } } else { action = [REDUCE, reduction.production.id]; } if (action && action.length) { state[this.symbols_[stackSymbol]] = action; } else if (action === NONASSOC) { state[this.symbols_[stackSymbol]] = undefined; } } } return state; }); if (this.DEBUG && conflictedStates.length > 0) { this.warn('\nStates with conflicts:'); for (const state in conflictedStates) { this.warn('State ' + state); this.warn(' ', itemSets[state].join('\n ')); } } return states; } parseError(str, hash) { if (hash.recoverable) { this.trace(str); } else { const error = new Error(str); error.hash = hash; throw error; } } generateModule(format = 'iife', options) { const banner = '/* parser generated by jison ' + version + ' */\n'; const body = this.generateModuleBody(options) + '\n'; let out = ''; switch (format) { case 'cjs': out += banner; out += body; out += 'module.exports = parser.parser = parser;'; break; case 'esm': out += banner; out += body; out += 'export default parser;\n'; break; case 'iife': out += '(function(){\n'; out += banner; out += body; out += 'return parser;\n'; out += '})();\n'; break; default: throw new Error(`Unknown module format "${format}"`); } return out; } generateModuleBody(options) { let parseSource = String(parse); if (!this.hasErrorRecovery) { parseSource = removeErrorRecovery(parseSource); } if (this.options['token-stack']) { parseSource = addTokenStack(parseSource); } // Generate code with fresh variable names const tableCode = this.generateTableCode(this.table, options && options.packTable); // Generate the initialization code let out = tableCode.commonCode; if (this.lexer && this.lexer.generateModule) { out += '\nconst lexer = ' + this.lexer.generateModule('iife') + ';'; } else { out += '\nconst lexer = null;'; } // Generate the module creation code out += '\nfunction Parser() {\n this.yy = {};\n}\n' + 'Parser.prototype = {' + [ 'Parser', // 'yy: {}', 'lexer', 'symbols_: ' + JSON.stringify(this.symbols_), 'terminals_: ' + JSON.stringify(this.terminals_).replace(/"(\d+)":/g, '$1:'), 'productions_: ' + JSON.stringify(this.productions_), 'table: ' + tableCode.moduleCode, 'defaultActions: ' + JSON.stringify(this.defaultActions).replace(/"(\d+)":/g, '$1:'), 'performAction' + String(this.performAction).replace(/^[^(]*\(/, '('), 'trace' + String(this.trace).replace(/^[^(]*\(/, '('), 'parseError' + String(this.parseError || (this.hasErrorRecovery ? traceParseError : this.parseError)).replace(/^[^(]*\(/, '('), 'parse' + parseSource.replace(/^[^(]*\(/, '(') ].join(',\n') + '};'; out += '\nconst parser = new Parser();'; out += '\n' + this.moduleInclude; return out; } // Generate code that represents the specified parser table generateTableCode(table, mode) { if (mode === 'advanced') { return this.__table || (this.__table = { commonCode: '', moduleCode: `(${unpackTable})(${JSON.stringify(packTable(table))})` }); } // Function that extends an object with the given value for all given keys // e.g., o([1, 3, 4], [6, 7], { x: 1, y: 2 }) = { 1: [6, 7]; 3: [6, 7], 4: [6, 7], x: 1, y: 2 } const createObjectCode = 'o=' + function(keys, v, o = {}) { keys.forEach(key => o[key] = v); return o; }; const variables = [createObjectCode]; let moduleCode = JSON.stringify(table); // Don't surround numerical property name numbers in quotes moduleCode = moduleCode.replace(/"(\d+)"(?=:)/g, '$1'); // Replace objects with several identical values by function calls // e.g., { 1: [6, 7]; 3: [6, 7], 4: [6, 7], 5: 8 } = o([1, 3, 4], [6, 7], { 5: 8 }) moduleCode = moduleCode.replace(/\{\d+:[^\}]+,\d+:[^\}]+\}/g, function(object) { // Find the value that occurs with the highest number of keys const keyValueMatcher = /(\d+):([^:]+)(?=,\d+:|\})/g; const keys = Object.create(null); let keyValueMatch; let frequentValue; let maxKeyCount = 0; while (keyValueMatch = keyValueMatcher.exec(object)) { // For each value, store the keys where that value occurs const [, key, value] = keyValueMatch; let keyCount; if (value in keys) { keyCount = keys[value].push(key); } else { keys[value] = [key]; keyCount = 1; } // Remember this value if it is the most frequent one if (keyCount > maxKeyCount) { maxKeyCount = keyCount; frequentValue = value; } } // Construct the object with a function call if the most frequent value occurs multiple times if (maxKeyCount > 1) { const keyValues = []; // Collect all non-frequent values into a remainder object for (const value in keys) { if (value !== frequentValue) { for (const key of keys[value]) { keyValues.push(key + ':' + value); } } } // Create the function call `o(keys, value, remainder)` object = `o([${keys[frequentValue]}],${frequentValue}${keyValues.length ? ',{' + keyValues + '}' : ''})`; } return object; }); // Count occurrences of number lists let list; let lists = Object.create(null); let listMatcher = /\[[0-9,]+\]/g; let varNameSeed = 0; const createVariable = () => '$V' + varNameSeed++; while (list = listMatcher.exec(moduleCode)) { lists[list] = (lists[list] || 0) + 1; } // Replace frequently occurring number lists with variables moduleCode = moduleCode.replace(listMatcher, list => { let listId = lists[list]; // If listId is a number, it represents the list's occurrence frequency if (typeof listId === 'number') { // If the list does not occur frequently, represent it by the list if (listId === 1) { lists[list] = listId = list; // If the list occurs frequently, represent it by a newly assigned variable } else { lists[list] = listId = createVariable(); variables.push(listId + '=' + list); } } return listId; }); // Return the variable initialization code and the table code return { commonCode: 'let ' + variables.join(',') + ';', moduleCode }; } createParser() { const parser = Function(this.generateModuleBody() + 'return new Parser;')(); // backwards compatability parser.lexer = this.lexer; parser.generateModule = (...args) => { this.lexer = parser.lexer; return this.generateModule(...args); }; return parser; } }; LRGenerator.prototype.Item = class { constructor(production, dotPosition = 0, follows = []) { this.production = production; this.dotPosition = dotPosition; this.follows = follows; this.id = production.id + 'a' + this.dotPosition; this.markedSymbol = production.handle[this.dotPosition]; } eq(e) { return e.id === this.id; } handleToString() { const handle = this.production.handle.slice(0); handle[this.dotPosition] = '.' + (handle[this.dotPosition] || ''); return handle.join(' '); } toString() { return this.production.symbol + ' -> ' + this.handleToString() + (this.follows.length === 0 ? '' : ' #lookaheads= ' + this.follows.join(' ')); } }; LRGenerator.prototype.ItemSet = class extends Array { static get [Symbol.species]() { return Array; } constructor(...args) { super(...args); this.ids_ = new Set(this.map(item => item.id)); this.reductions = []; this.goes = Object.create(null); this.edges = Object.create(null); this.shifts = false; this.inadequate = false; } push(...items) { for (const item of items) { if (!this.contains(item)) { this.ids_.add(item.id); super.push(item); } } return this.length; } contains(item) { return this.ids_.has(item.id); } valueOf() { return [...this.ids_].sort().join('|'); } }; // find states with only one action, a reduction function findDefaults(states) { const defaults = Object.create(null); states.forEach(function(state, k) { const [act, ...rest] = Object.keys(state); if (rest.length === 0 && state[act][0] === 2) { // only one action in state and it's a reduction defaults[k] = state[act]; } }); return defaults; } // resolves shift-reduce and reduce-reduce conflicts function resolveConflict(production, operator, reduce, shift) { const solution = { production, operator, reduce, shift }; if (shift[0] === REDUCE) { solution.msg = 'Resolve R/R conflict (use first production declared in grammar.)'; solution.action = shift[1] < reduce[1] ? shift : reduce; if (shift[1] !== reduce[1]) { solution.bydefault = true; } return solution; } if (production.precedence === 0 || !operator) { solution.msg = 'Resolve S/R conflict (shift by default.)'; solution.bydefault = true; solution.action = shift; } else if (production.precedence < operator.precedence) { solution.msg = 'Resolve S/R conflict (shift for higher precedent operator.)'; solution.action = shift; } else if (production.precedence === operator.precedence) { if (operator.assoc === 'right') { solution.msg = 'Resolve S/R conflict (shift for right associative operator.)'; solution.action = shift; } else if (operator.assoc === 'left') { solution.msg = 'Resolve S/R conflict (reduce for left associative operator.)'; solution.action = reduce; } else if (operator.assoc === 'nonassoc') { solution.msg = 'Resolve S/R conflict (no action for non-associative operator.)'; solution.action = NONASSOC; } } else { solution.msg = 'Resolve conflict (reduce for higher precedent production.)'; solution.action = reduce; } return solution; } function addTokenStack(fn) { // define variables to supress warnings in function below let tstack; let lexer; let EOF; // replace lex function for that supports token stacks return fn.replace(/\/\*\* @replace token stack \*\/(.|\s)+?\/\*\* @replace \*\//, String(() => { let token = tstack.pop() || lexer.lex() || EOF; // if token isn't its numeric value, convert if (typeof token !== 'number') { if (Array.isArray(token)) { tstack = token; token = tstack.pop(); } token = this.symbols_[token] || token; } return token; })); } // returns parse function without error recovery code function removeErrorRecovery(fn) { return fn.replace(/\/\*\* @cut recovery \*\/.+/g, ''); } function printAction(a, gen) { switch (a[0]) { case 1: return `shift token (then go to state ${a[1]})`; case 2: return `reduce by rule: ${gen.productions[a[1]]}`; default: return 'accept'; } } function traceParseError(err) { this.trace(err); } /* * LR(0) Parser * */ class LR0Generator extends LRGenerator { get type() { return 'LR(0)'; } constructor(...args) { super(...args); this.buildTable(); } }; exports.LR0Generator = LR0Generator; /* * Simple LALR(1) * */ class LALRGenerator extends LRGenerator { get type() { return 'LALR(1)'; } constructor(grammar, options) { super(grammar, options); options = options || {}; this.states = this.canonicalCollection(); this.terms_ = Object.create(null); this.inadequateStates = []; this.newg = Object.assign(Object.create(LookaheadGenerator.prototype), { DEBUG: false, trace: this.trace, nterms_: Object.create(null), nonterminals: Object.create(null), productions: [], go_: (r, B) => { let q = parseInt(r.split(':')[0]); // grab state # B = B.map(b => b.slice(b.indexOf(':') + 1)); for (const ref of B) { q = this.states[q].edges[ref] || q; } return q; } }); // if true, only lookaheads in inadequate states are computed (faster, larger table) // if false, lookaheads for all reductions will be computed (slower, smaller table) this.onDemandLookahead = options.onDemandLookahead || false; this.buildNewGrammar(); this.newg.computeLookaheads(); this.unionLookaheads(); this.table = this.parseTable(this.states); this.defaultActions = findDefaults(this.table); } lookAheads(state, item) { return this.onDemandLookahead && !state.inadequate ? this.terminals : item.follows; } goPath(p, w) { const path = []; let q = parseInt(p, 10); for (const ref of w) { const t = ref ? q + ':' + ref : ''; if (t) { this.newg.nterms_[t] = q; } path.push(t); this.terms_[t] = ref; q = this.states[q].edges[ref] || q; } return { path, endState: q }; } // every disjoint reduction of a nonterminal becomes a production in G' buildNewGrammar() { const newg = this.newg; this.states.forEach((state, i) => { for (const item of state) { if (item.dotPosition === 0) { // new symbols are a combination of state and transition symbol const symbol = i + ':' + item.production.symbol; this.terms_[symbol] = item.production.symbol; newg.nterms_[symbol] = i; if (symbol in newg.nonterminals === false) { newg.nonterminals[symbol] = new Nonterminal(symbol); } const pathInfo = this.goPath(i, item.production.handle); const production = new Production(symbol, pathInfo.path, newg.productions.length); newg.productions.push(production); newg.nonterminals[symbol].productions.push(production); // store the transition that get's 'backed up to' after reduction on path const handle = item.production.id; const goes = this.states[pathInfo.endState].goes; if (handle in goes) { goes[handle].push(symbol); } else { goes[handle] = [symbol]; } } } if (state.inadequate) { this.inadequateStates.push(state); } }); } unionLookaheads() { const states = this.onDemandLookahead ? this.inadequateStates : this.states; for (const state of states) { for (const reduction of state.reductions) { const follows = new Set(reduction.follows); for (const goesSymbol of state.goes[reduction.production.id]) { for (const followSymbol of this.newg.nonterminals[goesSymbol].follows) { const terminal = this.terms_[followSymbol]; if (!follows.has(terminal)) { follows.add(terminal); reduction.follows.push(terminal); } } } } } } } exports.LALRGenerator = LALRGenerator; /* * Lookahead parser definitions * * Define base type * */ class LRLookaheadGenerator extends LRGenerator { constructor(...args) { super(...args); this.computeLookaheads(); this.buildTable(); } } /* * SLR Parser * */ class SLRGenerator extends LRLookaheadGenerator { get type() { return 'SLR(1)'; } lookAheads(_, item) { return this.nonterminals[item.production.symbol].follows; } }; exports.SLRGenerator = SLRGenerator; /* * LR(1) Parser * */ class LR1Generator extends LRLookaheadGenerator { get type() { return 'Canonical LR(1)'; } lookAheads(_, item) { return item.follows; } closureOperation(itemSet) { const { nonterminals } = this; const closureSet = new this.ItemSet(...itemSet); for (const item of closureSet) { const symbol = item.markedSymbol; // if token is a nonterminal, recursively add closures if (symbol in nonterminals) { const follows = this.first(item.production.handle, item.dotPosition + 1); if (follows.length === 0 || item.production.nullable || this.nullable(item.production.handle, item.dotPosition + 1)) { follows.push(...item.follows); } for (const production of nonterminals[symbol].productions) { closureSet.push(new this.Item(production, 0, follows)); } } else if (!symbol) { // reduction closureSet.reductions.push(item); } } return closureSet; } }; LR1Generator.prototype.Item = class extends LRGenerator.prototype.Item { constructor(...args) { super(...args); this.id = `${this.production.id}a${this.dotPosition}a${this.follows.sort().join(',')}`; } }; exports.LR1Generator = LR1Generator; /* * LL Parser * */ class LLGenerator extends LookaheadGenerator { get type() { return 'LL(1)'; } constructor(...args) { super(...args); this.computeLookaheads(); this.table = this.parseTable(this.productions); } parseTable(productions) { const table = Object.create(null); productions.forEach((production, i) => { const row = table[production.symbol] || Object.create(null); const tokens = production.first; if (this.nullable(production.handle)) { tokens.push(...this.nonterminals[production.symbol].follows); } for (const token of tokens) { if (token in row) { row[token].push(i); this.conflicts++; } else { row[token] = [i]; } } table[production.symbol] = row; }); return table; } }; exports.LLGenerator = LLGenerator;