UNPKG

@multila/multila-parser-generator

Version:

An LR(1) Parser Generator written in TypeScript

github.com/multila/multila-parser-generator

multila/multila-parser-generator

499 lines • 18.3 kB

JavaScript

"use strict"; /* MULTILA Compiler and Computer Architecture Infrastructure Copyright (c) 2022 by Andreas Schwenk, contact@multila.org Licensed by GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 */ Object.defineProperty(exports, "__esModule", { value: true }); exports.LR1 = exports.LR1Error = void 0; const multila_lexer_1 = require("@multila/multila-lexer"); const token_1 = require("@multila/multila-lexer/lib/token"); const lr1rule_1 = require("./lr1rule"); const lr1state_1 = require("./lr1state"); const lr1table_1 = require("./lr1table"); // TODO: shift-reduce conflicts // TODO: reduce-reduce conflicts /** * Error that may be thrown while table-construction or parsing. */ class LR1Error extends Error { constructor(msg) { super(msg); this.name = 'LR1Error'; } } exports.LR1Error = LR1Error; /** * LR1 parser generator and parser. */ class LR1 { constructor() { /** * list of production rules */ this.rules = []; /** * first set for each non-terminal of rules */ this.first = {}; /** * states created while table creation */ this.states = []; /** * LR(1) table */ this.table = null; /** * Callback functions that are automatically called after rule reduction (if * provided). Parameter "terminals" contains tokens for all terminal items * of a rule. The i-th element in list refers to the i-th terminal in the * right-hand side of a rule. Non-terminals are skipped while indexing. */ this.callBacks = {}; // } /** * Gets the set of production rules. * @returns rules as list. The first rule represents the root rule. */ getRules() { return this.rules; } /** * Gets the first set, i.e. the FIRST-set of each non-terminal. * @returns first set */ getFirst() { return this.first; } /** * Gets the generated parse table. * @returns the parse table or null if it has not yet been generated */ getTable() { return this.table; } /** * Add a callback method that is automatically called after reduction of a * production rule while parsing. * @param id identifier of the callback function * @param f implementation of the callback function */ addCallback(id, f) { this.callBacks[id] = f; } /** * Parses production rules in the form "u = v1 v2 ... -> callback;", * specified in a small DSL. Grammar is as follows: * rules = { rule }; * rule = ID "=" rhs { "|" rhs } ";"; * rhs = { item } [ "->" ID ]; * item = "INT" | "REAL" | "HEX" | "ID" | "STR" | ID | STR; * @param src rules defined in the DSL specified above */ parseRules(src) { this.rules = []; const lexer = new multila_lexer_1.Lexer(); lexer.setTerminals(['->']); lexer.pushSource('LR1RULES', src); while (lexer.isNotEND()) { this.parseRule(lexer); } } parseRule(lexer) { const lhs = lexer.ID(); lexer.TER('='); this.parseRhs(lhs, lexer); while (lexer.isTER('|')) { lexer.next(); this.parseRhs(lhs, lexer); } lexer.TER(';'); } parseRhs(lhs, lexer) { const r = this.addRule(lhs); while (lexer.isNotTER('|') && lexer.isNotTER(';') && lexer.isNotTER('->')) { this.parseItem(lexer, r); } if (lexer.isTER('->')) { lexer.next(); r.callBackId = lexer.ID(); } } parseItem(lexer, r) { if (lexer.isTER('INT') || lexer.isTER('REAL') || lexer.isTER('HEX') || lexer.isTER('ID') || lexer.isTER('STR')) { r.addTerminalItem(lexer.getToken().token); lexer.next(); } else if (lexer.isID()) { const nonTerminal = lexer.ID(); r.addNonTerminalItem(nonTerminal); } else if (lexer.isSTR()) { const terminal = lexer.STR(); r.addTerminalItem(':' + terminal); } else { lexer.errorExpected([ '"INT"', '"REAL"', '"HEX"', '"ID"', '"STR"', 'ID', 'STR', ]); } } /** * Create a new production rule. * @param lhs non-terminal id of the rule * @returns a new rule object */ addRule(lhs) { const r = new lr1rule_1.LR1_Rule(lhs); this.rules.push(r); return r; } /** * Parse an input program according to the specified grammar. * Note: Method createTable must be called first! * @param lexer lexer instance with already pushed source code * @param verbose print verbose output * @returns this method may throw an LR1Error */ parse(lexer, verbose = false) { // check if we are ready to parse if (this.table == null) { throw new LR1Error('table not generated'); } if (this.table.rows.length == 0) { throw new LR1Error('table has no rules'); } // Parse stack that contains state numbers, terminals and non-terminals const stack = []; // Push the initial state to the stack stack.push(0); // Run until the accept-state. Stop on errors. for (;;) { if (verbose) this.printParseStack(stack); const state = stack[stack.length - 1]; const tk = lexer.getToken(); // find action entry in the table let entry = null; if (tk.type === token_1.LexerTokenType.TER || tk.type === token_1.LexerTokenType.DEL || tk.type === token_1.LexerTokenType.ID) { entry = this.table.rows[state].actionEntries[':' + tk.token]; if (entry === undefined) entry = null; } if (entry == null) { entry = this.table.rows[state].actionEntries[tk.type]; } if (entry == null) { lexer.error('unexpected token "' + tk.token + '"'); // TODO: list expected token-types / token-value } // SHIFT the next input token if (entry.shift) { stack.push(tk); lexer.next(); stack.push(entry.value); // push state if (verbose) console.log('shifted ' + tk.token); } // REDUCE the currently processed rule to a non-terminal else { // if we are reducing the root-rule, parsing is finished const rootRule = entry.value == 0; // get the rule that reduces the rightmost elements of the stack const rule = this.rules[entry.value]; // pop N items from stack, with N the number of rule items; // we need factor 2 due to the state numbers const items = stack.splice(stack.length - 2 * rule.rhs.length); // call the callback method, if present if (rule.callBackId.length > 0) { if (rule.callBackId in this.callBacks == false) { lexer.error('UNIMPLEMENTED callback function ' + rule.callBackId); } // get terminal tokens for the caller const terminals = []; for (let i = 0; i < items.length >> 1; i++) { if (rule.rhs[i].type === lr1rule_1.LR1_RuleItemType.Terminal) { terminals.push(items[i * 2]); } } // call this.callBacks[rule.callBackId](terminals); } // stop if we are reducing the root rule if (rootRule) { if (lexer.isEND() == false) { // throw an error in case that the end token is not reached lexer.error('expected END'); } if (verbose) { console.log('parsed successfully'); } return; } // read the state number on top of stack const s = stack[stack.length - 1]; // let x be the non-terminal on the left-hand side of the rule. // push x onto the stack stack.push(rule.lhs); // get the state of x from the goto table an put it onto the stack stack.push(this.table.rows[s].gotoEntries[rule.lhs].value); if (verbose) { console.log('reduced rule ' + rule.index + ' [' + rule.toString() + ']'); } } } } /** * prints the current parse stack * @param stack current parse stack */ printParseStack(stack) { let s = 'stack: '; for (const item of stack) { if (typeof item === 'number' || typeof item === 'string') { s += item; } else { s += '"' + item.token + '"'; } s += ' '; } console.log(s); } /** * Calculate parsing table. * @returns parsing table */ calcTable() { if (this.rules.length == 0) { throw new LR1Error('cannot calculate table without any production rule'); } // check if all rules are valid // (a) get the entire set of non-terminals const nt = new Set(); for (const rule of this.rules) { nt.add(rule.lhs); } // (b) check each rule for (const rule of this.rules) { // are all non-terminals on the right-hand side of a rule defined? for (const item of rule.rhs) { if (item.type === lr1rule_1.LR1_RuleItemType.NonTerminal) { if (nt.has(item.value) == false) { throw new LR1Error('rule [' + rule.toString() + '] uses undefined non-terminal ' + item.value); } } } } // calculate the first set for each non-terminal this.calcFirst(); // set an index number to each rule for (let i = 0; i < this.rules.length; i++) { this.rules[i].index = i; } // create an initial state with the root rule const state = new lr1state_1.LR1_State(this); const item = new lr1state_1.LR1_StateItem(); item.lookAheadSet.add('END'); item.pos = 0; item.rule = this.rules[0]; state.addItem(item); // Q := yet unprocessed states; the initial state is yet unprocessed let Q = [state]; // run until there are no remaining unprocessed states while (Q.length > 0) { // process one of the remaining states const q = Q.pop(); // calculate CLOSURE for currently processed state const R = q.calcItemSet(); // add state if (this.addState(q)) { // if the state was not present before, add its successor states // as yet unprocessed states Q = Q.concat(R); } } // finally create the parse table; its data is retrieved from the states this.table = new lr1table_1.LR1_Table(); // for each state: add a row to the table for (const state of this.states) { const row = new lr1table_1.LR1_TableRow(); this.table.rows.push(row); // state transitions define SHIFT-actions and GOTO-entries for (const outEdge of state.outEdges) { // crate a new entry and store the index of the destination state in it const entry = new lr1table_1.LR1_TableEntry(); entry.value = outEdge.dest.getIndex(); if (outEdge.label.type === lr1rule_1.LR1_RuleItemType.Terminal) { entry.shift = true; if (outEdge.label.value in row.actionEntries) { // TODO const bp = 1337; process.exit(-1); } row.actionEntries[outEdge.label.value] = entry; } else { if (outEdge.label.value in row.gotoEntries) { // TODO const bp = 1337; process.exit(-1); } row.gotoEntries[outEdge.label.value] = entry; } } // reduction takes place, when the position of a rule is after its last // item const reduceEntries = state.calcReduceEntries(); // create a REDUCE-action for each lookahead-terminal for (const terminal in reduceEntries) { const entry = new lr1table_1.LR1_TableEntry(); entry.shift = false; // reduce entry.value = reduceEntries[terminal].value; if (terminal in row.actionEntries) { // TODO const bp = 1337; process.exit(-1); } row.actionEntries[terminal] = entry; } } // return table return this.table; } /** * Calculates the first-set for each non-terminal. * The first set is the set of terminals that are parsed next: * rule 'x = y ...;' -> first(x) = first(x) uu first(y); * rule 'x = "z" ...;' -> first(x) = first(x) uu { "z" }; * with non-terminals x and y, as well as terminal "z"; * "uu" denotes the union operator. */ calcFirst() { for (const rule of this.rules) { if (rule.lhs in this.first == false) this.first[rule.lhs] = new Set(); } let changed = false; do { // run until no more changes occur changed = false; for (const rule of this.rules) { if (rule.rhs.length > 0) { const rhs0 = rule.rhs[0]; // rule 'x = y ...;' -> first(x) = first(x) uu first(y); if (rhs0.type === lr1rule_1.LR1_RuleItemType.NonTerminal) { const n = this.first[rule.lhs].size; this.first[rule.lhs] = new Set([ ...this.first[rule.lhs], ...this.first[rhs0.value], ]); if (this.first[rule.lhs].size > n) { changed = true; } } else { // rule 'x = "y" ...;' -> first(x) = first(x) uu { "y" }; if (this.first[rule.lhs].has(rhs0.value) == false) { changed = true; } this.first[rule.lhs].add(rhs0.value); } } } } while (changed); } /** * Adds a new state while the parsing table is created. * @param sNew the state to be added * @returns true, if the state has been added; false if an equal state is * already present */ addState(sNew) { // check if there already is a state equal to sNew for (const s of this.states) { if (s.equal(sNew)) { // if we found a state equal to sNew, we still have to ensure that // the set of all in-edges to the sNew are also present in the existing // state for (const u of sNew.inEdges) { // set the destination vertex to the existing state u.dest = s; let found = false; // is the edge already present? for (const v of s.inEdges) { if (u.src == v.src && u.dest == v.dest && u.label.type === v.label.type && u.label.value === v.label.value) { found = true; break; } } // only add it, if it was NOT found if (!found) { s.inEdges.push(u); } } // false := the number of states did NOT increase return false; } } // if there is no state equal to sNew, create it sNew.setIndex(this.states.length); this.states.push(sNew); // true := the number of states increased return true; } /** * Stringifies the current object. * @returns stringified representation */ toString() { let s = 'LR1-rules: {\n'; let i = 0; for (const rule of this.rules) { s += '' + i + ': ' + rule.toString() + '\n'; i++; } s += '}\n'; s += 'LR1-first: '; for (const id in this.first) { s += 'FIRST(' + id + ') = { '; for (const item of this.first[id]) { s += '"' + item + '", '; } if (s.endsWith(', ')) s = s.substring(0, s.length - 2); s += ' }, '; } s += '\n'; s += 'LR1-STATES:\n'; for (const state of this.states) { s += state.toString(); } s += '\n'; s += 'LR1-TABLE:\n'; s += this.table == null ? '%' : this.table.toString(); return s; } } exports.LR1 = LR1; //# sourceMappingURL=lr1.js.map