@multila/multila-parser-generator
Version:
An LR(1) Parser Generator written in TypeScript
499 lines • 18.3 kB
JavaScript
"use strict";
/*
MULTILA Compiler and Computer Architecture Infrastructure
Copyright (c) 2022 by Andreas Schwenk, contact@multila.org
Licensed by GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.LR1 = exports.LR1Error = void 0;
const multila_lexer_1 = require("@multila/multila-lexer");
const token_1 = require("@multila/multila-lexer/lib/token");
const lr1rule_1 = require("./lr1rule");
const lr1state_1 = require("./lr1state");
const lr1table_1 = require("./lr1table");
// TODO: shift-reduce conflicts
// TODO: reduce-reduce conflicts
/**
* Error that may be thrown while table-construction or parsing.
*/
class LR1Error extends Error {
constructor(msg) {
super(msg);
this.name = 'LR1Error';
}
}
exports.LR1Error = LR1Error;
/**
* LR1 parser generator and parser.
*/
class LR1 {
constructor() {
/**
* list of production rules
*/
this.rules = [];
/**
* first set for each non-terminal of rules
*/
this.first = {};
/**
* states created while table creation
*/
this.states = [];
/**
* LR(1) table
*/
this.table = null;
/**
* Callback functions that are automatically called after rule reduction (if
* provided). Parameter "terminals" contains tokens for all terminal items
* of a rule. The i-th element in list refers to the i-th terminal in the
* right-hand side of a rule. Non-terminals are skipped while indexing.
*/
this.callBacks = {};
//
}
/**
* Gets the set of production rules.
* @returns rules as list. The first rule represents the root rule.
*/
getRules() {
return this.rules;
}
/**
* Gets the first set, i.e. the FIRST-set of each non-terminal.
* @returns first set
*/
getFirst() {
return this.first;
}
/**
* Gets the generated parse table.
* @returns the parse table or null if it has not yet been generated
*/
getTable() {
return this.table;
}
/**
* Add a callback method that is automatically called after reduction of a
* production rule while parsing.
* @param id identifier of the callback function
* @param f implementation of the callback function
*/
addCallback(id, f) {
this.callBacks[id] = f;
}
/**
* Parses production rules in the form "u = v1 v2 ... -> callback;",
* specified in a small DSL. Grammar is as follows:
* rules = { rule };
* rule = ID "=" rhs { "|" rhs } ";";
* rhs = { item } [ "->" ID ];
* item = "INT" | "REAL" | "HEX" | "ID" | "STR" | ID | STR;
* @param src rules defined in the DSL specified above
*/
parseRules(src) {
this.rules = [];
const lexer = new multila_lexer_1.Lexer();
lexer.setTerminals(['->']);
lexer.pushSource('LR1RULES', src);
while (lexer.isNotEND()) {
this.parseRule(lexer);
}
}
parseRule(lexer) {
const lhs = lexer.ID();
lexer.TER('=');
this.parseRhs(lhs, lexer);
while (lexer.isTER('|')) {
lexer.next();
this.parseRhs(lhs, lexer);
}
lexer.TER(';');
}
parseRhs(lhs, lexer) {
const r = this.addRule(lhs);
while (lexer.isNotTER('|') && lexer.isNotTER(';') && lexer.isNotTER('->')) {
this.parseItem(lexer, r);
}
if (lexer.isTER('->')) {
lexer.next();
r.callBackId = lexer.ID();
}
}
parseItem(lexer, r) {
if (lexer.isTER('INT') ||
lexer.isTER('REAL') ||
lexer.isTER('HEX') ||
lexer.isTER('ID') ||
lexer.isTER('STR')) {
r.addTerminalItem(lexer.getToken().token);
lexer.next();
}
else if (lexer.isID()) {
const nonTerminal = lexer.ID();
r.addNonTerminalItem(nonTerminal);
}
else if (lexer.isSTR()) {
const terminal = lexer.STR();
r.addTerminalItem(':' + terminal);
}
else {
lexer.errorExpected([
'"INT"',
'"REAL"',
'"HEX"',
'"ID"',
'"STR"',
'ID',
'STR',
]);
}
}
/**
* Create a new production rule.
* @param lhs non-terminal id of the rule
* @returns a new rule object
*/
addRule(lhs) {
const r = new lr1rule_1.LR1_Rule(lhs);
this.rules.push(r);
return r;
}
/**
* Parse an input program according to the specified grammar.
* Note: Method createTable must be called first!
* @param lexer lexer instance with already pushed source code
* @param verbose print verbose output
* @returns this method may throw an LR1Error
*/
parse(lexer, verbose = false) {
// check if we are ready to parse
if (this.table == null) {
throw new LR1Error('table not generated');
}
if (this.table.rows.length == 0) {
throw new LR1Error('table has no rules');
}
// Parse stack that contains state numbers, terminals and non-terminals
const stack = [];
// Push the initial state to the stack
stack.push(0);
// Run until the accept-state. Stop on errors.
for (;;) {
if (verbose)
this.printParseStack(stack);
const state = stack[stack.length - 1];
const tk = lexer.getToken();
// find action entry in the table
let entry = null;
if (tk.type === token_1.LexerTokenType.TER ||
tk.type === token_1.LexerTokenType.DEL ||
tk.type === token_1.LexerTokenType.ID) {
entry = this.table.rows[state].actionEntries[':' + tk.token];
if (entry === undefined)
entry = null;
}
if (entry == null) {
entry = this.table.rows[state].actionEntries[tk.type];
}
if (entry == null) {
lexer.error('unexpected token "' + tk.token + '"');
// TODO: list expected token-types / token-value
}
// SHIFT the next input token
if (entry.shift) {
stack.push(tk);
lexer.next();
stack.push(entry.value); // push state
if (verbose)
console.log('shifted ' + tk.token);
}
// REDUCE the currently processed rule to a non-terminal
else {
// if we are reducing the root-rule, parsing is finished
const rootRule = entry.value == 0;
// get the rule that reduces the rightmost elements of the stack
const rule = this.rules[entry.value];
// pop N items from stack, with N the number of rule items;
// we need factor 2 due to the state numbers
const items = stack.splice(stack.length - 2 * rule.rhs.length);
// call the callback method, if present
if (rule.callBackId.length > 0) {
if (rule.callBackId in this.callBacks == false) {
lexer.error('UNIMPLEMENTED callback function ' + rule.callBackId);
}
// get terminal tokens for the caller
const terminals = [];
for (let i = 0; i < items.length >> 1; i++) {
if (rule.rhs[i].type === lr1rule_1.LR1_RuleItemType.Terminal) {
terminals.push(items[i * 2]);
}
}
// call
this.callBacks[rule.callBackId](terminals);
}
// stop if we are reducing the root rule
if (rootRule) {
if (lexer.isEND() == false) {
// throw an error in case that the end token is not reached
lexer.error('expected END');
}
if (verbose) {
console.log('parsed successfully');
}
return;
}
// read the state number on top of stack
const s = stack[stack.length - 1];
// let x be the non-terminal on the left-hand side of the rule.
// push x onto the stack
stack.push(rule.lhs);
// get the state of x from the goto table an put it onto the stack
stack.push(this.table.rows[s].gotoEntries[rule.lhs].value);
if (verbose) {
console.log('reduced rule ' + rule.index + ' [' + rule.toString() + ']');
}
}
}
}
/**
* prints the current parse stack
* @param stack current parse stack
*/
printParseStack(stack) {
let s = 'stack: ';
for (const item of stack) {
if (typeof item === 'number' || typeof item === 'string') {
s += item;
}
else {
s += '"' + item.token + '"';
}
s += ' ';
}
console.log(s);
}
/**
* Calculate parsing table.
* @returns parsing table
*/
calcTable() {
if (this.rules.length == 0) {
throw new LR1Error('cannot calculate table without any production rule');
}
// check if all rules are valid
// (a) get the entire set of non-terminals
const nt = new Set();
for (const rule of this.rules) {
nt.add(rule.lhs);
}
// (b) check each rule
for (const rule of this.rules) {
// are all non-terminals on the right-hand side of a rule defined?
for (const item of rule.rhs) {
if (item.type === lr1rule_1.LR1_RuleItemType.NonTerminal) {
if (nt.has(item.value) == false) {
throw new LR1Error('rule [' +
rule.toString() +
'] uses undefined non-terminal ' +
item.value);
}
}
}
}
// calculate the first set for each non-terminal
this.calcFirst();
// set an index number to each rule
for (let i = 0; i < this.rules.length; i++) {
this.rules[i].index = i;
}
// create an initial state with the root rule
const state = new lr1state_1.LR1_State(this);
const item = new lr1state_1.LR1_StateItem();
item.lookAheadSet.add('END');
item.pos = 0;
item.rule = this.rules[0];
state.addItem(item);
// Q := yet unprocessed states; the initial state is yet unprocessed
let Q = [state];
// run until there are no remaining unprocessed states
while (Q.length > 0) {
// process one of the remaining states
const q = Q.pop();
// calculate CLOSURE for currently processed state
const R = q.calcItemSet();
// add state
if (this.addState(q)) {
// if the state was not present before, add its successor states
// as yet unprocessed states
Q = Q.concat(R);
}
}
// finally create the parse table; its data is retrieved from the states
this.table = new lr1table_1.LR1_Table();
// for each state: add a row to the table
for (const state of this.states) {
const row = new lr1table_1.LR1_TableRow();
this.table.rows.push(row);
// state transitions define SHIFT-actions and GOTO-entries
for (const outEdge of state.outEdges) {
// crate a new entry and store the index of the destination state in it
const entry = new lr1table_1.LR1_TableEntry();
entry.value = outEdge.dest.getIndex();
if (outEdge.label.type === lr1rule_1.LR1_RuleItemType.Terminal) {
entry.shift = true;
if (outEdge.label.value in row.actionEntries) {
// TODO
const bp = 1337;
process.exit(-1);
}
row.actionEntries[outEdge.label.value] = entry;
}
else {
if (outEdge.label.value in row.gotoEntries) {
// TODO
const bp = 1337;
process.exit(-1);
}
row.gotoEntries[outEdge.label.value] = entry;
}
}
// reduction takes place, when the position of a rule is after its last
// item
const reduceEntries = state.calcReduceEntries();
// create a REDUCE-action for each lookahead-terminal
for (const terminal in reduceEntries) {
const entry = new lr1table_1.LR1_TableEntry();
entry.shift = false; // reduce
entry.value = reduceEntries[terminal].value;
if (terminal in row.actionEntries) {
// TODO
const bp = 1337;
process.exit(-1);
}
row.actionEntries[terminal] = entry;
}
}
// return table
return this.table;
}
/**
* Calculates the first-set for each non-terminal.
* The first set is the set of terminals that are parsed next:
* rule 'x = y ...;' -> first(x) = first(x) uu first(y);
* rule 'x = "z" ...;' -> first(x) = first(x) uu { "z" };
* with non-terminals x and y, as well as terminal "z";
* "uu" denotes the union operator.
*/
calcFirst() {
for (const rule of this.rules) {
if (rule.lhs in this.first == false)
this.first[rule.lhs] = new Set();
}
let changed = false;
do {
// run until no more changes occur
changed = false;
for (const rule of this.rules) {
if (rule.rhs.length > 0) {
const rhs0 = rule.rhs[0];
// rule 'x = y ...;' -> first(x) = first(x) uu first(y);
if (rhs0.type === lr1rule_1.LR1_RuleItemType.NonTerminal) {
const n = this.first[rule.lhs].size;
this.first[rule.lhs] = new Set([
...this.first[rule.lhs],
...this.first[rhs0.value],
]);
if (this.first[rule.lhs].size > n) {
changed = true;
}
}
else {
// rule 'x = "y" ...;' -> first(x) = first(x) uu { "y" };
if (this.first[rule.lhs].has(rhs0.value) == false) {
changed = true;
}
this.first[rule.lhs].add(rhs0.value);
}
}
}
} while (changed);
}
/**
* Adds a new state while the parsing table is created.
* @param sNew the state to be added
* @returns true, if the state has been added; false if an equal state is
* already present
*/
addState(sNew) {
// check if there already is a state equal to sNew
for (const s of this.states) {
if (s.equal(sNew)) {
// if we found a state equal to sNew, we still have to ensure that
// the set of all in-edges to the sNew are also present in the existing
// state
for (const u of sNew.inEdges) {
// set the destination vertex to the existing state
u.dest = s;
let found = false;
// is the edge already present?
for (const v of s.inEdges) {
if (u.src == v.src &&
u.dest == v.dest &&
u.label.type === v.label.type &&
u.label.value === v.label.value) {
found = true;
break;
}
}
// only add it, if it was NOT found
if (!found) {
s.inEdges.push(u);
}
}
// false := the number of states did NOT increase
return false;
}
}
// if there is no state equal to sNew, create it
sNew.setIndex(this.states.length);
this.states.push(sNew);
// true := the number of states increased
return true;
}
/**
* Stringifies the current object.
* @returns stringified representation
*/
toString() {
let s = 'LR1-rules: {\n';
let i = 0;
for (const rule of this.rules) {
s += '' + i + ': ' + rule.toString() + '\n';
i++;
}
s += '}\n';
s += 'LR1-first: ';
for (const id in this.first) {
s += 'FIRST(' + id + ') = { ';
for (const item of this.first[id]) {
s += '"' + item + '", ';
}
if (s.endsWith(', '))
s = s.substring(0, s.length - 2);
s += ' }, ';
}
s += '\n';
s += 'LR1-STATES:\n';
for (const state of this.states) {
s += state.toString();
}
s += '\n';
s += 'LR1-TABLE:\n';
s += this.table == null ? '%' : this.table.toString();
return s;
}
}
exports.LR1 = LR1;
//# sourceMappingURL=lr1.js.map