UNPKG

jison-gho

Version:

A parser generator with a Bison/YACC-similar API (derived off zaach/jison repo)

1,319 lines (1,179 loc) 1.18 MB
(function (global, factory) { typeof exports === 'object' && typeof module !== 'undefined' ? module.exports = factory(require('fs'), require('path'), require('@gerhobbelt/recast'), require('assert'), require('@gerhobbelt/xregexp'), require('@gerhobbelt/json5'), require('@gerhobbelt/ast-util')) : typeof define === 'function' && define.amd ? define(['fs', 'path', '@gerhobbelt/recast', 'assert', '@gerhobbelt/xregexp', '@gerhobbelt/json5', '@gerhobbelt/ast-util'], factory) : (global.jison = factory(global.fs,global.path,global.recast,global.assert$1,global.XRegExp,global.json5,global.astUtils)); }(this, (function (fs,path,recast,assert$1,XRegExp,json5,astUtils) { 'use strict'; fs = fs && fs.hasOwnProperty('default') ? fs['default'] : fs; path = path && path.hasOwnProperty('default') ? path['default'] : path; recast = recast && recast.hasOwnProperty('default') ? recast['default'] : recast; assert$1 = assert$1 && assert$1.hasOwnProperty('default') ? assert$1['default'] : assert$1; XRegExp = XRegExp && XRegExp.hasOwnProperty('default') ? XRegExp['default'] : XRegExp; json5 = json5 && json5.hasOwnProperty('default') ? json5['default'] : json5; astUtils = astUtils && astUtils.hasOwnProperty('default') ? astUtils['default'] : astUtils; // Return TRUE if `src` starts with `searchString`. function startsWith(src, searchString) { return src.substr(0, searchString.length) === searchString; } // tagged template string helper which removes the indentation common to all // non-empty lines: that indentation was added as part of the source code // formatting of this lexer spec file and must be removed to produce what // we were aiming for. // // Each template string starts with an optional empty line, which should be // removed entirely, followed by a first line of error reporting content text, // which should not be indented at all, i.e. the indentation of the first // non-empty line should be treated as the 'common' indentation and thus // should also be removed from all subsequent lines in the same template string. // // See also: https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Template_literals function rmCommonWS$1(strings, ...values) { // As `strings[]` is an array of strings, each potentially consisting // of multiple lines, followed by one(1) value, we have to split each // individual string into lines to keep that bit of information intact. // // We assume clean code style, hence no random mix of tabs and spaces, so every // line MUST have the same indent style as all others, so `length` of indent // should suffice, but the way we coded this is stricter checking as we look // for the *exact* indenting=leading whitespace in each line. var indent_str = null; var src = strings.map(function splitIntoLines(s) { var a = s.split('\n'); indent_str = a.reduce(function analyzeLine(indent_str, line, index) { // only check indentation of parts which follow a NEWLINE: if (index !== 0) { var m = /^(\s*)\S/.exec(line); // only non-empty ~ content-carrying lines matter re common indent calculus: if (m) { if (!indent_str) { indent_str = m[1]; } else if (m[1].length < indent_str.length) { indent_str = m[1]; } } } return indent_str; }, indent_str); return a; }); // Also note: due to the way we format the template strings in our sourcecode, // the last line in the entire template must be empty when it has ANY trailing // whitespace: var a = src[src.length - 1]; a[a.length - 1] = a[a.length - 1].replace(/\s+$/, ''); // Done removing common indentation. // // Process template string partials now, but only when there's // some actual UNindenting to do: if (indent_str) { for (var i = 0, len = src.length; i < len; i++) { var a = src[i]; // only correct indentation at start of line, i.e. only check for // the indent after every NEWLINE ==> start at j=1 rather than j=0 for (var j = 1, linecnt = a.length; j < linecnt; j++) { if (startsWith(a[j], indent_str)) { a[j] = a[j].substr(indent_str.length); } } } } // now merge everything to construct the template result: var rv = []; for (var i = 0, len = values.length; i < len; i++) { rv.push(src[i].join('\n')); rv.push(values[i]); } // the last value is always followed by a last template string partial: rv.push(src[i].join('\n')); var sv = rv.join(''); return sv; } // Convert dashed option keys to Camel Case, e.g. `camelCase('camels-have-one-hump')` => `'camelsHaveOneHump'` /** @public */ function camelCase(s) { // Convert first character to lowercase return s.replace(/^\w/, function (match) { return match.toLowerCase(); }) .replace(/-\w/g, function (match) { var c = match.charAt(1); var rv = c.toUpperCase(); // do not mutate 'a-2' to 'a2': if (c === rv && c.match(/\d/)) { return match; } return rv; }) } // Convert dashed option keys and other inputs to Camel Cased legal JavaScript identifiers /** @public */ function mkIdentifier$2(s) { s = camelCase('' + s); // cleanup: replace any non-suitable character series to a single underscore: return s .replace(/^[^\w_]/, '_') // do not accept numerics at the leading position, despite those matching regex `\w`: .replace(/^\d/, '_') .replace(/[^\w\d_]+/g, '_') // and only accept multiple (double, not triple) underscores at start or end of identifier name: .replace(/^__+/, '#') .replace(/__+$/, '#') .replace(/_+/g, '_') .replace(/#/g, '__'); } // properly quote and escape the given input string function dquote$1(s) { var sq = (s.indexOf('\'') >= 0); var dq = (s.indexOf('"') >= 0); if (sq && dq) { s = s.replace(/"/g, '\\"'); dq = false; } if (dq) { s = '\'' + s + '\''; } else { s = '"' + s + '"'; } return s; } // // Helper library for safe code execution/compilation, including dumping offending code to file for further error analysis // (the idea was originally coded in https://github.com/GerHobbelt/jison/commit/85e367d03b977780516d2b643afbe6f65ee758f2 ) // // MIT Licensed // // // This code is intended to help test and diagnose arbitrary chunks of code, answering questions like this: // // the given code fails, but where exactly and why? It's precise failure conditions are 'hidden' due to // the stuff running inside an `eval()` or `Function(...)` call, so we want the code dumped to file so that // we can test the code in a different environment so that we can see what precisely is causing the failure. // function chkBugger$1(src) { src = String(src); if (src.match(/\bcov_\w+/)) { console.error('### ISTANBUL COVERAGE CODE DETECTED ###\n', src); } } // Helper function: pad number with leading zeroes function pad(n, p) { p = p || 2; var rv = '0000' + n; return rv.slice(-p); } // attempt to dump in one of several locations: first winner is *it*! function dumpSourceToFile(sourcecode, errname, err_id, options, ex) { var dumpfile; try { var dumpPaths = [(options.outfile ? path.dirname(options.outfile) : null), options.inputPath, process.cwd()]; var dumpName = path.basename(options.inputFilename || options.moduleName || (options.outfile ? path.dirname(options.outfile) : null) || options.defaultModuleName || errname) .replace(/\.[a-z]{1,5}$/i, '') // remove extension .y, .yacc, .jison, ...whatever .replace(/[^a-z0-9_]/ig, '_'); // make sure it's legal in the destination filesystem: the least common denominator. if (dumpName === '' || dumpName === '_') { dumpName = '__bugger__'; } err_id = err_id || 'XXX'; var ts = new Date(); var tm = ts.getUTCFullYear() + '_' + pad(ts.getUTCMonth() + 1) + '_' + pad(ts.getUTCDate()) + 'T' + pad(ts.getUTCHours()) + '' + pad(ts.getUTCMinutes()) + '' + pad(ts.getUTCSeconds()) + '.' + pad(ts.getUTCMilliseconds(), 3) + 'Z'; dumpName += '.fatal_' + err_id + '_dump_' + tm + '.js'; for (var i = 0, l = dumpPaths.length; i < l; i++) { if (!dumpPaths[i]) { continue; } try { dumpfile = path.normalize(dumpPaths[i] + '/' + dumpName); fs.writeFileSync(dumpfile, sourcecode, 'utf8'); console.error("****** offending generated " + errname + " source code dumped into file: ", dumpfile); break; // abort loop once a dump action was successful! } catch (ex3) { //console.error("generated " + errname + " source code fatal DUMPING error ATTEMPT: ", i, " = ", ex3.message, " -- while attempting to dump into file: ", dumpfile, "\n", ex3.stack); if (i === l - 1) { throw ex3; } } } } catch (ex2) { console.error("generated " + errname + " source code fatal DUMPING error: ", ex2.message, " -- while attempting to dump into file: ", dumpfile, "\n", ex2.stack); } // augment the exception info, when available: if (ex) { ex.offending_source_code = sourcecode; ex.offending_source_title = errname; ex.offending_source_dumpfile = dumpfile; } } // // `code_execution_rig` is a function which gets executed, while it is fed the `sourcecode` as a parameter. // When the `code_execution_rig` crashes, its failure is caught and (using the `options`) the sourcecode // is dumped to file for later diagnosis. // // Two options drive the internal behaviour: // // - options.dumpSourceCodeOnFailure -- default: FALSE // - options.throwErrorOnCompileFailure -- default: FALSE // // Dumpfile naming and path are determined through these options: // // - options.outfile // - options.inputPath // - options.inputFilename // - options.moduleName // - options.defaultModuleName // function exec_and_diagnose_this_stuff(sourcecode, code_execution_rig, options, title) { options = options || {}; var errname = "" + (title || "exec_test"); var err_id = errname.replace(/[^a-z0-9_]/ig, "_"); if (err_id.length === 0) { err_id = "exec_crash"; } const debug = 0; if (debug) console.warn('generated ' + errname + ' code under EXEC TEST.'); if (debug > 1) console.warn(` ######################## source code ########################## ${sourcecode} ######################## source code ########################## `); var p; try { // p = eval(sourcecode); if (typeof code_execution_rig !== 'function') { throw new Error("safe-code-exec-and-diag: code_execution_rig MUST be a JavaScript function"); } chkBugger$1(sourcecode); p = code_execution_rig.call(this, sourcecode, options, errname, debug); } catch (ex) { if (debug > 1) console.log("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"); if (debug) console.log("generated " + errname + " source code fatal error: ", ex.message); if (debug > 1) console.log("exec-and-diagnose options:", options); if (debug > 1) console.log("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"); if (options.dumpSourceCodeOnFailure) { dumpSourceToFile(sourcecode, errname, err_id, options, ex); } if (options.throwErrorOnCompileFailure) { throw ex; } } return p; } var code_exec$1 = { exec: exec_and_diagnose_this_stuff, dump: dumpSourceToFile }; // // Parse a given chunk of code to an AST. // // MIT Licensed // // // This code is intended to help test and diagnose arbitrary chunks of code, answering questions like this: // // would the given code compile and possibly execute correctly, when included in a lexer, parser or other engine? // //import astUtils from '@gerhobbelt/ast-util'; assert$1(recast); var types = recast.types; assert$1(types); var namedTypes = types.namedTypes; assert$1(namedTypes); var b = types.builders; assert$1(b); // //assert(astUtils); function parseCodeChunkToAST(src, options) { // src = src // .replace(/@/g, '\uFFDA') // .replace(/#/g, '\uFFDB') // ; var ast = recast.parse(src); return ast; } function prettyPrintAST(ast, options) { var new_src; var s = recast.prettyPrint(ast, { tabWidth: 2, quote: 'single', arrowParensAlways: true, // Do not reuse whitespace (or anything else, for that matter) // when printing generically. reuseWhitespace: false }); new_src = s.code; new_src = new_src .replace(/\r\n|\n|\r/g, '\n') // platform dependent EOL fixup // // backpatch possible jison variables extant in the prettified code: // .replace(/\uFFDA/g, '@') // .replace(/\uFFDB/g, '#') ; return new_src; } // validate the given JavaScript snippet: does it compile? // // Return either the parsed AST (object) or an error message (string). function checkActionBlock(src, yylloc) { // make sure reasonable line numbers, etc. are reported in any // potential parse errors by pushing the source code down: if (yylloc && yylloc.first_line > 0) { var cnt = yylloc.first_line; var lines = new Array(cnt); src = lines.join('\n') + src; } if (!src.trim()) { return false; } try { var rv = parseCodeChunkToAST(src); return false; } catch (ex) { return ex.message || "code snippet cannot be parsed"; } } var parse2AST = { parseCodeChunkToAST, prettyPrintAST, checkActionBlock, }; function chkBugger$2(src) { src = String(src); if (src.match(/\bcov_\w+/)) { console.error('### ISTANBUL COVERAGE CODE DETECTED ###\n', src); } } /// HELPER FUNCTION: print the function in source code form, properly indented. /** @public */ function printFunctionSourceCode(f) { var src = String(f); chkBugger$2(src); return src; } const funcRe = /^function[\s\r\n]*[^\(]*\(([^\)]*)\)[\s\r\n]*\{([^]*?)\}$/; const arrowFuncRe = /^(?:(?:\(([^\)]*)\))|(?:([^\(\)]+)))[\s\r\n]*=>[\s\r\n]*(?:(?:\{([^]*?)\})|(?:(([^\s\r\n\{)])[^]*?)))$/; /// HELPER FUNCTION: print the function **content** in source code form, properly indented, /// ergo: produce the code for inlining the function. /// /// Also supports ES6's Arrow Functions: /// /// ``` /// function a(x) { return x; } ==> 'return x;' /// function (x) { return x; } ==> 'return x;' /// (x) => { return x; } ==> 'return x;' /// (x) => x; ==> 'return x;' /// (x) => do(1), do(2), x; ==> 'return (do(1), do(2), x);' /// /** @public */ function printFunctionSourceCodeContainer(f) { var action = printFunctionSourceCode(f).trim(); var args; // Also cope with Arrow Functions (and inline those as well?). // See also https://github.com/zaach/jison-lex/issues/23 var m = funcRe.exec(action); if (m) { args = m[1].trim(); action = m[2].trim(); } else { m = arrowFuncRe.exec(action); if (m) { if (m[2]) { // non-bracketed arguments: args = m[2].trim(); } else { // bracketed arguments: may be empty args list! args = m[1].trim(); } if (m[5]) { // non-bracketed version: implicit `return` statement! // // Q: Must we make sure we have extra braces around the return value // to prevent JavaScript from inserting implit EOS (End Of Statement) // markers when parsing this, when there are newlines in the code? // A: No, we don't have to as arrow functions rvalues suffer from this // same problem, hence the arrow function's programmer must already // have formatted the code correctly. action = m[4].trim(); action = 'return ' + action + ';'; } else { action = m[3].trim(); } } else { var e = new Error('Cannot extract code from function'); e.subject = action; throw e; } } return { args: args, code: action, }; } var stringifier = { printFunctionSourceCode, printFunctionSourceCodeContainer, }; // // // function detectIstanbulGlobal() { const gcv = "__coverage__"; const globalvar = new Function('return this')(); var coverage = globalvar[gcv]; return coverage || false; } var helpers = { rmCommonWS: rmCommonWS$1, camelCase, mkIdentifier: mkIdentifier$2, dquote: dquote$1, exec: code_exec$1.exec, dump: code_exec$1.dump, parseCodeChunkToAST: parse2AST.parseCodeChunkToAST, prettyPrintAST: parse2AST.prettyPrintAST, checkActionBlock: parse2AST.checkActionBlock, printFunctionSourceCode: stringifier.printFunctionSourceCode, printFunctionSourceCodeContainer: stringifier.printFunctionSourceCodeContainer, detectIstanbulGlobal, }; /* * Introduces a typal object to make classical/prototypal patterns easier * Plus some AOP sugar * * By Zachary Carter <zach@carter.name> * MIT Licensed */ var mkIdentifier$1 = helpers.mkIdentifier; var create = Object.create || function (o) { function F(){} F.prototype = o; return new F(); }; var position = /^(before|after)/; // basic method layering // always returns original method's return value function layerMethod(pos, key, prop, fun) { if (pos === 'after') { return function () { var ret = prop.apply(this, arguments); var args = [].slice.call(arguments); args.splice(0, 0, ret); fun.apply(this, args); return ret; }; } else if (pos === 'before') { return function () { fun.apply(this, arguments); var ret = prop.apply(this, arguments); return ret; }; } return fun; } // mixes each argument's own properties into calling object, // overwriting them or layering them. i.e. an object method 'meth' is // layered by mixin methods 'beforemeth' or 'aftermeth' function typal_mix() { var i, o, k; for (i = 0; i < arguments.length; i++) { o = arguments[i]; if (!o) continue; if (Object.prototype.hasOwnProperty.call(o, 'constructor')) { this.constructor = o.constructor; } if (Object.prototype.hasOwnProperty.call(o, 'toString')) { this.toString = o.toString; } for (k in o) { if (Object.prototype.hasOwnProperty.call(o, k)) { var match = k.match(position); var key = k.replace(position, ''); if (match && typeof this[key] === 'function') { this[key] = layerMethod(match[0], key, this[key], o[k]); } else { this[k] = o[k]; } } } } return this; } // Same as typal_mix but also camelCases every object member and 'standardizes' the key set of every input // argument through a caLLback function. // // This is useful for processing options with dashes in their key, e.g. `token-stack` --> tokenStack. function typal_camel_mix(cb) { var i, o, k; // Convert first character to lowercase function lcase0(s) { return s.replace(/^\w/, function (match) { return match.toLowerCase(); }); } for (i = 1; i < arguments.length; i++) { o = arguments[i]; if (!o) continue; if (Object.prototype.hasOwnProperty.call(o, 'constructor')) { this.constructor = o.constructor; } if (Object.prototype.hasOwnProperty.call(o, 'toString')) { this.toString = o.toString; } if (cb) { o = cb(o); } for (k in o) { if (Object.prototype.hasOwnProperty.call(o, k)) { var nk = mkIdentifier$1(k); var match = k.match(position); var key = k.replace(position, ''); // This anticipates before/after members to be camelcased already, e.g. // 'afterParse()' for layering 'parse()': var alt_key = lcase0(key); if (match && typeof this[key] === 'function') { this[key] = layerMethod(match[0], key, this[key], o[k]); } else if (match && typeof this[alt_key] === 'function') { this[alt_key] = layerMethod(match[0], alt_key, this[alt_key], o[k]); } else { this[nk] = o[k]; } } } } return this; } var typal = { // extend object with own properties of each argument mix: typal_mix, camelMix: typal_camel_mix, // sugar for object begetting and mixing // - Object.create(typal).mix(etc, etc); // + typal.beget(etc, etc); beget: function typal_beget() { return arguments.length ? typal_mix.apply(create(this), arguments) : create(this); }, // Creates a new Class function based on an object with a constructor method construct: function typal_construct() { var o = typal_mix.apply(create(this), arguments); var constructor = o.constructor; var Klass = o.constructor = function () { return constructor.apply(this, arguments); }; Klass.prototype = o; Klass.mix = typal_mix; // allow for easy singleton property extension return Klass; }, // no op constructor: function typal_constructor() { return this; } }; // Set class to wrap arrays var setMixin = { constructor: function Set_constructor(set, raw) { this._items = []; if (set && set.constructor === Array) { this._items = raw ? set: set.slice(0); } else if (arguments.length) { this._items = [].slice.call(arguments, 0); } }, concat: function concat(setB) { this._items.push.apply(this._items, setB._items || setB); return this; }, eq: function eq(set) { return this._items.length === set._items.length && this.subset(set) && this.superset(set); }, indexOf: function indexOf(item) { if (item && item.eq) { for (var k = 0; k < this._items.length; k++) { if (item.eq(this._items[k])) { return k; } } return -1; } return this._items.indexOf(item); }, intersection: function intersection(set) { return this.filter(function intersection_filter(elm) { return set.contains(elm); }); }, complement: function complement(set) { var that = this; return set.filter(function sub_complement(elm) { return !that.contains(elm); }); }, subset: function subset(set) { var cont = true; for (var i = 0; i < this._items.length && cont; i++) { cont = cont && set.contains(this._items[i]); } return cont; }, superset: function superset(set) { return set.subset(this); }, joinSet: function joinSet(set) { return this.concat(this.complement(set)); }, contains: function contains(item) { return this.indexOf(item) !== -1; }, item: function item(v) { return this._items[v]; }, i: function i(v) { return this._items[v]; }, assign: function assign(index, value) { this._items[index] = value; return this; }, first: function first() { return this._items[0]; }, last: function last() { return this._items[this._items.length - 1]; }, size: function size() { return this._items.length; }, isEmpty: function isEmpty() { return this._items.length === 0; }, copy: function copy() { return new Set(this._items); }, toString: function toString() { return this._items.toString(); } }; 'push shift unshift forEach some every join sort'.split(' ').forEach(function (e, i) { setMixin[e] = function () { return Array.prototype[e].apply(this._items, arguments); }; //setMixin[e].name = e; }); 'filter slice map'.split(' ').forEach(function (e, i) { setMixin[e] = function () { return new Set(Array.prototype[e].apply(this._items, arguments), true); }; //setMixin[e].name = e; }); var Set = typal.construct(setMixin); /* parser generated by jison 0.6.1-215 */ /* * Returns a Parser object of the following structure: * * Parser: { * yy: {} The so-called "shared state" or rather the *source* of it; * the real "shared state" `yy` passed around to * the rule actions, etc. is a derivative/copy of this one, * not a direct reference! * } * * Parser.prototype: { * yy: {}, * EOF: 1, * TERROR: 2, * * trace: function(errorMessage, ...), * * JisonParserError: function(msg, hash), * * quoteName: function(name), * Helper function which can be overridden by user code later on: put suitable * quotes around literal IDs in a description string. * * originalQuoteName: function(name), * The basic quoteName handler provided by JISON. * `cleanupAfterParse()` will clean up and reset `quoteName()` to reference this function * at the end of the `parse()`. * * describeSymbol: function(symbol), * Return a more-or-less human-readable description of the given symbol, when * available, or the symbol itself, serving as its own 'description' for lack * of something better to serve up. * * Return NULL when the symbol is unknown to the parser. * * symbols_: {associative list: name ==> number}, * terminals_: {associative list: number ==> name}, * nonterminals: {associative list: rule-name ==> {associative list: number ==> rule-alt}}, * terminal_descriptions_: (if there are any) {associative list: number ==> description}, * productions_: [...], * * performAction: function parser__performAction(yytext, yyleng, yylineno, yyloc, yystate, yysp, yyvstack, yylstack, yystack, yysstack), * * The function parameters and `this` have the following value/meaning: * - `this` : reference to the `yyval` internal object, which has members (`$` and `_$`) * to store/reference the rule value `$$` and location info `@$`. * * One important thing to note about `this` a.k.a. `yyval`: every *reduce* action gets * to see the same object via the `this` reference, i.e. if you wish to carry custom * data from one reduce action through to the next within a single parse run, then you * may get nasty and use `yyval` a.k.a. `this` for storing you own semi-permanent data. * * `this.yy` is a direct reference to the `yy` shared state object. * * `%parse-param`-specified additional `parse()` arguments have been added to this `yy` * object at `parse()` start and are therefore available to the action code via the * same named `yy.xxxx` attributes (where `xxxx` represents a identifier name from * the %parse-param` list. * * - `yytext` : reference to the lexer value which belongs to the last lexer token used * to match this rule. This is *not* the look-ahead token, but the last token * that's actually part of this rule. * * Formulated another way, `yytext` is the value of the token immediately preceeding * the current look-ahead token. * Caveats apply for rules which don't require look-ahead, such as epsilon rules. * * - `yyleng` : ditto as `yytext`, only now for the lexer.yyleng value. * * - `yylineno`: ditto as `yytext`, only now for the lexer.yylineno value. * * - `yyloc` : ditto as `yytext`, only now for the lexer.yylloc lexer token location info. * * WARNING: since jison 0.4.18-186 this entry may be NULL/UNDEFINED instead * of an empty object when no suitable location info can be provided. * * - `yystate` : the current parser state number, used internally for dispatching and * executing the action code chunk matching the rule currently being reduced. * * - `yysp` : the current state stack position (a.k.a. 'stack pointer') * * This one comes in handy when you are going to do advanced things to the parser * stacks, all of which are accessible from your action code (see the next entries below). * * Also note that you can access this and other stack index values using the new double-hash * syntax, i.e. `##$ === ##0 === yysp`, while `##1` is the stack index for all things * related to the first rule term, just like you have `$1`, `@1` and `#1`. * This is made available to write very advanced grammar action rules, e.g. when you want * to investigate the parse state stack in your action code, which would, for example, * be relevant when you wish to implement error diagnostics and reporting schemes similar * to the work described here: * * + Pottier, F., 2016. Reachability and error diagnosis in LR(1) automata. * In Journées Francophones des Languages Applicatifs. * * + Jeffery, C.L., 2003. Generating LR syntax error messages from examples. * ACM Transactions on Programming Languages and Systems (TOPLAS), 25(5), pp.631–640. * * - `yyrulelength`: the current rule's term count, i.e. the number of entries occupied on the stack. * * This one comes in handy when you are going to do advanced things to the parser * stacks, all of which are accessible from your action code (see the next entries below). * * - `yyvstack`: reference to the parser value stack. Also accessed via the `$1` etc. * constructs. * * - `yylstack`: reference to the parser token location stack. Also accessed via * the `@1` etc. constructs. * * WARNING: since jison 0.4.18-186 this array MAY contain slots which are * UNDEFINED rather than an empty (location) object, when the lexer/parser * action code did not provide a suitable location info object when such a * slot was filled! * * - `yystack` : reference to the parser token id stack. Also accessed via the * `#1` etc. constructs. * * Note: this is a bit of a **white lie** as we can statically decode any `#n` reference to * its numeric token id value, hence that code wouldn't need the `yystack` but *you* might * want access this array for your own purposes, such as error analysis as mentioned above! * * Note that this stack stores the current stack of *tokens*, that is the sequence of * already parsed=reduced *nonterminals* (tokens representing rules) and *terminals* * (lexer tokens *shifted* onto the stack until the rule they belong to is found and * *reduced*. * * - `yysstack`: reference to the parser state stack. This one carries the internal parser * *states* such as the one in `yystate`, which are used to represent * the parser state machine in the *parse table*. *Very* *internal* stuff, * what can I say? If you access this one, you're clearly doing wicked things * * - `...` : the extra arguments you specified in the `%parse-param` statement in your * grammar definition file. * * table: [...], * State transition table * ---------------------- * * index levels are: * - `state` --> hash table * - `symbol` --> action (number or array) * * If the `action` is an array, these are the elements' meaning: * - index [0]: 1 = shift, 2 = reduce, 3 = accept * - index [1]: GOTO `state` * * If the `action` is a number, it is the GOTO `state` * * defaultActions: {...}, * * parseError: function(str, hash, ExceptionClass), * yyError: function(str, ...), * yyRecovering: function(), * yyErrOk: function(), * yyClearIn: function(), * * constructParseErrorInfo: function(error_message, exception_object, expected_token_set, is_recoverable), * Helper function **which will be set up during the first invocation of the `parse()` method**. * Produces a new errorInfo 'hash object' which can be passed into `parseError()`. * See it's use in this parser kernel in many places; example usage: * * var infoObj = parser.constructParseErrorInfo('fail!', null, * parser.collect_expected_token_set(state), true); * var retVal = parser.parseError(infoObj.errStr, infoObj, parser.JisonParserError); * * originalParseError: function(str, hash, ExceptionClass), * The basic `parseError` handler provided by JISON. * `cleanupAfterParse()` will clean up and reset `parseError()` to reference this function * at the end of the `parse()`. * * options: { ... parser %options ... }, * * parse: function(input[, args...]), * Parse the given `input` and return the parsed value (or `true` when none was provided by * the root action, in which case the parser is acting as a *matcher*). * You MAY use the additional `args...` parameters as per `%parse-param` spec of this grammar: * these extra `args...` are added verbatim to the `yy` object reference as member variables. * * WARNING: * Parser's additional `args...` parameters (via `%parse-param`) MAY conflict with * any attributes already added to `yy` by the jison run-time; * when such a collision is detected an exception is thrown to prevent the generated run-time * from silently accepting this confusing and potentially hazardous situation! * * The lexer MAY add its own set of additional parameters (via the `%parse-param` line in * the lexer section of the grammar spec): these will be inserted in the `yy` shared state * object and any collision with those will be reported by the lexer via a thrown exception. * * cleanupAfterParse: function(resultValue, invoke_post_methods, do_not_nuke_errorinfos), * Helper function **which will be set up during the first invocation of the `parse()` method**. * This helper API is invoked at the end of the `parse()` call, unless an exception was thrown * and `%options no-try-catch` has been defined for this grammar: in that case this helper MAY * be invoked by calling user code to ensure the `post_parse` callbacks are invoked and * the internal parser gets properly garbage collected under these particular circumstances. * * yyMergeLocationInfo: function(first_index, last_index, first_yylloc, last_yylloc, dont_look_back), * Helper function **which will be set up during the first invocation of the `parse()` method**. * This helper API can be invoked to calculate a spanning `yylloc` location info object. * * Note: %epsilon rules MAY specify no `first_index` and `first_yylloc`, in which case * this function will attempt to obtain a suitable location marker by inspecting the location stack * backwards. * * For more info see the documentation comment further below, immediately above this function's * implementation. * * lexer: { * yy: {...}, A reference to the so-called "shared state" `yy` once * received via a call to the `.setInput(input, yy)` lexer API. * EOF: 1, * ERROR: 2, * JisonLexerError: function(msg, hash), * parseError: function(str, hash, ExceptionClass), * setInput: function(input, [yy]), * input: function(), * unput: function(str), * more: function(), * reject: function(), * less: function(n), * pastInput: function(n), * upcomingInput: function(n), * showPosition: function(), * test_match: function(regex_match_array, rule_index, ...), * next: function(...), * lex: function(...), * begin: function(condition), * pushState: function(condition), * popState: function(), * topState: function(), * _currentRules: function(), * stateStackSize: function(), * cleanupAfterLex: function() * * options: { ... lexer %options ... }, * * performAction: function(yy, yy_, $avoiding_name_collisions, YY_START, ...), * rules: [...], * conditions: {associative list: name ==> set}, * } * } * * * token location info (@$, _$, etc.): { * first_line: n, * last_line: n, * first_column: n, * last_column: n, * range: [start_number, end_number] * (where the numbers are indexes into the input string, zero-based) * } * * --- * * The `parseError` function receives a 'hash' object with these members for lexer and * parser errors: * * { * text: (matched text) * token: (the produced terminal token, if any) * token_id: (the produced terminal token numeric ID, if any) * line: (yylineno) * loc: (yylloc) * } * * parser (grammar) errors will also provide these additional members: * * { * expected: (array describing the set of expected tokens; * may be UNDEFINED when we cannot easily produce such a set) * state: (integer (or array when the table includes grammar collisions); * represents the current internal state of the parser kernel. * can, for example, be used to pass to the `collect_expected_token_set()` * API to obtain the expected token set) * action: (integer; represents the current internal action which will be executed) * new_state: (integer; represents the next/planned internal state, once the current * action has executed) * recoverable: (boolean: TRUE when the parser MAY have an error recovery rule * available for this particular error) * state_stack: (array: the current parser LALR/LR internal state stack; this can be used, * for instance, for advanced error analysis and reporting) * value_stack: (array: the current parser LALR/LR internal `$$` value stack; this can be used, * for instance, for advanced error analysis and reporting) * location_stack: (array: the current parser LALR/LR internal location stack; this can be used, * for instance, for advanced error analysis and reporting) * yy: (object: the current parser internal "shared state" `yy` * as is also available in the rule actions; this can be used, * for instance, for advanced error analysis and reporting) * lexer: (reference to the current lexer instance used by the parser) * parser: (reference to the current parser instance) * } * * while `this` will reference the current parser instance. * * When `parseError` is invoked by the lexer, `this` will still reference the related *parser* * instance, while these additional `hash` fields will also be provided: * * { * lexer: (reference to the current lexer instance which reported the error) * } * * When `parseError` is invoked by the parser due to a **JavaScript exception** being fired * from either the parser or lexer, `this` will still reference the related *parser* * instance, while these additional `hash` fields will also be provided: * * { * exception: (reference to the exception thrown) * } * * Please do note that in the latter situation, the `expected` field will be omitted as * this type of failure is assumed not to be due to *parse errors* but rather due to user * action code in either parser or lexer failing unexpectedly. * * --- * * You can specify parser options by setting / modifying the `.yy` object of your Parser instance. * These options are available: * * ### options which are global for all parser instances * * Parser.pre_parse: function(yy) * optional: you can specify a pre_parse() function in the chunk following * the grammar, i.e. after the last `%%`. * Parser.post_parse: function(yy, retval, parseInfo) { return retval; } * optional: you can specify a post_parse() function in the chunk following * the grammar, i.e. after the last `%%`. When it does not return any value, * the parser will return the original `retval`. * * ### options which can be set up per parser instance * * yy: { * pre_parse: function(yy) * optional: is invoked before the parse cycle starts (and before the first * invocation of `lex()`) but immediately after the invocation of * `parser.pre_parse()`). * post_parse: function(yy, retval, parseInfo) { return retval; } * optional: is invoked when the parse terminates due to success ('accept') * or failure (even when exceptions are thrown). * `retval` contains the return value to be produced by `Parser.parse()`; * this function can override the return value by returning another. * When it does not return any value, the parser will return the original * `retval`. * This function is invoked immediately before `parser.post_parse()`. * * parseError: function(str, hash, ExceptionClass) * optional: overrides the default `parseError` function. * quoteName: function(name), * optional: overrides the default `quoteName` function. * } * * parser.lexer.options: { * pre_lex: function() * optional: is invoked before the lexer is invoked to produce another token. * `this` refers to the Lexer object. * post_lex: function(token) { return token; } * optional: is invoked when the lexer has produced a token `token`; * this function can override the returned token value by returning another. * When it does not return any (truthy) value, the lexer will return * the original `token`. * `this` refers to the Lexer object. * * ranges: boolean * optional: `true` ==> token location info will include a .range[] member. * flex: boolean * optional: `true` ==> flex-like lexing behaviour where the rules are tested * exhaustively to find the longest match. * backtrack_lexer: boolean * optional: `true` ==> lexer regexes are tested in order and for invoked; * the lexer terminates the scan when a token is returned by the action code. * xregexp: boolean * optional: `true` ==> lexer rule regexes are "extended regex format" requiring the * `XRegExp` library. When this `%option` has not been specified at compile time, all lexer * rule regexes have been written as standard JavaScript RegExp expressions. * } */ // See also: // http://stackoverflow.com/questions/1382107/whats-a-good-way-to-extend-error-in-javascript/#35881508 // but we keep the prototype.constructor and prototype.name assignment lines too for compatibility // with userland code which might access the derived class in a 'classic' way. function JisonParserError(msg, hash) { Object.defineProperty(this, 'name', { enumerable: false, writable: false, value: 'JisonParserError' }); if (msg == null) msg = '???'; Object.defineProperty(this, 'message', { enumerable: false, writable: true, value: msg }); this.hash = hash; var stacktrace; if (hash && hash.exception instanceof Error) { var ex2 = hash.exception; this.message = ex2.message || msg; stacktrace = ex2.stack; } if (!stacktrace) { if (Error.hasOwnProperty('captureStackTrace')) { // V8/Chrome engine Error.captureStackTrace(this, this.constructor); } else { stacktrace = (new Error(msg)).stack; } } if (stacktrace) { Object.defineProperty(this, 'stack', { enumerable: false, writable: false, value: stacktrace }); } } if (typeof Object.setPrototypeOf === 'function') { Object.setPrototypeOf(JisonParserError.prototype, Error.prototype); } else { JisonParserError.prototype = Object.create(Error.prototype); } JisonParserError.prototype.constructor = JisonParserError; JisonParserError.prototype.name = 'JisonParserError'; // helper: reconstruct the productions[] table function bp(s) { var rv = []; var p = s.pop; var r = s.rule; for (var i = 0, l = p.length; i < l; i++) { rv.push([ p[i], r[i] ]); } return rv; } // helper: reconstruct the defaultActions[] table function bda(s) { var rv = {}; var d = s.idx; var g = s.goto; for (var i = 0, l = d.length; i < l; i++) { var j = d[i]; rv[j] = g[i]; } return rv; } // helper: reconstruct the 'goto' table function bt(s) { var rv = []; var d = s.len; var y = s.symbol; var t = s.type; var a = s.state; var m = s.mode; var g = s.goto; for (var i = 0, l = d.length; i < l; i++) { var n = d[i]; var q = {}; for (var j = 0; j < n; j++) { var z = y.shift(); switch (t.shift()) { case 2: q[z] = [ m.shift(), g.shift() ]; break; case 0: q[z] = a.shift(); break; default: // type === 1: accept q[z] = [ 3 ]; } } rv.push(q); } return rv; } // helper: runlength encoding with increment step: code, length: step (default step = 0) // `this` references an array function s(c, l, a) { a = a || 0; for (var i = 0; i < l; i++) { this.push(c); c += a; } } // helper: duplicate sequence from *relative* offset and length. // `this` references an array function c(i, l) { i = this.length - i; for (l += i; i < l; i++) { this.push(this[i]); } } // helper: unpack an array using helpers and data, all passed in an array argument 'a'. function u(a) { var rv = []; for (var i = 0, l = a.length; i < l; i++) { var e = a[i]; // Is this entry a helper function? if (typeof e === 'function') { i++; e.apply(rv, a[i]); } else { rv.push(e); } } return rv; } var parser$1 = {