UNPKG

chevrotain

Version:

Chevrotain is a high performance fault Tolerant Javascript parsing DSL for building recursive decent parsers

1,048 lines (1,047 loc) 150 kB
(function (root, factory) { /* istanbul ignore next */ if (typeof define === 'function' && define.amd) { // AMD. Register as an anonymous module unless amdModuleId is set define('chevrotain', ["lodash"], function (a0) { return (root['API'] = factory(a0)); }); } else if (typeof exports === 'object') { // Node. Does not work with strict CommonJS, but // only CommonJS-like environments that support module.exports, // like Node. module.exports = factory(require("lodash")); } else { root['chevrotain'] = factory(_); } }(this, function (_) { /*! chevrotain - v0.4.5 - 2015-07-11 */ var chevrotain; (function (chevrotain) { var lang; (function (lang) { var nameRegex = /^\s*function\s*(\S*)\s*\(/; /* istanbul ignore next */ var hasNativeName = typeof (function f() { }).name !== "undefined"; function classNameFromInstance(instance) { return functionName(instance.constructor); } lang.classNameFromInstance = classNameFromInstance; /* istanbul ignore next too many hacks for IE here*/ function functionName(func) { if (hasNativeName) { return func.name; } else if (func.rdtFuncNameCache666) { // super 'special' property name on INSTANCE to avoid hurting those who use browsers that // do not support name property even more (IE...) return func.rdtFuncNameCache666; } else { var name = func.toString().match(nameRegex)[1]; func.rdtFuncNameCache666 = name; return name; } } lang.functionName = functionName; /** * simple Hashtable between a string and some generic value * this should be removed once typescript supports ES6 style Hashtable */ var HashTable = (function () { function HashTable() { this._state = {}; } HashTable.prototype.keys = function () { return _.keys(this._state); }; HashTable.prototype.values = function () { return _.values(this._state); }; HashTable.prototype.put = function (key, value) { this._state[key] = value; }; HashTable.prototype.putAll = function (other) { this._state = _.assign(this._state, other._state); }; HashTable.prototype.get = function (key) { // To avoid edge case with a key called "hasOwnProperty" we need to perform the commented out check below // -> if (Object.prototype.hasOwnProperty.call(this._state, key)) { ... } <- // however this costs nearly 25% of the parser's runtime. // if someone decides to name their Parser class "hasOwnProperty" they deserve what they will get :) return this._state[key]; }; HashTable.prototype.containsKey = function (key) { return _.has(this._state, key); }; return HashTable; })(); lang.HashTable = HashTable; })/* istanbul ignore next */ (lang = chevrotain.lang || /* istanbul ignore next */ (chevrotain.lang = {})); })/* istanbul ignore next */ (chevrotain || (chevrotain = {})); var __extends = this.__extends || function (d, b) { for (var p in b) /* istanbul ignore next */ if (b.hasOwnProperty(p)) d[p] = b[p]; function __() { this.constructor = d; } __.prototype = b.prototype; d.prototype = new __(); }; // using only root module name ('chevrotain') and not a longer name ('chevrotain.tokens') // because the external and internal API must have the same names for d.ts definition files to be valid // TODO: examine module in module to reduce spam on chevrotain namespace var chevrotain; (function (chevrotain) { var lang = chevrotain.lang; function tokenName(clazz) { // used to support js inheritance patterns that do not use named functions // in that situation setting a property tokenName on a token constructor will // enable producing readable error messages. if (_.isString(clazz.tokenName)) { return clazz.tokenName; } else { return lang.functionName(clazz); } } chevrotain.tokenName = tokenName; /** * utility to help the poor souls who are still stuck writing pure javascript 5.1 * extend and create Token subclasses in a less verbose manner * * @param {string} tokenName the name of the new TokenClass * @param {*} patternOrParent Pa * @param {Function} parentConstructor the Token class to be extended * @returns {Function} a constructor for the new extended Token subclass */ function extendToken(tokenName, patternOrParent, parentConstructor) { if (patternOrParent === void 0) { patternOrParent = undefined; } if (parentConstructor === void 0) { parentConstructor = Token; } var pattern; if (_.isRegExp(patternOrParent) || patternOrParent === chevrotain.Lexer.SKIPPED || patternOrParent === chevrotain.Lexer.NA) { pattern = patternOrParent; } else if (_.isFunction(patternOrParent)) { parentConstructor = patternOrParent; pattern = undefined; } var derivedCostructor = function () { parentConstructor.apply(this, arguments); }; // static properties mixing _.forOwn(parentConstructor, function (v, k) { derivedCostructor[k] = v; }); // the tokenName property will be used by the Parser for Error Messages if the Token's constructor is anonymous derivedCostructor.tokenName = tokenName; derivedCostructor.prototype = Object.create(parentConstructor.prototype); derivedCostructor.prototype.constructor = derivedCostructor; if (!_.isUndefined(pattern)) { derivedCostructor.PATTERN = pattern; } return derivedCostructor; } chevrotain.extendToken = extendToken; var Token = (function () { /** * @param {string} image the textual representation of the Token as it appeared in the text * @param {number} offset offset of the first character of the Token * @param {number} startLine line of the first character of the Token * @param {number} startColumn column of the first character of the Token * @param {number} endLine line of the last character of the Token * @param {number} endColumn column of the last character of the Token * * Things to note: * * "do" {startColumn : 1, endColumn: 2} --> the range is inclusive to exclusive 1...2 (2 chars long). * * "\n" {startLine : 1, endLine: 1} --> a lineTerminator as the last character does not effect the Token's line numbering. * * "'hello\tworld\uBBBB'" {image: "'hello\tworld\uBBBB'"} --> a Token's image is the "literal" text * (unicode escaping is untouched). */ function Token(image, offset, startLine, startColumn, endLine, endColumn) { if (endLine === void 0) { endLine = startLine; } if (endColumn === void 0) { endColumn = startColumn + image.length - 1; } this.image = image; this.offset = offset; this.startLine = startLine; this.startColumn = startColumn; this.endLine = endLine; this.endColumn = endColumn; // this marks if a Token does not really exist and has been inserted "artificially" during parsing in rule error recovery this.isInsertedInRecovery = false; } return Token; })(); chevrotain.Token = Token; /** * a special kind of Token which does not really exist in the input * (hence the 'Virtual' prefix). These type of Tokens can be used as special markers: * for example, EOF (end-of-file). */ var VirtualToken = (function (_super) { __extends(VirtualToken, _super); function VirtualToken() { _super.call(this, "", -1, -1, -1, -1, -1); } return VirtualToken; })(Token); chevrotain.VirtualToken = VirtualToken; var EOF = (function (_super) { __extends(EOF, _super); function EOF() { _super.apply(this, arguments); } return EOF; })(VirtualToken); chevrotain.EOF = EOF; })/* istanbul ignore next */ (chevrotain || (chevrotain = {})); // using only root module name ('chevrotain') and not a longer name ('chevrotain.lexer') // because the external and internal API must have the same names for d.ts definition files to be valid // TODO: examine module in module to reduce spam on chevrotain namespace var chevrotain; (function (chevrotain) { var Lexer = (function () { /** * @param {Function[]} tokenClasses constructor functions for the Tokens types this scanner will support * These constructors must be in one of three forms: * * 1. With a PATTERN property that has a RegExp value for tokens to match: * example: -->class Integer extends Token { static PATTERN = /[1-9]\d }<-- * * 2. With a PATTERN property that has a RegExp value AND an IGNORE property with boolean value true. * These tokens will be matched but not as part of the main token vector. * this is usually used for ignoring whitespace/comments * example: --> class Whitespace extends Token { static PATTERN = /(\t| )/; static IGNORE = true}<-- * * 3. With a PATTERN property that has the value of the var Lexer.NA defined above. * This is a convenience form used to avoid matching Token classes that only act as categories. * example: -->class Keyword extends Token { static PATTERN = NA }<-- * * * The following RegExp patterns are not supported: * a. '$' for match at end of input * b. /b global flag * c. /m multi-line flag * * The Lexer will identify the first pattern the matches, Therefor the order of Token Constructors passed * To the SimpleLexer's constructor is meaningful. If two patterns may match the same string, the longer one * should be before the shorter one. * * Note that there are situations in which we may wish to place the longer pattern after the shorter one. * For example: keywords vs Identifiers. * 'do'(/do/) and 'done'(/w+) * * * If the Identifier pattern appears before the 'do' pattern both 'do' and 'done' * will be lexed as an Identifier. * * * If the 'do' pattern appears before the Identifier pattern 'do' will be lexed correctly as a keyword. * however 'done' will be lexed as TWO tokens keyword 'do' and identifier 'ne'. * * To resolve this problem, add a static property on the keyword's Tokens constructor named: LONGER_ALT * example: * * export class Identifier extends Keyword { static PATTERN = /[_a-zA-Z][_a-zA-Z0-9]/ } * export class Keyword extends Token { * static PATTERN = lex.NA * static LONGER_ALT = Identifier * } * export class Do extends Keyword { static PATTERN = /do/ } * export class While extends Keyword { static PATTERN = /while/ } * export class Return extends Keyword { static PATTERN = /return/ } * * The lexer will then also attempt to match a (longer) Identifier each time a keyword is matched * * */ function Lexer(tokenClasses) { this.tokenClasses = tokenClasses; chevrotain.validatePatterns(tokenClasses); var analyzeResult = chevrotain.analyzeTokenClasses(tokenClasses); this.allPatterns = analyzeResult.allPatterns; this.patternIdxToClass = analyzeResult.patternIdxToClass; this.patternIdxToGroup = analyzeResult.patternIdxToGroup; this.patternIdxToLongerAltIdx = analyzeResult.patternIdxToLongerAltIdx; this.patternIdxToCanLineTerminator = analyzeResult.patternIdxToCanLineTerminator; this.emptyGroups = analyzeResult.emptyGroups; } /** * Will lex(Tokenize) a string. * Note that this can be called repeatedly on different strings as this method * does not modify the state of the Lexer. * * @param {string} text the string to lex * @returns {{tokens: {Token}[], errors: string[]}} */ Lexer.prototype.tokenize = function (text) { var match, i, j, matchAlt, longerAltIdx, matchedImage, imageLength, group, tokClass, newToken, canMatchedContainLineTerminator, fixForEndingInLT, c, droppedChar, lastLTIdx, errorMessage, lastCharIsLT; var orgInput = text; var offset = 0; var matchedTokens = []; var errors = []; var line = 1; var column = 1; var groups = _.clone(this.emptyGroups); while (text.length > 0) { match = null; for (i = 0; i < this.allPatterns.length; i++) { match = this.allPatterns[i].exec(text); if (match !== null) { // even though this pattern matched we must try a another longer alternative. // this can be used to prioritize keywords over identifers longerAltIdx = this.patternIdxToLongerAltIdx[i]; if (longerAltIdx) { matchAlt = this.allPatterns[longerAltIdx].exec(text); if (matchAlt && matchAlt[0].length > match[0].length) { match = matchAlt; i = longerAltIdx; } } break; } } if (match !== null) { matchedImage = match[0]; imageLength = matchedImage.length; group = this.patternIdxToGroup[i]; if (group !== undefined) { tokClass = this.patternIdxToClass[i]; newToken = new tokClass(matchedImage, offset, line, column); if (group === "default") { matchedTokens.push(newToken); } else { groups[group].push(newToken); } } text = text.slice(imageLength); offset = offset + imageLength; column = column + imageLength; // TODO: with newlines the column may change be assigned twice canMatchedContainLineTerminator = this.patternIdxToCanLineTerminator[i]; if (canMatchedContainLineTerminator) { var lineTerminatorsInMatch = chevrotain.countLineTerminators(matchedImage); // TODO: identify edge case of one token ending in '\r' and another one starting with '\n' if (lineTerminatorsInMatch !== 0) { line = line + lineTerminatorsInMatch; lastLTIdx = imageLength - 1; while (lastLTIdx >= 0) { c = matchedImage.charCodeAt(lastLTIdx); // scan in reverse to find last lineTerminator in image if (c === 13 || c === 10) { break; } lastLTIdx--; } column = imageLength - lastLTIdx; if (group !== undefined) { lastCharIsLT = lastLTIdx === imageLength - 1; fixForEndingInLT = lastCharIsLT ? -1 : 0; if (!(lineTerminatorsInMatch === 1 && lastCharIsLT)) { // if a token ends in a LT that last LT only affects the line numbering of following Tokens newToken.endLine = line + fixForEndingInLT; // the last LT in a token does not affect the endColumn either as the [columnStart ... columnEnd) // inclusive to exclusive range. newToken.endColumn = column - 1 + -fixForEndingInLT; } } } } } else { var errorStartOffset = offset; var errorLine = line; var errorColumn = column; var foundResyncPoint = false; while (!foundResyncPoint && text.length > 0) { // drop chars until we succeed in matching something droppedChar = text.charCodeAt(0); if (droppedChar === 10 || (droppedChar === 13 && (text.length === 1 || (text.length > 1 && text.charCodeAt(1) !== 10)))) { line++; column = 1; } else { // either when skipping the next char, or when consuming the following pattern // (which will have to start in a '\n' if we manage to consume it) column++; } text = text.substr(1); offset++; for (j = 0; j < this.allPatterns.length; j++) { foundResyncPoint = this.allPatterns[j].test(text); if (foundResyncPoint) { break; } } } // at this point we either re-synced or reached the end of the input text errorMessage = ("unexpected character: ->" + orgInput.charAt(errorStartOffset) + "<- at offset: " + errorStartOffset + ",") + (" skipped " + (offset - errorStartOffset) + " characters."); errors.push({ line: errorLine, column: errorColumn, message: errorMessage }); } } return { tokens: matchedTokens, groups: groups, errors: errors }; }; Lexer.SKIPPED = { description: "This marks a skipped Token pattern, this means each token identified by it will" + "be consumed and then throw into oblivion, this can be used to for example: skip whitespace." }; Lexer.NA = /NOT_APPLICABLE/; return Lexer; })(); chevrotain.Lexer = Lexer; })/* istanbul ignore next */ (chevrotain || (chevrotain = {})); // using only root module name ('chevrotain') and not a longer name ('chevrotain.lexer') // because the external and internal API must have the same names for d.ts definition files to be valid // TODO: examine module in module to reduce spam on chevrotain namespace var chevrotain; (function (chevrotain) { var PATTERN = "PATTERN"; function analyzeTokenClasses(tokenClasses) { var onlyRelevantClasses = _.reject(tokenClasses, function (currClass) { return currClass[PATTERN] === chevrotain.Lexer.NA; }); var allTransformedPatterns = _.map(onlyRelevantClasses, function (currClass) { return addStartOfInput(currClass[PATTERN]); }); var allPatternsToClass = _.zipObject(allTransformedPatterns, onlyRelevantClasses); var patternIdxToClass = _.map(allTransformedPatterns, function (pattern) { return allPatternsToClass[pattern.toString()]; }); var patternIdxToGroup = _.map(onlyRelevantClasses, function (clazz) { var groupName = clazz.GROUP; if (groupName === chevrotain.Lexer.SKIPPED) { return undefined; } else if (_.isString(groupName)) { return groupName; }/* istanbul ignore else */ else if (_.isUndefined(groupName)) { return "default"; } else { /* istanbul ignore next */ throw Error("non exhaustive match"); } }); var patternIdxToLongerAltIdx = _.map(onlyRelevantClasses, function (clazz, idx) { var longerAltClass = clazz.LONGER_ALT; if (longerAltClass) { var longerAltIdx = _.indexOf(onlyRelevantClasses, longerAltClass); return longerAltIdx; } }); var patternIdxToCanLineTerminator = _.map(allTransformedPatterns, function (pattern) { // TODO: unicode escapes of line terminators too? return /\\n|\\r|\\s/g.test(pattern.source); }); var emptyGroups = _.reduce(onlyRelevantClasses, function (acc, clazz) { var groupName = clazz.GROUP; if (_.isString(groupName)) { acc[groupName] = []; } return acc; }, {}); return { allPatterns: allTransformedPatterns, patternIdxToClass: patternIdxToClass, patternIdxToGroup: patternIdxToGroup, patternIdxToLongerAltIdx: patternIdxToLongerAltIdx, patternIdxToCanLineTerminator: patternIdxToCanLineTerminator, emptyGroups: emptyGroups }; } chevrotain.analyzeTokenClasses = analyzeTokenClasses; function validatePatterns(tokenClasses) { var missingErrors = findMissingPatterns(tokenClasses); if (!_.isEmpty(missingErrors)) { throw new Error(missingErrors.join("\n ---------------- \n")); } var invalidPatterns = findInvalidPatterns(tokenClasses); if (!_.isEmpty(invalidPatterns)) { throw new Error(invalidPatterns.join("\n ---------------- \n")); } var InvalidEndOfInputAnchor = findEndOfInputAnchor(tokenClasses); if (!_.isEmpty(InvalidEndOfInputAnchor)) { throw new Error(InvalidEndOfInputAnchor.join("\n ---------------- \n")); } var invalidFlags = findUnsupportedFlags(tokenClasses); if (!_.isEmpty(invalidFlags)) { throw new Error(invalidFlags.join("\n ---------------- \n")); } var duplicates = findDuplicatePatterns(tokenClasses); if (!_.isEmpty(duplicates)) { throw new Error(duplicates.join("\n ---------------- \n")); } var invalidGroupType = findInvalidGroupType(tokenClasses); if (!_.isEmpty(invalidGroupType)) { throw new Error(invalidGroupType.join("\n ---------------- \n")); } } chevrotain.validatePatterns = validatePatterns; function findMissingPatterns(tokenClasses) { var noPatternClasses = _.filter(tokenClasses, function (currClass) { return !_.has(currClass, PATTERN); }); var errors = _.map(noPatternClasses, function (currClass) { return "Token class: ->" + chevrotain.tokenName(currClass) + "<- missing static 'PATTERN' property"; }); return errors; } chevrotain.findMissingPatterns = findMissingPatterns; function findInvalidPatterns(tokenClasses) { var invalidRegex = _.filter(tokenClasses, function (currClass) { var pattern = currClass[PATTERN]; return !_.isRegExp(pattern); }); var errors = _.map(invalidRegex, function (currClass) { return "Token class: ->" + chevrotain.tokenName(currClass) + "<- static 'PATTERN' can only be a RegEx"; }); return errors; } chevrotain.findInvalidPatterns = findInvalidPatterns; var end_of_input = /[^\\][\$]/; function findEndOfInputAnchor(tokenClasses) { var invalidRegex = _.filter(tokenClasses, function (currClass) { var pattern = currClass[PATTERN]; return end_of_input.test(pattern.source); }); var errors = _.map(invalidRegex, function (currClass) { return "Token class: ->" + chevrotain.tokenName(currClass) + "<- static 'PATTERN' cannot contain end of input anchor '$'"; }); return errors; } chevrotain.findEndOfInputAnchor = findEndOfInputAnchor; function findUnsupportedFlags(tokenClasses) { var invalidFlags = _.filter(tokenClasses, function (currClass) { var pattern = currClass[PATTERN]; return pattern instanceof RegExp && (pattern.multiline || pattern.global); }); var errors = _.map(invalidFlags, function (currClass) { return "Token class: ->" + chevrotain.tokenName(currClass) + "<- static 'PATTERN' may NOT contain global('g') or multiline('m')"; }); return errors; } chevrotain.findUnsupportedFlags = findUnsupportedFlags; // This can only test for identical duplicate RegExps, not semantically equivalent ones. function findDuplicatePatterns(tokenClasses) { var found = []; var identicalPatterns = _.map(tokenClasses, function (outerClass) { return _.reduce(tokenClasses, function (result, innerClass) { if ((outerClass.PATTERN.source === innerClass.PATTERN.source) && !_.contains(found, innerClass) && innerClass.PATTERN !== chevrotain.Lexer.NA) { // this avoids duplicates in the result, each class may only appear in one "set" // in essence we are creating Equivalence classes on equality relation. found.push(innerClass); return _.union(result, [innerClass]); } }, []); }); identicalPatterns = _.compact(identicalPatterns); var duplicatePatterns = _.filter(identicalPatterns, function (currIdenticalSet) { return _.size(currIdenticalSet) > 1; }); var errors = _.map(duplicatePatterns, function (setOfIdentical) { var classNames = _.map(setOfIdentical, function (currClass) { return chevrotain.tokenName(currClass); }); var dupPatternSrc = _.first(setOfIdentical).PATTERN; return ("The same RegExp pattern ->" + dupPatternSrc + "<-") + ("has been used in all the following classes: " + classNames.join(", ") + " <-"); }); return errors; } chevrotain.findDuplicatePatterns = findDuplicatePatterns; function findInvalidGroupType(tokenClasses) { var invalidTypes = _.filter(tokenClasses, function (clazz) { if (!_.has(clazz, "GROUP")) { return false; } var group = clazz.GROUP; return group !== chevrotain.Lexer.SKIPPED && group !== chevrotain.Lexer.NA && !_.isString(group); }); var errors = _.map(invalidTypes, function (currClass) { return "Token class: ->" + chevrotain.tokenName(currClass) + "<- static 'GROUP' can only be Lexer.SKIPPED/Lexer.NA/A String"; }); return errors; } chevrotain.findInvalidGroupType = findInvalidGroupType; function addStartOfInput(pattern) { var flags = pattern.ignoreCase ? "i" : ""; // always wrapping in a none capturing group preceded by '^' to make sure matching can only work on start of input. // duplicate/redundant start of input markers have no meaning (/^^^^A/ === /^A/) return new RegExp("^(?:" + pattern.source + ")", flags); } chevrotain.addStartOfInput = addStartOfInput; function countLineTerminators(text) { var lineTerminators = 0; var currOffset = 0; while (currOffset < text.length) { var c = text.charCodeAt(currOffset); if (c === 10) { lineTerminators++; } else if (c === 13) { if (currOffset !== text.length - 1 && text.charCodeAt(currOffset + 1) === 10) { } else { lineTerminators++; } } currOffset++; } return lineTerminators; } chevrotain.countLineTerminators = countLineTerminators; })/* istanbul ignore next */ (chevrotain || (chevrotain = {})); // todo: consider if this module really belongs in chevrotain? var chevrotain; (function (chevrotain) { var tree; (function (tree) { var ParseTree = (function () { function ParseTree(payload, children) { if (children === void 0) { children = []; } this.payload = payload; this.children = children; } ParseTree.prototype.getImage = function () { return this.payload.image; }; ParseTree.prototype.getLine = function () { return this.payload.startLine; }; ParseTree.prototype.getColumn = function () { return this.payload.startColumn; }; return ParseTree; })(); tree.ParseTree = ParseTree; /** * convenience factory for ParseTrees * * @param {Function|Token} tokenOrTokenClass The Token instance to be used as the root node, or a constructor Function * that will create the root node. * @param {ParseTree[]} children The sub nodes of the ParseTree to the built * @returns {ParseTree} */ function PT(tokenOrTokenClass, children) { if (children === void 0) { children = []; } var childrenCompact = _.compact(children); if (tokenOrTokenClass instanceof chevrotain.Token) { return new ParseTree(tokenOrTokenClass, childrenCompact); } else if (_.isFunction(tokenOrTokenClass)) { return new ParseTree(new tokenOrTokenClass(), childrenCompact); } else if (_.isUndefined(tokenOrTokenClass) || _.isNull(tokenOrTokenClass)) { return null; } else { throw "Invalid parameter " + tokenOrTokenClass + " to PT factory."; } } tree.PT = PT; })/* istanbul ignore next */ (tree = chevrotain.tree || /* istanbul ignore next */ (chevrotain.tree = {})); })/* istanbul ignore next */ (chevrotain || (chevrotain = {})); var chevrotain; (function (chevrotain) { var range; (function (range) { var Range = (function () { function Range(start, end) { this.start = start; this.end = end; if (!isValidRange(start, end)) { throw new Error("INVALID RANGE"); } } Range.prototype.contains = function (num) { return this.start <= num && this.end >= num; }; Range.prototype.containsRange = function (other) { return this.start <= other.start && this.end >= other.end; }; Range.prototype.isContainedInRange = function (other) { return other.containsRange(this); }; Range.prototype.strictlyContainsRange = function (other) { return this.start < other.start && this.end > other.end; }; Range.prototype.isStrictlyContainedInRange = function (other) { return other.strictlyContainsRange(this); }; return Range; })(); range.Range = Range; function isValidRange(start, end) { return !(start < 0 || end < start); } range.isValidRange = isValidRange; })/* istanbul ignore next */ (range = chevrotain.range || /* istanbul ignore next */ (chevrotain.range = {})); })/* istanbul ignore next */ (chevrotain || (chevrotain = {})); var chevrotain; (function (chevrotain) { var constants; (function (constants) { constants.IN = "_~IN~_"; })/* istanbul ignore next */ (constants = chevrotain.constants || /* istanbul ignore next */ (chevrotain.constants = {})); })/* istanbul ignore next */ (chevrotain || (chevrotain = {})); var chevrotain; (function (chevrotain) { var gast; (function (gast) { var AbstractProduction = (function () { function AbstractProduction(definition) { this.definition = definition; this.implicitOccurrenceIndex = false; } AbstractProduction.prototype.accept = function (visitor) { visitor.visit(this); _.forEach(this.definition, function (prod) { prod.accept(visitor); }); }; return AbstractProduction; })(); gast.AbstractProduction = AbstractProduction; var NonTerminal = (function (_super) { __extends(NonTerminal, _super); function NonTerminal(nonTerminalName, referencedRule, occurrenceInParent) { if (referencedRule === void 0) { referencedRule = undefined; } if (occurrenceInParent === void 0) { occurrenceInParent = 1; } _super.call(this, []); this.nonTerminalName = nonTerminalName; this.referencedRule = referencedRule; this.occurrenceInParent = occurrenceInParent; } Object.defineProperty(NonTerminal.prototype, "definition", { get: function () { if (this.referencedRule !== undefined) { return this.referencedRule.definition; } return []; }, set: function (definition) { // immutable }, enumerable: true, configurable: true }); NonTerminal.prototype.accept = function (visitor) { visitor.visit(this); // don't visit children of a reference, we will get cyclic infinite loops if we do so }; return NonTerminal; })(AbstractProduction); gast.NonTerminal = NonTerminal; var Rule = (function (_super) { __extends(Rule, _super); function Rule(name, definition) { _super.call(this, definition); this.name = name; } return Rule; })(AbstractProduction); gast.Rule = Rule; var Flat = (function (_super) { __extends(Flat, _super); function Flat(definition) { _super.call(this, definition); } return Flat; })(AbstractProduction); gast.Flat = Flat; var Option = (function (_super) { __extends(Option, _super); function Option(definition, occurrenceInParent) { if (occurrenceInParent === void 0) { occurrenceInParent = 1; } _super.call(this, definition); this.occurrenceInParent = occurrenceInParent; } return Option; })(AbstractProduction); gast.Option = Option; var RepetitionMandatory = (function (_super) { __extends(RepetitionMandatory, _super); function RepetitionMandatory(definition, occurrenceInParent) { if (occurrenceInParent === void 0) { occurrenceInParent = 1; } _super.call(this, definition); this.occurrenceInParent = occurrenceInParent; } return RepetitionMandatory; })(AbstractProduction); gast.RepetitionMandatory = RepetitionMandatory; var Repetition = (function (_super) { __extends(Repetition, _super); function Repetition(definition, occurrenceInParent) { if (occurrenceInParent === void 0) { occurrenceInParent = 1; } _super.call(this, definition); this.occurrenceInParent = occurrenceInParent; } return Repetition; })(AbstractProduction); gast.Repetition = Repetition; var Alternation = (function (_super) { __extends(Alternation, _super); function Alternation(definition, occurrenceInParent) { if (occurrenceInParent === void 0) { occurrenceInParent = 1; } _super.call(this, definition); this.occurrenceInParent = occurrenceInParent; } return Alternation; })(AbstractProduction); gast.Alternation = Alternation; var Terminal = (function () { function Terminal(terminalType, occurrenceInParent) { if (occurrenceInParent === void 0) { occurrenceInParent = 1; } this.terminalType = terminalType; this.occurrenceInParent = occurrenceInParent; this.implicitOccurrenceIndex = false; } Terminal.prototype.accept = function (visitor) { visitor.visit(this); }; return Terminal; })(); gast.Terminal = Terminal; var GAstVisitor = (function () { function GAstVisitor() { } GAstVisitor.prototype.visit = function (node) { if (node instanceof NonTerminal) { this.visitNonTerminal(node); } else if (node instanceof Flat) { this.visitFlat(node); } else if (node instanceof Option) { this.visitOption(node); } else if (node instanceof RepetitionMandatory) { this.visitRepetitionMandatory(node); } else if (node instanceof Repetition) { this.visitRepetition(node); } else if (node instanceof Alternation) { this.visitAlternation(node); } else if (node instanceof Terminal) { this.visitTerminal(node); } }; /* istanbul ignore next */ // this is an "Abstract" method that does nothing, testing it is pointless. GAstVisitor.prototype.visitNonTerminal = function (node) { }; GAstVisitor.prototype.visitFlat = function (node) { }; GAstVisitor.prototype.visitOption = function (node) { }; GAstVisitor.prototype.visitRepetitionMandatory = function (node) { }; GAstVisitor.prototype.visitRepetition = function (node) { }; GAstVisitor.prototype.visitAlternation = function (node) { }; GAstVisitor.prototype.visitTerminal = function (node) { }; return GAstVisitor; })(); gast.GAstVisitor = GAstVisitor; })/* istanbul ignore next */ (gast = chevrotain.gast || /* istanbul ignore next */ (chevrotain.gast = {})); })/* istanbul ignore next */ (chevrotain || (chevrotain = {})); var chevrotain; (function (chevrotain) { var gast; (function (gast) { var lang = chevrotain.lang; function isSequenceProd(prod) { return prod instanceof gast.Flat || prod instanceof gast.Option || prod instanceof gast.Repetition || prod instanceof gast.RepetitionMandatory || prod instanceof gast.Terminal || prod instanceof gast.Rule; } gast.isSequenceProd = isSequenceProd; function isOptionalProd(prod) { var isDirectlyOptional = prod instanceof gast.Option || prod instanceof gast.Repetition; if (isDirectlyOptional) { return true; } // note that this can cause infinite loop if one optional empty TOP production has a cyclic dependency with another // empty optional top rule // may be indirectly optional ((A?B?C?) | (D?E?F?)) if (prod instanceof gast.Alternation) { // for OR its enough for just one of the alternatives to be optional return _.some(prod.definition, function (subProd) { return isOptionalProd(subProd); }); } else if (prod instanceof gast.AbstractProduction) { return _.every(prod.definition, function (subProd) { return isOptionalProd(subProd); }); } else { return false; } } gast.isOptionalProd = isOptionalProd; function isBranchingProd(prod) { return prod instanceof gast.Alternation; } gast.isBranchingProd = isBranchingProd; var productionToDslName = {}; productionToDslName[lang.functionName(gast.NonTerminal)] = "SUBRULE"; productionToDslName[lang.functionName(gast.Option)] = "OPTION"; productionToDslName[lang.functionName(gast.RepetitionMandatory)] = "AT_LEAST_ONE"; productionToDslName[lang.functionName(gast.Repetition)] = "MANY"; productionToDslName[lang.functionName(gast.Alternation)] = "OR"; productionToDslName[lang.functionName(gast.Terminal)] = "CONSUME"; function getProductionDslName(prod) { var clazz = prod.constructor; var prodName = lang.functionName(clazz); return productionToDslName[prodName]; } gast.getProductionDslName = getProductionDslName; })/* istanbul ignore next */ (gast = chevrotain.gast || /* istanbul ignore next */ (chevrotain.gast = {})); })/* istanbul ignore next */ (chevrotain || (chevrotain = {})); var chevrotain; (function (chevrotain) { var first; (function (_first) { var gast = chevrotain.gast; function first(prod) { if (prod instanceof gast.NonTerminal) { // this could in theory cause infinite loops if // (1) prod A refs prod B. // (2) prod B refs prod A // (3) AB can match the empty set // in other words a cycle where everything is optional so the first will keep // looking ahead for the next optional part and will never exit // currently there is no safeguard for this unique edge case because // (1) not sure a grammar in which this can happen is useful for anything (productive) return first(prod.referencedRule); } else if (prod instanceof gast.Terminal) { return firstForTerminal(prod); } else if (gast.isSequenceProd(prod)) { return firstForSequence(prod); }/* istanbul ignore else */ else if (gast.isBranchingProd(prod)) { return firstForBranching(prod); } else { /* istanbul ignore next */ throw Error("non exhaustive match"); } } _first.first = first; function firstForSequence(prod) { var firstSet = []; var seq = prod.definition; var nextSubProdIdx = 0; var hasInnerProdsRemaining = seq.length > nextSubProdIdx; var currSubProd; // so we enter the loop at least once (if the definition is not empty var isLastInnerProdOptional = true; while (hasInnerProdsRemaining && isLastInnerProdOptional) { currSubProd = seq[nextSubProdIdx]; isLastInnerProdOptional = gast.isOptionalProd(currSubProd); firstSet = firstSet.concat(first(currSubProd)); nextSubProdIdx = nextSubProdIdx + 1; hasInnerProdsRemaining = seq.length > nextSubProdIdx; } return _.uniq(firstSet); } _first.firstForSequence = firstForSequence; function firstForBranching(prod) { var allAlternativesFirsts = _.map(prod.definition, function (innerProd) { return first(innerProd); }); return _.uniq(_.flatten(allAlternativesFirsts)); } _first.firstForBranching = firstForBranching; function firstForTerminal(terminal) { return [terminal.terminalType]; } _first.firstForTerminal = firstForTerminal; })/* istanbul ignore next */ (first = chevrotain.first || /* istanbul ignore next */ (chevrotain.first = {})); })/* istanbul ignore next */ (chevrotain || (chevrotain = {})); var chevrotain; (function (chevrotain) { var rest; (function (rest) { var g = chevrotain.gast; var RestWalker = (function () { function RestWalker() { } RestWalker.prototype.walk = function (prod, prevRest) { var _this = this; if (prevRest === void 0) { prevRest = []; } _.forEach(prod.definition, function (subProd, index) { var currRest = _.drop(prod.definition, index + 1); if (subProd instanceof g.NonTerminal) { _this.walkProdRef(subProd, currRest, prevRest); } else if (subProd instanceof g.Terminal) { _this.walkTerminal(subProd, currRest, prevRest); } else if (subProd instanceof g.Flat) { _this.walkFlat(subProd, currRest, prevRest); } else if (subProd instanceof g.Option) { _this.walkOption(subProd, currRest, prevRest); } else if (subProd instanceof g.RepetitionMandatory) { _this.walkAtLeastOne(subProd, currRest, prevRest); } else if (subProd instanceof g.Repetition) { _this.walkMany(subProd, currRest, prevRest); }/* istanbul ignore else */ else if (subProd instanceof g.Alternation) { _this.walkOr(subProd, currRest, prevRest); } else { /* istanbul ignore next */ throw Error("non exhaustive match"); } }); }; RestWalker.prototype.walkTerminal = function (terminal, currRest, prevRest) { }; RestWalker.prototype.walkProdRef = function (refProd, currRest, prevRest) { }; RestWalker.prototype.walkFlat = function (flatProd, currRest, prevRest) { // ABCDEF => after the D the rest is EF var fullOrRest = currRest.concat(prevRest); this.walk(flatProd, fullOrRest); }; RestWalker.prototype.walkOption = function (optionProd, currRest, prevRest) { // ABC(DE)?F => after the (DE)? the rest is F var fullOrRest = currRest.concat(prevRest); this.walk(optionProd, fullOrRest); }; RestWalker.prototype.walkAtLeastOne = function (atLeastOneProd, currRest, prevRest) { // ABC(DE)+F => after the (DE)+ the rest is (DE)?F var fullAtLeastOneRest = [new g.Option(atLeastOneProd.definition)].concat(currRest, prevRest); this.walk(atLeastOneProd, fullAtLeastOneRest); }; RestWalker.prototype.walkMany = function (manyProd, currRest, prevRest) { // ABC(DE)*F => after the (DE)* the rest is (DE)?F var fullManyRest = [new g.Option(manyProd.definition)].concat(currRest, prevRest); this.walk(manyProd, fullManyRest); }; RestWalker.prototype.walkOr = function (orProd, currRest, prevRest) { var _this = this; // ABC(D|E|F)G => when finding the (D|E|F) the rest is G var fullOrRest = currRest.concat(prevRest); // walk all different alternatives _.forEach(orProd.definition, function (alt) { // wrapping each alternative in a single definition wrapper // to avoid errors in computing the rest of that alternative in the invocation to computeInProdFollows // (otherwise for OR([alt1,alt2]) alt2 will be considered in 'rest' of alt1 var prodWrapper = new chevrotain.gast.Flat([alt]); _this.walk(prodWrapper, fullOrRest); }); }; return RestWalker; })(); rest.RestWalker = RestWalker; })/* istanbul ignore next */ (rest = chevrotain.rest || /* istanbul ignore next */ (chevrotain.rest = {})); })/* istanbul ignore next */ (chevrotain || (chevrotain = {})); var chevrotain; (function (chevrotain) { var follow; (function (follow) { var g = chevrotain.gast; var r = chevrotain.rest; var f = chevrotain.first; var IN = chevrotain.constants.IN; var lang = chevrotain.la