UNPKG

chevrotain

Version:

Chevrotain is a high performance fault Tolerant Javascript parsing DSL for building recursive decent parsers

github.com/SAP/chevrotain

1,023 lines (1,021 loc) • 195 kB

JavaScript

(function (root, factory) { /* istanbul ignore next */ if (typeof define === 'function' && define.amd) { // AMD. Register as an anonymous module unless amdModuleId is set define('chevrotain', ["lodash"], function (a0) { return (root['API'] = factory(a0)); }); } else if (typeof exports === 'object') { // Node. Does not work with strict CommonJS, but // only CommonJS-like environments that support module.exports, // like Node. module.exports = factory(require("lodash")); } else { root['chevrotain'] = factory(_); } }(this, function (_) { /*! chevrotain - v0.5.13 - 2016-01-02 */ /* Utils using lodash style API. (not necessarily 100% compliant) for functional and other utils. These utils should replace usage of lodash in the production code base. not because they are any better... but for the purpose of being a dependency free library. The hotspots in the code are already written in imperative style for performance reasons. so writing several dozen utils which may be slower than the original lodash, does not matter as much considering they will not be invoked in hotspots... */ var utils; (function (utils) { function isEmpty(arr) { return arr.length === 0; } utils.isEmpty = isEmpty; function keys(obj) { return Object.keys(obj); } utils.keys = keys; function values(obj) { var vals = []; var keys = Object.keys(obj); for (var i = 0; i < keys.length; i++) { vals.push(obj[keys[i]]); } return vals; } utils.values = values; function map(arr, callback) { var result = []; for (var idx = 0; idx < arr.length; idx++) { result.push(callback.call(null, arr[idx], idx)); } return result; } utils.map = map; })(utils || (utils = {})); var chevrotain; (function (chevrotain) { var lang; (function (lang) { var nameRegex = /^\s*function\s*(\S*)\s*\(/; /* istanbul ignore next */ var hasNativeName = typeof (function f() { }).name !== "undefined"; function classNameFromInstance(instance) { return functionName(instance.constructor); } lang.classNameFromInstance = classNameFromInstance; /* istanbul ignore next too many hacks for IE here*/ function functionName(func) { if (hasNativeName) { return func.name; } else if (func.rdtFuncNameCache666) { // super 'special' property name on INSTANCE to avoid hurting those who use browsers that // do not support name property even more (IE...) return func.rdtFuncNameCache666; } else { var name_1 = func.toString().match(nameRegex)[1]; func.rdtFuncNameCache666 = name_1; return name_1; } } lang.functionName = functionName; /** * simple Hashtable between a string and some generic value * this should be removed once typescript supports ES6 style Hashtable */ var HashTable = (function () { function HashTable() { this._state = {}; } HashTable.prototype.keys = function () { return utils.keys(this._state); }; HashTable.prototype.values = function () { return utils.values(this._state); }; HashTable.prototype.put = function (key, value) { this._state[key] = value; }; HashTable.prototype.putAll = function (other) { this._state = _.assign(this._state, other._state); }; HashTable.prototype.get = function (key) { // To avoid edge case with a key called "hasOwnProperty" we need to perform the commented out check below // -> if (Object.prototype.hasOwnProperty.call(this._state, key)) { ... } <- // however this costs nearly 25% of the parser's runtime. // if someone decides to name their Parser class "hasOwnProperty" they deserve what they will get :) return this._state[key]; }; HashTable.prototype.containsKey = function (key) { return _.has(this._state, key); }; return HashTable; })(); lang.HashTable = HashTable; })/* istanbul ignore next */ (lang = chevrotain.lang || /* istanbul ignore next */ (chevrotain.lang = {})); })/* istanbul ignore next */ (chevrotain || (chevrotain = {})); var __extends = (this && this.__extends) || function (d, b) { for (var p in b) /* istanbul ignore next */ if (b.hasOwnProperty(p)) d[p] = b[p]; function __() { this.constructor = d; } /* istanbul ignore next */ d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __()); }; // using only root namespace name ('chevrotain') and not a longer name ('chevrotain.tokens') // because the external and internal API must have the same names for d.ts definition files to be valid var chevrotain; (function (chevrotain) { var lang = chevrotain.lang; function tokenName(clazz) { // used to support js inheritance patterns that do not use named functions // in that situation setting a property tokenName on a token constructor will // enable producing readable error messages. if (_.isString(clazz.tokenName)) { return clazz.tokenName; } else { return lang.functionName(clazz); } } chevrotain.tokenName = tokenName; /** * utility to help the poor souls who are still stuck writing pure javascript 5.1 * extend and create Token subclasses in a less verbose manner * * @param {string} tokenName - the name of the new TokenClass * @param {RegExp|Function} patternOrParent - RegExp Pattern or Parent Token Constructor * @param {Function} parentConstructor - the Token class to be extended * @returns {Function} - a constructor for the new extended Token subclass */ function extendToken(tokenName, patternOrParent, parentConstructor) { if (patternOrParent === void 0) { patternOrParent = undefined; } if (parentConstructor === void 0) { parentConstructor = Token; } var pattern; if (_.isRegExp(patternOrParent) || patternOrParent === chevrotain.Lexer.SKIPPED || patternOrParent === chevrotain.Lexer.NA) { pattern = patternOrParent; } else if (_.isFunction(patternOrParent)) { parentConstructor = patternOrParent; pattern = undefined; } var derivedCostructor = function () { parentConstructor.apply(this, arguments); }; // static properties mixing _.forOwn(parentConstructor, function (v, k) { derivedCostructor[k] = v; }); // the tokenName property will be used by the Parser for Error Messages if the Token's constructor is anonymous derivedCostructor.tokenName = tokenName; derivedCostructor.prototype = Object.create(parentConstructor.prototype); derivedCostructor.prototype.constructor = derivedCostructor; if (!_.isUndefined(pattern)) { derivedCostructor.PATTERN = pattern; } return derivedCostructor; } chevrotain.extendToken = extendToken; var Token = (function () { /** * @param {string} image the textual representation of the Token as it appeared in the text * @param {number} offset offset of the first character of the Token * @param {number} startLine line of the first character of the Token * @param {number} startColumn column of the first character of the Token * @param {number} endLine line of the last character of the Token * @param {number} endColumn column of the last character of the Token * * Things to note: * * "do" {startColumn : 1, endColumn: 2} --> the range is inclusive to exclusive 1...2 (2 chars long). * * "\n" {startLine : 1, endLine: 1} --> a lineTerminator as the last character does not effect the Token's line numbering. * * "'hello\tworld\uBBBB'" {image: "'hello\tworld\uBBBB'"} --> a Token's image is the "literal" text * (unicode escaping is untouched). */ function Token(image, offset, startLine, startColumn, endLine, endColumn) { if (endLine === void 0) { endLine = startLine; } if (endColumn === void 0) { endColumn = startColumn + image.length - 1; } this.image = image; this.offset = offset; this.startLine = startLine; this.startColumn = startColumn; this.endLine = endLine; this.endColumn = endColumn; // this marks if a Token does not really exist and has been inserted "artificially" during parsing in rule error recovery this.isInsertedInRecovery = false; } return Token; })(); chevrotain.Token = Token; /** * a special kind of Token which does not really exist in the input * (hence the 'Virtual' prefix). These type of Tokens can be used as special markers: * for example, EOF (end-of-file). */ var VirtualToken = (function (_super) { __extends(VirtualToken, _super); function VirtualToken() { _super.call(this, "", -1, -1, -1, -1, -1); } return VirtualToken; })(Token); chevrotain.VirtualToken = VirtualToken; var EOF = (function (_super) { __extends(EOF, _super); function EOF() { _super.apply(this, arguments); } return EOF; })(VirtualToken); chevrotain.EOF = EOF; })/* istanbul ignore next */ (chevrotain || (chevrotain = {})); // using only root namespace name ('chevrotain') and not a longer name ('chevrotain.lexer') // because the external and internal API must have the same names for d.ts definition files to be valid var chevrotain; (function (chevrotain) { (function (LexerDefinitionErrorType) { LexerDefinitionErrorType[LexerDefinitionErrorType["MISSING_PATTERN"] = 0] = "MISSING_PATTERN"; LexerDefinitionErrorType[LexerDefinitionErrorType["INVALID_PATTERN"] = 1] = "INVALID_PATTERN"; LexerDefinitionErrorType[LexerDefinitionErrorType["EOI_ANCHOR_FOUND"] = 2] = "EOI_ANCHOR_FOUND"; LexerDefinitionErrorType[LexerDefinitionErrorType["UNSUPPORTED_FLAGS_FOUND"] = 3] = "UNSUPPORTED_FLAGS_FOUND"; LexerDefinitionErrorType[LexerDefinitionErrorType["DUPLICATE_PATTERNS_FOUND"] = 4] = "DUPLICATE_PATTERNS_FOUND"; LexerDefinitionErrorType[LexerDefinitionErrorType["INVALID_GROUP_TYPE_FOUND"] = 5] = "INVALID_GROUP_TYPE_FOUND"; })(chevrotain.LexerDefinitionErrorType || (chevrotain.LexerDefinitionErrorType = {})); var LexerDefinitionErrorType = chevrotain.LexerDefinitionErrorType; var Lexer = (function () { /** * @param {Function[]} tokenClasses constructor functions for the Tokens types this scanner will support * These constructors must be in one of three forms: * * 1. With a PATTERN property that has a RegExp value for tokens to match: * example: -->class Integer extends Token { static PATTERN = /[1-9]\d }<-- * * 2. With a PATTERN property that has a RegExp value AND an IGNORE property with boolean value true. * These tokens will be matched but not as part of the main token vector. * this is usually used for ignoring whitespace/comments * example: --> class Whitespace extends Token { static PATTERN = /(\t| )/; static IGNORE = true}<-- * * 3. With a PATTERN property that has the value of the var Lexer.NA defined above. * This is a convenience form used to avoid matching Token classes that only act as categories. * example: -->class Keyword extends Token { static PATTERN = NA }<-- * * * The following RegExp patterns are not supported: * a. '$' for match at end of input * b. /b global flag * c. /m multi-line flag * * The Lexer will identify the first pattern the matches, Therefor the order of Token Constructors passed * To the SimpleLexer's constructor is meaningful. If two patterns may match the same string, the longer one * should be before the shorter one. * * Note that there are situations in which we may wish to place the longer pattern after the shorter one. * For example: keywords vs Identifiers. * 'do'(/do/) and 'done'(/w+) * * * If the Identifier pattern appears before the 'do' pattern both 'do' and 'done' * will be lexed as an Identifier. * * * If the 'do' pattern appears before the Identifier pattern 'do' will be lexed correctly as a keyword. * however 'done' will be lexed as TWO tokens keyword 'do' and identifier 'ne'. * * To resolve this problem, add a static property on the keyword's Tokens constructor named: LONGER_ALT * example: * * export class Identifier extends Keyword { static PATTERN = /[_a-zA-Z][_a-zA-Z0-9]/ } * export class Keyword extends Token { * static PATTERN = lex.NA * static LONGER_ALT = Identifier * } * export class Do extends Keyword { static PATTERN = /do/ } * export class While extends Keyword { static PATTERN = /while/ } * export class Return extends Keyword { static PATTERN = /return/ } * * The lexer will then also attempt to match a (longer) Identifier each time a keyword is matched * * * @param {boolean} [deferDefinitionErrorsHandling=false] * an optional flag indicating that lexer definition errors * should not automatically cause an error to be raised. * This can be useful when wishing to indicate lexer errors in another manner * than simply throwing an error (for example in an online playground). */ function Lexer(tokenClasses, deferDefinitionErrorsHandling) { if (deferDefinitionErrorsHandling === void 0) { deferDefinitionErrorsHandling = false; } this.tokenClasses = tokenClasses; this.lexerDefinitionErrors = []; this.lexerDefinitionErrors = chevrotain.validatePatterns(tokenClasses); if (!utils.isEmpty(this.lexerDefinitionErrors) && !deferDefinitionErrorsHandling) { var allErrMessages = utils.map(this.lexerDefinitionErrors, function (error) { return error.message; }); var allErrMessagesString = allErrMessages.join("-----------------------\n"); throw new Error("Errors detected in definition of Lexer:\n" + allErrMessagesString); } // If definition errors were encountered, the analysis phase may fail unexpectedly/ // Considering a lexer with definition errors may never be used, there is no point // to performing the analysis anyhow... if (utils.isEmpty(this.lexerDefinitionErrors)) { var analyzeResult = chevrotain.analyzeTokenClasses(tokenClasses); this.allPatterns = analyzeResult.allPatterns; this.patternIdxToClass = analyzeResult.patternIdxToClass; this.patternIdxToGroup = analyzeResult.patternIdxToGroup; this.patternIdxToLongerAltIdx = analyzeResult.patternIdxToLongerAltIdx; this.patternIdxToCanLineTerminator = analyzeResult.patternIdxToCanLineTerminator; this.emptyGroups = analyzeResult.emptyGroups; } } /** * Will lex(Tokenize) a string. * Note that this can be called repeatedly on different strings as this method * does not modify the state of the Lexer. * * @param {string} text the string to lex * @returns {{tokens: {Token}[], errors: string[]}} */ Lexer.prototype.tokenize = function (text) { var match, i, j, matchAlt, longerAltIdx, matchedImage, imageLength, group, tokClass, newToken, errLength, canMatchedContainLineTerminator, fixForEndingInLT, c, droppedChar, lastLTIdx, errorMessage, lastCharIsLT; var orgInput = text; var offset = 0; var matchedTokens = []; var errors = []; var line = 1; var column = 1; var groups = _.clone(this.emptyGroups); if (!utils.isEmpty(this.lexerDefinitionErrors)) { var allErrMessages = utils.map(this.lexerDefinitionErrors, function (error) { return error.message; }); var allErrMessagesString = allErrMessages.join("-----------------------\n"); throw new Error("Unable to Tokenize because Errors detected in definition of Lexer:\n" + allErrMessagesString); } while (text.length > 0) { match = null; for (i = 0; i < this.allPatterns.length; i++) { match = this.allPatterns[i].exec(text); if (match !== null) { // even though this pattern matched we must try a another longer alternative. // this can be used to prioritize keywords over identifers longerAltIdx = this.patternIdxToLongerAltIdx[i]; if (longerAltIdx) { matchAlt = this.allPatterns[longerAltIdx].exec(text); if (matchAlt && matchAlt[0].length > match[0].length) { match = matchAlt; i = longerAltIdx; } } break; } } if (match !== null) { matchedImage = match[0]; imageLength = matchedImage.length; group = this.patternIdxToGroup[i]; if (group !== undefined) { tokClass = this.patternIdxToClass[i]; newToken = new tokClass(matchedImage, offset, line, column); if (group === "default") { matchedTokens.push(newToken); } else { groups[group].push(newToken); } } text = text.slice(imageLength); offset = offset + imageLength; column = column + imageLength; // TODO: with newlines the column may be assigned twice canMatchedContainLineTerminator = this.patternIdxToCanLineTerminator[i]; if (canMatchedContainLineTerminator) { var lineTerminatorsInMatch = chevrotain.countLineTerminators(matchedImage); // TODO: identify edge case of one token ending in '\r' and another one starting with '\n' if (lineTerminatorsInMatch !== 0) { line = line + lineTerminatorsInMatch; lastLTIdx = imageLength - 1; while (lastLTIdx >= 0) { c = matchedImage.charCodeAt(lastLTIdx); // scan in reverse to find last lineTerminator in image if (c === 13 || c === 10) { break; } lastLTIdx--; } column = imageLength - lastLTIdx; if (group !== undefined) { lastCharIsLT = lastLTIdx === imageLength - 1; fixForEndingInLT = lastCharIsLT ? -1 : 0; if (!(lineTerminatorsInMatch === 1 && lastCharIsLT)) { // if a token ends in a LT that last LT only affects the line numbering of following Tokens newToken.endLine = line + fixForEndingInLT; // the last LT in a token does not affect the endColumn either as the [columnStart ... columnEnd) // inclusive to exclusive range. newToken.endColumn = column - 1 + -fixForEndingInLT; } } } } } else { var errorStartOffset = offset; var errorLine = line; var errorColumn = column; var foundResyncPoint = false; while (!foundResyncPoint && text.length > 0) { // drop chars until we succeed in matching something droppedChar = text.charCodeAt(0); if (droppedChar === 10 || (droppedChar === 13 && (text.length === 1 || (text.length > 1 && text.charCodeAt(1) !== 10)))) { line++; column = 1; } else { // either when skipping the next char, or when consuming the following pattern // (which will have to start in a '\n' if we manage to consume it) column++; } text = text.substr(1); offset++; for (j = 0; j < this.allPatterns.length; j++) { foundResyncPoint = this.allPatterns[j].test(text); if (foundResyncPoint) { break; } } } errLength = offset - errorStartOffset; // at this point we either re-synced or reached the end of the input text errorMessage = ("unexpected character: ->" + orgInput.charAt(errorStartOffset) + "<- at offset: " + errorStartOffset + ",") + (" skipped " + (offset - errorStartOffset) + " characters."); errors.push({ line: errorLine, column: errorColumn, length: errLength, message: errorMessage }); } } return { tokens: matchedTokens, groups: groups, errors: errors }; }; Lexer.SKIPPED = { description: "This marks a skipped Token pattern, this means each token identified by it will" + "be consumed and then throw into oblivion, this can be used to for example: skip whitespace." }; Lexer.NA = /NOT_APPLICABLE/; return Lexer; })(); chevrotain.Lexer = Lexer; })/* istanbul ignore next */ (chevrotain || (chevrotain = {})); // using only root namespace name ('chevrotain') and not a longer name ('chevrotain.lexer') // because the external and internal API must have the same names for d.ts definition files to be valid var chevrotain; (function (chevrotain) { var PATTERN = "PATTERN"; function analyzeTokenClasses(tokenClasses) { var onlyRelevantClasses = _.reject(tokenClasses, function (currClass) { return currClass[PATTERN] === chevrotain.Lexer.NA; }); var allTransformedPatterns = utils.map(onlyRelevantClasses, function (currClass) { return addStartOfInput(currClass[PATTERN]); }); var allPatternsToClass = _.zipObject(allTransformedPatterns, onlyRelevantClasses); var patternIdxToClass = utils.map(allTransformedPatterns, function (pattern) { return allPatternsToClass[pattern.toString()]; }); var patternIdxToGroup = utils.map(onlyRelevantClasses, function (clazz) { var groupName = clazz.GROUP; if (groupName === chevrotain.Lexer.SKIPPED) { return undefined; } else if (_.isString(groupName)) { return groupName; }/* istanbul ignore else */ else if (_.isUndefined(groupName)) { return "default"; } else { /* istanbul ignore next */ throw Error("non exhaustive match"); } }); var patternIdxToLongerAltIdx = utils.map(onlyRelevantClasses, function (clazz) { var longerAltClass = clazz.LONGER_ALT; if (longerAltClass) { var longerAltIdx = _.indexOf(onlyRelevantClasses, longerAltClass); return longerAltIdx; } }); var patternIdxToCanLineTerminator = utils.map(allTransformedPatterns, function (pattern) { // TODO: unicode escapes of line terminators too? return /\\n|\\r|\\s/g.test(pattern.source); }); var emptyGroups = _.reduce(onlyRelevantClasses, function (acc, clazz) { var groupName = clazz.GROUP; if (_.isString(groupName)) { acc[groupName] = []; } return acc; }, {}); return { allPatterns: allTransformedPatterns, patternIdxToClass: patternIdxToClass, patternIdxToGroup: patternIdxToGroup, patternIdxToLongerAltIdx: patternIdxToLongerAltIdx, patternIdxToCanLineTerminator: patternIdxToCanLineTerminator, emptyGroups: emptyGroups }; } chevrotain.analyzeTokenClasses = analyzeTokenClasses; function validatePatterns(tokenClasses) { var errors = []; var missingResult = findMissingPatterns(tokenClasses); var validTokenClasses = missingResult.validTokenClasses; errors = errors.concat(missingResult.errors); var invalidResult = findInvalidPatterns(validTokenClasses); validTokenClasses = invalidResult.validTokenClasses; errors = errors.concat(invalidResult.errors); errors = errors.concat(findEndOfInputAnchor(validTokenClasses)); errors = errors.concat(findUnsupportedFlags(validTokenClasses)); errors = errors.concat(findDuplicatePatterns(validTokenClasses)); errors = errors.concat(findInvalidGroupType(validTokenClasses)); return errors; } chevrotain.validatePatterns = validatePatterns; function findMissingPatterns(tokenClasses) { var tokenClassesWithMissingPattern = _.filter(tokenClasses, function (currClass) { return !_.has(currClass, PATTERN); }); var errors = utils.map(tokenClassesWithMissingPattern, function (currClass) { return { message: "Token class: ->" + chevrotain.tokenName(currClass) + "<- missing static 'PATTERN' property", type: chevrotain.LexerDefinitionErrorType.MISSING_PATTERN, tokenClasses: [currClass] }; }); var validTokenClasses = _.difference(tokenClasses, tokenClassesWithMissingPattern); return { errors: errors, validTokenClasses: validTokenClasses }; } chevrotain.findMissingPatterns = findMissingPatterns; function findInvalidPatterns(tokenClasses) { var tokenClassesWithInvalidPattern = _.filter(tokenClasses, function (currClass) { var pattern = currClass[PATTERN]; return !_.isRegExp(pattern); }); var errors = utils.map(tokenClassesWithInvalidPattern, function (currClass) { return { message: "Token class: ->" + chevrotain.tokenName(currClass) + "<- static 'PATTERN' can only be a RegExp", type: chevrotain.LexerDefinitionErrorType.INVALID_PATTERN, tokenClasses: [currClass] }; }); var validTokenClasses = _.difference(tokenClasses, tokenClassesWithInvalidPattern); return { errors: errors, validTokenClasses: validTokenClasses }; } chevrotain.findInvalidPatterns = findInvalidPatterns; var end_of_input = /[^\\][\$]/; function findEndOfInputAnchor(tokenClasses) { var invalidRegex = _.filter(tokenClasses, function (currClass) { var pattern = currClass[PATTERN]; return end_of_input.test(pattern.source); }); var errors = utils.map(invalidRegex, function (currClass) { return { message: "Token class: ->" + chevrotain.tokenName(currClass) + "<- static 'PATTERN' cannot contain end of input anchor '$'", type: chevrotain.LexerDefinitionErrorType.EOI_ANCHOR_FOUND, tokenClasses: [currClass] }; }); return errors; } chevrotain.findEndOfInputAnchor = findEndOfInputAnchor; function findUnsupportedFlags(tokenClasses) { var invalidFlags = _.filter(tokenClasses, function (currClass) { var pattern = currClass[PATTERN]; return pattern instanceof RegExp && (pattern.multiline || pattern.global); }); var errors = utils.map(invalidFlags, function (currClass) { return { message: "Token class: ->" + chevrotain.tokenName(currClass) + "<- static 'PATTERN' may NOT contain global('g') or multiline('m')", type: chevrotain.LexerDefinitionErrorType.UNSUPPORTED_FLAGS_FOUND, tokenClasses: [currClass] }; }); return errors; } chevrotain.findUnsupportedFlags = findUnsupportedFlags; // This can only test for identical duplicate RegExps, not semantically equivalent ones. function findDuplicatePatterns(tokenClasses) { var found = []; var identicalPatterns = utils.map(tokenClasses, function (outerClass) { return _.reduce(tokenClasses, function (result, innerClass) { if ((outerClass.PATTERN.source === innerClass.PATTERN.source) && !_.contains(found, innerClass) && innerClass.PATTERN !== chevrotain.Lexer.NA) { // this avoids duplicates in the result, each class may only appear in one "set" // in essence we are creating Equivalence classes on equality relation. found.push(innerClass); return _.union(result, [innerClass]); } return result; }, []); }); identicalPatterns = _.compact(identicalPatterns); var duplicatePatterns = _.filter(identicalPatterns, function (currIdenticalSet) { return _.size(currIdenticalSet) > 1; }); var errors = utils.map(duplicatePatterns, function (setOfIdentical) { var classNames = utils.map(setOfIdentical, function (currClass) { return chevrotain.tokenName(currClass); }); var dupPatternSrc = _.first(setOfIdentical).PATTERN; return { message: ("The same RegExp pattern ->" + dupPatternSrc + "<-") + ("has been used in all the following classes: " + classNames.join(", ") + " <-"), type: chevrotain.LexerDefinitionErrorType.DUPLICATE_PATTERNS_FOUND, tokenClasses: setOfIdentical }; }); return errors; } chevrotain.findDuplicatePatterns = findDuplicatePatterns; function findInvalidGroupType(tokenClasses) { var invalidTypes = _.filter(tokenClasses, function (clazz) { if (!_.has(clazz, "GROUP")) { return false; } var group = clazz.GROUP; return group !== chevrotain.Lexer.SKIPPED && group !== chevrotain.Lexer.NA && !_.isString(group); }); var errors = utils.map(invalidTypes, function (currClass) { return { message: "Token class: ->" + chevrotain.tokenName(currClass) + "<- static 'GROUP' can only be Lexer.SKIPPED/Lexer.NA/A String", type: chevrotain.LexerDefinitionErrorType.INVALID_GROUP_TYPE_FOUND, tokenClasses: [currClass] }; }); return errors; } chevrotain.findInvalidGroupType = findInvalidGroupType; function addStartOfInput(pattern) { var flags = pattern.ignoreCase ? "i" : ""; // always wrapping in a none capturing group preceded by '^' to make sure matching can only work on start of input. // duplicate/redundant start of input markers have no meaning (/^^^^A/ === /^A/) return new RegExp("^(?:" + pattern.source + ")", flags); } chevrotain.addStartOfInput = addStartOfInput; function countLineTerminators(text) { var lineTerminators = 0; var currOffset = 0; while (currOffset < text.length) { var c = text.charCodeAt(currOffset); if (c === 10) { lineTerminators++; } else if (c === 13) { if (currOffset !== text.length - 1 && text.charCodeAt(currOffset + 1) === 10) { } else { lineTerminators++; } } currOffset++; } return lineTerminators; } chevrotain.countLineTerminators = countLineTerminators; })/* istanbul ignore next */ (chevrotain || (chevrotain = {})); var chevrotain; (function (chevrotain) { var range; (function (range) { var Range = (function () { function Range(start, end) { this.start = start; this.end = end; if (!isValidRange(start, end)) { throw new Error("INVALID RANGE"); } } Range.prototype.contains = function (num) { return this.start <= num && this.end >= num; }; Range.prototype.containsRange = function (other) { return this.start <= other.start && this.end >= other.end; }; Range.prototype.isContainedInRange = function (other) { return other.containsRange(this); }; Range.prototype.strictlyContainsRange = function (other) { return this.start < other.start && this.end > other.end; }; Range.prototype.isStrictlyContainedInRange = function (other) { return other.strictlyContainsRange(this); }; return Range; })(); range.Range = Range; function isValidRange(start, end) { return !(start < 0 || end < start); } range.isValidRange = isValidRange; })/* istanbul ignore next */ (range = chevrotain.range || /* istanbul ignore next */ (chevrotain.range = {})); })/* istanbul ignore next */ (chevrotain || (chevrotain = {})); var chevrotain; (function (chevrotain) { var constants; (function (constants) { constants.IN = "_~IN~_"; })/* istanbul ignore next */ (constants = chevrotain.constants || /* istanbul ignore next */ (chevrotain.constants = {})); })/* istanbul ignore next */ (chevrotain || (chevrotain = {})); var chevrotain; (function (chevrotain) { var gast; (function (gast) { var AbstractProduction = (function () { function AbstractProduction(definition) { this.definition = definition; this.implicitOccurrenceIndex = false; } AbstractProduction.prototype.accept = function (visitor) { visitor.visit(this); _.forEach(this.definition, function (prod) { prod.accept(visitor); }); }; return AbstractProduction; })(); gast.AbstractProduction = AbstractProduction; var NonTerminal = (function (_super) { __extends(NonTerminal, _super); function NonTerminal(nonTerminalName, referencedRule, occurrenceInParent) { if (referencedRule === void 0) { referencedRule = undefined; } if (occurrenceInParent === void 0) { occurrenceInParent = 1; } _super.call(this, []); this.nonTerminalName = nonTerminalName; this.referencedRule = referencedRule; this.occurrenceInParent = occurrenceInParent; } Object.defineProperty(NonTerminal.prototype, "definition", { get: function () { if (this.referencedRule !== undefined) { return this.referencedRule.definition; } return []; }, set: function (definition) { // immutable }, enumerable: true, configurable: true }); NonTerminal.prototype.accept = function (visitor) { visitor.visit(this); // don't visit children of a reference, we will get cyclic infinite loops if we do so }; return NonTerminal; })(AbstractProduction); gast.NonTerminal = NonTerminal; var Rule = (function (_super) { __extends(Rule, _super); function Rule(name, definition, orgText) { if (orgText === void 0) { orgText = ""; } _super.call(this, definition); this.name = name; this.orgText = orgText; } return Rule; })(AbstractProduction); gast.Rule = Rule; var Flat = (function (_super) { __extends(Flat, _super); function Flat(definition) { _super.call(this, definition); } return Flat; })(AbstractProduction); gast.Flat = Flat; var Option = (function (_super) { __extends(Option, _super); function Option(definition, occurrenceInParent) { if (occurrenceInParent === void 0) { occurrenceInParent = 1; } _super.call(this, definition); this.occurrenceInParent = occurrenceInParent; } return Option; })(AbstractProduction); gast.Option = Option; var RepetitionMandatory = (function (_super) { __extends(RepetitionMandatory, _super); function RepetitionMandatory(definition, occurrenceInParent) { if (occurrenceInParent === void 0) { occurrenceInParent = 1; } _super.call(this, definition); this.occurrenceInParent = occurrenceInParent; } return RepetitionMandatory; })(AbstractProduction); gast.RepetitionMandatory = RepetitionMandatory; var RepetitionMandatoryWithSeparator = (function (_super) { __extends(RepetitionMandatoryWithSeparator, _super); function RepetitionMandatoryWithSeparator(definition, separator, occurrenceInParent) { if (occurrenceInParent === void 0) { occurrenceInParent = 1; } _super.call(this, definition); this.separator = separator; this.occurrenceInParent = occurrenceInParent; } return RepetitionMandatoryWithSeparator; })(AbstractProduction); gast.RepetitionMandatoryWithSeparator = RepetitionMandatoryWithSeparator; var Repetition = (function (_super) { __extends(Repetition, _super); function Repetition(definition, occurrenceInParent) { if (occurrenceInParent === void 0) { occurrenceInParent = 1; } _super.call(this, definition); this.occurrenceInParent = occurrenceInParent; } return Repetition; })(AbstractProduction); gast.Repetition = Repetition; var RepetitionWithSeparator = (function (_super) { __extends(RepetitionWithSeparator, _super); function RepetitionWithSeparator(definition, separator, occurrenceInParent) { if (occurrenceInParent === void 0) { occurrenceInParent = 1; } _super.call(this, definition); this.separator = separator; this.occurrenceInParent = occurrenceInParent; } return RepetitionWithSeparator; })(AbstractProduction); gast.RepetitionWithSeparator = RepetitionWithSeparator; var Alternation = (function (_super) { __extends(Alternation, _super); function Alternation(definition, occurrenceInParent) { if (occurrenceInParent === void 0) { occurrenceInParent = 1; } _super.call(this, definition); this.occurrenceInParent = occurrenceInParent; } return Alternation; })(AbstractProduction); gast.Alternation = Alternation; var Terminal = (function () { function Terminal(terminalType, occurrenceInParent) { if (occurrenceInParent === void 0) { occurrenceInParent = 1; } this.terminalType = terminalType; this.occurrenceInParent = occurrenceInParent; this.implicitOccurrenceIndex = false; } Terminal.prototype.accept = function (visitor) { visitor.visit(this); }; return Terminal; })(); gast.Terminal = Terminal; var GAstVisitor = (function () { function GAstVisitor() { } GAstVisitor.prototype.visit = function (node) { if (node instanceof NonTerminal) { this.visitNonTerminal(node); } else if (node instanceof Flat) { this.visitFlat(node); } else if (node instanceof Option) { this.visitOption(node); } else if (node instanceof RepetitionMandatory) { this.visitRepetitionMandatory(node); } else if (node instanceof RepetitionMandatoryWithSeparator) { this.visitRepetitionMandatoryWithSeparator(node); } else if (node instanceof RepetitionWithSeparator) { this.visitRepetitionWithSeparator(node); } else if (node instanceof Repetition) { this.visitRepetition(node); } else if (node instanceof Alternation) { this.visitAlternation(node); } else if (node instanceof Terminal) { this.visitTerminal(node); } }; /* istanbul ignore next */ // this is an "Abstract" method that does nothing, testing it is pointless. GAstVisitor.prototype.visitNonTerminal = function (node) { }; GAstVisitor.prototype.visitFlat = function (node) { }; GAstVisitor.prototype.visitOption = function (node) { }; GAstVisitor.prototype.visitRepetition = function (node) { }; GAstVisitor.prototype.visitRepetitionMandatory = function (node) { }; GAstVisitor.prototype.visitRepetitionMandatoryWithSeparator = function (node) { }; GAstVisitor.prototype.visitRepetitionWithSeparator = function (node) { }; GAstVisitor.prototype.visitAlternation = function (node) { }; GAstVisitor.prototype.visitTerminal = function (node) { }; return GAstVisitor; })(); gast.GAstVisitor = GAstVisitor; })/* istanbul ignore next */ (gast = chevrotain.gast || /* istanbul ignore next */ (chevrotain.gast = {})); })/* istanbul ignore next */ (chevrotain || (chevrotain = {})); var chevrotain; (function (chevrotain) { var gast; (function (gast) { var lang = chevrotain.lang; function isSequenceProd(prod) { return prod instanceof gast.Flat || prod instanceof gast.Option || prod instanceof gast.Repetition || prod instanceof gast.RepetitionMandatory || prod instanceof gast.RepetitionMandatoryWithSeparator || prod instanceof gast.RepetitionWithSeparator || prod instanceof gast.Terminal || prod instanceof gast.Rule; } gast.isSequenceProd = isSequenceProd; function isOptionalProd(prod, alreadyVisited) { if (alreadyVisited === void 0) { alreadyVisited = []; } var isDirectlyOptional = prod instanceof gast.Option || prod instanceof gast.Repetition || prod instanceof gast.RepetitionWithSeparator; if (isDirectlyOptional) { return true; } // note that this can cause infinite loop if one optional empty TOP production has a cyclic dependency with another // empty optional top rule // may be indirectly optional ((A?B?C?) | (D?E?F?)) if (prod instanceof gast.Alternation) { // for OR its enough for just one of the alternatives to be optional return _.some(prod.definition, function (subProd) { return isOptionalProd(subProd, alreadyVisited); }); } else if (prod instanceof gast.NonTerminal && _.contains(alreadyVisited, prod)) { // avoiding stack overflow due to infinite recursion return false; } else if (prod instanceof gast.AbstractProduction) { if (prod instanceof gast.NonTerminal) { alreadyVisited.push(prod); } return _.every(prod.definition, function (subProd) { return isOptionalProd(subProd, alreadyVisited); }); } else { return false; } } gast.isOptionalProd = isOptionalProd; function isBranchingProd(prod) { return prod instanceof gast.Alternation; } gast.isBranchingProd = isBranchingProd; var productionToDslName = {}; productionToDslName[lang.functionName(gast.NonTerminal)] = "SUBRULE"; productionToDslName[lang.functionName(gast.Option)] = "OPTION"; productionToDslName[lang.functionName(gast.RepetitionMandatory)] = "AT_LEAST_ONE"; productionToDslName[lang.functionName(gast.RepetitionMandatoryWithSeparator)] = "AT_LEAST_ONE_SEP"; productionToDslName[lang.functionName(gast.RepetitionWithSeparator)] = "MANY_SEP"; productionToDslName[lang.functionName(gast.Repetition)] = "MANY"; productionToDslName[lang.functionName(gast.Alternation)] = "OR"; productionToDslName[lang.functionName(gast.Terminal)] = "CONSUME"; function getProductionDslName(prod) { var clazz = prod.constructor; var prodName = lang.functionName(clazz); return productionToDslName[prodName]; } gast.getProductionDslName = getProductionDslName; })/* istanbul ignore next */ (gast = chevrotain.gast || /* istanbul ignore next */ (chevrotain.gast = {})); })/* istanbul ignore next */ (chevrotain || (chevrotain = {})); var chevrotain; (function (chevrotain) { var first; (function (first_1) { var gast = chevrotain.gast; function first(prod) { if (prod instanceof gast.NonTerminal) { // this could in theory cause infinite loops if // (1) prod A refs prod B. // (2) prod B refs prod A // (3) AB can match the empty set // in other words a cycle where everything is optional so the first will keep // looking ahead for the next optional part and will never exit // currently there is no safeguard for this unique edge case because // (1) not sure a grammar in which this can happen is useful for anything (productive) return first(prod.referencedRule); } else if (prod instanceof gast.Terminal) { return firstForTerminal(prod); } else if (gast.isSequenceProd(prod)) { return firstForSequence(prod); }/* istanbul ignore else */ else if (gast.isBranchingProd(prod)) { return firstForBranching(prod); } else { /* istanbul ignore next */ throw Error("non exhaustive match"); } } first_1.first = first; function firstForSequence(prod) { var firstSet = []; var seq = prod.definition; var nextSubProdIdx = 0; var hasInnerProdsRemaining = seq.length > nextSubProdIdx; var currSubProd; // so we enter the loop at least once (if the definition is not empty var