UNPKG

chevrotain

Version:

Chevrotain is a high performance fault tolerant javascript parsing DSL for building recursive decent parsers

1,788 lines (1,771 loc) 244 kB
// lib/src/version.js var VERSION = "12.0.0"; // ../utils/lib/src/print.js function PRINT_ERROR(msg) { if (console && console.error) { console.error(`Error: ${msg}`); } } function PRINT_WARNING(msg) { if (console && console.warn) { console.warn(`Warning: ${msg}`); } } // ../utils/lib/src/timer.js function timer(func) { const start = (/* @__PURE__ */ new Date()).getTime(); const val = func(); const end = (/* @__PURE__ */ new Date()).getTime(); const total = end - start; return { time: total, value: val }; } // ../utils/lib/src/to-fast-properties.js function toFastProperties(toBecomeFast) { function FakeConstructor() { } FakeConstructor.prototype = toBecomeFast; const fakeInstance = new FakeConstructor(); function fakeAccess() { return typeof fakeInstance.bar; } fakeAccess(); fakeAccess(); if (1) return toBecomeFast; (0, eval)(toBecomeFast); } // ../gast/lib/src/model.js function tokenLabel(tokType) { if (hasTokenLabel(tokType)) { return tokType.LABEL; } else { return tokType.name; } } function hasTokenLabel(obj) { return typeof obj.LABEL === "string" && obj.LABEL !== ""; } var AbstractProduction = class { get definition() { return this._definition; } set definition(value) { this._definition = value; } constructor(_definition) { this._definition = _definition; } accept(visitor) { visitor.visit(this); this.definition.forEach((prod) => { prod.accept(visitor); }); } }; var NonTerminal = class extends AbstractProduction { constructor(options) { super([]); this.idx = 1; Object.assign(this, pickOnlyDefined(options)); } set definition(definition) { } get definition() { if (this.referencedRule !== void 0) { return this.referencedRule.definition; } return []; } accept(visitor) { visitor.visit(this); } }; var Rule = class extends AbstractProduction { constructor(options) { super(options.definition); this.orgText = ""; Object.assign(this, pickOnlyDefined(options)); } }; var Alternative = class extends AbstractProduction { constructor(options) { super(options.definition); this.ignoreAmbiguities = false; Object.assign(this, pickOnlyDefined(options)); } }; var Option = class extends AbstractProduction { constructor(options) { super(options.definition); this.idx = 1; Object.assign(this, pickOnlyDefined(options)); } }; var RepetitionMandatory = class extends AbstractProduction { constructor(options) { super(options.definition); this.idx = 1; Object.assign(this, pickOnlyDefined(options)); } }; var RepetitionMandatoryWithSeparator = class extends AbstractProduction { constructor(options) { super(options.definition); this.idx = 1; Object.assign(this, pickOnlyDefined(options)); } }; var Repetition = class extends AbstractProduction { constructor(options) { super(options.definition); this.idx = 1; Object.assign(this, pickOnlyDefined(options)); } }; var RepetitionWithSeparator = class extends AbstractProduction { constructor(options) { super(options.definition); this.idx = 1; Object.assign(this, pickOnlyDefined(options)); } }; var Alternation = class extends AbstractProduction { get definition() { return this._definition; } set definition(value) { this._definition = value; } constructor(options) { super(options.definition); this.idx = 1; this.ignoreAmbiguities = false; this.hasPredicates = false; Object.assign(this, pickOnlyDefined(options)); } }; var Terminal = class { constructor(options) { this.idx = 1; Object.assign(this, pickOnlyDefined(options)); } accept(visitor) { visitor.visit(this); } }; function serializeGrammar(topRules) { return topRules.map(serializeProduction); } function serializeProduction(node) { function convertDefinition(definition) { return definition.map(serializeProduction); } if (node instanceof NonTerminal) { const serializedNonTerminal = { type: "NonTerminal", name: node.nonTerminalName, idx: node.idx }; if (typeof node.label === "string") { serializedNonTerminal.label = node.label; } return serializedNonTerminal; } else if (node instanceof Alternative) { return { type: "Alternative", definition: convertDefinition(node.definition) }; } else if (node instanceof Option) { return { type: "Option", idx: node.idx, definition: convertDefinition(node.definition) }; } else if (node instanceof RepetitionMandatory) { return { type: "RepetitionMandatory", idx: node.idx, definition: convertDefinition(node.definition) }; } else if (node instanceof RepetitionMandatoryWithSeparator) { return { type: "RepetitionMandatoryWithSeparator", idx: node.idx, separator: serializeProduction(new Terminal({ terminalType: node.separator })), definition: convertDefinition(node.definition) }; } else if (node instanceof RepetitionWithSeparator) { return { type: "RepetitionWithSeparator", idx: node.idx, separator: serializeProduction(new Terminal({ terminalType: node.separator })), definition: convertDefinition(node.definition) }; } else if (node instanceof Repetition) { return { type: "Repetition", idx: node.idx, definition: convertDefinition(node.definition) }; } else if (node instanceof Alternation) { return { type: "Alternation", idx: node.idx, definition: convertDefinition(node.definition) }; } else if (node instanceof Terminal) { const serializedTerminal = { type: "Terminal", name: node.terminalType.name, label: tokenLabel(node.terminalType), idx: node.idx }; if (typeof node.label === "string") { serializedTerminal.terminalLabel = node.label; } const pattern = node.terminalType.PATTERN; if (node.terminalType.PATTERN) { serializedTerminal.pattern = pattern instanceof RegExp ? pattern.source : pattern; } return serializedTerminal; } else if (node instanceof Rule) { return { type: "Rule", name: node.name, orgText: node.orgText, definition: convertDefinition(node.definition) }; } else { throw Error("non exhaustive match"); } } function pickOnlyDefined(obj) { return Object.fromEntries(Object.entries(obj).filter(([, v]) => v !== void 0)); } // ../gast/lib/src/visitor.js var GAstVisitor = class { visit(node) { const nodeAny = node; switch (nodeAny.constructor) { case NonTerminal: return this.visitNonTerminal(nodeAny); case Alternative: return this.visitAlternative(nodeAny); case Option: return this.visitOption(nodeAny); case RepetitionMandatory: return this.visitRepetitionMandatory(nodeAny); case RepetitionMandatoryWithSeparator: return this.visitRepetitionMandatoryWithSeparator(nodeAny); case RepetitionWithSeparator: return this.visitRepetitionWithSeparator(nodeAny); case Repetition: return this.visitRepetition(nodeAny); case Alternation: return this.visitAlternation(nodeAny); case Terminal: return this.visitTerminal(nodeAny); case Rule: return this.visitRule(nodeAny); /* c8 ignore next 2 */ default: throw Error("non exhaustive match"); } } /* c8 ignore next */ visitNonTerminal(node) { } /* c8 ignore next */ visitAlternative(node) { } /* c8 ignore next */ visitOption(node) { } /* c8 ignore next */ visitRepetition(node) { } /* c8 ignore next */ visitRepetitionMandatory(node) { } /* c8 ignore next 3 */ visitRepetitionMandatoryWithSeparator(node) { } /* c8 ignore next */ visitRepetitionWithSeparator(node) { } /* c8 ignore next */ visitAlternation(node) { } /* c8 ignore next */ visitTerminal(node) { } /* c8 ignore next */ visitRule(node) { } }; // ../gast/lib/src/helpers.js function isSequenceProd(prod) { return prod instanceof Alternative || prod instanceof Option || prod instanceof Repetition || prod instanceof RepetitionMandatory || prod instanceof RepetitionMandatoryWithSeparator || prod instanceof RepetitionWithSeparator || prod instanceof Terminal || prod instanceof Rule; } function isOptionalProd(prod, alreadyVisited = []) { const isDirectlyOptional = prod instanceof Option || prod instanceof Repetition || prod instanceof RepetitionWithSeparator; if (isDirectlyOptional) { return true; } if (prod instanceof Alternation) { return prod.definition.some((subProd) => { return isOptionalProd(subProd, alreadyVisited); }); } else if (prod instanceof NonTerminal && alreadyVisited.includes(prod)) { return false; } else if (prod instanceof AbstractProduction) { if (prod instanceof NonTerminal) { alreadyVisited.push(prod); } return prod.definition.every((subProd) => { return isOptionalProd(subProd, alreadyVisited); }); } else { return false; } } function isBranchingProd(prod) { return prod instanceof Alternation; } function getProductionDslName(prod) { if (prod instanceof NonTerminal) { return "SUBRULE"; } else if (prod instanceof Option) { return "OPTION"; } else if (prod instanceof Alternation) { return "OR"; } else if (prod instanceof RepetitionMandatory) { return "AT_LEAST_ONE"; } else if (prod instanceof RepetitionMandatoryWithSeparator) { return "AT_LEAST_ONE_SEP"; } else if (prod instanceof RepetitionWithSeparator) { return "MANY_SEP"; } else if (prod instanceof Repetition) { return "MANY"; } else if (prod instanceof Terminal) { return "CONSUME"; } else { throw Error("non exhaustive match"); } } // lib/src/parse/grammar/rest.js var RestWalker = class { walk(prod, prevRest = []) { prod.definition.forEach((subProd, index) => { const currRest = prod.definition.slice(index + 1); if (subProd instanceof NonTerminal) { this.walkProdRef(subProd, currRest, prevRest); } else if (subProd instanceof Terminal) { this.walkTerminal(subProd, currRest, prevRest); } else if (subProd instanceof Alternative) { this.walkFlat(subProd, currRest, prevRest); } else if (subProd instanceof Option) { this.walkOption(subProd, currRest, prevRest); } else if (subProd instanceof RepetitionMandatory) { this.walkAtLeastOne(subProd, currRest, prevRest); } else if (subProd instanceof RepetitionMandatoryWithSeparator) { this.walkAtLeastOneSep(subProd, currRest, prevRest); } else if (subProd instanceof RepetitionWithSeparator) { this.walkManySep(subProd, currRest, prevRest); } else if (subProd instanceof Repetition) { this.walkMany(subProd, currRest, prevRest); } else if (subProd instanceof Alternation) { this.walkOr(subProd, currRest, prevRest); } else { throw Error("non exhaustive match"); } }); } walkTerminal(terminal, currRest, prevRest) { } walkProdRef(refProd, currRest, prevRest) { } walkFlat(flatProd, currRest, prevRest) { const fullOrRest = currRest.concat(prevRest); this.walk(flatProd, fullOrRest); } walkOption(optionProd, currRest, prevRest) { const fullOrRest = currRest.concat(prevRest); this.walk(optionProd, fullOrRest); } walkAtLeastOne(atLeastOneProd, currRest, prevRest) { const fullAtLeastOneRest = [ new Option({ definition: atLeastOneProd.definition }) ].concat(currRest, prevRest); this.walk(atLeastOneProd, fullAtLeastOneRest); } walkAtLeastOneSep(atLeastOneSepProd, currRest, prevRest) { const fullAtLeastOneSepRest = restForRepetitionWithSeparator(atLeastOneSepProd, currRest, prevRest); this.walk(atLeastOneSepProd, fullAtLeastOneSepRest); } walkMany(manyProd, currRest, prevRest) { const fullManyRest = [ new Option({ definition: manyProd.definition }) ].concat(currRest, prevRest); this.walk(manyProd, fullManyRest); } walkManySep(manySepProd, currRest, prevRest) { const fullManySepRest = restForRepetitionWithSeparator(manySepProd, currRest, prevRest); this.walk(manySepProd, fullManySepRest); } walkOr(orProd, currRest, prevRest) { const fullOrRest = currRest.concat(prevRest); orProd.definition.forEach((alt) => { const prodWrapper = new Alternative({ definition: [alt] }); this.walk(prodWrapper, fullOrRest); }); } }; function restForRepetitionWithSeparator(repSepProd, currRest, prevRest) { const repSepRest = [ new Option({ definition: [ new Terminal({ terminalType: repSepProd.separator }) ].concat(repSepProd.definition) }) ]; const fullRepSepRest = repSepRest.concat(currRest, prevRest); return fullRepSepRest; } // lib/src/parse/grammar/first.js function first(prod) { if (prod instanceof NonTerminal) { return first(prod.referencedRule); } else if (prod instanceof Terminal) { return firstForTerminal(prod); } else if (isSequenceProd(prod)) { return firstForSequence(prod); } else if (isBranchingProd(prod)) { return firstForBranching(prod); } else { throw Error("non exhaustive match"); } } function firstForSequence(prod) { let firstSet = []; const seq = prod.definition; let nextSubProdIdx = 0; let hasInnerProdsRemaining = seq.length > nextSubProdIdx; let currSubProd; let isLastInnerProdOptional = true; while (hasInnerProdsRemaining && isLastInnerProdOptional) { currSubProd = seq[nextSubProdIdx]; isLastInnerProdOptional = isOptionalProd(currSubProd); firstSet = firstSet.concat(first(currSubProd)); nextSubProdIdx = nextSubProdIdx + 1; hasInnerProdsRemaining = seq.length > nextSubProdIdx; } return [...new Set(firstSet)]; } function firstForBranching(prod) { const allAlternativesFirsts = prod.definition.map((innerProd) => { return first(innerProd); }); return [...new Set(allAlternativesFirsts.flat())]; } function firstForTerminal(terminal) { return [terminal.terminalType]; } // lib/src/parse/constants.js var IN = "_~IN~_"; // lib/src/parse/grammar/follow.js var ResyncFollowsWalker = class extends RestWalker { constructor(topProd) { super(); this.topProd = topProd; this.follows = {}; } startWalking() { this.walk(this.topProd); return this.follows; } walkTerminal(terminal, currRest, prevRest) { } walkProdRef(refProd, currRest, prevRest) { const followName = buildBetweenProdsFollowPrefix(refProd.referencedRule, refProd.idx) + this.topProd.name; const fullRest = currRest.concat(prevRest); const restProd = new Alternative({ definition: fullRest }); const t_in_topProd_follows = first(restProd); this.follows[followName] = t_in_topProd_follows; } }; function computeAllProdsFollows(topProductions) { const reSyncFollows = {}; topProductions.forEach((topProd) => { const currRefsFollow = new ResyncFollowsWalker(topProd).startWalking(); Object.assign(reSyncFollows, currRefsFollow); }); return reSyncFollows; } function buildBetweenProdsFollowPrefix(inner, occurenceInParent) { return inner.name + occurenceInParent + IN; } // ../regexp-to-ast/lib/src/utils.js function cc(char) { return char.charCodeAt(0); } function insertToSet(item, set) { if (Array.isArray(item)) { item.forEach(function(subItem) { set.push(subItem); }); } else { set.push(item); } } function addFlag(flagObj, flagKey) { if (flagObj[flagKey] === true) { throw "duplicate flag " + flagKey; } const x = flagObj[flagKey]; flagObj[flagKey] = true; } function ASSERT_EXISTS(obj) { if (obj === void 0) { throw Error("Internal Error - Should never get here!"); } return true; } function ASSERT_NEVER_REACH_HERE() { throw Error("Internal Error - Should never get here!"); } function isCharacter(obj) { return obj["type"] === "Character"; } // ../regexp-to-ast/lib/src/character-classes.js var digitsCharCodes = []; for (let i = cc("0"); i <= cc("9"); i++) { digitsCharCodes.push(i); } var wordCharCodes = [cc("_")].concat(digitsCharCodes); for (let i = cc("a"); i <= cc("z"); i++) { wordCharCodes.push(i); } for (let i = cc("A"); i <= cc("Z"); i++) { wordCharCodes.push(i); } var whitespaceCodes = [ cc(" "), cc("\f"), cc("\n"), cc("\r"), cc(" "), cc("\v"), cc(" "), cc("\xA0"), cc("\u1680"), cc("\u2000"), cc("\u2001"), cc("\u2002"), cc("\u2003"), cc("\u2004"), cc("\u2005"), cc("\u2006"), cc("\u2007"), cc("\u2008"), cc("\u2009"), cc("\u200A"), cc("\u2028"), cc("\u2029"), cc("\u202F"), cc("\u205F"), cc("\u3000"), cc("\uFEFF") ]; // ../regexp-to-ast/lib/src/regexp-parser.js var hexDigitPattern = /[0-9a-fA-F]/; var decimalPattern = /[0-9]/; var decimalPatternNoZero = /[1-9]/; var RegExpParser = class { constructor() { this.idx = 0; this.input = ""; this.groupIdx = 0; } saveState() { return { idx: this.idx, input: this.input, groupIdx: this.groupIdx }; } restoreState(newState) { this.idx = newState.idx; this.input = newState.input; this.groupIdx = newState.groupIdx; } pattern(input) { this.idx = 0; this.input = input; this.groupIdx = 0; this.consumeChar("/"); const value = this.disjunction(); this.consumeChar("/"); const flags = { type: "Flags", loc: { begin: this.idx, end: input.length }, global: false, ignoreCase: false, multiLine: false, unicode: false, sticky: false }; while (this.isRegExpFlag()) { switch (this.popChar()) { case "g": addFlag(flags, "global"); break; case "i": addFlag(flags, "ignoreCase"); break; case "m": addFlag(flags, "multiLine"); break; case "u": addFlag(flags, "unicode"); break; case "y": addFlag(flags, "sticky"); break; } } if (this.idx !== this.input.length) { throw Error("Redundant input: " + this.input.substring(this.idx)); } return { type: "Pattern", flags, value, loc: this.loc(0) }; } disjunction() { const alts = []; const begin = this.idx; alts.push(this.alternative()); while (this.peekChar() === "|") { this.consumeChar("|"); alts.push(this.alternative()); } return { type: "Disjunction", value: alts, loc: this.loc(begin) }; } alternative() { const terms = []; const begin = this.idx; while (this.isTerm()) { terms.push(this.term()); } return { type: "Alternative", value: terms, loc: this.loc(begin) }; } term() { if (this.isAssertion()) { return this.assertion(); } else { return this.atom(); } } assertion() { const begin = this.idx; switch (this.popChar()) { case "^": return { type: "StartAnchor", loc: this.loc(begin) }; case "$": return { type: "EndAnchor", loc: this.loc(begin) }; // '\b' or '\B' case "\\": switch (this.popChar()) { case "b": return { type: "WordBoundary", loc: this.loc(begin) }; case "B": return { type: "NonWordBoundary", loc: this.loc(begin) }; } throw Error("Invalid Assertion Escape"); // '(?=' or '(?!' case "(": this.consumeChar("?"); let type; switch (this.popChar()) { case "=": type = "Lookahead"; break; case "!": type = "NegativeLookahead"; break; case "<": { switch (this.popChar()) { case "=": type = "Lookbehind"; break; case "!": type = "NegativeLookbehind"; } break; } } ASSERT_EXISTS(type); const disjunction = this.disjunction(); this.consumeChar(")"); return { type, value: disjunction, loc: this.loc(begin) }; } return ASSERT_NEVER_REACH_HERE(); } quantifier(isBacktracking = false) { let range = void 0; const begin = this.idx; switch (this.popChar()) { case "*": range = { atLeast: 0, atMost: Infinity }; break; case "+": range = { atLeast: 1, atMost: Infinity }; break; case "?": range = { atLeast: 0, atMost: 1 }; break; case "{": const atLeast = this.integerIncludingZero(); switch (this.popChar()) { case "}": range = { atLeast, atMost: atLeast }; break; case ",": let atMost; if (this.isDigit()) { atMost = this.integerIncludingZero(); range = { atLeast, atMost }; } else { range = { atLeast, atMost: Infinity }; } this.consumeChar("}"); break; } if (isBacktracking === true && range === void 0) { return void 0; } ASSERT_EXISTS(range); break; } if (isBacktracking === true && range === void 0) { return void 0; } if (ASSERT_EXISTS(range)) { if (this.peekChar(0) === "?") { this.consumeChar("?"); range.greedy = false; } else { range.greedy = true; } range.type = "Quantifier"; range.loc = this.loc(begin); return range; } } atom() { let atom; const begin = this.idx; switch (this.peekChar()) { case ".": atom = this.dotAll(); break; case "\\": atom = this.atomEscape(); break; case "[": atom = this.characterClass(); break; case "(": atom = this.group(); break; } if (atom === void 0 && this.isPatternCharacter()) { atom = this.patternCharacter(); } if (ASSERT_EXISTS(atom)) { atom.loc = this.loc(begin); if (this.isQuantifier()) { atom.quantifier = this.quantifier(); } return atom; } return ASSERT_NEVER_REACH_HERE(); } dotAll() { this.consumeChar("."); return { type: "Set", complement: true, value: [cc("\n"), cc("\r"), cc("\u2028"), cc("\u2029")] }; } atomEscape() { this.consumeChar("\\"); switch (this.peekChar()) { case "1": case "2": case "3": case "4": case "5": case "6": case "7": case "8": case "9": return this.decimalEscapeAtom(); case "d": case "D": case "s": case "S": case "w": case "W": return this.characterClassEscape(); case "f": case "n": case "r": case "t": case "v": return this.controlEscapeAtom(); case "c": return this.controlLetterEscapeAtom(); case "0": return this.nulCharacterAtom(); case "x": return this.hexEscapeSequenceAtom(); case "u": return this.regExpUnicodeEscapeSequenceAtom(); default: return this.identityEscapeAtom(); } } decimalEscapeAtom() { const value = this.positiveInteger(); return { type: "GroupBackReference", value }; } characterClassEscape() { let set; let complement = false; switch (this.popChar()) { case "d": set = digitsCharCodes; break; case "D": set = digitsCharCodes; complement = true; break; case "s": set = whitespaceCodes; break; case "S": set = whitespaceCodes; complement = true; break; case "w": set = wordCharCodes; break; case "W": set = wordCharCodes; complement = true; break; } if (ASSERT_EXISTS(set)) { return { type: "Set", value: set, complement }; } return ASSERT_NEVER_REACH_HERE(); } controlEscapeAtom() { let escapeCode; switch (this.popChar()) { case "f": escapeCode = cc("\f"); break; case "n": escapeCode = cc("\n"); break; case "r": escapeCode = cc("\r"); break; case "t": escapeCode = cc(" "); break; case "v": escapeCode = cc("\v"); break; } if (ASSERT_EXISTS(escapeCode)) { return { type: "Character", value: escapeCode }; } return ASSERT_NEVER_REACH_HERE(); } controlLetterEscapeAtom() { this.consumeChar("c"); const letter = this.popChar(); if (/[a-zA-Z]/.test(letter) === false) { throw Error("Invalid "); } const letterCode = letter.toUpperCase().charCodeAt(0) - 64; return { type: "Character", value: letterCode }; } nulCharacterAtom() { this.consumeChar("0"); return { type: "Character", value: cc("\0") }; } hexEscapeSequenceAtom() { this.consumeChar("x"); return this.parseHexDigits(2); } regExpUnicodeEscapeSequenceAtom() { this.consumeChar("u"); return this.parseHexDigits(4); } identityEscapeAtom() { const escapedChar = this.popChar(); return { type: "Character", value: cc(escapedChar) }; } classPatternCharacterAtom() { switch (this.peekChar()) { // istanbul ignore next case "\n": // istanbul ignore next case "\r": // istanbul ignore next case "\u2028": // istanbul ignore next case "\u2029": // istanbul ignore next case "\\": // istanbul ignore next case "]": throw Error("TBD"); default: const nextChar = this.popChar(); return { type: "Character", value: cc(nextChar) }; } } characterClass() { const set = []; let complement = false; this.consumeChar("["); if (this.peekChar(0) === "^") { this.consumeChar("^"); complement = true; } while (this.isClassAtom()) { const from = this.classAtom(); const isFromSingleChar = from.type === "Character"; if (isCharacter(from) && this.isRangeDash()) { this.consumeChar("-"); const to = this.classAtom(); const isToSingleChar = to.type === "Character"; if (isCharacter(to)) { if (to.value < from.value) { throw Error("Range out of order in character class"); } set.push({ from: from.value, to: to.value }); } else { insertToSet(from.value, set); set.push(cc("-")); insertToSet(to.value, set); } } else { insertToSet(from.value, set); } } this.consumeChar("]"); return { type: "Set", complement, value: set }; } classAtom() { switch (this.peekChar()) { // istanbul ignore next case "]": // istanbul ignore next case "\n": // istanbul ignore next case "\r": // istanbul ignore next case "\u2028": // istanbul ignore next case "\u2029": throw Error("TBD"); case "\\": return this.classEscape(); default: return this.classPatternCharacterAtom(); } } classEscape() { this.consumeChar("\\"); switch (this.peekChar()) { // Matches a backspace. // (Not to be confused with \b word boundary outside characterClass) case "b": this.consumeChar("b"); return { type: "Character", value: cc("\b") }; case "d": case "D": case "s": case "S": case "w": case "W": return this.characterClassEscape(); case "f": case "n": case "r": case "t": case "v": return this.controlEscapeAtom(); case "c": return this.controlLetterEscapeAtom(); case "0": return this.nulCharacterAtom(); case "x": return this.hexEscapeSequenceAtom(); case "u": return this.regExpUnicodeEscapeSequenceAtom(); default: return this.identityEscapeAtom(); } } group() { let capturing = true; this.consumeChar("("); switch (this.peekChar(0)) { case "?": this.consumeChar("?"); this.consumeChar(":"); capturing = false; break; default: this.groupIdx++; break; } const value = this.disjunction(); this.consumeChar(")"); const groupAst = { type: "Group", capturing, value }; if (capturing) { groupAst["idx"] = this.groupIdx; } return groupAst; } positiveInteger() { let number = this.popChar(); if (decimalPatternNoZero.test(number) === false) { throw Error("Expecting a positive integer"); } while (decimalPattern.test(this.peekChar(0))) { number += this.popChar(); } return parseInt(number, 10); } integerIncludingZero() { let number = this.popChar(); if (decimalPattern.test(number) === false) { throw Error("Expecting an integer"); } while (decimalPattern.test(this.peekChar(0))) { number += this.popChar(); } return parseInt(number, 10); } patternCharacter() { const nextChar = this.popChar(); switch (nextChar) { // istanbul ignore next case "\n": // istanbul ignore next case "\r": // istanbul ignore next case "\u2028": // istanbul ignore next case "\u2029": // istanbul ignore next case "^": // istanbul ignore next case "$": // istanbul ignore next case "\\": // istanbul ignore next case ".": // istanbul ignore next case "*": // istanbul ignore next case "+": // istanbul ignore next case "?": // istanbul ignore next case "(": // istanbul ignore next case ")": // istanbul ignore next case "[": // istanbul ignore next case "|": throw Error("TBD"); default: return { type: "Character", value: cc(nextChar) }; } } isRegExpFlag() { switch (this.peekChar(0)) { case "g": case "i": case "m": case "u": case "y": return true; default: return false; } } isRangeDash() { return this.peekChar() === "-" && this.isClassAtom(1); } isDigit() { return decimalPattern.test(this.peekChar(0)); } isClassAtom(howMuch = 0) { switch (this.peekChar(howMuch)) { case "]": case "\n": case "\r": case "\u2028": case "\u2029": return false; default: return true; } } isTerm() { return this.isAtom() || this.isAssertion(); } isAtom() { if (this.isPatternCharacter()) { return true; } switch (this.peekChar(0)) { case ".": case "\\": // atomEscape case "[": // characterClass // TODO: isAtom must be called before isAssertion - disambiguate case "(": return true; default: return false; } } isAssertion() { switch (this.peekChar(0)) { case "^": case "$": return true; // '\b' or '\B' case "\\": switch (this.peekChar(1)) { case "b": case "B": return true; default: return false; } // '(?=' or '(?!' or `(?<=` or `(?<!` case "(": return this.peekChar(1) === "?" && (this.peekChar(2) === "=" || this.peekChar(2) === "!" || this.peekChar(2) === "<" && (this.peekChar(3) === "=" || this.peekChar(3) === "!")); default: return false; } } isQuantifier() { const prevState = this.saveState(); try { return this.quantifier(true) !== void 0; } catch (e) { return false; } finally { this.restoreState(prevState); } } isPatternCharacter() { switch (this.peekChar()) { case "^": case "$": case "\\": case ".": case "*": case "+": case "?": case "(": case ")": case "[": case "|": case "/": case "\n": case "\r": case "\u2028": case "\u2029": return false; default: return true; } } parseHexDigits(howMany) { let hexString = ""; for (let i = 0; i < howMany; i++) { const hexChar = this.popChar(); if (hexDigitPattern.test(hexChar) === false) { throw Error("Expecting a HexDecimal digits"); } hexString += hexChar; } const charCode = parseInt(hexString, 16); return { type: "Character", value: charCode }; } peekChar(howMuch = 0) { return this.input[this.idx + howMuch]; } popChar() { const nextChar = this.peekChar(0); this.consumeChar(void 0); return nextChar; } consumeChar(char) { if (char !== void 0 && this.input[this.idx] !== char) { throw Error("Expected: '" + char + "' but found: '" + this.input[this.idx] + "' at offset: " + this.idx); } if (this.idx >= this.input.length) { throw Error("Unexpected end of input"); } this.idx++; } loc(begin) { return { begin, end: this.idx }; } }; // ../regexp-to-ast/lib/src/base-regexp-visitor.js var BaseRegExpVisitor = class { visitChildren(node) { for (const key in node) { const child = node[key]; if (node.hasOwnProperty(key)) { if (child.type !== void 0) { this.visit(child); } else if (Array.isArray(child)) { child.forEach((subChild) => { this.visit(subChild); }, this); } } } } visit(node) { switch (node.type) { case "Pattern": this.visitPattern(node); break; case "Flags": this.visitFlags(node); break; case "Disjunction": this.visitDisjunction(node); break; case "Alternative": this.visitAlternative(node); break; case "StartAnchor": this.visitStartAnchor(node); break; case "EndAnchor": this.visitEndAnchor(node); break; case "WordBoundary": this.visitWordBoundary(node); break; case "NonWordBoundary": this.visitNonWordBoundary(node); break; case "Lookahead": this.visitLookahead(node); break; case "NegativeLookahead": this.visitNegativeLookahead(node); break; case "Lookbehind": this.visitLookbehind(node); break; case "NegativeLookbehind": this.visitNegativeLookbehind(node); break; case "Character": this.visitCharacter(node); break; case "Set": this.visitSet(node); break; case "Group": this.visitGroup(node); break; case "GroupBackReference": this.visitGroupBackReference(node); break; case "Quantifier": this.visitQuantifier(node); break; } this.visitChildren(node); } visitPattern(node) { } visitFlags(node) { } visitDisjunction(node) { } visitAlternative(node) { } // Assertion visitStartAnchor(node) { } visitEndAnchor(node) { } visitWordBoundary(node) { } visitNonWordBoundary(node) { } visitLookahead(node) { } visitNegativeLookahead(node) { } visitLookbehind(node) { } visitNegativeLookbehind(node) { } // atoms visitCharacter(node) { } visitSet(node) { } visitGroup(node) { } visitGroupBackReference(node) { } visitQuantifier(node) { } }; // lib/src/scan/reg_exp_parser.js var regExpAstCache = {}; var regExpParser = new RegExpParser(); function getRegExpAst(regExp) { const regExpStr = regExp.toString(); if (regExpAstCache.hasOwnProperty(regExpStr)) { return regExpAstCache[regExpStr]; } else { const regExpAst = regExpParser.pattern(regExpStr); regExpAstCache[regExpStr] = regExpAst; return regExpAst; } } function clearRegExpParserCache() { regExpAstCache = {}; } // lib/src/scan/reg_exp.js var complementErrorMessage = "Complement Sets are not supported for first char optimization"; var failedOptimizationPrefixMsg = 'Unable to use "first char" lexer optimizations:\n'; function getOptimizedStartCodesIndices(regExp, ensureOptimizations = false) { try { const ast = getRegExpAst(regExp); const firstChars = firstCharOptimizedIndices(ast.value, {}, ast.flags.ignoreCase); return firstChars; } catch (e) { if (e.message === complementErrorMessage) { if (ensureOptimizations) { PRINT_WARNING(`${failedOptimizationPrefixMsg} Unable to optimize: < ${regExp.toString()} > Complement Sets cannot be automatically optimized. This will disable the lexer's first char optimizations. See: https://chevrotain.io/docs/guide/resolving_lexer_errors.html#COMPLEMENT for details.`); } } else { let msgSuffix = ""; if (ensureOptimizations) { msgSuffix = "\n This will disable the lexer's first char optimizations.\n See: https://chevrotain.io/docs/guide/resolving_lexer_errors.html#REGEXP_PARSING for details."; } PRINT_ERROR(`${failedOptimizationPrefixMsg} Failed parsing: < ${regExp.toString()} > Using the @chevrotain/regexp-to-ast library Please open an issue at: https://github.com/chevrotain/chevrotain/issues` + msgSuffix); } } return []; } function firstCharOptimizedIndices(ast, result, ignoreCase) { switch (ast.type) { case "Disjunction": for (let i = 0; i < ast.value.length; i++) { firstCharOptimizedIndices(ast.value[i], result, ignoreCase); } break; case "Alternative": const terms = ast.value; for (let i = 0; i < terms.length; i++) { const term = terms[i]; switch (term.type) { case "EndAnchor": // A group back reference cannot affect potential starting char. // because if a back reference is the first production than automatically // the group being referenced has had to come BEFORE so its codes have already been added case "GroupBackReference": // assertions do not affect potential starting codes case "Lookahead": case "NegativeLookahead": case "Lookbehind": case "NegativeLookbehind": case "StartAnchor": case "WordBoundary": case "NonWordBoundary": continue; } const atom = term; switch (atom.type) { case "Character": addOptimizedIdxToResult(atom.value, result, ignoreCase); break; case "Set": if (atom.complement === true) { throw Error(complementErrorMessage); } atom.value.forEach((code) => { if (typeof code === "number") { addOptimizedIdxToResult(code, result, ignoreCase); } else { const range = code; if (ignoreCase === true) { for (let rangeCode = range.from; rangeCode <= range.to; rangeCode++) { addOptimizedIdxToResult(rangeCode, result, ignoreCase); } } else { for (let rangeCode = range.from; rangeCode <= range.to && rangeCode < minOptimizationVal; rangeCode++) { addOptimizedIdxToResult(rangeCode, result, ignoreCase); } if (range.to >= minOptimizationVal) { const minUnOptVal = range.from >= minOptimizationVal ? range.from : minOptimizationVal; const maxUnOptVal = range.to; const minOptIdx = charCodeToOptimizedIndex(minUnOptVal); const maxOptIdx = charCodeToOptimizedIndex(maxUnOptVal); for (let currOptIdx = minOptIdx; currOptIdx <= maxOptIdx; currOptIdx++) { result[currOptIdx] = currOptIdx; } } } } }); break; case "Group": firstCharOptimizedIndices(atom.value, result, ignoreCase); break; /* istanbul ignore next */ default: throw Error("Non Exhaustive Match"); } const isOptionalQuantifier = atom.quantifier !== void 0 && atom.quantifier.atLeast === 0; if ( // A group may be optional due to empty contents /(?:)/ // or if everything inside it is optional /((a)?)/ atom.type === "Group" && isWholeOptional(atom) === false || // If this term is not a group it may only be optional if it has an optional quantifier atom.type !== "Group" && isOptionalQuantifier === false ) { break; } } break; /* istanbul ignore next */ default: throw Error("non exhaustive match!"); } return Object.values(result); } function addOptimizedIdxToResult(code, result, ignoreCase) { const optimizedCharIdx = charCodeToOptimizedIndex(code); result[optimizedCharIdx] = optimizedCharIdx; if (ignoreCase === true) { handleIgnoreCase(code, result); } } function handleIgnoreCase(code, result) { const char = String.fromCharCode(code); const upperChar = char.toUpperCase(); if (upperChar !== char) { const optimizedCharIdx = charCodeToOptimizedIndex(upperChar.charCodeAt(0)); result[optimizedCharIdx] = optimizedCharIdx; } else { const lowerChar = char.toLowerCase(); if (lowerChar !== char) { const optimizedCharIdx = charCodeToOptimizedIndex(lowerChar.charCodeAt(0)); result[optimizedCharIdx] = optimizedCharIdx; } } } function findCode(setNode, targetCharCodes) { return setNode.value.find((codeOrRange) => { if (typeof codeOrRange === "number") { return targetCharCodes.includes(codeOrRange); } else { const range = codeOrRange; return targetCharCodes.find((targetCode) => range.from <= targetCode && targetCode <= range.to) !== void 0; } }); } function isWholeOptional(ast) { const quantifier = ast.quantifier; if (quantifier && quantifier.atLeast === 0) { return true; } if (!ast.value) { return false; } return Array.isArray(ast.value) ? ast.value.every(isWholeOptional) : isWholeOptional(ast.value); } var CharCodeFinder = class extends BaseRegExpVisitor { constructor(targetCharCodes) { super(); this.targetCharCodes = targetCharCodes; this.found = false; } visitChildren(node) { if (this.found === true) { return; } switch (node.type) { case "Lookahead": this.visitLookahead(node); return; case "NegativeLookahead": this.visitNegativeLookahead(node); return; case "Lookbehind": this.visitLookbehind(node); return; case "NegativeLookbehind": this.visitNegativeLookbehind(node); return; } super.visitChildren(node); } visitCharacter(node) { if (this.targetCharCodes.includes(node.value)) { this.found = true; } } visitSet(node) { if (node.complement) { if (findCode(node, this.targetCharCodes) === void 0) { this.found = true; } } else { if (findCode(node, this.targetCharCodes) !== void 0) { this.found = true; } } } }; function canMatchCharCode(charCodes, pattern) { if (pattern instanceof RegExp) { const ast = getRegExpAst(pattern); const charCodeFinder = new CharCodeFinder(charCodes); charCodeFinder.visit(ast); return charCodeFinder.found; } else { for (const char of pattern) { const charCode = char.charCodeAt(0); if (charCodes.includes(charCode)) { return true; } } return false; } } // lib/src/scan/lexer.js var PATTERN = "PATTERN"; var DEFAULT_MODE = "defaultMode"; var MODES = "modes"; function analyzeTokenTypes(tokenTypes, options) { options = Object.assign({ safeMode: false, positionTracking: "full", lineTerminatorCharacters: ["\r", "\n"], tracer: (msg, action) => action() }, options); const tracer = options.tracer; tracer("initCharCodeToOptimizedIndexMap", () => { initCharCodeToOptimizedIndexMap(); }); let onlyRelevantTypes; tracer("Reject Lexer.NA", () => { onlyRelevantTypes = tokenTypes.filter((currType) => { return currType[PATTERN] !== Lexer.NA; }); }); let hasCustom = false; let allTransformedPatterns; tracer("Transform Patterns", () => { hasCustom = false; allTransformedPatterns = onlyRelevantTypes.map((currType) => { const currPattern = currType[PATTERN]; if (currPattern instanceof RegExp) { const regExpSource = currPattern.source; if (regExpSource.length === 1 && // only these regExp meta characters which can appear in a length one regExp regExpSource !== "^" && regExpSource !== "$" && regExpSource !== "." && !currPattern.ignoreCase) { return regExpSource; } else if (regExpSource.length === 2 && regExpSource[0] === "\\" && // not a meta character ![ "d", "D", "s", "S", "t", "r", "n", "t", "0", "c", "b", "B", "f", "v", "w", "W" ].includes(regExpSource[1])) { return regExpSource[1]; } else { return addStickyFlag(currPattern); } } else if (typeof currPattern === "function") { hasCustom = true; return { exec: currPattern }; } else if (typeof currPattern === "object") { hasCustom = true; return currPattern; } else if (typeof currPattern === "string") { if (currPattern.length === 1) { return currPattern; } else { const escapedRegExpString = currPattern.replace(/[\\^$.*+?()[\]{}|]/g, "\\$&"); const wrappedRegExp = new RegExp(escapedRegExpString); return addStickyFlag(wrappedRegExp); } } else { throw Error("non exhaustive match"); } }); }); let patternIdxToType; let patternIdxToGroup; let patternIdxToLongerAltIdxArr; let patternIdxToPushMode; let patternIdxToPopMode; tracer("misc mapping", () => { patternIdxToType = onlyRelevantTypes.map((currType) => currType.tokenTypeIdx); patternIdxToGroup = onlyRelevantTypes.map((clazz) => { const groupName = clazz.GROUP; if (groupName === Lexer.SKIPPED) { return void 0; } else if (typeof groupName === "string") { return groupName; } else if (groupName === void 0) { return false; } else { throw Error("non exhaustive match"); } }); patternIdxToLongerAltIdxArr = onlyRelevantTypes.map((clazz) => { const longerAltType = clazz.LONGER_ALT; if (longerAltType) { const longerAltIdxArr = Array.isArray(longerAltType) ? longerAltType.map((type) => onlyRelevantTypes.indexOf(type)) : [onlyRelevantTypes.indexOf(longerAltType)]; return longerAltIdxArr; } }); patternIdxToPushMode = onlyRelevantTypes.map((clazz) => clazz.PUSH_MODE); patternIdxToPopMode = onlyRelevantTypes.map((clazz) => Object.hasOwn(clazz, "POP_MODE")); }); let patternIdxToCanLineTerminator; tracer("Line Terminator Handling", () => { const lineTerminatorCharCodes = getCharCodes(options.lineTerminatorCharacters); patternIdxToCanLineTerminator = onlyRelevantTypes.map((tokType) => false); if (options.positionTracking !== "onlyOffset") { patternIdxToCanLineTerminator = onlyRelevantTypes.map((tokType) => { if (Object.hasOwn(tokType, "LINE_BREAKS")) { return !!tokType.LINE_BREAKS; } else { return checkLineBreaksIssues(tokType, lineTerminatorCharCodes) === false && canMatchCharCode(lineTerminatorCharCodes, tokType.PATTERN); } }); } }); let patternIdxToIsCustom; let patternIdxToShort; let emptyGroups; let patternIdxToConfig; tracer("Misc Mapping #2", () => { patternIdxToIsCustom = onlyRelevantTypes.map(isCustomPattern); patternIdxToShort = allTransformedPatterns.map(isShortPattern); emptyGroups = onlyRelevantTypes.reduce((acc, clazz) => { const groupName = clazz.GROUP; if (typeof groupName === "string" && !(groupName === Lexer.SKIPPED)) { acc[groupName] = []; } return acc; }, {}); patternIdxToConfig = allTransformedPatterns.map((x, idx) => { return { pattern: allTransformedPatterns[idx], longerAlt: patternIdxToLongerAltIdxArr[idx], canLineTerminator: patternIdxToCanLineTerminator[idx], isCustom: patternIdxToIsCustom[idx], short: patternIdxToShort[idx], group: patternIdxToGroup[idx], push: patternIdxToPushMode[idx], pop: patternIdxToPopMode[idx], tokenTypeIdx: patternIdxToType[idx], tokenType: onlyRelevantTypes[idx] }; }); }); let canBeOptimized = true; let charCodeToPatternIdxToConfig = []; if (!options.safeMode) { tracer("First Char Optimization", () => { charCodeToPatternIdxToConfig = onlyRelevantTypes.reduce((result, currTokType, idx) => { if (typeof currTokType.PATTERN === "string") { const charCode = currTokType.PATTERN.charCodeAt(0); const optimizedIdx = charCodeToOptimizedIndex(charCode); add