chevrotain
Version:
Chevrotain is a high performance fault tolerant javascript parsing DSL for building recursive decent parsers
1,788 lines (1,771 loc) • 244 kB
JavaScript
// lib/src/version.js
var VERSION = "12.0.0";
// ../utils/lib/src/print.js
function PRINT_ERROR(msg) {
if (console && console.error) {
console.error(`Error: ${msg}`);
}
}
function PRINT_WARNING(msg) {
if (console && console.warn) {
console.warn(`Warning: ${msg}`);
}
}
// ../utils/lib/src/timer.js
function timer(func) {
const start = (/* @__PURE__ */ new Date()).getTime();
const val = func();
const end = (/* @__PURE__ */ new Date()).getTime();
const total = end - start;
return { time: total, value: val };
}
// ../utils/lib/src/to-fast-properties.js
function toFastProperties(toBecomeFast) {
function FakeConstructor() {
}
FakeConstructor.prototype = toBecomeFast;
const fakeInstance = new FakeConstructor();
function fakeAccess() {
return typeof fakeInstance.bar;
}
fakeAccess();
fakeAccess();
if (1)
return toBecomeFast;
(0, eval)(toBecomeFast);
}
// ../gast/lib/src/model.js
function tokenLabel(tokType) {
if (hasTokenLabel(tokType)) {
return tokType.LABEL;
} else {
return tokType.name;
}
}
function hasTokenLabel(obj) {
return typeof obj.LABEL === "string" && obj.LABEL !== "";
}
var AbstractProduction = class {
get definition() {
return this._definition;
}
set definition(value) {
this._definition = value;
}
constructor(_definition) {
this._definition = _definition;
}
accept(visitor) {
visitor.visit(this);
this.definition.forEach((prod) => {
prod.accept(visitor);
});
}
};
var NonTerminal = class extends AbstractProduction {
constructor(options) {
super([]);
this.idx = 1;
Object.assign(this, pickOnlyDefined(options));
}
set definition(definition) {
}
get definition() {
if (this.referencedRule !== void 0) {
return this.referencedRule.definition;
}
return [];
}
accept(visitor) {
visitor.visit(this);
}
};
var Rule = class extends AbstractProduction {
constructor(options) {
super(options.definition);
this.orgText = "";
Object.assign(this, pickOnlyDefined(options));
}
};
var Alternative = class extends AbstractProduction {
constructor(options) {
super(options.definition);
this.ignoreAmbiguities = false;
Object.assign(this, pickOnlyDefined(options));
}
};
var Option = class extends AbstractProduction {
constructor(options) {
super(options.definition);
this.idx = 1;
Object.assign(this, pickOnlyDefined(options));
}
};
var RepetitionMandatory = class extends AbstractProduction {
constructor(options) {
super(options.definition);
this.idx = 1;
Object.assign(this, pickOnlyDefined(options));
}
};
var RepetitionMandatoryWithSeparator = class extends AbstractProduction {
constructor(options) {
super(options.definition);
this.idx = 1;
Object.assign(this, pickOnlyDefined(options));
}
};
var Repetition = class extends AbstractProduction {
constructor(options) {
super(options.definition);
this.idx = 1;
Object.assign(this, pickOnlyDefined(options));
}
};
var RepetitionWithSeparator = class extends AbstractProduction {
constructor(options) {
super(options.definition);
this.idx = 1;
Object.assign(this, pickOnlyDefined(options));
}
};
var Alternation = class extends AbstractProduction {
get definition() {
return this._definition;
}
set definition(value) {
this._definition = value;
}
constructor(options) {
super(options.definition);
this.idx = 1;
this.ignoreAmbiguities = false;
this.hasPredicates = false;
Object.assign(this, pickOnlyDefined(options));
}
};
var Terminal = class {
constructor(options) {
this.idx = 1;
Object.assign(this, pickOnlyDefined(options));
}
accept(visitor) {
visitor.visit(this);
}
};
function serializeGrammar(topRules) {
return topRules.map(serializeProduction);
}
function serializeProduction(node) {
function convertDefinition(definition) {
return definition.map(serializeProduction);
}
if (node instanceof NonTerminal) {
const serializedNonTerminal = {
type: "NonTerminal",
name: node.nonTerminalName,
idx: node.idx
};
if (typeof node.label === "string") {
serializedNonTerminal.label = node.label;
}
return serializedNonTerminal;
} else if (node instanceof Alternative) {
return {
type: "Alternative",
definition: convertDefinition(node.definition)
};
} else if (node instanceof Option) {
return {
type: "Option",
idx: node.idx,
definition: convertDefinition(node.definition)
};
} else if (node instanceof RepetitionMandatory) {
return {
type: "RepetitionMandatory",
idx: node.idx,
definition: convertDefinition(node.definition)
};
} else if (node instanceof RepetitionMandatoryWithSeparator) {
return {
type: "RepetitionMandatoryWithSeparator",
idx: node.idx,
separator: serializeProduction(new Terminal({ terminalType: node.separator })),
definition: convertDefinition(node.definition)
};
} else if (node instanceof RepetitionWithSeparator) {
return {
type: "RepetitionWithSeparator",
idx: node.idx,
separator: serializeProduction(new Terminal({ terminalType: node.separator })),
definition: convertDefinition(node.definition)
};
} else if (node instanceof Repetition) {
return {
type: "Repetition",
idx: node.idx,
definition: convertDefinition(node.definition)
};
} else if (node instanceof Alternation) {
return {
type: "Alternation",
idx: node.idx,
definition: convertDefinition(node.definition)
};
} else if (node instanceof Terminal) {
const serializedTerminal = {
type: "Terminal",
name: node.terminalType.name,
label: tokenLabel(node.terminalType),
idx: node.idx
};
if (typeof node.label === "string") {
serializedTerminal.terminalLabel = node.label;
}
const pattern = node.terminalType.PATTERN;
if (node.terminalType.PATTERN) {
serializedTerminal.pattern = pattern instanceof RegExp ? pattern.source : pattern;
}
return serializedTerminal;
} else if (node instanceof Rule) {
return {
type: "Rule",
name: node.name,
orgText: node.orgText,
definition: convertDefinition(node.definition)
};
} else {
throw Error("non exhaustive match");
}
}
function pickOnlyDefined(obj) {
return Object.fromEntries(Object.entries(obj).filter(([, v]) => v !== void 0));
}
// ../gast/lib/src/visitor.js
var GAstVisitor = class {
visit(node) {
const nodeAny = node;
switch (nodeAny.constructor) {
case NonTerminal:
return this.visitNonTerminal(nodeAny);
case Alternative:
return this.visitAlternative(nodeAny);
case Option:
return this.visitOption(nodeAny);
case RepetitionMandatory:
return this.visitRepetitionMandatory(nodeAny);
case RepetitionMandatoryWithSeparator:
return this.visitRepetitionMandatoryWithSeparator(nodeAny);
case RepetitionWithSeparator:
return this.visitRepetitionWithSeparator(nodeAny);
case Repetition:
return this.visitRepetition(nodeAny);
case Alternation:
return this.visitAlternation(nodeAny);
case Terminal:
return this.visitTerminal(nodeAny);
case Rule:
return this.visitRule(nodeAny);
/* c8 ignore next 2 */
default:
throw Error("non exhaustive match");
}
}
/* c8 ignore next */
visitNonTerminal(node) {
}
/* c8 ignore next */
visitAlternative(node) {
}
/* c8 ignore next */
visitOption(node) {
}
/* c8 ignore next */
visitRepetition(node) {
}
/* c8 ignore next */
visitRepetitionMandatory(node) {
}
/* c8 ignore next 3 */
visitRepetitionMandatoryWithSeparator(node) {
}
/* c8 ignore next */
visitRepetitionWithSeparator(node) {
}
/* c8 ignore next */
visitAlternation(node) {
}
/* c8 ignore next */
visitTerminal(node) {
}
/* c8 ignore next */
visitRule(node) {
}
};
// ../gast/lib/src/helpers.js
function isSequenceProd(prod) {
return prod instanceof Alternative || prod instanceof Option || prod instanceof Repetition || prod instanceof RepetitionMandatory || prod instanceof RepetitionMandatoryWithSeparator || prod instanceof RepetitionWithSeparator || prod instanceof Terminal || prod instanceof Rule;
}
function isOptionalProd(prod, alreadyVisited = []) {
const isDirectlyOptional = prod instanceof Option || prod instanceof Repetition || prod instanceof RepetitionWithSeparator;
if (isDirectlyOptional) {
return true;
}
if (prod instanceof Alternation) {
return prod.definition.some((subProd) => {
return isOptionalProd(subProd, alreadyVisited);
});
} else if (prod instanceof NonTerminal && alreadyVisited.includes(prod)) {
return false;
} else if (prod instanceof AbstractProduction) {
if (prod instanceof NonTerminal) {
alreadyVisited.push(prod);
}
return prod.definition.every((subProd) => {
return isOptionalProd(subProd, alreadyVisited);
});
} else {
return false;
}
}
function isBranchingProd(prod) {
return prod instanceof Alternation;
}
function getProductionDslName(prod) {
if (prod instanceof NonTerminal) {
return "SUBRULE";
} else if (prod instanceof Option) {
return "OPTION";
} else if (prod instanceof Alternation) {
return "OR";
} else if (prod instanceof RepetitionMandatory) {
return "AT_LEAST_ONE";
} else if (prod instanceof RepetitionMandatoryWithSeparator) {
return "AT_LEAST_ONE_SEP";
} else if (prod instanceof RepetitionWithSeparator) {
return "MANY_SEP";
} else if (prod instanceof Repetition) {
return "MANY";
} else if (prod instanceof Terminal) {
return "CONSUME";
} else {
throw Error("non exhaustive match");
}
}
// lib/src/parse/grammar/rest.js
var RestWalker = class {
walk(prod, prevRest = []) {
prod.definition.forEach((subProd, index) => {
const currRest = prod.definition.slice(index + 1);
if (subProd instanceof NonTerminal) {
this.walkProdRef(subProd, currRest, prevRest);
} else if (subProd instanceof Terminal) {
this.walkTerminal(subProd, currRest, prevRest);
} else if (subProd instanceof Alternative) {
this.walkFlat(subProd, currRest, prevRest);
} else if (subProd instanceof Option) {
this.walkOption(subProd, currRest, prevRest);
} else if (subProd instanceof RepetitionMandatory) {
this.walkAtLeastOne(subProd, currRest, prevRest);
} else if (subProd instanceof RepetitionMandatoryWithSeparator) {
this.walkAtLeastOneSep(subProd, currRest, prevRest);
} else if (subProd instanceof RepetitionWithSeparator) {
this.walkManySep(subProd, currRest, prevRest);
} else if (subProd instanceof Repetition) {
this.walkMany(subProd, currRest, prevRest);
} else if (subProd instanceof Alternation) {
this.walkOr(subProd, currRest, prevRest);
} else {
throw Error("non exhaustive match");
}
});
}
walkTerminal(terminal, currRest, prevRest) {
}
walkProdRef(refProd, currRest, prevRest) {
}
walkFlat(flatProd, currRest, prevRest) {
const fullOrRest = currRest.concat(prevRest);
this.walk(flatProd, fullOrRest);
}
walkOption(optionProd, currRest, prevRest) {
const fullOrRest = currRest.concat(prevRest);
this.walk(optionProd, fullOrRest);
}
walkAtLeastOne(atLeastOneProd, currRest, prevRest) {
const fullAtLeastOneRest = [
new Option({ definition: atLeastOneProd.definition })
].concat(currRest, prevRest);
this.walk(atLeastOneProd, fullAtLeastOneRest);
}
walkAtLeastOneSep(atLeastOneSepProd, currRest, prevRest) {
const fullAtLeastOneSepRest = restForRepetitionWithSeparator(atLeastOneSepProd, currRest, prevRest);
this.walk(atLeastOneSepProd, fullAtLeastOneSepRest);
}
walkMany(manyProd, currRest, prevRest) {
const fullManyRest = [
new Option({ definition: manyProd.definition })
].concat(currRest, prevRest);
this.walk(manyProd, fullManyRest);
}
walkManySep(manySepProd, currRest, prevRest) {
const fullManySepRest = restForRepetitionWithSeparator(manySepProd, currRest, prevRest);
this.walk(manySepProd, fullManySepRest);
}
walkOr(orProd, currRest, prevRest) {
const fullOrRest = currRest.concat(prevRest);
orProd.definition.forEach((alt) => {
const prodWrapper = new Alternative({ definition: [alt] });
this.walk(prodWrapper, fullOrRest);
});
}
};
function restForRepetitionWithSeparator(repSepProd, currRest, prevRest) {
const repSepRest = [
new Option({
definition: [
new Terminal({ terminalType: repSepProd.separator })
].concat(repSepProd.definition)
})
];
const fullRepSepRest = repSepRest.concat(currRest, prevRest);
return fullRepSepRest;
}
// lib/src/parse/grammar/first.js
function first(prod) {
if (prod instanceof NonTerminal) {
return first(prod.referencedRule);
} else if (prod instanceof Terminal) {
return firstForTerminal(prod);
} else if (isSequenceProd(prod)) {
return firstForSequence(prod);
} else if (isBranchingProd(prod)) {
return firstForBranching(prod);
} else {
throw Error("non exhaustive match");
}
}
function firstForSequence(prod) {
let firstSet = [];
const seq = prod.definition;
let nextSubProdIdx = 0;
let hasInnerProdsRemaining = seq.length > nextSubProdIdx;
let currSubProd;
let isLastInnerProdOptional = true;
while (hasInnerProdsRemaining && isLastInnerProdOptional) {
currSubProd = seq[nextSubProdIdx];
isLastInnerProdOptional = isOptionalProd(currSubProd);
firstSet = firstSet.concat(first(currSubProd));
nextSubProdIdx = nextSubProdIdx + 1;
hasInnerProdsRemaining = seq.length > nextSubProdIdx;
}
return [...new Set(firstSet)];
}
function firstForBranching(prod) {
const allAlternativesFirsts = prod.definition.map((innerProd) => {
return first(innerProd);
});
return [...new Set(allAlternativesFirsts.flat())];
}
function firstForTerminal(terminal) {
return [terminal.terminalType];
}
// lib/src/parse/constants.js
var IN = "_~IN~_";
// lib/src/parse/grammar/follow.js
var ResyncFollowsWalker = class extends RestWalker {
constructor(topProd) {
super();
this.topProd = topProd;
this.follows = {};
}
startWalking() {
this.walk(this.topProd);
return this.follows;
}
walkTerminal(terminal, currRest, prevRest) {
}
walkProdRef(refProd, currRest, prevRest) {
const followName = buildBetweenProdsFollowPrefix(refProd.referencedRule, refProd.idx) + this.topProd.name;
const fullRest = currRest.concat(prevRest);
const restProd = new Alternative({ definition: fullRest });
const t_in_topProd_follows = first(restProd);
this.follows[followName] = t_in_topProd_follows;
}
};
function computeAllProdsFollows(topProductions) {
const reSyncFollows = {};
topProductions.forEach((topProd) => {
const currRefsFollow = new ResyncFollowsWalker(topProd).startWalking();
Object.assign(reSyncFollows, currRefsFollow);
});
return reSyncFollows;
}
function buildBetweenProdsFollowPrefix(inner, occurenceInParent) {
return inner.name + occurenceInParent + IN;
}
// ../regexp-to-ast/lib/src/utils.js
function cc(char) {
return char.charCodeAt(0);
}
function insertToSet(item, set) {
if (Array.isArray(item)) {
item.forEach(function(subItem) {
set.push(subItem);
});
} else {
set.push(item);
}
}
function addFlag(flagObj, flagKey) {
if (flagObj[flagKey] === true) {
throw "duplicate flag " + flagKey;
}
const x = flagObj[flagKey];
flagObj[flagKey] = true;
}
function ASSERT_EXISTS(obj) {
if (obj === void 0) {
throw Error("Internal Error - Should never get here!");
}
return true;
}
function ASSERT_NEVER_REACH_HERE() {
throw Error("Internal Error - Should never get here!");
}
function isCharacter(obj) {
return obj["type"] === "Character";
}
// ../regexp-to-ast/lib/src/character-classes.js
var digitsCharCodes = [];
for (let i = cc("0"); i <= cc("9"); i++) {
digitsCharCodes.push(i);
}
var wordCharCodes = [cc("_")].concat(digitsCharCodes);
for (let i = cc("a"); i <= cc("z"); i++) {
wordCharCodes.push(i);
}
for (let i = cc("A"); i <= cc("Z"); i++) {
wordCharCodes.push(i);
}
var whitespaceCodes = [
cc(" "),
cc("\f"),
cc("\n"),
cc("\r"),
cc(" "),
cc("\v"),
cc(" "),
cc("\xA0"),
cc("\u1680"),
cc("\u2000"),
cc("\u2001"),
cc("\u2002"),
cc("\u2003"),
cc("\u2004"),
cc("\u2005"),
cc("\u2006"),
cc("\u2007"),
cc("\u2008"),
cc("\u2009"),
cc("\u200A"),
cc("\u2028"),
cc("\u2029"),
cc("\u202F"),
cc("\u205F"),
cc("\u3000"),
cc("\uFEFF")
];
// ../regexp-to-ast/lib/src/regexp-parser.js
var hexDigitPattern = /[0-9a-fA-F]/;
var decimalPattern = /[0-9]/;
var decimalPatternNoZero = /[1-9]/;
var RegExpParser = class {
constructor() {
this.idx = 0;
this.input = "";
this.groupIdx = 0;
}
saveState() {
return {
idx: this.idx,
input: this.input,
groupIdx: this.groupIdx
};
}
restoreState(newState) {
this.idx = newState.idx;
this.input = newState.input;
this.groupIdx = newState.groupIdx;
}
pattern(input) {
this.idx = 0;
this.input = input;
this.groupIdx = 0;
this.consumeChar("/");
const value = this.disjunction();
this.consumeChar("/");
const flags = {
type: "Flags",
loc: { begin: this.idx, end: input.length },
global: false,
ignoreCase: false,
multiLine: false,
unicode: false,
sticky: false
};
while (this.isRegExpFlag()) {
switch (this.popChar()) {
case "g":
addFlag(flags, "global");
break;
case "i":
addFlag(flags, "ignoreCase");
break;
case "m":
addFlag(flags, "multiLine");
break;
case "u":
addFlag(flags, "unicode");
break;
case "y":
addFlag(flags, "sticky");
break;
}
}
if (this.idx !== this.input.length) {
throw Error("Redundant input: " + this.input.substring(this.idx));
}
return {
type: "Pattern",
flags,
value,
loc: this.loc(0)
};
}
disjunction() {
const alts = [];
const begin = this.idx;
alts.push(this.alternative());
while (this.peekChar() === "|") {
this.consumeChar("|");
alts.push(this.alternative());
}
return { type: "Disjunction", value: alts, loc: this.loc(begin) };
}
alternative() {
const terms = [];
const begin = this.idx;
while (this.isTerm()) {
terms.push(this.term());
}
return { type: "Alternative", value: terms, loc: this.loc(begin) };
}
term() {
if (this.isAssertion()) {
return this.assertion();
} else {
return this.atom();
}
}
assertion() {
const begin = this.idx;
switch (this.popChar()) {
case "^":
return {
type: "StartAnchor",
loc: this.loc(begin)
};
case "$":
return { type: "EndAnchor", loc: this.loc(begin) };
// '\b' or '\B'
case "\\":
switch (this.popChar()) {
case "b":
return {
type: "WordBoundary",
loc: this.loc(begin)
};
case "B":
return {
type: "NonWordBoundary",
loc: this.loc(begin)
};
}
throw Error("Invalid Assertion Escape");
// '(?=' or '(?!'
case "(":
this.consumeChar("?");
let type;
switch (this.popChar()) {
case "=":
type = "Lookahead";
break;
case "!":
type = "NegativeLookahead";
break;
case "<": {
switch (this.popChar()) {
case "=":
type = "Lookbehind";
break;
case "!":
type = "NegativeLookbehind";
}
break;
}
}
ASSERT_EXISTS(type);
const disjunction = this.disjunction();
this.consumeChar(")");
return {
type,
value: disjunction,
loc: this.loc(begin)
};
}
return ASSERT_NEVER_REACH_HERE();
}
quantifier(isBacktracking = false) {
let range = void 0;
const begin = this.idx;
switch (this.popChar()) {
case "*":
range = {
atLeast: 0,
atMost: Infinity
};
break;
case "+":
range = {
atLeast: 1,
atMost: Infinity
};
break;
case "?":
range = {
atLeast: 0,
atMost: 1
};
break;
case "{":
const atLeast = this.integerIncludingZero();
switch (this.popChar()) {
case "}":
range = {
atLeast,
atMost: atLeast
};
break;
case ",":
let atMost;
if (this.isDigit()) {
atMost = this.integerIncludingZero();
range = {
atLeast,
atMost
};
} else {
range = {
atLeast,
atMost: Infinity
};
}
this.consumeChar("}");
break;
}
if (isBacktracking === true && range === void 0) {
return void 0;
}
ASSERT_EXISTS(range);
break;
}
if (isBacktracking === true && range === void 0) {
return void 0;
}
if (ASSERT_EXISTS(range)) {
if (this.peekChar(0) === "?") {
this.consumeChar("?");
range.greedy = false;
} else {
range.greedy = true;
}
range.type = "Quantifier";
range.loc = this.loc(begin);
return range;
}
}
atom() {
let atom;
const begin = this.idx;
switch (this.peekChar()) {
case ".":
atom = this.dotAll();
break;
case "\\":
atom = this.atomEscape();
break;
case "[":
atom = this.characterClass();
break;
case "(":
atom = this.group();
break;
}
if (atom === void 0 && this.isPatternCharacter()) {
atom = this.patternCharacter();
}
if (ASSERT_EXISTS(atom)) {
atom.loc = this.loc(begin);
if (this.isQuantifier()) {
atom.quantifier = this.quantifier();
}
return atom;
}
return ASSERT_NEVER_REACH_HERE();
}
dotAll() {
this.consumeChar(".");
return {
type: "Set",
complement: true,
value: [cc("\n"), cc("\r"), cc("\u2028"), cc("\u2029")]
};
}
atomEscape() {
this.consumeChar("\\");
switch (this.peekChar()) {
case "1":
case "2":
case "3":
case "4":
case "5":
case "6":
case "7":
case "8":
case "9":
return this.decimalEscapeAtom();
case "d":
case "D":
case "s":
case "S":
case "w":
case "W":
return this.characterClassEscape();
case "f":
case "n":
case "r":
case "t":
case "v":
return this.controlEscapeAtom();
case "c":
return this.controlLetterEscapeAtom();
case "0":
return this.nulCharacterAtom();
case "x":
return this.hexEscapeSequenceAtom();
case "u":
return this.regExpUnicodeEscapeSequenceAtom();
default:
return this.identityEscapeAtom();
}
}
decimalEscapeAtom() {
const value = this.positiveInteger();
return { type: "GroupBackReference", value };
}
characterClassEscape() {
let set;
let complement = false;
switch (this.popChar()) {
case "d":
set = digitsCharCodes;
break;
case "D":
set = digitsCharCodes;
complement = true;
break;
case "s":
set = whitespaceCodes;
break;
case "S":
set = whitespaceCodes;
complement = true;
break;
case "w":
set = wordCharCodes;
break;
case "W":
set = wordCharCodes;
complement = true;
break;
}
if (ASSERT_EXISTS(set)) {
return { type: "Set", value: set, complement };
}
return ASSERT_NEVER_REACH_HERE();
}
controlEscapeAtom() {
let escapeCode;
switch (this.popChar()) {
case "f":
escapeCode = cc("\f");
break;
case "n":
escapeCode = cc("\n");
break;
case "r":
escapeCode = cc("\r");
break;
case "t":
escapeCode = cc(" ");
break;
case "v":
escapeCode = cc("\v");
break;
}
if (ASSERT_EXISTS(escapeCode)) {
return { type: "Character", value: escapeCode };
}
return ASSERT_NEVER_REACH_HERE();
}
controlLetterEscapeAtom() {
this.consumeChar("c");
const letter = this.popChar();
if (/[a-zA-Z]/.test(letter) === false) {
throw Error("Invalid ");
}
const letterCode = letter.toUpperCase().charCodeAt(0) - 64;
return { type: "Character", value: letterCode };
}
nulCharacterAtom() {
this.consumeChar("0");
return { type: "Character", value: cc("\0") };
}
hexEscapeSequenceAtom() {
this.consumeChar("x");
return this.parseHexDigits(2);
}
regExpUnicodeEscapeSequenceAtom() {
this.consumeChar("u");
return this.parseHexDigits(4);
}
identityEscapeAtom() {
const escapedChar = this.popChar();
return { type: "Character", value: cc(escapedChar) };
}
classPatternCharacterAtom() {
switch (this.peekChar()) {
// istanbul ignore next
case "\n":
// istanbul ignore next
case "\r":
// istanbul ignore next
case "\u2028":
// istanbul ignore next
case "\u2029":
// istanbul ignore next
case "\\":
// istanbul ignore next
case "]":
throw Error("TBD");
default:
const nextChar = this.popChar();
return { type: "Character", value: cc(nextChar) };
}
}
characterClass() {
const set = [];
let complement = false;
this.consumeChar("[");
if (this.peekChar(0) === "^") {
this.consumeChar("^");
complement = true;
}
while (this.isClassAtom()) {
const from = this.classAtom();
const isFromSingleChar = from.type === "Character";
if (isCharacter(from) && this.isRangeDash()) {
this.consumeChar("-");
const to = this.classAtom();
const isToSingleChar = to.type === "Character";
if (isCharacter(to)) {
if (to.value < from.value) {
throw Error("Range out of order in character class");
}
set.push({ from: from.value, to: to.value });
} else {
insertToSet(from.value, set);
set.push(cc("-"));
insertToSet(to.value, set);
}
} else {
insertToSet(from.value, set);
}
}
this.consumeChar("]");
return { type: "Set", complement, value: set };
}
classAtom() {
switch (this.peekChar()) {
// istanbul ignore next
case "]":
// istanbul ignore next
case "\n":
// istanbul ignore next
case "\r":
// istanbul ignore next
case "\u2028":
// istanbul ignore next
case "\u2029":
throw Error("TBD");
case "\\":
return this.classEscape();
default:
return this.classPatternCharacterAtom();
}
}
classEscape() {
this.consumeChar("\\");
switch (this.peekChar()) {
// Matches a backspace.
// (Not to be confused with \b word boundary outside characterClass)
case "b":
this.consumeChar("b");
return { type: "Character", value: cc("\b") };
case "d":
case "D":
case "s":
case "S":
case "w":
case "W":
return this.characterClassEscape();
case "f":
case "n":
case "r":
case "t":
case "v":
return this.controlEscapeAtom();
case "c":
return this.controlLetterEscapeAtom();
case "0":
return this.nulCharacterAtom();
case "x":
return this.hexEscapeSequenceAtom();
case "u":
return this.regExpUnicodeEscapeSequenceAtom();
default:
return this.identityEscapeAtom();
}
}
group() {
let capturing = true;
this.consumeChar("(");
switch (this.peekChar(0)) {
case "?":
this.consumeChar("?");
this.consumeChar(":");
capturing = false;
break;
default:
this.groupIdx++;
break;
}
const value = this.disjunction();
this.consumeChar(")");
const groupAst = {
type: "Group",
capturing,
value
};
if (capturing) {
groupAst["idx"] = this.groupIdx;
}
return groupAst;
}
positiveInteger() {
let number = this.popChar();
if (decimalPatternNoZero.test(number) === false) {
throw Error("Expecting a positive integer");
}
while (decimalPattern.test(this.peekChar(0))) {
number += this.popChar();
}
return parseInt(number, 10);
}
integerIncludingZero() {
let number = this.popChar();
if (decimalPattern.test(number) === false) {
throw Error("Expecting an integer");
}
while (decimalPattern.test(this.peekChar(0))) {
number += this.popChar();
}
return parseInt(number, 10);
}
patternCharacter() {
const nextChar = this.popChar();
switch (nextChar) {
// istanbul ignore next
case "\n":
// istanbul ignore next
case "\r":
// istanbul ignore next
case "\u2028":
// istanbul ignore next
case "\u2029":
// istanbul ignore next
case "^":
// istanbul ignore next
case "$":
// istanbul ignore next
case "\\":
// istanbul ignore next
case ".":
// istanbul ignore next
case "*":
// istanbul ignore next
case "+":
// istanbul ignore next
case "?":
// istanbul ignore next
case "(":
// istanbul ignore next
case ")":
// istanbul ignore next
case "[":
// istanbul ignore next
case "|":
throw Error("TBD");
default:
return { type: "Character", value: cc(nextChar) };
}
}
isRegExpFlag() {
switch (this.peekChar(0)) {
case "g":
case "i":
case "m":
case "u":
case "y":
return true;
default:
return false;
}
}
isRangeDash() {
return this.peekChar() === "-" && this.isClassAtom(1);
}
isDigit() {
return decimalPattern.test(this.peekChar(0));
}
isClassAtom(howMuch = 0) {
switch (this.peekChar(howMuch)) {
case "]":
case "\n":
case "\r":
case "\u2028":
case "\u2029":
return false;
default:
return true;
}
}
isTerm() {
return this.isAtom() || this.isAssertion();
}
isAtom() {
if (this.isPatternCharacter()) {
return true;
}
switch (this.peekChar(0)) {
case ".":
case "\\":
// atomEscape
case "[":
// characterClass
// TODO: isAtom must be called before isAssertion - disambiguate
case "(":
return true;
default:
return false;
}
}
isAssertion() {
switch (this.peekChar(0)) {
case "^":
case "$":
return true;
// '\b' or '\B'
case "\\":
switch (this.peekChar(1)) {
case "b":
case "B":
return true;
default:
return false;
}
// '(?=' or '(?!' or `(?<=` or `(?<!`
case "(":
return this.peekChar(1) === "?" && (this.peekChar(2) === "=" || this.peekChar(2) === "!" || this.peekChar(2) === "<" && (this.peekChar(3) === "=" || this.peekChar(3) === "!"));
default:
return false;
}
}
isQuantifier() {
const prevState = this.saveState();
try {
return this.quantifier(true) !== void 0;
} catch (e) {
return false;
} finally {
this.restoreState(prevState);
}
}
isPatternCharacter() {
switch (this.peekChar()) {
case "^":
case "$":
case "\\":
case ".":
case "*":
case "+":
case "?":
case "(":
case ")":
case "[":
case "|":
case "/":
case "\n":
case "\r":
case "\u2028":
case "\u2029":
return false;
default:
return true;
}
}
parseHexDigits(howMany) {
let hexString = "";
for (let i = 0; i < howMany; i++) {
const hexChar = this.popChar();
if (hexDigitPattern.test(hexChar) === false) {
throw Error("Expecting a HexDecimal digits");
}
hexString += hexChar;
}
const charCode = parseInt(hexString, 16);
return { type: "Character", value: charCode };
}
peekChar(howMuch = 0) {
return this.input[this.idx + howMuch];
}
popChar() {
const nextChar = this.peekChar(0);
this.consumeChar(void 0);
return nextChar;
}
consumeChar(char) {
if (char !== void 0 && this.input[this.idx] !== char) {
throw Error("Expected: '" + char + "' but found: '" + this.input[this.idx] + "' at offset: " + this.idx);
}
if (this.idx >= this.input.length) {
throw Error("Unexpected end of input");
}
this.idx++;
}
loc(begin) {
return { begin, end: this.idx };
}
};
// ../regexp-to-ast/lib/src/base-regexp-visitor.js
var BaseRegExpVisitor = class {
visitChildren(node) {
for (const key in node) {
const child = node[key];
if (node.hasOwnProperty(key)) {
if (child.type !== void 0) {
this.visit(child);
} else if (Array.isArray(child)) {
child.forEach((subChild) => {
this.visit(subChild);
}, this);
}
}
}
}
visit(node) {
switch (node.type) {
case "Pattern":
this.visitPattern(node);
break;
case "Flags":
this.visitFlags(node);
break;
case "Disjunction":
this.visitDisjunction(node);
break;
case "Alternative":
this.visitAlternative(node);
break;
case "StartAnchor":
this.visitStartAnchor(node);
break;
case "EndAnchor":
this.visitEndAnchor(node);
break;
case "WordBoundary":
this.visitWordBoundary(node);
break;
case "NonWordBoundary":
this.visitNonWordBoundary(node);
break;
case "Lookahead":
this.visitLookahead(node);
break;
case "NegativeLookahead":
this.visitNegativeLookahead(node);
break;
case "Lookbehind":
this.visitLookbehind(node);
break;
case "NegativeLookbehind":
this.visitNegativeLookbehind(node);
break;
case "Character":
this.visitCharacter(node);
break;
case "Set":
this.visitSet(node);
break;
case "Group":
this.visitGroup(node);
break;
case "GroupBackReference":
this.visitGroupBackReference(node);
break;
case "Quantifier":
this.visitQuantifier(node);
break;
}
this.visitChildren(node);
}
visitPattern(node) {
}
visitFlags(node) {
}
visitDisjunction(node) {
}
visitAlternative(node) {
}
// Assertion
visitStartAnchor(node) {
}
visitEndAnchor(node) {
}
visitWordBoundary(node) {
}
visitNonWordBoundary(node) {
}
visitLookahead(node) {
}
visitNegativeLookahead(node) {
}
visitLookbehind(node) {
}
visitNegativeLookbehind(node) {
}
// atoms
visitCharacter(node) {
}
visitSet(node) {
}
visitGroup(node) {
}
visitGroupBackReference(node) {
}
visitQuantifier(node) {
}
};
// lib/src/scan/reg_exp_parser.js
var regExpAstCache = {};
var regExpParser = new RegExpParser();
function getRegExpAst(regExp) {
const regExpStr = regExp.toString();
if (regExpAstCache.hasOwnProperty(regExpStr)) {
return regExpAstCache[regExpStr];
} else {
const regExpAst = regExpParser.pattern(regExpStr);
regExpAstCache[regExpStr] = regExpAst;
return regExpAst;
}
}
function clearRegExpParserCache() {
regExpAstCache = {};
}
// lib/src/scan/reg_exp.js
var complementErrorMessage = "Complement Sets are not supported for first char optimization";
var failedOptimizationPrefixMsg = 'Unable to use "first char" lexer optimizations:\n';
function getOptimizedStartCodesIndices(regExp, ensureOptimizations = false) {
try {
const ast = getRegExpAst(regExp);
const firstChars = firstCharOptimizedIndices(ast.value, {}, ast.flags.ignoreCase);
return firstChars;
} catch (e) {
if (e.message === complementErrorMessage) {
if (ensureOptimizations) {
PRINT_WARNING(`${failedOptimizationPrefixMsg} Unable to optimize: < ${regExp.toString()} >
Complement Sets cannot be automatically optimized.
This will disable the lexer's first char optimizations.
See: https://chevrotain.io/docs/guide/resolving_lexer_errors.html#COMPLEMENT for details.`);
}
} else {
let msgSuffix = "";
if (ensureOptimizations) {
msgSuffix = "\n This will disable the lexer's first char optimizations.\n See: https://chevrotain.io/docs/guide/resolving_lexer_errors.html#REGEXP_PARSING for details.";
}
PRINT_ERROR(`${failedOptimizationPrefixMsg}
Failed parsing: < ${regExp.toString()} >
Using the @chevrotain/regexp-to-ast library
Please open an issue at: https://github.com/chevrotain/chevrotain/issues` + msgSuffix);
}
}
return [];
}
function firstCharOptimizedIndices(ast, result, ignoreCase) {
switch (ast.type) {
case "Disjunction":
for (let i = 0; i < ast.value.length; i++) {
firstCharOptimizedIndices(ast.value[i], result, ignoreCase);
}
break;
case "Alternative":
const terms = ast.value;
for (let i = 0; i < terms.length; i++) {
const term = terms[i];
switch (term.type) {
case "EndAnchor":
// A group back reference cannot affect potential starting char.
// because if a back reference is the first production than automatically
// the group being referenced has had to come BEFORE so its codes have already been added
case "GroupBackReference":
// assertions do not affect potential starting codes
case "Lookahead":
case "NegativeLookahead":
case "Lookbehind":
case "NegativeLookbehind":
case "StartAnchor":
case "WordBoundary":
case "NonWordBoundary":
continue;
}
const atom = term;
switch (atom.type) {
case "Character":
addOptimizedIdxToResult(atom.value, result, ignoreCase);
break;
case "Set":
if (atom.complement === true) {
throw Error(complementErrorMessage);
}
atom.value.forEach((code) => {
if (typeof code === "number") {
addOptimizedIdxToResult(code, result, ignoreCase);
} else {
const range = code;
if (ignoreCase === true) {
for (let rangeCode = range.from; rangeCode <= range.to; rangeCode++) {
addOptimizedIdxToResult(rangeCode, result, ignoreCase);
}
} else {
for (let rangeCode = range.from; rangeCode <= range.to && rangeCode < minOptimizationVal; rangeCode++) {
addOptimizedIdxToResult(rangeCode, result, ignoreCase);
}
if (range.to >= minOptimizationVal) {
const minUnOptVal = range.from >= minOptimizationVal ? range.from : minOptimizationVal;
const maxUnOptVal = range.to;
const minOptIdx = charCodeToOptimizedIndex(minUnOptVal);
const maxOptIdx = charCodeToOptimizedIndex(maxUnOptVal);
for (let currOptIdx = minOptIdx; currOptIdx <= maxOptIdx; currOptIdx++) {
result[currOptIdx] = currOptIdx;
}
}
}
}
});
break;
case "Group":
firstCharOptimizedIndices(atom.value, result, ignoreCase);
break;
/* istanbul ignore next */
default:
throw Error("Non Exhaustive Match");
}
const isOptionalQuantifier = atom.quantifier !== void 0 && atom.quantifier.atLeast === 0;
if (
// A group may be optional due to empty contents /(?:)/
// or if everything inside it is optional /((a)?)/
atom.type === "Group" && isWholeOptional(atom) === false || // If this term is not a group it may only be optional if it has an optional quantifier
atom.type !== "Group" && isOptionalQuantifier === false
) {
break;
}
}
break;
/* istanbul ignore next */
default:
throw Error("non exhaustive match!");
}
return Object.values(result);
}
function addOptimizedIdxToResult(code, result, ignoreCase) {
const optimizedCharIdx = charCodeToOptimizedIndex(code);
result[optimizedCharIdx] = optimizedCharIdx;
if (ignoreCase === true) {
handleIgnoreCase(code, result);
}
}
function handleIgnoreCase(code, result) {
const char = String.fromCharCode(code);
const upperChar = char.toUpperCase();
if (upperChar !== char) {
const optimizedCharIdx = charCodeToOptimizedIndex(upperChar.charCodeAt(0));
result[optimizedCharIdx] = optimizedCharIdx;
} else {
const lowerChar = char.toLowerCase();
if (lowerChar !== char) {
const optimizedCharIdx = charCodeToOptimizedIndex(lowerChar.charCodeAt(0));
result[optimizedCharIdx] = optimizedCharIdx;
}
}
}
function findCode(setNode, targetCharCodes) {
return setNode.value.find((codeOrRange) => {
if (typeof codeOrRange === "number") {
return targetCharCodes.includes(codeOrRange);
} else {
const range = codeOrRange;
return targetCharCodes.find((targetCode) => range.from <= targetCode && targetCode <= range.to) !== void 0;
}
});
}
function isWholeOptional(ast) {
const quantifier = ast.quantifier;
if (quantifier && quantifier.atLeast === 0) {
return true;
}
if (!ast.value) {
return false;
}
return Array.isArray(ast.value) ? ast.value.every(isWholeOptional) : isWholeOptional(ast.value);
}
var CharCodeFinder = class extends BaseRegExpVisitor {
constructor(targetCharCodes) {
super();
this.targetCharCodes = targetCharCodes;
this.found = false;
}
visitChildren(node) {
if (this.found === true) {
return;
}
switch (node.type) {
case "Lookahead":
this.visitLookahead(node);
return;
case "NegativeLookahead":
this.visitNegativeLookahead(node);
return;
case "Lookbehind":
this.visitLookbehind(node);
return;
case "NegativeLookbehind":
this.visitNegativeLookbehind(node);
return;
}
super.visitChildren(node);
}
visitCharacter(node) {
if (this.targetCharCodes.includes(node.value)) {
this.found = true;
}
}
visitSet(node) {
if (node.complement) {
if (findCode(node, this.targetCharCodes) === void 0) {
this.found = true;
}
} else {
if (findCode(node, this.targetCharCodes) !== void 0) {
this.found = true;
}
}
}
};
function canMatchCharCode(charCodes, pattern) {
if (pattern instanceof RegExp) {
const ast = getRegExpAst(pattern);
const charCodeFinder = new CharCodeFinder(charCodes);
charCodeFinder.visit(ast);
return charCodeFinder.found;
} else {
for (const char of pattern) {
const charCode = char.charCodeAt(0);
if (charCodes.includes(charCode)) {
return true;
}
}
return false;
}
}
// lib/src/scan/lexer.js
var PATTERN = "PATTERN";
var DEFAULT_MODE = "defaultMode";
var MODES = "modes";
function analyzeTokenTypes(tokenTypes, options) {
options = Object.assign({ safeMode: false, positionTracking: "full", lineTerminatorCharacters: ["\r", "\n"], tracer: (msg, action) => action() }, options);
const tracer = options.tracer;
tracer("initCharCodeToOptimizedIndexMap", () => {
initCharCodeToOptimizedIndexMap();
});
let onlyRelevantTypes;
tracer("Reject Lexer.NA", () => {
onlyRelevantTypes = tokenTypes.filter((currType) => {
return currType[PATTERN] !== Lexer.NA;
});
});
let hasCustom = false;
let allTransformedPatterns;
tracer("Transform Patterns", () => {
hasCustom = false;
allTransformedPatterns = onlyRelevantTypes.map((currType) => {
const currPattern = currType[PATTERN];
if (currPattern instanceof RegExp) {
const regExpSource = currPattern.source;
if (regExpSource.length === 1 && // only these regExp meta characters which can appear in a length one regExp
regExpSource !== "^" && regExpSource !== "$" && regExpSource !== "." && !currPattern.ignoreCase) {
return regExpSource;
} else if (regExpSource.length === 2 && regExpSource[0] === "\\" && // not a meta character
![
"d",
"D",
"s",
"S",
"t",
"r",
"n",
"t",
"0",
"c",
"b",
"B",
"f",
"v",
"w",
"W"
].includes(regExpSource[1])) {
return regExpSource[1];
} else {
return addStickyFlag(currPattern);
}
} else if (typeof currPattern === "function") {
hasCustom = true;
return { exec: currPattern };
} else if (typeof currPattern === "object") {
hasCustom = true;
return currPattern;
} else if (typeof currPattern === "string") {
if (currPattern.length === 1) {
return currPattern;
} else {
const escapedRegExpString = currPattern.replace(/[\\^$.*+?()[\]{}|]/g, "\\$&");
const wrappedRegExp = new RegExp(escapedRegExpString);
return addStickyFlag(wrappedRegExp);
}
} else {
throw Error("non exhaustive match");
}
});
});
let patternIdxToType;
let patternIdxToGroup;
let patternIdxToLongerAltIdxArr;
let patternIdxToPushMode;
let patternIdxToPopMode;
tracer("misc mapping", () => {
patternIdxToType = onlyRelevantTypes.map((currType) => currType.tokenTypeIdx);
patternIdxToGroup = onlyRelevantTypes.map((clazz) => {
const groupName = clazz.GROUP;
if (groupName === Lexer.SKIPPED) {
return void 0;
} else if (typeof groupName === "string") {
return groupName;
} else if (groupName === void 0) {
return false;
} else {
throw Error("non exhaustive match");
}
});
patternIdxToLongerAltIdxArr = onlyRelevantTypes.map((clazz) => {
const longerAltType = clazz.LONGER_ALT;
if (longerAltType) {
const longerAltIdxArr = Array.isArray(longerAltType) ? longerAltType.map((type) => onlyRelevantTypes.indexOf(type)) : [onlyRelevantTypes.indexOf(longerAltType)];
return longerAltIdxArr;
}
});
patternIdxToPushMode = onlyRelevantTypes.map((clazz) => clazz.PUSH_MODE);
patternIdxToPopMode = onlyRelevantTypes.map((clazz) => Object.hasOwn(clazz, "POP_MODE"));
});
let patternIdxToCanLineTerminator;
tracer("Line Terminator Handling", () => {
const lineTerminatorCharCodes = getCharCodes(options.lineTerminatorCharacters);
patternIdxToCanLineTerminator = onlyRelevantTypes.map((tokType) => false);
if (options.positionTracking !== "onlyOffset") {
patternIdxToCanLineTerminator = onlyRelevantTypes.map((tokType) => {
if (Object.hasOwn(tokType, "LINE_BREAKS")) {
return !!tokType.LINE_BREAKS;
} else {
return checkLineBreaksIssues(tokType, lineTerminatorCharCodes) === false && canMatchCharCode(lineTerminatorCharCodes, tokType.PATTERN);
}
});
}
});
let patternIdxToIsCustom;
let patternIdxToShort;
let emptyGroups;
let patternIdxToConfig;
tracer("Misc Mapping #2", () => {
patternIdxToIsCustom = onlyRelevantTypes.map(isCustomPattern);
patternIdxToShort = allTransformedPatterns.map(isShortPattern);
emptyGroups = onlyRelevantTypes.reduce((acc, clazz) => {
const groupName = clazz.GROUP;
if (typeof groupName === "string" && !(groupName === Lexer.SKIPPED)) {
acc[groupName] = [];
}
return acc;
}, {});
patternIdxToConfig = allTransformedPatterns.map((x, idx) => {
return {
pattern: allTransformedPatterns[idx],
longerAlt: patternIdxToLongerAltIdxArr[idx],
canLineTerminator: patternIdxToCanLineTerminator[idx],
isCustom: patternIdxToIsCustom[idx],
short: patternIdxToShort[idx],
group: patternIdxToGroup[idx],
push: patternIdxToPushMode[idx],
pop: patternIdxToPopMode[idx],
tokenTypeIdx: patternIdxToType[idx],
tokenType: onlyRelevantTypes[idx]
};
});
});
let canBeOptimized = true;
let charCodeToPatternIdxToConfig = [];
if (!options.safeMode) {
tracer("First Char Optimization", () => {
charCodeToPatternIdxToConfig = onlyRelevantTypes.reduce((result, currTokType, idx) => {
if (typeof currTokType.PATTERN === "string") {
const charCode = currTokType.PATTERN.charCodeAt(0);
const optimizedIdx = charCodeToOptimizedIndex(charCode);
add