chevrotain
Version:
Chevrotain is a high performance fault tolerant javascript parsing DSL for building recursive decent parsers
491 lines • 20.3 kB
JavaScript
import { first } from "./first.js";
import { RestWalker } from "./rest.js";
import { Alternation, Alternative, NonTerminal, Option, Repetition, RepetitionMandatory, RepetitionMandatoryWithSeparator, RepetitionWithSeparator, Rule, Terminal, } from "@chevrotain/gast";
export class AbstractNextPossibleTokensWalker extends RestWalker {
constructor(topProd, path) {
super();
this.topProd = topProd;
this.path = path;
this.possibleTokTypes = [];
this.nextProductionName = "";
this.nextProductionOccurrence = 0;
this.found = false;
this.isAtEndOfPath = false;
}
startWalking() {
this.found = false;
if (this.path.ruleStack[0] !== this.topProd.name) {
throw Error("The path does not start with the walker's top Rule!");
}
// immutable for the win
this.ruleStack = [...this.path.ruleStack].reverse(); // intelij bug requires assertion
this.occurrenceStack = [...this.path.occurrenceStack].reverse(); // intelij bug requires assertion
// already verified that the first production is valid, we now seek the 2nd production
this.ruleStack.pop();
this.occurrenceStack.pop();
this.updateExpectedNext();
this.walk(this.topProd);
return this.possibleTokTypes;
}
walk(prod, prevRest = []) {
// stop scanning once we found the path
if (!this.found) {
super.walk(prod, prevRest);
}
}
walkProdRef(refProd, currRest, prevRest) {
// found the next production, need to keep walking in it
if (refProd.referencedRule.name === this.nextProductionName &&
refProd.idx === this.nextProductionOccurrence) {
const fullRest = currRest.concat(prevRest);
this.updateExpectedNext();
this.walk(refProd.referencedRule, fullRest);
}
}
updateExpectedNext() {
// need to consume the Terminal
if (this.ruleStack.length === 0) {
// must reset nextProductionXXX to avoid walking down another Top Level production while what we are
// really seeking is the last Terminal...
this.nextProductionName = "";
this.nextProductionOccurrence = 0;
this.isAtEndOfPath = true;
}
else {
this.nextProductionName = this.ruleStack.pop();
this.nextProductionOccurrence = this.occurrenceStack.pop();
}
}
}
export class NextAfterTokenWalker extends AbstractNextPossibleTokensWalker {
constructor(topProd, path) {
super(topProd, path);
this.path = path;
this.nextTerminalName = "";
this.nextTerminalOccurrence = 0;
this.nextTerminalName = this.path.lastTok.name;
this.nextTerminalOccurrence = this.path.lastTokOccurrence;
}
walkTerminal(terminal, currRest, prevRest) {
if (this.isAtEndOfPath &&
terminal.terminalType.name === this.nextTerminalName &&
terminal.idx === this.nextTerminalOccurrence &&
!this.found) {
const fullRest = currRest.concat(prevRest);
const restProd = new Alternative({ definition: fullRest });
this.possibleTokTypes = first(restProd);
this.found = true;
}
}
}
/**
* This walker only "walks" a single "TOP" level in the Grammar Ast, this means
* it never "follows" production refs
*/
export class AbstractNextTerminalAfterProductionWalker extends RestWalker {
constructor(topRule, occurrence) {
super();
this.topRule = topRule;
this.occurrence = occurrence;
this.result = {
token: undefined,
occurrence: undefined,
isEndOfRule: undefined,
};
}
startWalking() {
this.walk(this.topRule);
return this.result;
}
}
export class NextTerminalAfterManyWalker extends AbstractNextTerminalAfterProductionWalker {
walkMany(manyProd, currRest, prevRest) {
if (manyProd.idx === this.occurrence) {
const firstAfterMany = currRest.concat(prevRest)[0];
this.result.isEndOfRule = firstAfterMany === undefined;
if (firstAfterMany instanceof Terminal) {
this.result.token = firstAfterMany.terminalType;
this.result.occurrence = firstAfterMany.idx;
}
}
else {
super.walkMany(manyProd, currRest, prevRest);
}
}
}
export class NextTerminalAfterManySepWalker extends AbstractNextTerminalAfterProductionWalker {
walkManySep(manySepProd, currRest, prevRest) {
if (manySepProd.idx === this.occurrence) {
const firstAfterManySep = currRest.concat(prevRest)[0];
this.result.isEndOfRule = firstAfterManySep === undefined;
if (firstAfterManySep instanceof Terminal) {
this.result.token = firstAfterManySep.terminalType;
this.result.occurrence = firstAfterManySep.idx;
}
}
else {
super.walkManySep(manySepProd, currRest, prevRest);
}
}
}
export class NextTerminalAfterAtLeastOneWalker extends AbstractNextTerminalAfterProductionWalker {
walkAtLeastOne(atLeastOneProd, currRest, prevRest) {
if (atLeastOneProd.idx === this.occurrence) {
const firstAfterAtLeastOne = currRest.concat(prevRest)[0];
this.result.isEndOfRule = firstAfterAtLeastOne === undefined;
if (firstAfterAtLeastOne instanceof Terminal) {
this.result.token = firstAfterAtLeastOne.terminalType;
this.result.occurrence = firstAfterAtLeastOne.idx;
}
}
else {
super.walkAtLeastOne(atLeastOneProd, currRest, prevRest);
}
}
}
// TODO: reduce code duplication in the AfterWalkers
export class NextTerminalAfterAtLeastOneSepWalker extends AbstractNextTerminalAfterProductionWalker {
walkAtLeastOneSep(atleastOneSepProd, currRest, prevRest) {
if (atleastOneSepProd.idx === this.occurrence) {
const firstAfterfirstAfterAtLeastOneSep = currRest.concat(prevRest)[0];
this.result.isEndOfRule = firstAfterfirstAfterAtLeastOneSep === undefined;
if (firstAfterfirstAfterAtLeastOneSep instanceof Terminal) {
this.result.token = firstAfterfirstAfterAtLeastOneSep.terminalType;
this.result.occurrence = firstAfterfirstAfterAtLeastOneSep.idx;
}
}
else {
super.walkAtLeastOneSep(atleastOneSepProd, currRest, prevRest);
}
}
}
export function possiblePathsFrom(targetDef, maxLength, currPath = []) {
// avoid side effects
currPath = [...currPath];
let result = [];
let i = 0;
// TODO: avoid inner funcs
function remainingPathWith(nextDef) {
return nextDef.concat(targetDef.slice(i + 1));
}
// TODO: avoid inner funcs
function getAlternativesForProd(definition) {
const alternatives = possiblePathsFrom(remainingPathWith(definition), maxLength, currPath);
return result.concat(alternatives);
}
/**
* Mandatory productions will halt the loop as the paths computed from their recursive calls will already contain the
* following (rest) of the targetDef.
*
* For optional productions (Option/Repetition/...) the loop will continue to represent the paths that do not include the
* the optional production.
*/
while (currPath.length < maxLength && i < targetDef.length) {
const prod = targetDef[i];
/* istanbul ignore else */
if (prod instanceof Alternative) {
return getAlternativesForProd(prod.definition);
}
else if (prod instanceof NonTerminal) {
return getAlternativesForProd(prod.definition);
}
else if (prod instanceof Option) {
result = getAlternativesForProd(prod.definition);
}
else if (prod instanceof RepetitionMandatory) {
const newDef = prod.definition.concat([
new Repetition({
definition: prod.definition,
}),
]);
return getAlternativesForProd(newDef);
}
else if (prod instanceof RepetitionMandatoryWithSeparator) {
const newDef = [
new Alternative({ definition: prod.definition }),
new Repetition({
definition: [new Terminal({ terminalType: prod.separator })].concat(prod.definition),
}),
];
return getAlternativesForProd(newDef);
}
else if (prod instanceof RepetitionWithSeparator) {
const newDef = prod.definition.concat([
new Repetition({
definition: [new Terminal({ terminalType: prod.separator })].concat(prod.definition),
}),
]);
result = getAlternativesForProd(newDef);
}
else if (prod instanceof Repetition) {
const newDef = prod.definition.concat([
new Repetition({
definition: prod.definition,
}),
]);
result = getAlternativesForProd(newDef);
}
else if (prod instanceof Alternation) {
prod.definition.forEach((currAlt) => {
// TODO: this is a limited check for empty alternatives
// It would prevent a common case of infinite loops during parser initialization.
// However **in-directly** empty alternatives may still cause issues.
if (currAlt.definition.length !== 0) {
result = getAlternativesForProd(currAlt.definition);
}
});
return result;
}
else if (prod instanceof Terminal) {
currPath.push(prod.terminalType);
}
else {
throw Error("non exhaustive match");
}
i++;
}
result.push({
partialPath: currPath,
suffixDef: targetDef.slice(i),
});
return result;
}
export function nextPossibleTokensAfter(initialDef, tokenVector, tokMatcher, maxLookAhead) {
const EXIT_NON_TERMINAL = "EXIT_NONE_TERMINAL";
// to avoid creating a new Array each time.
const EXIT_NON_TERMINAL_ARR = [EXIT_NON_TERMINAL];
const EXIT_ALTERNATIVE = "EXIT_ALTERNATIVE";
let foundCompletePath = false;
const tokenVectorLength = tokenVector.length;
const minimalAlternativesIndex = tokenVectorLength - maxLookAhead - 1;
const result = [];
const possiblePaths = [];
possiblePaths.push({
idx: -1,
def: initialDef,
ruleStack: [],
occurrenceStack: [],
});
while (possiblePaths.length !== 0) {
const currPath = possiblePaths.pop();
// skip alternatives if no more results can be found (assuming deterministic grammar with fixed lookahead)
if (currPath === EXIT_ALTERNATIVE) {
if (foundCompletePath &&
possiblePaths.at(-1).idx <= minimalAlternativesIndex) {
// remove irrelevant alternative
possiblePaths.pop();
}
continue;
}
const currDef = currPath.def;
const currIdx = currPath.idx;
const currRuleStack = currPath.ruleStack;
const currOccurrenceStack = currPath.occurrenceStack;
// For Example: an empty path could exist in a valid grammar in the case of an EMPTY_ALT
if (currDef.length === 0) {
continue;
}
const prod = currDef[0];
/* istanbul ignore else */
if (prod === EXIT_NON_TERMINAL) {
const nextPath = {
idx: currIdx,
def: currDef.slice(1),
ruleStack: currRuleStack.slice(0, -1),
occurrenceStack: currOccurrenceStack.slice(0, -1),
};
possiblePaths.push(nextPath);
}
else if (prod instanceof Terminal) {
/* istanbul ignore else */
if (currIdx < tokenVectorLength - 1) {
const nextIdx = currIdx + 1;
const actualToken = tokenVector[nextIdx];
if (tokMatcher(actualToken, prod.terminalType)) {
const nextPath = {
idx: nextIdx,
def: currDef.slice(1),
ruleStack: currRuleStack,
occurrenceStack: currOccurrenceStack,
};
possiblePaths.push(nextPath);
}
// end of the line
}
else if (currIdx === tokenVectorLength - 1) {
// IGNORE ABOVE ELSE
result.push({
nextTokenType: prod.terminalType,
nextTokenOccurrence: prod.idx,
ruleStack: currRuleStack,
occurrenceStack: currOccurrenceStack,
});
foundCompletePath = true;
}
else {
throw Error("non exhaustive match");
}
}
else if (prod instanceof NonTerminal) {
const newRuleStack = [...currRuleStack];
newRuleStack.push(prod.nonTerminalName);
const newOccurrenceStack = [...currOccurrenceStack];
newOccurrenceStack.push(prod.idx);
const nextPath = {
idx: currIdx,
def: prod.definition.concat(EXIT_NON_TERMINAL_ARR, currDef.slice(1)),
ruleStack: newRuleStack,
occurrenceStack: newOccurrenceStack,
};
possiblePaths.push(nextPath);
}
else if (prod instanceof Option) {
// the order of alternatives is meaningful, FILO (Last path will be traversed first).
const nextPathWithout = {
idx: currIdx,
def: currDef.slice(1),
ruleStack: currRuleStack,
occurrenceStack: currOccurrenceStack,
};
possiblePaths.push(nextPathWithout);
// required marker to avoid backtracking paths whose higher priority alternatives already matched
possiblePaths.push(EXIT_ALTERNATIVE);
const nextPathWith = {
idx: currIdx,
def: prod.definition.concat(currDef.slice(1)),
ruleStack: currRuleStack,
occurrenceStack: currOccurrenceStack,
};
possiblePaths.push(nextPathWith);
}
else if (prod instanceof RepetitionMandatory) {
// TODO:(THE NEW operators here take a while...) (convert once?)
const secondIteration = new Repetition({
definition: prod.definition,
idx: prod.idx,
});
const nextDef = prod.definition.concat([secondIteration], currDef.slice(1));
const nextPath = {
idx: currIdx,
def: nextDef,
ruleStack: currRuleStack,
occurrenceStack: currOccurrenceStack,
};
possiblePaths.push(nextPath);
}
else if (prod instanceof RepetitionMandatoryWithSeparator) {
// TODO:(THE NEW operators here take a while...) (convert once?)
const separatorGast = new Terminal({
terminalType: prod.separator,
});
const secondIteration = new Repetition({
definition: [separatorGast].concat(prod.definition),
idx: prod.idx,
});
const nextDef = prod.definition.concat([secondIteration], currDef.slice(1));
const nextPath = {
idx: currIdx,
def: nextDef,
ruleStack: currRuleStack,
occurrenceStack: currOccurrenceStack,
};
possiblePaths.push(nextPath);
}
else if (prod instanceof RepetitionWithSeparator) {
// the order of alternatives is meaningful, FILO (Last path will be traversed first).
const nextPathWithout = {
idx: currIdx,
def: currDef.slice(1),
ruleStack: currRuleStack,
occurrenceStack: currOccurrenceStack,
};
possiblePaths.push(nextPathWithout);
// required marker to avoid backtracking paths whose higher priority alternatives already matched
possiblePaths.push(EXIT_ALTERNATIVE);
const separatorGast = new Terminal({
terminalType: prod.separator,
});
const nthRepetition = new Repetition({
definition: [separatorGast].concat(prod.definition),
idx: prod.idx,
});
const nextDef = prod.definition.concat([nthRepetition], currDef.slice(1));
const nextPathWith = {
idx: currIdx,
def: nextDef,
ruleStack: currRuleStack,
occurrenceStack: currOccurrenceStack,
};
possiblePaths.push(nextPathWith);
}
else if (prod instanceof Repetition) {
// the order of alternatives is meaningful, FILO (Last path will be traversed first).
const nextPathWithout = {
idx: currIdx,
def: currDef.slice(1),
ruleStack: currRuleStack,
occurrenceStack: currOccurrenceStack,
};
possiblePaths.push(nextPathWithout);
// required marker to avoid backtracking paths whose higher priority alternatives already matched
possiblePaths.push(EXIT_ALTERNATIVE);
// TODO: an empty repetition will cause infinite loops here, will the parser detect this in selfAnalysis?
const nthRepetition = new Repetition({
definition: prod.definition,
idx: prod.idx,
});
const nextDef = prod.definition.concat([nthRepetition], currDef.slice(1));
const nextPathWith = {
idx: currIdx,
def: nextDef,
ruleStack: currRuleStack,
occurrenceStack: currOccurrenceStack,
};
possiblePaths.push(nextPathWith);
}
else if (prod instanceof Alternation) {
// the order of alternatives is meaningful, FILO (Last path will be traversed first).
for (let i = prod.definition.length - 1; i >= 0; i--) {
const currAlt = prod.definition[i];
const currAltPath = {
idx: currIdx,
def: currAlt.definition.concat(currDef.slice(1)),
ruleStack: currRuleStack,
occurrenceStack: currOccurrenceStack,
};
possiblePaths.push(currAltPath);
possiblePaths.push(EXIT_ALTERNATIVE);
}
}
else if (prod instanceof Alternative) {
possiblePaths.push({
idx: currIdx,
def: prod.definition.concat(currDef.slice(1)),
ruleStack: currRuleStack,
occurrenceStack: currOccurrenceStack,
});
}
else if (prod instanceof Rule) {
// last because we should only encounter at most a single one of these per invocation.
possiblePaths.push(expandTopLevelRule(prod, currIdx, currRuleStack, currOccurrenceStack));
}
else {
throw Error("non exhaustive match");
}
}
return result;
}
function expandTopLevelRule(topRule, currIdx, currRuleStack, currOccurrenceStack) {
const newRuleStack = [...currRuleStack];
newRuleStack.push(topRule.name);
const newCurrOccurrenceStack = [...currOccurrenceStack];
// top rule is always assumed to have been called with occurrence index 1
newCurrOccurrenceStack.push(1);
return {
idx: currIdx,
def: topRule.definition,
ruleStack: newRuleStack,
occurrenceStack: newCurrOccurrenceStack,
};
}
//# sourceMappingURL=interpreter.js.map