chevrotain
Version:
Chevrotain is a high performance fault tolerant javascript parsing DSL for building recursive decent parsers
713 lines (638 loc) • 20.2 kB
text/typescript
import {
clone,
compact,
difference,
drop,
dropRight,
filter,
first,
flatMap,
flatten,
forEach,
groupBy,
includes,
isEmpty,
map,
pickBy,
reduce,
reject,
values,
} from "lodash-es";
import {
IParserAmbiguousAlternativesDefinitionError,
IParserDuplicatesDefinitionError,
IParserEmptyAlternativeDefinitionError,
ParserDefinitionErrorType,
} from "../parser/parser.js";
import {
Alternation,
Alternative as AlternativeGAST,
GAstVisitor,
getProductionDslName,
isOptionalProd,
NonTerminal,
Option,
Repetition,
RepetitionMandatory,
RepetitionMandatoryWithSeparator,
RepetitionWithSeparator,
Terminal,
} from "@chevrotain/gast";
import {
Alternative,
containsPath,
getLookaheadPathsForOptionalProd,
getLookaheadPathsForOr,
getProdType,
isStrictPrefixOfPath,
} from "./lookahead.js";
import { nextPossibleTokensAfter } from "./interpreter.js";
import {
ILookaheadStrategy,
IProduction,
IProductionWithOccurrence,
Rule,
TokenType,
} from "@chevrotain/types";
import {
IGrammarValidatorErrorMessageProvider,
IParserDefinitionError,
} from "./types.js";
import { tokenStructuredMatcher } from "../../scan/tokens.js";
export function validateLookahead(options: {
lookaheadStrategy: ILookaheadStrategy;
rules: Rule[];
tokenTypes: TokenType[];
grammarName: string;
}): IParserDefinitionError[] {
const lookaheadValidationErrorMessages = options.lookaheadStrategy.validate({
rules: options.rules,
tokenTypes: options.tokenTypes,
grammarName: options.grammarName,
});
return map(lookaheadValidationErrorMessages, (errorMessage) => ({
type: ParserDefinitionErrorType.CUSTOM_LOOKAHEAD_VALIDATION,
...errorMessage,
}));
}
export function validateGrammar(
topLevels: Rule[],
tokenTypes: TokenType[],
errMsgProvider: IGrammarValidatorErrorMessageProvider,
grammarName: string,
): IParserDefinitionError[] {
const duplicateErrors: IParserDefinitionError[] = flatMap(
topLevels,
(currTopLevel) =>
validateDuplicateProductions(currTopLevel, errMsgProvider),
);
const termsNamespaceConflictErrors = checkTerminalAndNoneTerminalsNameSpace(
topLevels,
tokenTypes,
errMsgProvider,
);
const tooManyAltsErrors = flatMap(topLevels, (curRule) =>
validateTooManyAlts(curRule, errMsgProvider),
);
const duplicateRulesError = flatMap(topLevels, (curRule) =>
validateRuleDoesNotAlreadyExist(
curRule,
topLevels,
grammarName,
errMsgProvider,
),
);
return duplicateErrors.concat(
termsNamespaceConflictErrors,
tooManyAltsErrors,
duplicateRulesError,
);
}
function validateDuplicateProductions(
topLevelRule: Rule,
errMsgProvider: IGrammarValidatorErrorMessageProvider,
): IParserDuplicatesDefinitionError[] {
const collectorVisitor = new OccurrenceValidationCollector();
topLevelRule.accept(collectorVisitor);
const allRuleProductions = collectorVisitor.allProductions;
const productionGroups = groupBy(
allRuleProductions,
identifyProductionForDuplicates,
);
const duplicates: any = pickBy(productionGroups, (currGroup) => {
return currGroup.length > 1;
});
const errors = map(values(duplicates), (currDuplicates: any) => {
const firstProd: any = first(currDuplicates);
const msg = errMsgProvider.buildDuplicateFoundError(
topLevelRule,
currDuplicates,
);
const dslName = getProductionDslName(firstProd);
const defError: IParserDuplicatesDefinitionError = {
message: msg,
type: ParserDefinitionErrorType.DUPLICATE_PRODUCTIONS,
ruleName: topLevelRule.name,
dslName: dslName,
occurrence: firstProd.idx,
};
const param = getExtraProductionArgument(firstProd);
if (param) {
defError.parameter = param;
}
return defError;
});
return errors;
}
export function identifyProductionForDuplicates(
prod: IProductionWithOccurrence,
): string {
return `${getProductionDslName(prod)}_#_${
prod.idx
}_#_${getExtraProductionArgument(prod)}`;
}
function getExtraProductionArgument(prod: IProductionWithOccurrence): string {
if (prod instanceof Terminal) {
return prod.terminalType.name;
} else if (prod instanceof NonTerminal) {
return prod.nonTerminalName;
} else {
return "";
}
}
export class OccurrenceValidationCollector extends GAstVisitor {
public allProductions: IProductionWithOccurrence[] = [];
public visitNonTerminal(subrule: NonTerminal): void {
this.allProductions.push(subrule);
}
public visitOption(option: Option): void {
this.allProductions.push(option);
}
public visitRepetitionWithSeparator(manySep: RepetitionWithSeparator): void {
this.allProductions.push(manySep);
}
public visitRepetitionMandatory(atLeastOne: RepetitionMandatory): void {
this.allProductions.push(atLeastOne);
}
public visitRepetitionMandatoryWithSeparator(
atLeastOneSep: RepetitionMandatoryWithSeparator,
): void {
this.allProductions.push(atLeastOneSep);
}
public visitRepetition(many: Repetition): void {
this.allProductions.push(many);
}
public visitAlternation(or: Alternation): void {
this.allProductions.push(or);
}
public visitTerminal(terminal: Terminal): void {
this.allProductions.push(terminal);
}
}
export function validateRuleDoesNotAlreadyExist(
rule: Rule,
allRules: Rule[],
className: string,
errMsgProvider: IGrammarValidatorErrorMessageProvider,
): IParserDefinitionError[] {
const errors = [];
const occurrences = reduce(
allRules,
(result, curRule) => {
if (curRule.name === rule.name) {
return result + 1;
}
return result;
},
0,
);
if (occurrences > 1) {
const errMsg = errMsgProvider.buildDuplicateRuleNameError({
topLevelRule: rule,
grammarName: className,
});
errors.push({
message: errMsg,
type: ParserDefinitionErrorType.DUPLICATE_RULE_NAME,
ruleName: rule.name,
});
}
return errors;
}
// TODO: is there anyway to get only the rule names of rules inherited from the super grammars?
// This is not part of the IGrammarErrorProvider because the validation cannot be performed on
// The grammar structure, only at runtime.
export function validateRuleIsOverridden(
ruleName: string,
definedRulesNames: string[],
className: string,
): IParserDefinitionError[] {
const errors = [];
let errMsg;
if (!includes(definedRulesNames, ruleName)) {
errMsg =
`Invalid rule override, rule: ->${ruleName}<- cannot be overridden in the grammar: ->${className}<-` +
`as it is not defined in any of the super grammars `;
errors.push({
message: errMsg,
type: ParserDefinitionErrorType.INVALID_RULE_OVERRIDE,
ruleName: ruleName,
});
}
return errors;
}
export function validateNoLeftRecursion(
topRule: Rule,
currRule: Rule,
errMsgProvider: IGrammarValidatorErrorMessageProvider,
path: Rule[] = [],
): IParserDefinitionError[] {
const errors: IParserDefinitionError[] = [];
const nextNonTerminals = getFirstNoneTerminal(currRule.definition);
if (isEmpty(nextNonTerminals)) {
return [];
} else {
const ruleName = topRule.name;
const foundLeftRecursion = includes(nextNonTerminals, topRule);
if (foundLeftRecursion) {
errors.push({
message: errMsgProvider.buildLeftRecursionError({
topLevelRule: topRule,
leftRecursionPath: path,
}),
type: ParserDefinitionErrorType.LEFT_RECURSION,
ruleName: ruleName,
});
}
// we are only looking for cyclic paths leading back to the specific topRule
// other cyclic paths are ignored, we still need this difference to avoid infinite loops...
const validNextSteps = difference(nextNonTerminals, path.concat([topRule]));
const errorsFromNextSteps = flatMap(validNextSteps, (currRefRule) => {
const newPath = clone(path);
newPath.push(currRefRule);
return validateNoLeftRecursion(
topRule,
currRefRule,
errMsgProvider,
newPath,
);
});
return errors.concat(errorsFromNextSteps);
}
}
export function getFirstNoneTerminal(definition: IProduction[]): Rule[] {
let result: Rule[] = [];
if (isEmpty(definition)) {
return result;
}
const firstProd = first(definition);
/* istanbul ignore else */
if (firstProd instanceof NonTerminal) {
result.push(firstProd.referencedRule);
} else if (
firstProd instanceof AlternativeGAST ||
firstProd instanceof Option ||
firstProd instanceof RepetitionMandatory ||
firstProd instanceof RepetitionMandatoryWithSeparator ||
firstProd instanceof RepetitionWithSeparator ||
firstProd instanceof Repetition
) {
result = result.concat(
getFirstNoneTerminal(<IProduction[]>firstProd.definition),
);
} else if (firstProd instanceof Alternation) {
// each sub definition in alternation is a FLAT
result = flatten(
map(firstProd.definition, (currSubDef) =>
getFirstNoneTerminal((<AlternativeGAST>currSubDef).definition),
),
);
} else if (firstProd instanceof Terminal) {
// nothing to see, move along
} else {
throw Error("non exhaustive match");
}
const isFirstOptional = isOptionalProd(firstProd);
const hasMore = definition.length > 1;
if (isFirstOptional && hasMore) {
const rest = drop(definition);
return result.concat(getFirstNoneTerminal(rest));
} else {
return result;
}
}
class OrCollector extends GAstVisitor {
public alternations: Alternation[] = [];
public visitAlternation(node: Alternation): void {
this.alternations.push(node);
}
}
export function validateEmptyOrAlternative(
topLevelRule: Rule,
errMsgProvider: IGrammarValidatorErrorMessageProvider,
): IParserEmptyAlternativeDefinitionError[] {
const orCollector = new OrCollector();
topLevelRule.accept(orCollector);
const ors = orCollector.alternations;
const errors = flatMap<Alternation, IParserEmptyAlternativeDefinitionError>(
ors,
(currOr) => {
const exceptLast = dropRight(currOr.definition);
return flatMap(exceptLast, (currAlternative, currAltIdx) => {
const possibleFirstInAlt = nextPossibleTokensAfter(
[currAlternative],
[],
tokenStructuredMatcher,
1,
);
if (isEmpty(possibleFirstInAlt)) {
return [
{
message: errMsgProvider.buildEmptyAlternationError({
topLevelRule: topLevelRule,
alternation: currOr,
emptyChoiceIdx: currAltIdx,
}),
type: ParserDefinitionErrorType.NONE_LAST_EMPTY_ALT,
ruleName: topLevelRule.name,
occurrence: currOr.idx,
alternative: currAltIdx + 1,
},
];
} else {
return [];
}
});
},
);
return errors;
}
export function validateAmbiguousAlternationAlternatives(
topLevelRule: Rule,
globalMaxLookahead: number,
errMsgProvider: IGrammarValidatorErrorMessageProvider,
): IParserAmbiguousAlternativesDefinitionError[] {
const orCollector = new OrCollector();
topLevelRule.accept(orCollector);
let ors = orCollector.alternations;
// New Handling of ignoring ambiguities
// - https://github.com/chevrotain/chevrotain/issues/869
ors = reject(ors, (currOr) => currOr.ignoreAmbiguities === true);
const errors = flatMap(ors, (currOr: Alternation) => {
const currOccurrence = currOr.idx;
const actualMaxLookahead = currOr.maxLookahead || globalMaxLookahead;
const alternatives = getLookaheadPathsForOr(
currOccurrence,
topLevelRule,
actualMaxLookahead,
currOr,
);
const altsAmbiguityErrors = checkAlternativesAmbiguities(
alternatives,
currOr,
topLevelRule,
errMsgProvider,
);
const altsPrefixAmbiguityErrors = checkPrefixAlternativesAmbiguities(
alternatives,
currOr,
topLevelRule,
errMsgProvider,
);
return altsAmbiguityErrors.concat(altsPrefixAmbiguityErrors);
});
return errors;
}
export class RepetitionCollector extends GAstVisitor {
public allProductions: (IProductionWithOccurrence & {
maxLookahead?: number;
})[] = [];
public visitRepetitionWithSeparator(manySep: RepetitionWithSeparator): void {
this.allProductions.push(manySep);
}
public visitRepetitionMandatory(atLeastOne: RepetitionMandatory): void {
this.allProductions.push(atLeastOne);
}
public visitRepetitionMandatoryWithSeparator(
atLeastOneSep: RepetitionMandatoryWithSeparator,
): void {
this.allProductions.push(atLeastOneSep);
}
public visitRepetition(many: Repetition): void {
this.allProductions.push(many);
}
}
export function validateTooManyAlts(
topLevelRule: Rule,
errMsgProvider: IGrammarValidatorErrorMessageProvider,
): IParserDefinitionError[] {
const orCollector = new OrCollector();
topLevelRule.accept(orCollector);
const ors = orCollector.alternations;
const errors = flatMap(ors, (currOr) => {
if (currOr.definition.length > 255) {
return [
{
message: errMsgProvider.buildTooManyAlternativesError({
topLevelRule: topLevelRule,
alternation: currOr,
}),
type: ParserDefinitionErrorType.TOO_MANY_ALTS,
ruleName: topLevelRule.name,
occurrence: currOr.idx,
},
];
} else {
return [];
}
});
return errors;
}
export function validateSomeNonEmptyLookaheadPath(
topLevelRules: Rule[],
maxLookahead: number,
errMsgProvider: IGrammarValidatorErrorMessageProvider,
): IParserDefinitionError[] {
const errors: IParserDefinitionError[] = [];
forEach(topLevelRules, (currTopRule) => {
const collectorVisitor = new RepetitionCollector();
currTopRule.accept(collectorVisitor);
const allRuleProductions = collectorVisitor.allProductions;
forEach(allRuleProductions, (currProd) => {
const prodType = getProdType(currProd);
const actualMaxLookahead = currProd.maxLookahead || maxLookahead;
const currOccurrence = currProd.idx;
const paths = getLookaheadPathsForOptionalProd(
currOccurrence,
currTopRule,
prodType,
actualMaxLookahead,
);
const pathsInsideProduction = paths[0];
if (isEmpty(flatten(pathsInsideProduction))) {
const errMsg = errMsgProvider.buildEmptyRepetitionError({
topLevelRule: currTopRule,
repetition: currProd,
});
errors.push({
message: errMsg,
type: ParserDefinitionErrorType.NO_NON_EMPTY_LOOKAHEAD,
ruleName: currTopRule.name,
});
}
});
});
return errors;
}
export interface IAmbiguityDescriptor {
alts: number[];
path: TokenType[];
}
function checkAlternativesAmbiguities(
alternatives: Alternative[],
alternation: Alternation,
rule: Rule,
errMsgProvider: IGrammarValidatorErrorMessageProvider,
): IParserAmbiguousAlternativesDefinitionError[] {
const foundAmbiguousPaths: Alternative = [];
const identicalAmbiguities = reduce(
alternatives,
(result, currAlt, currAltIdx) => {
// ignore (skip) ambiguities with this alternative
if (alternation.definition[currAltIdx].ignoreAmbiguities === true) {
return result;
}
forEach(currAlt, (currPath) => {
const altsCurrPathAppearsIn = [currAltIdx];
forEach(alternatives, (currOtherAlt, currOtherAltIdx) => {
if (
currAltIdx !== currOtherAltIdx &&
containsPath(currOtherAlt, currPath) &&
// ignore (skip) ambiguities with this "other" alternative
alternation.definition[currOtherAltIdx].ignoreAmbiguities !== true
) {
altsCurrPathAppearsIn.push(currOtherAltIdx);
}
});
if (
altsCurrPathAppearsIn.length > 1 &&
!containsPath(foundAmbiguousPaths, currPath)
) {
foundAmbiguousPaths.push(currPath);
result.push({
alts: altsCurrPathAppearsIn,
path: currPath,
});
}
});
return result;
},
[] as { alts: number[]; path: TokenType[] }[],
);
const currErrors = map(identicalAmbiguities, (currAmbDescriptor) => {
const ambgIndices = map(
currAmbDescriptor.alts,
(currAltIdx) => currAltIdx + 1,
);
const currMessage = errMsgProvider.buildAlternationAmbiguityError({
topLevelRule: rule,
alternation: alternation,
ambiguityIndices: ambgIndices,
prefixPath: currAmbDescriptor.path,
});
return {
message: currMessage,
type: ParserDefinitionErrorType.AMBIGUOUS_ALTS,
ruleName: rule.name,
occurrence: alternation.idx,
alternatives: currAmbDescriptor.alts,
};
});
return currErrors;
}
export function checkPrefixAlternativesAmbiguities(
alternatives: Alternative[],
alternation: Alternation,
rule: Rule,
errMsgProvider: IGrammarValidatorErrorMessageProvider,
): IParserAmbiguousAlternativesDefinitionError[] {
// flatten
const pathsAndIndices = reduce(
alternatives,
(result, currAlt, idx) => {
const currPathsAndIdx = map(currAlt, (currPath) => {
return { idx: idx, path: currPath };
});
return result.concat(currPathsAndIdx);
},
[] as { idx: number; path: TokenType[] }[],
);
const errors = compact(
flatMap(pathsAndIndices, (currPathAndIdx) => {
const alternativeGast = alternation.definition[currPathAndIdx.idx];
// ignore (skip) ambiguities with this alternative
if (alternativeGast.ignoreAmbiguities === true) {
return [];
}
const targetIdx = currPathAndIdx.idx;
const targetPath = currPathAndIdx.path;
const prefixAmbiguitiesPathsAndIndices = filter(
pathsAndIndices,
(searchPathAndIdx) => {
// prefix ambiguity can only be created from lower idx (higher priority) path
return (
// ignore (skip) ambiguities with this "other" alternative
alternation.definition[searchPathAndIdx.idx].ignoreAmbiguities !==
true &&
searchPathAndIdx.idx < targetIdx &&
// checking for strict prefix because identical lookaheads
// will be be detected using a different validation.
isStrictPrefixOfPath(searchPathAndIdx.path, targetPath)
);
},
);
const currPathPrefixErrors = map(
prefixAmbiguitiesPathsAndIndices,
(currAmbPathAndIdx): IParserAmbiguousAlternativesDefinitionError => {
const ambgIndices = [currAmbPathAndIdx.idx + 1, targetIdx + 1];
const occurrence = alternation.idx === 0 ? "" : alternation.idx;
const message = errMsgProvider.buildAlternationPrefixAmbiguityError({
topLevelRule: rule,
alternation: alternation,
ambiguityIndices: ambgIndices,
prefixPath: currAmbPathAndIdx.path,
});
return {
message: message,
type: ParserDefinitionErrorType.AMBIGUOUS_PREFIX_ALTS,
ruleName: rule.name,
occurrence: occurrence,
alternatives: ambgIndices,
};
},
);
return currPathPrefixErrors;
}),
);
return errors;
}
function checkTerminalAndNoneTerminalsNameSpace(
topLevels: Rule[],
tokenTypes: TokenType[],
errMsgProvider: IGrammarValidatorErrorMessageProvider,
): IParserDefinitionError[] {
const errors: IParserDefinitionError[] = [];
const tokenNames = map(tokenTypes, (currToken) => currToken.name);
forEach(topLevels, (currRule) => {
const currRuleName = currRule.name;
if (includes(tokenNames, currRuleName)) {
const errMsg = errMsgProvider.buildNamespaceConflictError(currRule);
errors.push({
message: errMsg,
type: ParserDefinitionErrorType.CONFLICT_TOKENS_RULES_NAMESPACE,
ruleName: currRuleName,
});
}
});
return errors;
}