chevrotain
Version:
Chevrotain is a high performance fault tolerant javascript parsing DSL for building recursive decent parsers
312 lines (280 loc) • 10.3 kB
text/typescript
import { toFastProperties } from "@chevrotain/utils";
import { computeAllProdsFollows } from "../grammar/follow.js";
import { createTokenInstance, EOF } from "../../scan/tokens_public.js";
import {
defaultGrammarValidatorErrorProvider,
defaultParserErrorProvider,
} from "../errors_public.js";
import {
resolveGrammar,
validateGrammar,
} from "../grammar/gast/gast_resolver_public.js";
import {
CstNode,
IParserConfig,
IRecognitionException,
IRuleConfig,
IToken,
TokenType,
TokenVocabulary,
} from "@chevrotain/types";
import { Recoverable } from "./traits/recoverable.js";
import { LooksAhead } from "./traits/looksahead.js";
import { TreeBuilder } from "./traits/tree_builder.js";
import { LexerAdapter } from "./traits/lexer_adapter.js";
import { RecognizerApi } from "./traits/recognizer_api.js";
import { RecognizerEngine } from "./traits/recognizer_engine.js";
import { ErrorHandler } from "./traits/error_handler.js";
import { MixedInParser } from "./traits/parser_traits.js";
import { GastRecorder } from "./traits/gast_recorder.js";
import { PerformanceTracer } from "./traits/perf_tracer.js";
import { applyMixins } from "./utils/apply_mixins.js";
import { IParserDefinitionError } from "../grammar/types.js";
import { Rule } from "@chevrotain/gast";
import { IParserConfigInternal, ParserMethodInternal } from "./types.js";
import { validateLookahead } from "../grammar/checks.js";
export const END_OF_FILE = createTokenInstance(
EOF,
"",
NaN,
NaN,
NaN,
NaN,
NaN,
NaN,
);
Object.freeze(END_OF_FILE);
export type TokenMatcher = (token: IToken, tokType: TokenType) => boolean;
export const DEFAULT_PARSER_CONFIG: Required<
Omit<IParserConfigInternal, "lookaheadStrategy">
> = Object.freeze({
recoveryEnabled: false,
maxLookahead: 3,
dynamicTokensEnabled: false,
outputCst: true,
errorMessageProvider: defaultParserErrorProvider,
nodeLocationTracking: "none",
traceInitPerf: false,
skipValidations: false,
});
export const DEFAULT_RULE_CONFIG: Required<IRuleConfig<any>> = Object.freeze({
recoveryValueFunc: () => undefined,
resyncEnabled: true,
});
export enum ParserDefinitionErrorType {
INVALID_RULE_NAME = 0,
DUPLICATE_RULE_NAME = 1,
INVALID_RULE_OVERRIDE = 2,
DUPLICATE_PRODUCTIONS = 3,
UNRESOLVED_SUBRULE_REF = 4,
LEFT_RECURSION = 5,
NONE_LAST_EMPTY_ALT = 6,
AMBIGUOUS_ALTS = 7,
CONFLICT_TOKENS_RULES_NAMESPACE = 8,
INVALID_TOKEN_NAME = 9,
NO_NON_EMPTY_LOOKAHEAD = 10,
AMBIGUOUS_PREFIX_ALTS = 11,
TOO_MANY_ALTS = 12,
CUSTOM_LOOKAHEAD_VALIDATION = 13,
}
export interface IParserDuplicatesDefinitionError extends IParserDefinitionError {
dslName: string;
occurrence: number;
parameter?: string;
}
export interface IParserEmptyAlternativeDefinitionError extends IParserDefinitionError {
occurrence: number;
alternative: number;
}
export interface IParserAmbiguousAlternativesDefinitionError extends IParserDefinitionError {
occurrence: number | string;
alternatives: number[];
}
export interface IParserUnresolvedRefDefinitionError extends IParserDefinitionError {
unresolvedRefName: string;
}
export interface IParserState {
errors: IRecognitionException[];
lexerState: any;
RULE_STACK: number[];
CST_STACK: CstNode[];
}
export type Predicate = () => boolean;
export function EMPTY_ALT(): () => undefined;
export function EMPTY_ALT<T>(value: T): () => T;
export function EMPTY_ALT(value: any = undefined) {
return function () {
return value;
};
}
export class Parser {
// Set this flag to true if you don't want the Parser to throw error when problems in it's definition are detected.
// (normally during the parser's constructor).
// This is a design time flag, it will not affect the runtime error handling of the parser, just design time errors,
// for example: duplicate rule names, referencing an unresolved subrule, etc...
// This flag should not be enabled during normal usage, it is used in special situations, for example when
// needing to display the parser definition errors in some GUI(online playground).
static DEFER_DEFINITION_ERRORS_HANDLING: boolean = false;
/**
* @deprecated use the **instance** method with the same name instead
*/
static performSelfAnalysis(parserInstance: Parser): void {
throw Error(
"The **static** `performSelfAnalysis` method has been deprecated." +
"\t\nUse the **instance** method with the same name instead.",
);
}
public performSelfAnalysis(this: MixedInParser): void {
this.TRACE_INIT("performSelfAnalysis", () => {
let defErrorsMsgs;
this.selfAnalysisDone = true;
const className = this.className;
this.TRACE_INIT("toFastProps", () => {
// Without this voodoo magic the parser would be x3-x4 slower
// It seems it is better to invoke `toFastProperties` **before**
// Any manipulations of the `this` object done during the recording phase.
toFastProperties(this);
});
this.TRACE_INIT("Grammar Recording", () => {
try {
this.enableRecording();
// Building the GAST
this.definedRulesNames.forEach((currRuleName: string) => {
const wrappedRule = (this as any)[
currRuleName
] as ParserMethodInternal<unknown[], unknown>;
const originalGrammarAction = wrappedRule["originalGrammarAction"];
let recordedRuleGast!: Rule;
this.TRACE_INIT(`${currRuleName} Rule`, () => {
recordedRuleGast = this.topLevelRuleRecord(
currRuleName,
originalGrammarAction,
);
});
this.gastProductionsCache[currRuleName] = recordedRuleGast;
});
} finally {
this.disableRecording();
}
});
let resolverErrors: IParserDefinitionError[] = [];
this.TRACE_INIT("Grammar Resolving", () => {
resolverErrors = resolveGrammar({
rules: Object.values(this.gastProductionsCache),
});
this.definitionErrors = this.definitionErrors.concat(resolverErrors);
});
this.TRACE_INIT("Grammar Validations", () => {
// only perform additional grammar validations IFF no resolving errors have occurred.
// as unresolved grammar may lead to unhandled runtime exceptions in the follow up validations.
if (resolverErrors.length === 0 && this.skipValidations === false) {
const validationErrors = validateGrammar({
rules: Object.values(this.gastProductionsCache),
tokenTypes: Object.values(this.tokensMap),
errMsgProvider: defaultGrammarValidatorErrorProvider,
grammarName: className,
});
const lookaheadValidationErrors = validateLookahead({
lookaheadStrategy: this.lookaheadStrategy,
rules: Object.values(this.gastProductionsCache),
tokenTypes: Object.values(this.tokensMap),
grammarName: className,
});
this.definitionErrors = this.definitionErrors.concat(
validationErrors,
lookaheadValidationErrors,
);
}
});
// this analysis may fail if the grammar is not perfectly valid
if (this.definitionErrors.length === 0) {
// The results of these computations are not needed unless error recovery is enabled.
if (this.recoveryEnabled) {
this.TRACE_INIT("computeAllProdsFollows", () => {
const allFollows = computeAllProdsFollows(
Object.values(this.gastProductionsCache),
);
this.resyncFollows = allFollows;
});
}
this.TRACE_INIT("ComputeLookaheadFunctions", () => {
this.lookaheadStrategy.initialize?.({
rules: Object.values(this.gastProductionsCache),
});
this.preComputeLookaheadFunctions(
Object.values(this.gastProductionsCache),
);
});
}
if (
!Parser.DEFER_DEFINITION_ERRORS_HANDLING &&
this.definitionErrors.length !== 0
) {
defErrorsMsgs = this.definitionErrors.map(
(defError) => defError.message,
);
throw new Error(
`Parser Definition Errors detected:\n ${defErrorsMsgs.join(
"\n-------------------------------\n",
)}`,
);
}
});
}
definitionErrors: IParserDefinitionError[] = [];
selfAnalysisDone = false;
protected skipValidations: boolean;
constructor(tokenVocabulary: TokenVocabulary, config: IParserConfig) {
const that: MixedInParser = this as any;
that.initErrorHandler(config);
that.initLexerAdapter();
that.initLooksAhead(config);
that.initRecognizerEngine(tokenVocabulary, config);
that.initRecoverable(config);
that.initTreeBuilder(config);
that.initGastRecorder(config);
that.initPerformanceTracer(config);
if (Object.hasOwn(config, "ignoredIssues")) {
throw new Error(
"The <ignoredIssues> IParserConfig property has been deprecated.\n\t" +
"Please use the <IGNORE_AMBIGUITIES> flag on the relevant DSL method instead.\n\t" +
"See: https://chevrotain.io/docs/guide/resolving_grammar_errors.html#IGNORING_AMBIGUITIES\n\t" +
"For further details.",
);
}
this.skipValidations = Object.hasOwn(config, "skipValidations")
? (config.skipValidations as boolean) // casting assumes the end user passing the correct type
: DEFAULT_PARSER_CONFIG.skipValidations;
}
}
applyMixins(Parser, [
Recoverable,
LooksAhead,
TreeBuilder,
LexerAdapter,
RecognizerEngine,
RecognizerApi,
ErrorHandler,
GastRecorder,
PerformanceTracer,
]);
export class CstParser extends Parser {
constructor(
tokenVocabulary: TokenVocabulary,
config: IParserConfigInternal = DEFAULT_PARSER_CONFIG,
) {
const configClone = { ...config };
configClone.outputCst = true;
super(tokenVocabulary, configClone);
}
}
export class EmbeddedActionsParser extends Parser {
constructor(
tokenVocabulary: TokenVocabulary,
config: IParserConfigInternal = DEFAULT_PARSER_CONFIG,
) {
const configClone = { ...config };
configClone.outputCst = false;
super(tokenVocabulary, configClone);
}
}