UNPKG

chevrotain

Version:

Chevrotain is a high performance fault tolerant javascript parsing DSL for building recursive decent parsers

294 lines 13.4 kB
import { Alternation, Alternative, NonTerminal, Option, Repetition, RepetitionMandatory, RepetitionMandatoryWithSeparator, RepetitionWithSeparator, Rule, Terminal, } from "@chevrotain/gast"; import { Lexer } from "../../../scan/lexer_public.js"; import { augmentTokenTypes, hasShortKeyProperty, } from "../../../scan/tokens.js"; import { createToken, createTokenInstance, } from "../../../scan/tokens_public.js"; import { END_OF_FILE } from "../parser.js"; import { BITS_FOR_OCCURRENCE_IDX } from "../../grammar/keys.js"; const RECORDING_NULL_OBJECT = { description: "This Object indicates the Parser is during Recording Phase", }; Object.freeze(RECORDING_NULL_OBJECT); const HANDLE_SEPARATOR = true; const MAX_METHOD_IDX = Math.pow(2, BITS_FOR_OCCURRENCE_IDX) - 1; const RFT = createToken({ name: "RECORDING_PHASE_TOKEN", pattern: Lexer.NA }); augmentTokenTypes([RFT]); const RECORDING_PHASE_TOKEN = createTokenInstance(RFT, "This IToken indicates the Parser is in Recording Phase\n\t" + "" + "See: https://chevrotain.io/docs/guide/internals.html#grammar-recording for details", // Using "-1" instead of NaN (as in EOF) because an actual number is less likely to // cause errors if the output of LA or CONSUME would be (incorrectly) used during the recording phase. -1, -1, -1, -1, -1, -1); Object.freeze(RECORDING_PHASE_TOKEN); const RECORDING_PHASE_CSTNODE = { name: "This CSTNode indicates the Parser is in Recording Phase\n\t" + "See: https://chevrotain.io/docs/guide/internals.html#grammar-recording for details", children: {}, }; /** * This trait handles the creation of the GAST structure for Chevrotain Grammars */ export class GastRecorder { initGastRecorder(config) { this.recordingProdStack = []; this.RECORDING_PHASE = false; } enableRecording() { this.RECORDING_PHASE = true; this.TRACE_INIT("Enable Recording", () => { /** * Warning Dark Voodoo Magic upcoming! * We are "replacing" the public parsing DSL methods API * With **new** alternative implementations on the Parser **instance** * * So far this is the only way I've found to avoid performance regressions during parsing time. * - Approx 30% performance regression was measured on Chrome 75 Canary when attempting to replace the "internal" * implementations directly instead. */ for (let i = 0; i < 10; i++) { const idx = i > 0 ? i : ""; this[`CONSUME${idx}`] = function (arg1, arg2) { return this.consumeInternalRecord(arg1, i, arg2); }; this[`SUBRULE${idx}`] = function (arg1, arg2) { return this.subruleInternalRecord(arg1, i, arg2); }; this[`OPTION${idx}`] = function (arg1) { return this.optionInternalRecord(arg1, i); }; this[`OR${idx}`] = function (arg1) { return this.orInternalRecord(arg1, i); }; this[`MANY${idx}`] = function (arg1) { this.manyInternalRecord(i, arg1); }; this[`MANY_SEP${idx}`] = function (arg1) { this.manySepFirstInternalRecord(i, arg1); }; this[`AT_LEAST_ONE${idx}`] = function (arg1) { this.atLeastOneInternalRecord(i, arg1); }; this[`AT_LEAST_ONE_SEP${idx}`] = function (arg1) { this.atLeastOneSepFirstInternalRecord(i, arg1); }; } // DSL methods with the idx(suffix) as an argument this[`consume`] = function (idx, arg1, arg2) { return this.consumeInternalRecord(arg1, idx, arg2); }; this[`subrule`] = function (idx, arg1, arg2) { return this.subruleInternalRecord(arg1, idx, arg2); }; this[`option`] = function (idx, arg1) { return this.optionInternalRecord(arg1, idx); }; this[`or`] = function (idx, arg1) { return this.orInternalRecord(arg1, idx); }; this[`many`] = function (idx, arg1) { this.manyInternalRecord(idx, arg1); }; this[`atLeastOne`] = function (idx, arg1) { this.atLeastOneInternalRecord(idx, arg1); }; this.ACTION = this.ACTION_RECORD; this.BACKTRACK = this.BACKTRACK_RECORD; this.LA = this.LA_RECORD; }); } disableRecording() { this.RECORDING_PHASE = false; // By deleting these **instance** properties, any future invocation // will be deferred to the original methods on the **prototype** object // This seems to get rid of any incorrect optimizations that V8 may // do during the recording phase. this.TRACE_INIT("Deleting Recording methods", () => { const that = this; for (let i = 0; i < 10; i++) { const idx = i > 0 ? i : ""; delete that[`CONSUME${idx}`]; delete that[`SUBRULE${idx}`]; delete that[`OPTION${idx}`]; delete that[`OR${idx}`]; delete that[`MANY${idx}`]; delete that[`MANY_SEP${idx}`]; delete that[`AT_LEAST_ONE${idx}`]; delete that[`AT_LEAST_ONE_SEP${idx}`]; } delete that[`consume`]; delete that[`subrule`]; delete that[`option`]; delete that[`or`]; delete that[`many`]; delete that[`atLeastOne`]; delete that.ACTION; delete that.BACKTRACK; delete that.LA; }); } // Parser methods are called inside an ACTION? // Maybe try/catch/finally on ACTIONS while disabling the recorders state changes? // @ts-expect-error -- noop place holder ACTION_RECORD(impl) { // NO-OP during recording } // Executing backtracking logic will break our recording logic assumptions BACKTRACK_RECORD(grammarRule, args) { return () => true; } // LA is part of the official API and may be used for custom lookahead logic // by end users who may forget to wrap it in ACTION or inside a GATE LA_RECORD(howMuch) { // We cannot use the RECORD_PHASE_TOKEN here because someone may depend // On LA return EOF at the end of the input so an infinite loop may occur. return END_OF_FILE; } topLevelRuleRecord(name, def) { try { const newTopLevelRule = new Rule({ definition: [], name: name }); newTopLevelRule.name = name; this.recordingProdStack.push(newTopLevelRule); def.call(this); this.recordingProdStack.pop(); return newTopLevelRule; } catch (originalError) { if (originalError.KNOWN_RECORDER_ERROR !== true) { try { originalError.message = originalError.message + '\n\t This error was thrown during the "grammar recording phase" For more info see:\n\t' + "https://chevrotain.io/docs/guide/internals.html#grammar-recording"; } catch (mutabilityError) { // We may not be able to modify the original error object throw originalError; } } throw originalError; } } // Implementation of parsing DSL optionInternalRecord(actionORMethodDef, occurrence) { return recordProd.call(this, Option, actionORMethodDef, occurrence); } atLeastOneInternalRecord(occurrence, actionORMethodDef) { recordProd.call(this, RepetitionMandatory, actionORMethodDef, occurrence); } atLeastOneSepFirstInternalRecord(occurrence, options) { recordProd.call(this, RepetitionMandatoryWithSeparator, options, occurrence, HANDLE_SEPARATOR); } manyInternalRecord(occurrence, actionORMethodDef) { recordProd.call(this, Repetition, actionORMethodDef, occurrence); } manySepFirstInternalRecord(occurrence, options) { recordProd.call(this, RepetitionWithSeparator, options, occurrence, HANDLE_SEPARATOR); } orInternalRecord(altsOrOpts, occurrence) { return recordOrProd.call(this, altsOrOpts, occurrence); } subruleInternalRecord(ruleToCall, occurrence, options) { assertMethodIdxIsValid(occurrence); if (!ruleToCall || !Object.hasOwn(ruleToCall, "ruleName")) { const error = new Error(`<SUBRULE${getIdxSuffix(occurrence)}> argument is invalid` + ` expecting a Parser method reference but got: <${JSON.stringify(ruleToCall)}>` + `\n inside top level rule: <${this.recordingProdStack[0].name}>`); error.KNOWN_RECORDER_ERROR = true; throw error; } const prevProd = this.recordingProdStack.at(-1); const ruleName = ruleToCall.ruleName; const newNoneTerminal = new NonTerminal({ idx: occurrence, nonTerminalName: ruleName, label: options === null || options === void 0 ? void 0 : options.LABEL, // The resolving of the `referencedRule` property will be done once all the Rule's GASTs have been created referencedRule: undefined, }); prevProd.definition.push(newNoneTerminal); return this.outputCst ? RECORDING_PHASE_CSTNODE : RECORDING_NULL_OBJECT; } consumeInternalRecord(tokType, occurrence, options) { assertMethodIdxIsValid(occurrence); if (!hasShortKeyProperty(tokType)) { const error = new Error(`<CONSUME${getIdxSuffix(occurrence)}> argument is invalid` + ` expecting a TokenType reference but got: <${JSON.stringify(tokType)}>` + `\n inside top level rule: <${this.recordingProdStack[0].name}>`); error.KNOWN_RECORDER_ERROR = true; throw error; } const prevProd = this.recordingProdStack.at(-1); const newNoneTerminal = new Terminal({ idx: occurrence, terminalType: tokType, label: options === null || options === void 0 ? void 0 : options.LABEL, }); prevProd.definition.push(newNoneTerminal); return RECORDING_PHASE_TOKEN; } } function recordProd(prodConstructor, mainProdArg, occurrence, handleSep = false) { assertMethodIdxIsValid(occurrence); const prevProd = this.recordingProdStack.at(-1); const grammarAction = typeof mainProdArg === "function" ? mainProdArg : mainProdArg.DEF; const newProd = new prodConstructor({ definition: [], idx: occurrence }); if (handleSep) { newProd.separator = mainProdArg.SEP; } if (Object.hasOwn(mainProdArg, "MAX_LOOKAHEAD")) { newProd.maxLookahead = mainProdArg.MAX_LOOKAHEAD; } this.recordingProdStack.push(newProd); grammarAction.call(this); prevProd.definition.push(newProd); this.recordingProdStack.pop(); return RECORDING_NULL_OBJECT; } function recordOrProd(mainProdArg, occurrence) { assertMethodIdxIsValid(occurrence); const prevProd = this.recordingProdStack.at(-1); // Only an array of alternatives const hasOptions = Array.isArray(mainProdArg) === false; const alts = hasOptions === false ? mainProdArg : mainProdArg.DEF; const newOrProd = new Alternation({ definition: [], idx: occurrence, ignoreAmbiguities: hasOptions && mainProdArg.IGNORE_AMBIGUITIES === true, }); if (Object.hasOwn(mainProdArg, "MAX_LOOKAHEAD")) { newOrProd.maxLookahead = mainProdArg.MAX_LOOKAHEAD; } const hasPredicates = alts.some((currAlt) => typeof currAlt.GATE === "function"); newOrProd.hasPredicates = hasPredicates; prevProd.definition.push(newOrProd); alts.forEach((currAlt) => { const currAltFlat = new Alternative({ definition: [] }); newOrProd.definition.push(currAltFlat); if (Object.hasOwn(currAlt, "IGNORE_AMBIGUITIES")) { currAltFlat.ignoreAmbiguities = currAlt.IGNORE_AMBIGUITIES; // assumes end user provides the correct config value/type } // **implicit** ignoreAmbiguities due to usage of gate else if (Object.hasOwn(currAlt, "GATE")) { currAltFlat.ignoreAmbiguities = true; } this.recordingProdStack.push(currAltFlat); currAlt.ALT.call(this); this.recordingProdStack.pop(); }); return RECORDING_NULL_OBJECT; } function getIdxSuffix(idx) { return idx === 0 ? "" : `${idx}`; } function assertMethodIdxIsValid(idx) { if (idx < 0 || idx > MAX_METHOD_IDX) { const error = new Error( // The stack trace will contain all the needed details `Invalid DSL Method idx value: <${idx}>\n\t` + `Idx value must be a none negative value smaller than ${MAX_METHOD_IDX + 1}`); error.KNOWN_RECORDER_ERROR = true; throw error; } } //# sourceMappingURL=gast_recorder.js.map