chevrotain
Version:
Chevrotain is a high performance fault tolerant javascript parsing DSL for building recursive decent parsers
444 lines (404 loc) • 13.6 kB
text/typescript
import {
AtLeastOneSepMethodOpts,
ConsumeMethodOpts,
CstNode,
DSLMethodOpts,
DSLMethodOptsWithErr,
GrammarAction,
IOrAlt,
IParserConfig,
IProduction,
IToken,
ManySepMethodOpts,
OrMethodOpts,
SubruleMethodOpts,
TokenType
} from "../../../../api"
import {
forEach,
has,
isArray,
isFunction,
peek,
some
} from "../../../utils/utils"
import { MixedInParser } from "./parser_traits"
import {
Alternation,
Alternative,
NonTerminal,
Option,
Repetition,
RepetitionMandatory,
RepetitionMandatoryWithSeparator,
RepetitionWithSeparator,
Rule,
Terminal
} from "../../grammar/gast/gast_public"
import { Lexer } from "../../../scan/lexer_public"
import { augmentTokenTypes, hasShortKeyProperty } from "../../../scan/tokens"
import { createToken, createTokenInstance } from "../../../scan/tokens_public"
import { END_OF_FILE } from "../parser"
import { BITS_FOR_OCCURRENCE_IDX } from "../../grammar/keys"
type ProdWithDef = IProduction & { definition?: IProduction[] }
const RECORDING_NULL_OBJECT = {
description: "This Object indicates the Parser is during Recording Phase"
}
Object.freeze(RECORDING_NULL_OBJECT)
const HANDLE_SEPARATOR = true
const MAX_METHOD_IDX = Math.pow(2, BITS_FOR_OCCURRENCE_IDX) - 1
const RFT = createToken({ name: "RECORDING_PHASE_TOKEN", pattern: Lexer.NA })
augmentTokenTypes([RFT])
const RECORDING_PHASE_TOKEN = createTokenInstance(
RFT,
"This IToken indicates the Parser is in Recording Phase\n\t" +
"" +
"See: https://chevrotain.io/docs/guide/internals.html#grammar-recording for details",
// Using "-1" instead of NaN (as in EOF) because an actual number is less likely to
// cause errors if the output of LA or CONSUME would be (incorrectly) used during the recording phase.
-1,
-1,
-1,
-1,
-1,
-1
)
Object.freeze(RECORDING_PHASE_TOKEN)
const RECORDING_PHASE_CSTNODE: CstNode = {
name:
"This CSTNode indicates the Parser is in Recording Phase\n\t" +
"See: https://chevrotain.io/docs/guide/internals.html#grammar-recording for details",
children: {}
}
/**
* This trait handles the creation of the GAST structure for Chevrotain Grammars
*/
export class GastRecorder {
recordingProdStack: ProdWithDef[]
RECORDING_PHASE: boolean
initGastRecorder(this: MixedInParser, config: IParserConfig): void {
this.recordingProdStack = []
this.RECORDING_PHASE = false
}
enableRecording(this: MixedInParser): void {
this.RECORDING_PHASE = true
this.TRACE_INIT("Enable Recording", () => {
/**
* Warning Dark Voodoo Magic upcoming!
* We are "replacing" the public parsing DSL methods API
* With **new** alternative implementations on the Parser **instance**
*
* So far this is the only way I've found to avoid performance regressions during parsing time.
* - Approx 30% performance regression was measured on Chrome 75 Canary when attempting to replace the "internal"
* implementations directly instead.
*/
for (let i = 0; i < 10; i++) {
const idx = i > 0 ? i : ""
this[`CONSUME${idx}`] = function (arg1, arg2) {
return this.consumeInternalRecord(arg1, i, arg2)
}
this[`SUBRULE${idx}`] = function (arg1, arg2) {
return this.subruleInternalRecord(arg1, i, arg2)
}
this[`OPTION${idx}`] = function (arg1) {
return this.optionInternalRecord(arg1, i)
}
this[`OR${idx}`] = function (arg1) {
return this.orInternalRecord(arg1, i)
}
this[`MANY${idx}`] = function (arg1) {
this.manyInternalRecord(i, arg1)
}
this[`MANY_SEP${idx}`] = function (arg1) {
this.manySepFirstInternalRecord(i, arg1)
}
this[`AT_LEAST_ONE${idx}`] = function (arg1) {
this.atLeastOneInternalRecord(i, arg1)
}
this[`AT_LEAST_ONE_SEP${idx}`] = function (arg1) {
this.atLeastOneSepFirstInternalRecord(i, arg1)
}
}
// DSL methods with the idx(suffix) as an argument
this[`consume`] = function (idx, arg1, arg2) {
return this.consumeInternalRecord(arg1, idx, arg2)
}
this[`subrule`] = <any>function (idx, arg1, arg2) {
return this.subruleInternalRecord(arg1, idx, arg2)
}
this[`option`] = function (idx, arg1) {
return this.optionInternalRecord(arg1, idx)
}
this[`or`] = function (idx, arg1) {
return this.orInternalRecord(arg1, idx)
}
this[`many`] = function (idx, arg1) {
this.manyInternalRecord(idx, arg1)
}
this[`atLeastOne`] = function (idx, arg1) {
this.atLeastOneInternalRecord(idx, arg1)
}
this.ACTION = this.ACTION_RECORD
this.BACKTRACK = this.BACKTRACK_RECORD
this.LA = this.LA_RECORD
})
}
disableRecording(this: MixedInParser) {
this.RECORDING_PHASE = false
// By deleting these **instance** properties, any future invocation
// will be deferred to the original methods on the **prototype** object
// This seems to get rid of any incorrect optimizations that V8 may
// do during the recording phase.
this.TRACE_INIT("Deleting Recording methods", () => {
for (let i = 0; i < 10; i++) {
const idx = i > 0 ? i : ""
delete this[`CONSUME${idx}`]
delete this[`SUBRULE${idx}`]
delete this[`OPTION${idx}`]
delete this[`OR${idx}`]
delete this[`MANY${idx}`]
delete this[`MANY_SEP${idx}`]
delete this[`AT_LEAST_ONE${idx}`]
delete this[`AT_LEAST_ONE_SEP${idx}`]
}
delete this[`consume`]
delete this[`subrule`]
delete this[`option`]
delete this[`or`]
delete this[`many`]
delete this[`atLeastOne`]
delete this.ACTION
delete this.BACKTRACK
delete this.LA
})
}
// TODO: is there any way to use this method to check no
// Parser methods are called inside an ACTION?
// Maybe try/catch/finally on ACTIONS while disabling the recorders state changes?
ACTION_RECORD<T>(this: MixedInParser, impl: () => T): T {
// NO-OP during recording
return
}
// Executing backtracking logic will break our recording logic assumptions
BACKTRACK_RECORD<T>(
grammarRule: (...args: any[]) => T,
args?: any[]
): () => boolean {
return () => true
}
// LA is part of the official API and may be used for custom lookahead logic
// by end users who may forget to wrap it in ACTION or inside a GATE
LA_RECORD(howMuch: number): IToken {
// We cannot use the RECORD_PHASE_TOKEN here because someone may depend
// On LA return EOF at the end of the input so an infinite loop may occur.
return END_OF_FILE
}
topLevelRuleRecord(name: string, def: Function): Rule {
try {
const newTopLevelRule = new Rule({ definition: [], name: name })
newTopLevelRule.name = name
this.recordingProdStack.push(newTopLevelRule)
def.call(this)
this.recordingProdStack.pop()
return newTopLevelRule
} catch (originalError) {
if (originalError.KNOWN_RECORDER_ERROR !== true) {
try {
originalError.message =
originalError.message +
'\n\t This error was thrown during the "grammar recording phase" For more info see:\n\t' +
"https://chevrotain.io/docs/guide/internals.html#grammar-recording"
} catch (mutabilityError) {
// We may not be able to modify the original error object
throw originalError
}
}
throw originalError
}
}
// Implementation of parsing DSL
optionInternalRecord<OUT>(
this: MixedInParser,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>,
occurrence: number
): OUT {
return recordProd.call(this, Option, actionORMethodDef, occurrence)
}
atLeastOneInternalRecord<OUT>(
this: MixedInParser,
occurrence: number,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOptsWithErr<OUT>
): void {
recordProd.call(this, RepetitionMandatory, actionORMethodDef, occurrence)
}
atLeastOneSepFirstInternalRecord<OUT>(
this: MixedInParser,
occurrence: number,
options: AtLeastOneSepMethodOpts<OUT>
): void {
recordProd.call(
this,
RepetitionMandatoryWithSeparator,
options,
occurrence,
HANDLE_SEPARATOR
)
}
manyInternalRecord<OUT>(
this: MixedInParser,
occurrence: number,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): void {
recordProd.call(this, Repetition, actionORMethodDef, occurrence)
}
manySepFirstInternalRecord<OUT>(
this: MixedInParser,
occurrence: number,
options: ManySepMethodOpts<OUT>
): void {
recordProd.call(
this,
RepetitionWithSeparator,
options,
occurrence,
HANDLE_SEPARATOR
)
}
orInternalRecord<T>(
this: MixedInParser,
altsOrOpts: IOrAlt<any>[] | OrMethodOpts<unknown>,
occurrence: number
): T {
return recordOrProd.call(this, altsOrOpts, occurrence)
}
subruleInternalRecord<T>(
this: MixedInParser,
ruleToCall: (idx: number) => T,
occurrence: number,
options?: SubruleMethodOpts
): T | CstNode {
assertMethodIdxIsValid(occurrence)
if (!ruleToCall || has(ruleToCall, "ruleName") === false) {
const error: any = new Error(
`<SUBRULE${getIdxSuffix(occurrence)}> argument is invalid` +
` expecting a Parser method reference but got: <${JSON.stringify(
ruleToCall
)}>` +
`\n inside top level rule: <${
(<Rule>this.recordingProdStack[0]).name
}>`
)
error.KNOWN_RECORDER_ERROR = true
throw error
}
const prevProd: any = peek(this.recordingProdStack)
const ruleName = ruleToCall["ruleName"]
const newNoneTerminal = new NonTerminal({
idx: occurrence,
nonTerminalName: ruleName,
// The resolving of the `referencedRule` property will be done once all the Rule's GASTs have been created
referencedRule: undefined
})
prevProd.definition.push(newNoneTerminal)
return this.outputCst ? RECORDING_PHASE_CSTNODE : <any>RECORDING_NULL_OBJECT
}
consumeInternalRecord(
this: MixedInParser,
tokType: TokenType,
occurrence: number,
options: ConsumeMethodOpts
): IToken {
assertMethodIdxIsValid(occurrence)
if (!hasShortKeyProperty(tokType)) {
const error: any = new Error(
`<CONSUME${getIdxSuffix(occurrence)}> argument is invalid` +
` expecting a TokenType reference but got: <${JSON.stringify(
tokType
)}>` +
`\n inside top level rule: <${
(<Rule>this.recordingProdStack[0]).name
}>`
)
error.KNOWN_RECORDER_ERROR = true
throw error
}
const prevProd: any = peek(this.recordingProdStack)
const newNoneTerminal = new Terminal({
idx: occurrence,
terminalType: tokType
})
prevProd.definition.push(newNoneTerminal)
return RECORDING_PHASE_TOKEN
}
}
function recordProd(
prodConstructor: any,
mainProdArg: any,
occurrence: number,
handleSep: boolean = false
): any {
assertMethodIdxIsValid(occurrence)
const prevProd: any = peek(this.recordingProdStack)
const grammarAction = isFunction(mainProdArg) ? mainProdArg : mainProdArg.DEF
const newProd = new prodConstructor({ definition: [], idx: occurrence })
if (handleSep) {
newProd.separator = mainProdArg.SEP
}
if (has(mainProdArg, "MAX_LOOKAHEAD")) {
newProd.maxLookahead = mainProdArg.MAX_LOOKAHEAD
}
this.recordingProdStack.push(newProd)
grammarAction.call(this)
prevProd.definition.push(newProd)
this.recordingProdStack.pop()
return RECORDING_NULL_OBJECT
}
function recordOrProd(mainProdArg: any, occurrence: number): any {
assertMethodIdxIsValid(occurrence)
const prevProd: any = peek(this.recordingProdStack)
// Only an array of alternatives
const hasOptions = isArray(mainProdArg) === false
const alts = hasOptions === false ? mainProdArg : mainProdArg.DEF
const newOrProd = new Alternation({
definition: [],
idx: occurrence,
ignoreAmbiguities: hasOptions && mainProdArg.IGNORE_AMBIGUITIES === true
})
if (has(mainProdArg, "MAX_LOOKAHEAD")) {
newOrProd.maxLookahead = mainProdArg.MAX_LOOKAHEAD
}
const hasPredicates = some(alts, (currAlt: any) => isFunction(currAlt.GATE))
newOrProd.hasPredicates = hasPredicates
prevProd.definition.push(newOrProd)
forEach(alts, (currAlt) => {
const currAltFlat = new Alternative({ definition: [] })
newOrProd.definition.push(currAltFlat)
if (has(currAlt, "IGNORE_AMBIGUITIES")) {
currAltFlat.ignoreAmbiguities = currAlt.IGNORE_AMBIGUITIES
}
// **implicit** ignoreAmbiguities due to usage of gate
else if (has(currAlt, "GATE")) {
currAltFlat.ignoreAmbiguities = true
}
this.recordingProdStack.push(currAltFlat)
currAlt.ALT.call(this)
this.recordingProdStack.pop()
})
return RECORDING_NULL_OBJECT
}
function getIdxSuffix(idx: number): string {
return idx === 0 ? "" : `${idx}`
}
function assertMethodIdxIsValid(idx): void {
if (idx < 0 || idx > MAX_METHOD_IDX) {
const error: any = new Error(
// The stack trace will contain all the needed details
`Invalid DSL Method idx value: <${idx}>\n\t` +
`Idx value must be a none negative value smaller than ${
MAX_METHOD_IDX + 1
}`
)
error.KNOWN_RECORDER_ERROR = true
throw error
}
}