chevrotain

import { analyzeTokenTypes, charCodeToOptimizedIndex, cloneEmptyGroups, DEFAULT_MODE, IAnalyzeResult, IPatternConfig, LineTerminatorOptimizedTester, performRuntimeChecks, performWarningRuntimeChecks, validatePatterns, } from "./lexer.js"; import { PRINT_WARNING, timer, toFastProperties } from "@chevrotain/utils"; import { augmentTokenTypes } from "./tokens.js"; import { CustomPatternMatcherFunc, CustomPatternMatcherReturn, ILexerConfig, ILexerDefinitionError, ILexingError, IMultiModeLexerDefinition, IToken, TokenType, } from "@chevrotain/types"; import { defaultLexerErrorProvider } from "./lexer_errors_public.js"; import { clearRegExpParserCache } from "./reg_exp_parser.js"; export interface ILexingResult { tokens: IToken[]; groups: { [groupName: string]: IToken[] }; errors: ILexingError[]; } export enum LexerDefinitionErrorType { MISSING_PATTERN, INVALID_PATTERN, EOI_ANCHOR_FOUND, UNSUPPORTED_FLAGS_FOUND, DUPLICATE_PATTERNS_FOUND, INVALID_GROUP_TYPE_FOUND, PUSH_MODE_DOES_NOT_EXIST, MULTI_MODE_LEXER_WITHOUT_DEFAULT_MODE, MULTI_MODE_LEXER_WITHOUT_MODES_PROPERTY, MULTI_MODE_LEXER_DEFAULT_MODE_VALUE_DOES_NOT_EXIST, LEXER_DEFINITION_CANNOT_CONTAIN_UNDEFINED, SOI_ANCHOR_FOUND, EMPTY_MATCH_PATTERN, NO_LINE_BREAKS_FLAGS, UNREACHABLE_PATTERN, IDENTIFY_TERMINATOR, CUSTOM_LINE_BREAK, MULTI_MODE_LEXER_LONGER_ALT_NOT_IN_CURRENT_MODE, } export interface IRegExpExec { exec: CustomPatternMatcherFunc; } const DEFAULT_LEXER_CONFIG: Required<ILexerConfig> = { deferDefinitionErrorsHandling: false, positionTracking: "full", lineTerminatorsPattern: /\n|\r\n?/g, lineTerminatorCharacters: ["\n", "\r"], ensureOptimizations: false, safeMode: false, errorMessageProvider: defaultLexerErrorProvider, traceInitPerf: false, skipValidations: false, recoveryEnabled: true, }; Object.freeze(DEFAULT_LEXER_CONFIG); export class Lexer { public static SKIPPED = "This marks a skipped Token pattern, this means each token identified by it will " + "be consumed and then thrown into oblivion, this can be used to for example to completely ignore whitespace."; public static NA = /NOT_APPLICABLE/; public lexerDefinitionErrors: ILexerDefinitionError[] = []; public lexerDefinitionWarning: ILexerDefinitionError[] = []; protected patternIdxToConfig: Record<string, IPatternConfig[]> = {}; protected charCodeToPatternIdxToConfig: { [modeName: string]: { [charCode: number]: IPatternConfig[] }; } = {}; protected modes: string[] = []; protected defaultMode!: string; protected emptyGroups: { [groupName: string]: IToken } = {}; private config: Required<ILexerConfig>; private trackStartLines: boolean = true; private trackEndLines: boolean = true; private hasCustom: boolean = false; private canModeBeOptimized: Record<string, boolean> = {}; private traceInitPerf!: boolean | number; private traceInitMaxIdent!: number; private traceInitIndent: number; constructor( protected lexerDefinition: TokenType[] | IMultiModeLexerDefinition, config: ILexerConfig = DEFAULT_LEXER_CONFIG, ) { if (typeof config === "boolean") { throw Error( "The second argument to the Lexer constructor is now an ILexerConfig Object.\n" + "a boolean 2nd argument is no longer supported", ); } this.config = Object.assign({}, DEFAULT_LEXER_CONFIG, config) as any; const traceInitVal = this.config.traceInitPerf; if (traceInitVal === true) { this.traceInitMaxIdent = Infinity; this.traceInitPerf = true; } else if (typeof traceInitVal === "number") { this.traceInitMaxIdent = traceInitVal; this.traceInitPerf = true; } this.traceInitIndent = -1; this.TRACE_INIT("Lexer Constructor", () => { let actualDefinition!: IMultiModeLexerDefinition; let hasOnlySingleMode = true; this.TRACE_INIT("Lexer Config handling", () => { if ( this.config.lineTerminatorsPattern === DEFAULT_LEXER_CONFIG.lineTerminatorsPattern ) { // optimized built-in implementation for the defaults definition of lineTerminators this.config.lineTerminatorsPattern = LineTerminatorOptimizedTester; } else { if ( this.config.lineTerminatorCharacters === DEFAULT_LEXER_CONFIG.lineTerminatorCharacters ) { throw Error( "Error: Missing <lineTerminatorCharacters> property on the Lexer config.\n" + "\tFor details See: https://chevrotain.io/docs/guide/resolving_lexer_errors.html#MISSING_LINE_TERM_CHARS", ); } } if (config.safeMode && config.ensureOptimizations) { throw Error( '"safeMode" and "ensureOptimizations" flags are mutually exclusive.', ); } this.trackStartLines = /full|onlyStart/i.test( this.config.positionTracking, ); this.trackEndLines = /full/i.test(this.config.positionTracking); // Convert SingleModeLexerDefinition into a IMultiModeLexerDefinition. if (Array.isArray(lexerDefinition)) { actualDefinition = { modes: { defaultMode: [...lexerDefinition] }, defaultMode: DEFAULT_MODE, }; } else { // no conversion needed, input should already be a IMultiModeLexerDefinition hasOnlySingleMode = false; actualDefinition = { ...(<IMultiModeLexerDefinition>lexerDefinition), }; } }); if (this.config.skipValidations === false) { this.TRACE_INIT("performRuntimeChecks", () => { this.lexerDefinitionErrors = this.lexerDefinitionErrors.concat( performRuntimeChecks( actualDefinition, this.trackStartLines, this.config.lineTerminatorCharacters, ), ); }); this.TRACE_INIT("performWarningRuntimeChecks", () => { this.lexerDefinitionWarning = this.lexerDefinitionWarning.concat( performWarningRuntimeChecks( actualDefinition, this.trackStartLines, this.config.lineTerminatorCharacters, ), ); }); } // for extra robustness to avoid throwing a none informative error message actualDefinition.modes = actualDefinition.modes ? actualDefinition.modes : {}; // an error of undefined TokenTypes will be detected in "performRuntimeChecks" above. // this transformation is to increase robustness in the case of partially invalid lexer definition. Object.entries(actualDefinition.modes).forEach( ([currModeName, currModeValue]) => { actualDefinition.modes[currModeName] = currModeValue.filter( (currTokType: TokenType) => currTokType !== undefined, ); }, ); const allModeNames = Object.keys(actualDefinition.modes); Object.entries(actualDefinition.modes).forEach( ([currModName, currModDef]: [string, TokenType[]]) => { this.TRACE_INIT(`Mode: <${currModName}> processing`, () => { this.modes.push(currModName); if (this.config.skipValidations === false) { this.TRACE_INIT(`validatePatterns`, () => { this.lexerDefinitionErrors = this.lexerDefinitionErrors.concat( validatePatterns(currModDef, allModeNames), ); }); } // If definition errors were encountered, the analysis phase may fail unexpectedly/ // Considering a lexer with definition errors may never be used, there is no point // to performing the analysis anyhow... if (this.lexerDefinitionErrors.length === 0) { augmentTokenTypes(currModDef); let currAnalyzeResult!: IAnalyzeResult; this.TRACE_INIT(`analyzeTokenTypes`, () => { currAnalyzeResult = analyzeTokenTypes(currModDef, { lineTerminatorCharacters: this.config.lineTerminatorCharacters, positionTracking: config.positionTracking, ensureOptimizations: config.ensureOptimizations, safeMode: config.safeMode, tracer: this.TRACE_INIT, }); }); this.patternIdxToConfig[currModName] = currAnalyzeResult.patternIdxToConfig; this.charCodeToPatternIdxToConfig[currModName] = currAnalyzeResult.charCodeToPatternIdxToConfig; this.emptyGroups = Object.assign( {}, this.emptyGroups, currAnalyzeResult.emptyGroups, ) as any; this.hasCustom = currAnalyzeResult.hasCustom || this.hasCustom; this.canModeBeOptimized[currModName] = currAnalyzeResult.canBeOptimized; } }); }, ); this.defaultMode = actualDefinition.defaultMode; if ( this.lexerDefinitionErrors.length > 0 && !this.config.deferDefinitionErrorsHandling ) { const allErrMessages = this.lexerDefinitionErrors.map((error) => { return error.message; }); const allErrMessagesString = allErrMessages.join( "-----------------------\n", ); throw new Error( "Errors detected in definition of Lexer:\n" + allErrMessagesString, ); } // Only print warning if there are no errors, This will avoid pl this.lexerDefinitionWarning.forEach((warningDescriptor) => { PRINT_WARNING(warningDescriptor.message); }); this.TRACE_INIT("Choosing sub-methods implementations", () => { // Choose the relevant internal implementations for this specific parser. // These implementations should be in-lined by the JavaScript engine // to provide optimal performance in each scenario. if (hasOnlySingleMode) { this.handleModes = () => {}; } if (this.trackStartLines === false) { this.computeNewColumn = (x: any) => x; } if (this.trackEndLines === false) { this.updateTokenEndLineColumnLocation = () => {}; } if (/full/i.test(this.config.positionTracking)) { this.createTokenInstance = this.createFullToken; } else if (/onlyStart/i.test(this.config.positionTracking)) { this.createTokenInstance = this.createStartOnlyToken; } else if (/onlyOffset/i.test(this.config.positionTracking)) { this.createTokenInstance = this.createOffsetOnlyToken; } else { throw Error( `Invalid <positionTracking> config option: "${this.config.positionTracking}"`, ); } if (this.hasCustom) { this.addToken = this.addTokenUsingPush; this.handlePayload = this.handlePayloadWithCustom; } else { this.addToken = this.addTokenUsingMemberAccess; this.handlePayload = this.handlePayloadNoCustom; } }); this.TRACE_INIT("Failed Optimization Warnings", () => { const unOptimizedModes = Object.entries(this.canModeBeOptimized).reduce( (cannotBeOptimized, [modeName, canBeOptimized]) => { if (canBeOptimized === false) { cannotBeOptimized.push(modeName); } return cannotBeOptimized; }, [] as string[], ); if (config.ensureOptimizations && unOptimizedModes.length > 0) { throw Error( `Lexer Modes: < ${unOptimizedModes.join( ", ", )} > cannot be optimized.\n` + '\t Disable the "ensureOptimizations" lexer config flag to silently ignore this and run the lexer in an un-optimized mode.\n' + "\t Or inspect the console log for details on how to resolve these issues.", ); } }); this.TRACE_INIT("clearRegExpParserCache", () => { clearRegExpParserCache(); }); this.TRACE_INIT("toFastProperties", () => { toFastProperties(this); }); }); } public tokenize( text: string, initialMode: string = this.defaultMode, ): ILexingResult { if (this.lexerDefinitionErrors.length > 0) { const allErrMessages = this.lexerDefinitionErrors.map((error) => { return error.message; }); const allErrMessagesString = allErrMessages.join( "-----------------------\n", ); throw new Error( "Unable to Tokenize because Errors detected in definition of Lexer:\n" + allErrMessagesString, ); } return this.tokenizeInternal(text, initialMode); } // There is quite a bit of duplication between this and "tokenizeInternalLazy" // This is intentional due to performance considerations. // this method also used quite a bit of `!` none null assertions because it is too optimized // for `tsc` to always understand it is "safe" private tokenizeInternal(text: string, initialMode: string): ILexingResult { let i, j, k, matchAltImage, longerAlt, matchedImage: string | null, payload, altPayload, imageLength, group, tokType, newToken: IToken, errLength, msg, match; const orgText = text; const orgLength = orgText.length; let offset = 0; let matchedTokensIndex = 0; // initializing the tokensArray to the "guessed" size. // guessing too little will still reduce the number of array re-sizes on pushes. // guessing too large (Tested by guessing x4 too large) may cost a bit more of memory // but would still have a faster runtime by avoiding (All but one) array resizing. const guessedNumberOfTokens = this.hasCustom ? 0 // will break custom token pattern APIs the matchedTokens array will contain undefined elements. : Math.floor(text.length / 10); const matchedTokens = new Array(guessedNumberOfTokens); const errors: ILexingError[] = []; let line = this.trackStartLines ? 1 : undefined; let column = this.trackStartLines ? 1 : undefined; const groups: any = cloneEmptyGroups(this.emptyGroups); const trackLines = this.trackStartLines; const lineTerminatorPattern = this.config.lineTerminatorsPattern; let currModePatternsLength = 0; let patternIdxToConfig: IPatternConfig[] = []; let currCharCodeToPatternIdxToConfig: { [charCode: number]: IPatternConfig[]; } = []; const modeStack: string[] = []; const emptyArray: IPatternConfig[] = []; Object.freeze(emptyArray); let isOptimizedMode = false; const pop_mode = (popToken: IToken) => { // TODO: perhaps avoid this error in the edge case there is no more input? if ( modeStack.length === 1 && // if we have both a POP_MODE and a PUSH_MODE this is in-fact a "transition" // So no error should occur. popToken.tokenType.PUSH_MODE === undefined ) { // if we try to pop the last mode there lexer will no longer have ANY mode. // thus the pop is ignored, an error will be created and the lexer will continue parsing in the previous mode. const msg = this.config.errorMessageProvider.buildUnableToPopLexerModeMessage( popToken, ); errors.push({ offset: popToken.startOffset, line: popToken.startLine, column: popToken.startColumn, length: popToken.image.length, message: msg, }); } else { modeStack.pop(); const newMode = modeStack.at(-1)!; patternIdxToConfig = this.patternIdxToConfig[newMode]; currCharCodeToPatternIdxToConfig = this.charCodeToPatternIdxToConfig[newMode]; currModePatternsLength = patternIdxToConfig.length; const modeCanBeOptimized = this.canModeBeOptimized[newMode] && this.config.safeMode === false; if (currCharCodeToPatternIdxToConfig && modeCanBeOptimized) { isOptimizedMode = true; } else { isOptimizedMode = false; } } }; function push_mode(this: Lexer, newMode: string) { modeStack.push(newMode); currCharCodeToPatternIdxToConfig = this.charCodeToPatternIdxToConfig[newMode]; patternIdxToConfig = this.patternIdxToConfig[newMode]; currModePatternsLength = patternIdxToConfig.length; currModePatternsLength = patternIdxToConfig.length; const modeCanBeOptimized = this.canModeBeOptimized[newMode] && this.config.safeMode === false; if (currCharCodeToPatternIdxToConfig && modeCanBeOptimized) { isOptimizedMode = true; } else { isOptimizedMode = false; } } // this pattern seems to avoid a V8 de-optimization, although that de-optimization does not // seem to matter performance wise. push_mode.call(this, initialMode); let currConfig!: IPatternConfig; const recoveryEnabled = this.config.recoveryEnabled; while (offset < orgLength) { matchedImage = null; imageLength = -1; const nextCharCode = orgText.charCodeAt(offset); let chosenPatternIdxToConfig: IPatternConfig[]; if (isOptimizedMode) { const optimizedCharIdx = charCodeToOptimizedIndex(nextCharCode); const possiblePatterns = currCharCodeToPatternIdxToConfig[optimizedCharIdx]; chosenPatternIdxToConfig = possiblePatterns !== undefined ? possiblePatterns : emptyArray; } else { chosenPatternIdxToConfig = patternIdxToConfig; } const chosenPatternsLength = chosenPatternIdxToConfig.length; for (i = 0; i < chosenPatternsLength; i++) { currConfig = chosenPatternIdxToConfig[i]; const currPattern = currConfig.pattern; payload = null; // manually in-lined because > 600 chars won't be in-lined in V8 const singleCharCode = currConfig.short; if (singleCharCode !== false) { if (nextCharCode === singleCharCode) { // single character string imageLength = 1; matchedImage = currPattern as string; } } else if (currConfig.isCustom === true) { match = (currPattern as IRegExpExec).exec( orgText, offset, matchedTokens, groups, ); if (match !== null) { matchedImage = match[0]; imageLength = matchedImage.length; if ((match as CustomPatternMatcherReturn).payload !== undefined) { payload = (match as CustomPatternMatcherReturn).payload; } } else { matchedImage = null; } } else { (currPattern as RegExp).lastIndex = offset; imageLength = this.matchLength(currPattern as RegExp, text, offset); } // longer alts handling if (imageLength !== -1) { // even though this pattern matched we must try a another longer alternative. // this can be used to prioritize keywords over identifiers longerAlt = currConfig.longerAlt; if (longerAlt !== undefined) { matchedImage = text.substring(offset, offset + imageLength); const longerAltLength = longerAlt.length; for (k = 0; k < longerAltLength; k++) { const longerAltConfig = patternIdxToConfig[longerAlt[k]]; const longerAltPattern = longerAltConfig.pattern; altPayload = null; // single Char can never be a longer alt so no need to test it. // manually in-lined because > 600 chars won't be in-lined in V8 if (longerAltConfig.isCustom === true) { match = (longerAltPattern as IRegExpExec).exec( orgText, offset, matchedTokens, groups, ); if (match !== null) { matchAltImage = match[0]; if ( (match as CustomPatternMatcherReturn).payload !== undefined ) { altPayload = (match as CustomPatternMatcherReturn).payload; } } else { matchAltImage = null; } } else { (longerAltPattern as RegExp).lastIndex = offset; matchAltImage = this.match( longerAltPattern as RegExp, text, offset, ); } if (matchAltImage && matchAltImage.length > matchedImage.length) { matchedImage = matchAltImage; imageLength = matchAltImage.length; payload = altPayload; currConfig = longerAltConfig; // Exit the loop early after matching one of the longer alternatives // The first matched alternative takes precedence break; } } } break; } } // successful match if (imageLength !== -1) { group = currConfig.group; if (group !== undefined) { matchedImage = matchedImage !== null ? matchedImage // for custom Tokens we will already have the `matchedImage` : text.substring(offset, offset + imageLength); tokType = currConfig.tokenTypeIdx; newToken = this.createTokenInstance( matchedImage, offset, tokType, currConfig.tokenType, line, column, imageLength, ); this.handlePayload(newToken, payload); if (group === false) { matchedTokensIndex = this.addToken( matchedTokens, matchedTokensIndex, newToken, ); } else { groups[group].push(newToken); } } // line terminator handling if (trackLines === true && currConfig.canLineTerminator === true) { let numOfLTsInMatch = 0; let foundTerminator; let lastLTEndOffset: number; lineTerminatorPattern.lastIndex = 0; do { // only for skipped tokens the matchedImage may be null at this point matchedImage = matchedImage !== null ? matchedImage : text.substring(offset, offset + imageLength); foundTerminator = lineTerminatorPattern.test(matchedImage); if (foundTerminator === true) { lastLTEndOffset = lineTerminatorPattern.lastIndex - 1; numOfLTsInMatch++; } } while (foundTerminator === true); if (numOfLTsInMatch !== 0) { line = line! + numOfLTsInMatch; column = imageLength - lastLTEndOffset!; this.updateTokenEndLineColumnLocation( newToken!, group!, lastLTEndOffset!, numOfLTsInMatch, line, column, imageLength, ); } else { column = this.computeNewColumn(column!, imageLength); } } else { column = this.computeNewColumn(column!, imageLength); } offset = offset + imageLength; // will be NOOP if no modes present this.handleModes(currConfig, pop_mode, push_mode, newToken!); } else { // error recovery, drop characters until we identify a valid token's start point const errorStartOffset = offset; const errorLine = line; const errorColumn = column; let foundResyncPoint = recoveryEnabled === false; while (foundResyncPoint === false && offset < orgLength) { offset++; for (j = 0; j < currModePatternsLength; j++) { const currConfig = patternIdxToConfig[j]; const currPattern = currConfig.pattern; // manually in-lined because > 600 chars won't be in-lined in V8 const singleCharCode = currConfig.short; if (singleCharCode !== false) { if (orgText.charCodeAt(offset) === singleCharCode) { // single character string foundResyncPoint = true; } } else if (currConfig.isCustom === true) { foundResyncPoint = (currPattern as IRegExpExec).exec( orgText, offset, matchedTokens, groups, ) !== null; } else { (currPattern as RegExp).lastIndex = offset; foundResyncPoint = (currPattern as RegExp).exec(text) !== null; } if (foundResyncPoint === true) { break; } } } errLength = offset - errorStartOffset; column = this.computeNewColumn(column!, errLength); // at this point we either re-synced or reached the end of the input text msg = this.config.errorMessageProvider.buildUnexpectedCharactersMessage( orgText, errorStartOffset, errLength, errorLine, errorColumn, modeStack.at(-1), ); errors.push({ offset: errorStartOffset, line: errorLine, column: errorColumn, length: errLength, message: msg, }); if (recoveryEnabled === false) { break; } } } // if we do have custom patterns which push directly into the // TODO: custom tokens should not push directly?? if (!this.hasCustom) { // if we guessed a too large size for the tokens array this will shrink it to the right size. matchedTokens.length = matchedTokensIndex; } return { tokens: matchedTokens, groups: groups, errors: errors, }; } private handleModes( config: IPatternConfig, pop_mode: (tok: IToken) => void, push_mode: (this: Lexer, pushMode: string) => void, newToken: IToken, ) { if (config.pop === true) { // need to save the PUSH_MODE property as if the mode is popped // patternIdxToPopMode is updated to reflect the new mode after popping the stack const pushMode = config.push; pop_mode(newToken); if (pushMode !== undefined) { push_mode.call(this, pushMode); } } else if (config.push !== undefined) { push_mode.call(this, config.push); } } // TODO: decrease this under 600 characters? inspect stripping comments option in TSC compiler private updateTokenEndLineColumnLocation( newToken: IToken, group: string | false, lastLTIdx: number, numOfLTsInMatch: number, line: number, column: number, imageLength: number, ): void { let lastCharIsLT, fixForEndingInLT; if (group !== undefined) { // a none skipped multi line Token, need to update endLine/endColumn lastCharIsLT = lastLTIdx === imageLength - 1; fixForEndingInLT = lastCharIsLT ? -1 : 0; if (!(numOfLTsInMatch === 1 && lastCharIsLT === true)) { // if a token ends in a LT that last LT only affects the line numbering of following Tokens newToken.endLine = line + fixForEndingInLT; // the last LT in a token does not affect the endColumn either as the [columnStart ... columnEnd) // inclusive to exclusive range. newToken.endColumn = column - 1 + -fixForEndingInLT; } // else single LT in the last character of a token, no need to modify the endLine/EndColumn } } private computeNewColumn(oldColumn: number, imageLength: number) { return oldColumn + imageLength; } // Place holder, will be replaced by the correct variant according to the locationTracking option at runtime. /* istanbul ignore next - place holder */ private createTokenInstance!: (...args: any[]) => IToken; private createOffsetOnlyToken( image: string, startOffset: number, tokenTypeIdx: number, tokenType: TokenType, ) { return { image, startOffset, tokenTypeIdx, tokenType, }; } private createStartOnlyToken( image: string, startOffset: number, tokenTypeIdx: number, tokenType: TokenType, startLine: number, startColumn: number, ) { return { image, startOffset, startLine, startColumn, tokenTypeIdx, tokenType, }; } private createFullToken( image: string, startOffset: number, tokenTypeIdx: number, tokenType: TokenType, startLine: number, startColumn: number, imageLength: number, ): IToken { return { image, startOffset, endOffset: startOffset + imageLength - 1, startLine, endLine: startLine, startColumn, endColumn: startColumn + imageLength - 1, tokenTypeIdx, tokenType, }; } // Place holder, will be replaced by the correct variant according to the locationTracking option at runtime. /* istanbul ignore next - place holder */ private addToken!: ( tokenVector: IToken[], index: number, tokenToAdd: IToken, ) => number; private addTokenUsingPush( tokenVector: IToken[], index: number, tokenToAdd: IToken, ): number { tokenVector.push(tokenToAdd); return index; } private addTokenUsingMemberAccess( tokenVector: IToken[], index: number, tokenToAdd: IToken, ): number { tokenVector[index] = tokenToAdd; index++; return index; } // Place holder, will be replaced by the correct variant according to the hasCustom flag option at runtime. private handlePayload: (token: IToken, payload: any) => void; private handlePayloadNoCustom(token: IToken, payload: any): void {} private handlePayloadWithCustom(token: IToken, payload: any): void { if (payload !== null) { token.payload = payload; } } private match(pattern: RegExp, text: string, offset: number): string | null { const found = pattern.test(text); if (found === true) { return text.substring(offset, pattern.lastIndex); } return null; } private matchLength( pattern: RegExp, text: string, offset: number, ): number | -1 { const found = pattern.test(text); if (found === true) { return pattern.lastIndex - offset; } return -1; } // Duplicated from the parser's perf trace trait to allow future extraction // of the lexer to a separate package. TRACE_INIT = <T>(phaseDesc: string, phaseImpl: () => T): T => { // No need to optimize this using NOOP pattern because // It is not called in a hot spot... if (this.traceInitPerf === true) { this.traceInitIndent++; const indent = new Array(this.traceInitIndent + 1).join("\t"); if (this.traceInitIndent < this.traceInitMaxIdent) { console.log(`${indent}--> <${phaseDesc}>`); } const { time, value } = timer(phaseImpl); /* istanbul ignore next - Difficult to reproduce specific performance behavior (>10ms) in tests */ const traceMethod = time > 10 ? console.warn : console.log; if (this.traceInitIndent < this.traceInitMaxIdent) { traceMethod(`${indent}<-- <${phaseDesc}> time: ${time}ms`); } this.traceInitIndent--; return value; } else { return phaseImpl(); } }; }