UNPKG

antlr-ng

Version:

Next generation ANTLR Tool

765 lines (764 loc) 25.2 kB
var __defProp = Object.defineProperty; var __name = (target, value) => __defProp(target, "name", { value, configurable: true }); import { ActionTransition, AtomTransition, BasicState, CodePointTransitions, CommonToken, HashMap, IntervalSet, IntStream, Lexer, LexerChannelAction, LexerCustomAction, LexerModeAction, LexerMoreAction, LexerPopModeAction, LexerPushModeAction, LexerSkipAction, LexerTypeAction, NotSetTransition, SetTransition, Token, TokensStartState } from "antlr4ng"; import { Constants } from "../Constants.js"; import { CodeGenerator } from "../codegen/CodeGenerator.js"; import { ANTLRv4Parser } from "../generated/ANTLRv4Parser.js"; import { CharSupport } from "../misc/CharSupport.js"; import { EscapeSequenceParsing, ResultType } from "../misc/EscapeSequenceParsing.js"; import { Character } from "../support/Character.js"; import { IssueCode } from "../tool/Issues.js"; import { ActionAST } from "../tool/ast/ActionAST.js"; import { RangeAST } from "../tool/ast/RangeAST.js"; import { ATNOptimizer } from "./ATNOptimizer.js"; import { ICharSetParseState, Mode } from "./ICharsetParserState.js"; import { ParserATNFactory } from "./ParserATNFactory.js"; import { RangeBorderCharactersData } from "./RangeBorderCharactersData.js"; class LexerATNFactory extends ParserATNFactory { static { __name(this, "LexerATNFactory"); } codegenTemplates; /** Maps from an action index to a {@link LexerAction} object. */ indexToActionMap = /* @__PURE__ */ new Map(); /** Maps from a {@link LexerAction} object to the action index. */ actionToIndexMap = new HashMap(); ruleCommands = new Array(); constructor(g, codeGenerator) { super(g); codeGenerator ??= new CodeGenerator(g); this.codegenTemplates = codeGenerator.templates; } createATN() { for (const [modeName] of this.g.modes) { const startState = this.newState(TokensStartState); this.atn.modeNameToStartState.set(modeName, startState); this.atn.modeToStartState.push(startState); this.atn.defineDecisionState(startState); } this.atn.ruleToTokenType = new Array(this.g.rules.size); for (const r of this.g.rules.values()) { this.atn.ruleToTokenType[r.index] = this.g.getTokenType(r.name); } this.doCreateATN(Array.from(this.g.rules.values())); this.atn.lexerActions = new Array(this.indexToActionMap.size); for (const [index, value] of this.indexToActionMap.entries()) { this.atn.lexerActions[index] = value; } for (const [modeName] of this.g.modes) { const rules = this.g.modes.get(modeName); const startState = this.atn.modeNameToStartState.get(modeName) ?? null; for (const r of rules) { if (!r.isFragment()) { const s = this.atn.ruleToStartState[r.index]; this.epsilon(startState, s); } } } ATNOptimizer.optimize(this.g, this.atn); this.checkEpsilonClosure(); return this.atn; } rule(ruleAST, name, blk) { this.ruleCommands.splice(0, this.ruleCommands.length); return super.rule(ruleAST, name, blk); } action(...args) { let node; let lexerAction; if (args.length === 1) { if (typeof args[0] === "string") { const [action] = args; if (action.trim().length === 0) { const left2 = this.newState(BasicState); const right2 = this.newState(BasicState); this.epsilon(left2, right2); return { left: left2, right: right2 }; } node = new ActionAST(CommonToken.fromType(ANTLRv4Parser.ACTION, action)); this.currentRule.defineActionInAlt(this.currentOuterAlt, node); } else { [node] = args; } const ruleIndex = this.currentRule.index; const actionIndex = this.g.lexerActions.get(node); lexerAction = new LexerCustomAction(ruleIndex, actionIndex); } else { [node, lexerAction] = args; } const left = this.newState(BasicState); const right = this.newState(BasicState); const isCtxDependent = false; const lexerActionIndex = this.getLexerActionIndex(lexerAction); const a = new ActionTransition(right, this.currentRule.index, lexerActionIndex, isCtxDependent); left.addTransition(a); node.atnState = left; return { left, right }; } lexerAltCommands(alt, commands) { this.epsilon(alt.right, commands.left); return { left: alt.left, right: commands.right }; } lexerCallCommand(id, arg) { return this.lexerCallCommandOrCommand(id, arg); } lexerCommand(id) { return this.lexerCallCommandOrCommand(id); } range(a, b) { const left = this.newState(BasicState); const right = this.newState(BasicState); const t1 = CharSupport.getCharValueFromGrammarCharLiteral(a.getText()); const t2 = CharSupport.getCharValueFromGrammarCharLiteral(b.getText()); if (this.checkRange(a, b, t1, t2)) { left.addTransition(this.createTransition(right, t1, t2, a)); } a.atnState = left; b.atnState = left; return { left, right }; } set(associatedAST, alts, invert) { const left = this.newState(BasicState); const right = this.newState(BasicState); const set = new IntervalSet(); for (const t of alts) { if (t.getType() === ANTLRv4Parser.RANGE) { const a = CharSupport.getCharValueFromGrammarCharLiteral(t.children[0].getText()); const b = CharSupport.getCharValueFromGrammarCharLiteral(t.children[1].getText()); if (this.checkRange(t.children[0], t.children[1], a, b)) { this.checkRangeAndAddToSet(associatedAST, t, set, a, b, this.currentRule.caseInsensitive, null); } } else if (t.getType() === ANTLRv4Parser.LEXER_CHAR_SET) { set.addSet(this.getSetFromCharSetLiteral(t)); } else if (t.getType() === ANTLRv4Parser.STRING_LITERAL) { const c = CharSupport.getCharValueFromGrammarCharLiteral(t.getText()); if (c !== -1) { this.checkCharAndAddToSet(associatedAST, set, c); } else { this.g.tool.errorManager.grammarError( IssueCode.InvalidLiteralInLexerSet, this.g.fileName, t.token, t.getText() ); } } else if (t.getType() === ANTLRv4Parser.TOKEN_REF) { this.g.tool.errorManager.grammarError( IssueCode.UnsupportedReferenceInLexerSet, this.g.fileName, t.token, t.getText() ); } } if (invert) { left.addTransition(new NotSetTransition(right, set)); } else { let transition; const intervals = Array.from(set); if (intervals.length === 1) { const interval = intervals[0]; transition = CodePointTransitions.createWithCodePointRange(right, interval.start, interval.stop); } else { transition = new SetTransition(right, set); } left.addTransition(transition); } associatedAST.atnState = left; return { left, right }; } /** * For a lexer, a string is a sequence of char to match. That is, "fog" is treated as 'f' 'o' 'g' not as a * single transition in the DFA. Machine== o-'f'->o-'o'->o-'g'->o and has n+1 states for n characters. * If "caseInsensitive" option is enabled, "fog" will be treated as o-('f'|'F') -> o-('o'|'O') -> o-('g'|'G'). */ stringLiteral(stringLiteralAST) { const chars = stringLiteralAST.getText(); const left = this.newState(BasicState); let right; const s = CharSupport.getStringFromGrammarStringLiteral(chars, this.g, stringLiteralAST.token); if (s === null) { return { left, right: left }; } let prev = left; right = null; for (const char of s) { right = this.newState(BasicState); const codePoint = char.codePointAt(0); prev.addTransition(this.createTransition(right, codePoint, codePoint, stringLiteralAST)); prev = right; } stringLiteralAST.atnState = left; return { left, right }; } /** `[Aa\t \u1234a-z\]\p{Letter}\-]` char sets */ charSetLiteral(charSetAST) { const left = this.newState(BasicState); const right = this.newState(BasicState); const set = this.getSetFromCharSetLiteral(charSetAST); left.addTransition(new SetTransition(right, set)); charSetAST.atnState = left; return { left, right }; } tokenRef(node) { if (node.getText() === "EOF") { const left = this.newState(BasicState); const right = this.newState(BasicState); left.addTransition(new AtomTransition(right, IntStream.EOF)); return { left, right }; } return this._ruleRef(node); } getSetFromCharSetLiteral(charSetAST) { let text = charSetAST.getText(); text = text.substring(1, text.length - 1); const set = new IntervalSet(); let state = ICharSetParseState.none; for (let i = 0; i < text.length; ) { if (state.mode === Mode.Error) { return new IntervalSet(); } const c = text.codePointAt(i); let offset = Character.charCount(c); if (c === 92) { const escapeParseResult = EscapeSequenceParsing.parseEscape(text, i); switch (escapeParseResult.type) { case ResultType.Invalid: { const invalid = text.substring( escapeParseResult.startOffset, escapeParseResult.startOffset + escapeParseResult.parseLength ); this.g.tool.errorManager.grammarError( IssueCode.InvalidEscapeSequence, this.g.fileName, charSetAST.token, invalid ); state = ICharSetParseState.error; break; } case ResultType.CodePoint: { state = this.applyPrevStateAndMoveToCodePoint( charSetAST, set, state, escapeParseResult.codePoint ); break; } case ResultType.Property: { state = this.applyPrevStateAndMoveToProperty( charSetAST, set, state, escapeParseResult.propertyIntervalSet ); break; } default: } offset = escapeParseResult.parseLength; } else { if (c === 45 && !state.inRange && i !== 0 && i !== text.length - 1 && state.mode !== Mode.None) { if (state.mode === Mode.PrevProperty) { this.g.tool.errorManager.grammarError( IssueCode.UnicodePropertyNotAllowedInRange, this.g.fileName, charSetAST.token, charSetAST.getText() ); state = ICharSetParseState.error; } else { state = { mode: state.mode, inRange: true, prevCodePoint: state.prevCodePoint, prevProperty: state.prevProperty }; } } else { state = this.applyPrevStateAndMoveToCodePoint(charSetAST, set, state, c); } } i += offset; } if (state.mode === Mode.Error) { return new IntervalSet(); } this.applyPrevState(charSetAST, set, state); if (set.length === 0) { this.g.tool.errorManager.grammarError( IssueCode.EmptyStringAndSetsNotAllowed, this.g.fileName, charSetAST.token, "[]" ); } return set; } getLexerActionIndex(lexerAction) { let lexerActionIndex = this.actionToIndexMap.get(lexerAction); if (lexerActionIndex === void 0) { lexerActionIndex = this.actionToIndexMap.size; this.actionToIndexMap.set(lexerAction, lexerActionIndex); this.indexToActionMap.set(lexerActionIndex, lexerAction); } return lexerActionIndex; } checkRange(leftNode, rightNode, leftValue, rightValue) { let result = true; if (leftValue === -1) { result = false; this.g.tool.errorManager.grammarError( IssueCode.InvalidLiteralInLexerSet, this.g.fileName, leftNode.token, leftNode.getText() ); } if (rightValue === -1) { result = false; this.g.tool.errorManager.grammarError( IssueCode.InvalidLiteralInLexerSet, this.g.fileName, rightNode.token, rightNode.getText() ); } if (!result) { return false; } if (rightValue < leftValue) { this.g.tool.errorManager.grammarError( IssueCode.EmptyStringAndSetsNotAllowed, this.g.fileName, leftNode.parent.token, leftNode.getText() + ".." + rightNode.getText() ); return false; } return true; } lexerCallCommandOrCommand(id, arg) { const lexerAction = this.createLexerAction(id, arg); if (lexerAction) { return this.action(id, lexerAction); } const cmdST = this.codegenTemplates.getInstanceOf("Lexer" + CharSupport.capitalize(id.getText()) + "Command"); if (cmdST === null) { this.g.tool.errorManager.grammarError( IssueCode.InvalidLexerCommand, this.g.fileName, id.token, id.getText() ); return this.epsilon(id); } const callCommand = arg !== void 0; const containsArg = cmdST.impl?.formalArguments?.has("arg") ?? false; if (callCommand !== containsArg) { const errorType = callCommand ? IssueCode.UnwantedLexerCommandArgument : IssueCode.MisingLexerCommandArgument; this.g.tool.errorManager.grammarError(errorType, this.g.fileName, id.token, id.getText()); return this.epsilon(id); } if (callCommand) { cmdST.add("arg", arg.getText()); cmdST.add("grammar", arg.g); } return this.action(cmdST.render()); } applyPrevStateAndMoveToCodePoint(charSetAST, set, state, codePoint) { if (state.inRange) { if (state.prevCodePoint > codePoint) { this.g.tool.errorManager.grammarError( IssueCode.EmptyStringAndSetsNotAllowed, this.g.fileName, charSetAST.token, CharSupport.getRangeEscapedString(state.prevCodePoint, codePoint) ); } this.checkRangeAndAddToSet(charSetAST, set, state.prevCodePoint, codePoint); state = ICharSetParseState.none; } else { this.applyPrevState(charSetAST, set, state); state = { mode: Mode.PrevCodePoint, inRange: false, prevCodePoint: codePoint, prevProperty: new IntervalSet() }; } return state; } applyPrevStateAndMoveToProperty(charSetAST, set, state, property) { if (state.inRange) { this.g.tool.errorManager.grammarError( IssueCode.UnicodePropertyNotAllowedInRange, this.g.fileName, charSetAST.token, charSetAST.getText() ); return ICharSetParseState.error; } else { this.applyPrevState(charSetAST, set, state); state = { mode: Mode.PrevProperty, inRange: false, prevCodePoint: -1, prevProperty: property }; } return state; } applyPrevState(charSetAST, set, state) { switch (state.mode) { case Mode.None: case Mode.Error: { break; } case Mode.PrevCodePoint: { this.checkCharAndAddToSet(charSetAST, set, state.prevCodePoint); break; } case Mode.PrevProperty: { set.addSet(state.prevProperty); break; } default: } } checkCharAndAddToSet(ast, set, c) { this.checkRangeAndAddToSet(ast, ast, set, c, c, this.currentRule.caseInsensitive, null); } checkRangeAndAddToSet(...args) { switch (args.length) { case 4: { const [mainAst, set, a, b] = args; this.checkRangeAndAddToSet(mainAst, mainAst, set, a, b, this.currentRule.caseInsensitive, null); break; } case 7: { const [rootAst, ast, set, a, b, caseInsensitive, previousStatus] = args; let status; const charactersData = RangeBorderCharactersData.getAndCheckCharactersData( a, b, this.g, ast, !(previousStatus?.notImpliedCharacters ?? false) ); if (caseInsensitive) { status = { collision: false, notImpliedCharacters: charactersData.mixOfLowerAndUpperCharCase }; if (charactersData.isSingleRange()) { status = this.checkRangeAndAddToSet(rootAst, ast, set, a, b, false, status); } else { status = this.checkRangeAndAddToSet( rootAst, ast, set, charactersData.lowerFrom, charactersData.lowerTo, false, status ); status = this.checkRangeAndAddToSet( rootAst, ast, set, charactersData.upperFrom, charactersData.upperTo, false, status ); } } else { let charactersCollision = previousStatus?.collision ?? false; if (!charactersCollision) { for (let i = a; i <= b; i++) { if (set.contains(i)) { let setText; if (rootAst.children.length === 0) { setText = rootAst.getText(); } else { setText = ""; for (const child of rootAst.children) { if (child instanceof RangeAST) { setText += child.children[0].getText() + ".."; setText += child.children[1].getText(); } else { setText += child.getText(); } setText += " | "; } setText = setText.substring(0, setText.length - 3); } const charsString = a === b ? String.fromCodePoint(a) : String.fromCodePoint(a) + "-" + String.fromCodePoint(b); this.g.tool.errorManager.grammarError( IssueCode.CharactersCollisionInSet, this.g.fileName, ast.token, charsString, setText ); charactersCollision = true; break; } } } status = { collision: charactersCollision, notImpliedCharacters: charactersData.mixOfLowerAndUpperCharCase }; set.addRange(a, b); } return status; } default: { throw new Error("Invalid number of arguments"); } } } createTransition(target, from, to, tree) { const charactersData = RangeBorderCharactersData.getAndCheckCharactersData(from, to, this.g, tree, true); if (this.currentRule.caseInsensitive) { if (charactersData.isSingleRange()) { return CodePointTransitions.createWithCodePointRange(target, from, to); } else { const intervalSet = new IntervalSet(); intervalSet.addRange(charactersData.lowerFrom, charactersData.lowerTo); intervalSet.addRange(charactersData.upperFrom, charactersData.upperTo); return new SetTransition(target, intervalSet); } } else { return CodePointTransitions.createWithCodePointRange(target, from, to); } } createLexerAction(id, arg) { const command = id.getText(); this.checkCommands(command, id.token); switch (command) { case "skip": { if (!arg) { return LexerSkipAction.instance; } break; } case "more": { if (!arg) { return LexerMoreAction.instance; } break; } case "popMode": { if (!arg) { return LexerPopModeAction.instance; } break; } default: { if (arg) { const name = arg.getText(); switch (command) { case "mode": { const mode = this.getModeConstantValue(name, arg.token); if (mode === void 0) { return void 0; } return new LexerModeAction(mode); } case "pushMode": { const mode = this.getModeConstantValue(name, arg.token); if (mode === void 0) { return void 0; } return new LexerPushModeAction(mode); } case "type": { const type = this.getTokenConstantValue(name, arg.token); if (type === void 0) { return void 0; } return new LexerTypeAction(type); } case "channel": { const channel = this.getChannelConstantValue(name, arg.token); if (channel === void 0) { return void 0; } return new LexerChannelAction(channel); } default: } } break; } } return void 0; } checkCommands(command, commandToken) { if (command !== "pushMode" && command !== "popMode") { if (this.ruleCommands.includes(command)) { this.g.tool.errorManager.grammarError( IssueCode.DuplicatedCommand, this.g.fileName, commandToken, command ); } let firstCommand; if (command === "skip") { if (this.ruleCommands.includes("more")) { firstCommand = "more"; } else if (this.ruleCommands.includes("type")) { firstCommand = "type"; } else if (this.ruleCommands.includes("channel")) { firstCommand = "channel"; } } else if (command === "more") { if (this.ruleCommands.includes("skip")) { firstCommand = "skip"; } else if (this.ruleCommands.includes("type")) { firstCommand = "type"; } else if (this.ruleCommands.includes("channel")) { firstCommand = "channel"; } } else if (command === "type" || command === "channel") { if (this.ruleCommands.includes("more")) { firstCommand = "more"; } else if (this.ruleCommands.includes("skip")) { firstCommand = "skip"; } } if (firstCommand) { this.g.tool.errorManager.grammarError( IssueCode.IncompatibleCommands, this.g.fileName, commandToken, firstCommand, command ); } } this.ruleCommands.push(command); } getModeConstantValue(modeName, token) { if (!modeName || !token) { return void 0; } if (modeName === "DEFAULT_MODE") { return Lexer.DEFAULT_MODE; } if (Constants.COMMON_CONSTANTS.has(modeName)) { this.g.tool.errorManager.grammarError( IssueCode.ModeConflictsWithCommonConstants, this.g.fileName, token, token.text ); return void 0; } const modeNames = [...this.g.modes.keys()]; const mode = modeNames.indexOf(modeName); if (mode >= 0) { return mode; } const result = Number.parseInt(modeName); if (isNaN(result)) { this.g.tool.errorManager.grammarError( IssueCode.ConstantValueIsNotARecognizedModeName, this.g.fileName, token, token.text ); return void 0; } return result; } getTokenConstantValue(tokenName, token) { if (tokenName === void 0 || token === void 0) { return void 0; } if (tokenName === "EOF") { return Lexer.EOF; } if (Constants.COMMON_CONSTANTS.has(tokenName)) { this.g.tool.errorManager.grammarError( IssueCode.TokenConflictsWithCommonConstants, this.g.fileName, token, token.text ); return void 0; } const tokenType = this.g.getTokenType(tokenName); if (tokenType !== Token.INVALID_TYPE) { return tokenType; } const result = Number.parseInt(tokenName); if (isNaN(result)) { this.g.tool.errorManager.grammarError( IssueCode.ConstantValueIsNotARecognizedTokenName, this.g.fileName, token, token.text ); return void 0; } return result; } getChannelConstantValue(channelName, token) { if (channelName === void 0 || token === void 0) { return void 0; } if (channelName === "HIDDEN") { return Lexer.HIDDEN; } if (channelName === "DEFAULT_TOKEN_CHANNEL") { return Lexer.DEFAULT_TOKEN_CHANNEL; } if (Constants.COMMON_CONSTANTS.has(channelName)) { this.g.tool.errorManager.grammarError( IssueCode.ChannelConflictsWithCommonConstants, this.g.fileName, token, token.text ); return void 0; } const channelValue = this.g.getChannelValue(channelName); if (channelValue >= Token.MIN_USER_CHANNEL_VALUE) { return channelValue; } const result = Number.parseInt(channelName); if (isNaN(result)) { this.g.tool.errorManager.grammarError( IssueCode.ConstantValueIsNotARecognizedChannelName, this.g.fileName, token, token.text ); return void 0; } return result; } } export { LexerATNFactory };