antlr-ng
Version:
Next generation ANTLR Tool
765 lines (764 loc) • 25.2 kB
JavaScript
var __defProp = Object.defineProperty;
var __name = (target, value) => __defProp(target, "name", { value, configurable: true });
import {
ActionTransition,
AtomTransition,
BasicState,
CodePointTransitions,
CommonToken,
HashMap,
IntervalSet,
IntStream,
Lexer,
LexerChannelAction,
LexerCustomAction,
LexerModeAction,
LexerMoreAction,
LexerPopModeAction,
LexerPushModeAction,
LexerSkipAction,
LexerTypeAction,
NotSetTransition,
SetTransition,
Token,
TokensStartState
} from "antlr4ng";
import { Constants } from "../Constants.js";
import { CodeGenerator } from "../codegen/CodeGenerator.js";
import { ANTLRv4Parser } from "../generated/ANTLRv4Parser.js";
import { CharSupport } from "../misc/CharSupport.js";
import { EscapeSequenceParsing, ResultType } from "../misc/EscapeSequenceParsing.js";
import { Character } from "../support/Character.js";
import { IssueCode } from "../tool/Issues.js";
import { ActionAST } from "../tool/ast/ActionAST.js";
import { RangeAST } from "../tool/ast/RangeAST.js";
import { ATNOptimizer } from "./ATNOptimizer.js";
import { ICharSetParseState, Mode } from "./ICharsetParserState.js";
import { ParserATNFactory } from "./ParserATNFactory.js";
import { RangeBorderCharactersData } from "./RangeBorderCharactersData.js";
class LexerATNFactory extends ParserATNFactory {
static {
__name(this, "LexerATNFactory");
}
codegenTemplates;
/** Maps from an action index to a {@link LexerAction} object. */
indexToActionMap = /* @__PURE__ */ new Map();
/** Maps from a {@link LexerAction} object to the action index. */
actionToIndexMap = new HashMap();
ruleCommands = new Array();
constructor(g, codeGenerator) {
super(g);
codeGenerator ??= new CodeGenerator(g);
this.codegenTemplates = codeGenerator.templates;
}
createATN() {
for (const [modeName] of this.g.modes) {
const startState = this.newState(TokensStartState);
this.atn.modeNameToStartState.set(modeName, startState);
this.atn.modeToStartState.push(startState);
this.atn.defineDecisionState(startState);
}
this.atn.ruleToTokenType = new Array(this.g.rules.size);
for (const r of this.g.rules.values()) {
this.atn.ruleToTokenType[r.index] = this.g.getTokenType(r.name);
}
this.doCreateATN(Array.from(this.g.rules.values()));
this.atn.lexerActions = new Array(this.indexToActionMap.size);
for (const [index, value] of this.indexToActionMap.entries()) {
this.atn.lexerActions[index] = value;
}
for (const [modeName] of this.g.modes) {
const rules = this.g.modes.get(modeName);
const startState = this.atn.modeNameToStartState.get(modeName) ?? null;
for (const r of rules) {
if (!r.isFragment()) {
const s = this.atn.ruleToStartState[r.index];
this.epsilon(startState, s);
}
}
}
ATNOptimizer.optimize(this.g, this.atn);
this.checkEpsilonClosure();
return this.atn;
}
rule(ruleAST, name, blk) {
this.ruleCommands.splice(0, this.ruleCommands.length);
return super.rule(ruleAST, name, blk);
}
action(...args) {
let node;
let lexerAction;
if (args.length === 1) {
if (typeof args[0] === "string") {
const [action] = args;
if (action.trim().length === 0) {
const left2 = this.newState(BasicState);
const right2 = this.newState(BasicState);
this.epsilon(left2, right2);
return { left: left2, right: right2 };
}
node = new ActionAST(CommonToken.fromType(ANTLRv4Parser.ACTION, action));
this.currentRule.defineActionInAlt(this.currentOuterAlt, node);
} else {
[node] = args;
}
const ruleIndex = this.currentRule.index;
const actionIndex = this.g.lexerActions.get(node);
lexerAction = new LexerCustomAction(ruleIndex, actionIndex);
} else {
[node, lexerAction] = args;
}
const left = this.newState(BasicState);
const right = this.newState(BasicState);
const isCtxDependent = false;
const lexerActionIndex = this.getLexerActionIndex(lexerAction);
const a = new ActionTransition(right, this.currentRule.index, lexerActionIndex, isCtxDependent);
left.addTransition(a);
node.atnState = left;
return { left, right };
}
lexerAltCommands(alt, commands) {
this.epsilon(alt.right, commands.left);
return { left: alt.left, right: commands.right };
}
lexerCallCommand(id, arg) {
return this.lexerCallCommandOrCommand(id, arg);
}
lexerCommand(id) {
return this.lexerCallCommandOrCommand(id);
}
range(a, b) {
const left = this.newState(BasicState);
const right = this.newState(BasicState);
const t1 = CharSupport.getCharValueFromGrammarCharLiteral(a.getText());
const t2 = CharSupport.getCharValueFromGrammarCharLiteral(b.getText());
if (this.checkRange(a, b, t1, t2)) {
left.addTransition(this.createTransition(right, t1, t2, a));
}
a.atnState = left;
b.atnState = left;
return { left, right };
}
set(associatedAST, alts, invert) {
const left = this.newState(BasicState);
const right = this.newState(BasicState);
const set = new IntervalSet();
for (const t of alts) {
if (t.getType() === ANTLRv4Parser.RANGE) {
const a = CharSupport.getCharValueFromGrammarCharLiteral(t.children[0].getText());
const b = CharSupport.getCharValueFromGrammarCharLiteral(t.children[1].getText());
if (this.checkRange(t.children[0], t.children[1], a, b)) {
this.checkRangeAndAddToSet(associatedAST, t, set, a, b, this.currentRule.caseInsensitive, null);
}
} else if (t.getType() === ANTLRv4Parser.LEXER_CHAR_SET) {
set.addSet(this.getSetFromCharSetLiteral(t));
} else if (t.getType() === ANTLRv4Parser.STRING_LITERAL) {
const c = CharSupport.getCharValueFromGrammarCharLiteral(t.getText());
if (c !== -1) {
this.checkCharAndAddToSet(associatedAST, set, c);
} else {
this.g.tool.errorManager.grammarError(
IssueCode.InvalidLiteralInLexerSet,
this.g.fileName,
t.token,
t.getText()
);
}
} else if (t.getType() === ANTLRv4Parser.TOKEN_REF) {
this.g.tool.errorManager.grammarError(
IssueCode.UnsupportedReferenceInLexerSet,
this.g.fileName,
t.token,
t.getText()
);
}
}
if (invert) {
left.addTransition(new NotSetTransition(right, set));
} else {
let transition;
const intervals = Array.from(set);
if (intervals.length === 1) {
const interval = intervals[0];
transition = CodePointTransitions.createWithCodePointRange(right, interval.start, interval.stop);
} else {
transition = new SetTransition(right, set);
}
left.addTransition(transition);
}
associatedAST.atnState = left;
return { left, right };
}
/**
* For a lexer, a string is a sequence of char to match. That is, "fog" is treated as 'f' 'o' 'g' not as a
* single transition in the DFA. Machine== o-'f'->o-'o'->o-'g'->o and has n+1 states for n characters.
* If "caseInsensitive" option is enabled, "fog" will be treated as o-('f'|'F') -> o-('o'|'O') -> o-('g'|'G').
*/
stringLiteral(stringLiteralAST) {
const chars = stringLiteralAST.getText();
const left = this.newState(BasicState);
let right;
const s = CharSupport.getStringFromGrammarStringLiteral(chars, this.g, stringLiteralAST.token);
if (s === null) {
return { left, right: left };
}
let prev = left;
right = null;
for (const char of s) {
right = this.newState(BasicState);
const codePoint = char.codePointAt(0);
prev.addTransition(this.createTransition(right, codePoint, codePoint, stringLiteralAST));
prev = right;
}
stringLiteralAST.atnState = left;
return { left, right };
}
/** `[Aa\t \u1234a-z\]\p{Letter}\-]` char sets */
charSetLiteral(charSetAST) {
const left = this.newState(BasicState);
const right = this.newState(BasicState);
const set = this.getSetFromCharSetLiteral(charSetAST);
left.addTransition(new SetTransition(right, set));
charSetAST.atnState = left;
return { left, right };
}
tokenRef(node) {
if (node.getText() === "EOF") {
const left = this.newState(BasicState);
const right = this.newState(BasicState);
left.addTransition(new AtomTransition(right, IntStream.EOF));
return { left, right };
}
return this._ruleRef(node);
}
getSetFromCharSetLiteral(charSetAST) {
let text = charSetAST.getText();
text = text.substring(1, text.length - 1);
const set = new IntervalSet();
let state = ICharSetParseState.none;
for (let i = 0; i < text.length; ) {
if (state.mode === Mode.Error) {
return new IntervalSet();
}
const c = text.codePointAt(i);
let offset = Character.charCount(c);
if (c === 92) {
const escapeParseResult = EscapeSequenceParsing.parseEscape(text, i);
switch (escapeParseResult.type) {
case ResultType.Invalid: {
const invalid = text.substring(
escapeParseResult.startOffset,
escapeParseResult.startOffset + escapeParseResult.parseLength
);
this.g.tool.errorManager.grammarError(
IssueCode.InvalidEscapeSequence,
this.g.fileName,
charSetAST.token,
invalid
);
state = ICharSetParseState.error;
break;
}
case ResultType.CodePoint: {
state = this.applyPrevStateAndMoveToCodePoint(
charSetAST,
set,
state,
escapeParseResult.codePoint
);
break;
}
case ResultType.Property: {
state = this.applyPrevStateAndMoveToProperty(
charSetAST,
set,
state,
escapeParseResult.propertyIntervalSet
);
break;
}
default:
}
offset = escapeParseResult.parseLength;
} else {
if (c === 45 && !state.inRange && i !== 0 && i !== text.length - 1 && state.mode !== Mode.None) {
if (state.mode === Mode.PrevProperty) {
this.g.tool.errorManager.grammarError(
IssueCode.UnicodePropertyNotAllowedInRange,
this.g.fileName,
charSetAST.token,
charSetAST.getText()
);
state = ICharSetParseState.error;
} else {
state = {
mode: state.mode,
inRange: true,
prevCodePoint: state.prevCodePoint,
prevProperty: state.prevProperty
};
}
} else {
state = this.applyPrevStateAndMoveToCodePoint(charSetAST, set, state, c);
}
}
i += offset;
}
if (state.mode === Mode.Error) {
return new IntervalSet();
}
this.applyPrevState(charSetAST, set, state);
if (set.length === 0) {
this.g.tool.errorManager.grammarError(
IssueCode.EmptyStringAndSetsNotAllowed,
this.g.fileName,
charSetAST.token,
"[]"
);
}
return set;
}
getLexerActionIndex(lexerAction) {
let lexerActionIndex = this.actionToIndexMap.get(lexerAction);
if (lexerActionIndex === void 0) {
lexerActionIndex = this.actionToIndexMap.size;
this.actionToIndexMap.set(lexerAction, lexerActionIndex);
this.indexToActionMap.set(lexerActionIndex, lexerAction);
}
return lexerActionIndex;
}
checkRange(leftNode, rightNode, leftValue, rightValue) {
let result = true;
if (leftValue === -1) {
result = false;
this.g.tool.errorManager.grammarError(
IssueCode.InvalidLiteralInLexerSet,
this.g.fileName,
leftNode.token,
leftNode.getText()
);
}
if (rightValue === -1) {
result = false;
this.g.tool.errorManager.grammarError(
IssueCode.InvalidLiteralInLexerSet,
this.g.fileName,
rightNode.token,
rightNode.getText()
);
}
if (!result) {
return false;
}
if (rightValue < leftValue) {
this.g.tool.errorManager.grammarError(
IssueCode.EmptyStringAndSetsNotAllowed,
this.g.fileName,
leftNode.parent.token,
leftNode.getText() + ".." + rightNode.getText()
);
return false;
}
return true;
}
lexerCallCommandOrCommand(id, arg) {
const lexerAction = this.createLexerAction(id, arg);
if (lexerAction) {
return this.action(id, lexerAction);
}
const cmdST = this.codegenTemplates.getInstanceOf("Lexer" + CharSupport.capitalize(id.getText()) + "Command");
if (cmdST === null) {
this.g.tool.errorManager.grammarError(
IssueCode.InvalidLexerCommand,
this.g.fileName,
id.token,
id.getText()
);
return this.epsilon(id);
}
const callCommand = arg !== void 0;
const containsArg = cmdST.impl?.formalArguments?.has("arg") ?? false;
if (callCommand !== containsArg) {
const errorType = callCommand ? IssueCode.UnwantedLexerCommandArgument : IssueCode.MisingLexerCommandArgument;
this.g.tool.errorManager.grammarError(errorType, this.g.fileName, id.token, id.getText());
return this.epsilon(id);
}
if (callCommand) {
cmdST.add("arg", arg.getText());
cmdST.add("grammar", arg.g);
}
return this.action(cmdST.render());
}
applyPrevStateAndMoveToCodePoint(charSetAST, set, state, codePoint) {
if (state.inRange) {
if (state.prevCodePoint > codePoint) {
this.g.tool.errorManager.grammarError(
IssueCode.EmptyStringAndSetsNotAllowed,
this.g.fileName,
charSetAST.token,
CharSupport.getRangeEscapedString(state.prevCodePoint, codePoint)
);
}
this.checkRangeAndAddToSet(charSetAST, set, state.prevCodePoint, codePoint);
state = ICharSetParseState.none;
} else {
this.applyPrevState(charSetAST, set, state);
state = {
mode: Mode.PrevCodePoint,
inRange: false,
prevCodePoint: codePoint,
prevProperty: new IntervalSet()
};
}
return state;
}
applyPrevStateAndMoveToProperty(charSetAST, set, state, property) {
if (state.inRange) {
this.g.tool.errorManager.grammarError(
IssueCode.UnicodePropertyNotAllowedInRange,
this.g.fileName,
charSetAST.token,
charSetAST.getText()
);
return ICharSetParseState.error;
} else {
this.applyPrevState(charSetAST, set, state);
state = { mode: Mode.PrevProperty, inRange: false, prevCodePoint: -1, prevProperty: property };
}
return state;
}
applyPrevState(charSetAST, set, state) {
switch (state.mode) {
case Mode.None:
case Mode.Error: {
break;
}
case Mode.PrevCodePoint: {
this.checkCharAndAddToSet(charSetAST, set, state.prevCodePoint);
break;
}
case Mode.PrevProperty: {
set.addSet(state.prevProperty);
break;
}
default:
}
}
checkCharAndAddToSet(ast, set, c) {
this.checkRangeAndAddToSet(ast, ast, set, c, c, this.currentRule.caseInsensitive, null);
}
checkRangeAndAddToSet(...args) {
switch (args.length) {
case 4: {
const [mainAst, set, a, b] = args;
this.checkRangeAndAddToSet(mainAst, mainAst, set, a, b, this.currentRule.caseInsensitive, null);
break;
}
case 7: {
const [rootAst, ast, set, a, b, caseInsensitive, previousStatus] = args;
let status;
const charactersData = RangeBorderCharactersData.getAndCheckCharactersData(
a,
b,
this.g,
ast,
!(previousStatus?.notImpliedCharacters ?? false)
);
if (caseInsensitive) {
status = { collision: false, notImpliedCharacters: charactersData.mixOfLowerAndUpperCharCase };
if (charactersData.isSingleRange()) {
status = this.checkRangeAndAddToSet(rootAst, ast, set, a, b, false, status);
} else {
status = this.checkRangeAndAddToSet(
rootAst,
ast,
set,
charactersData.lowerFrom,
charactersData.lowerTo,
false,
status
);
status = this.checkRangeAndAddToSet(
rootAst,
ast,
set,
charactersData.upperFrom,
charactersData.upperTo,
false,
status
);
}
} else {
let charactersCollision = previousStatus?.collision ?? false;
if (!charactersCollision) {
for (let i = a; i <= b; i++) {
if (set.contains(i)) {
let setText;
if (rootAst.children.length === 0) {
setText = rootAst.getText();
} else {
setText = "";
for (const child of rootAst.children) {
if (child instanceof RangeAST) {
setText += child.children[0].getText() + "..";
setText += child.children[1].getText();
} else {
setText += child.getText();
}
setText += " | ";
}
setText = setText.substring(0, setText.length - 3);
}
const charsString = a === b ? String.fromCodePoint(a) : String.fromCodePoint(a) + "-" + String.fromCodePoint(b);
this.g.tool.errorManager.grammarError(
IssueCode.CharactersCollisionInSet,
this.g.fileName,
ast.token,
charsString,
setText
);
charactersCollision = true;
break;
}
}
}
status = {
collision: charactersCollision,
notImpliedCharacters: charactersData.mixOfLowerAndUpperCharCase
};
set.addRange(a, b);
}
return status;
}
default: {
throw new Error("Invalid number of arguments");
}
}
}
createTransition(target, from, to, tree) {
const charactersData = RangeBorderCharactersData.getAndCheckCharactersData(from, to, this.g, tree, true);
if (this.currentRule.caseInsensitive) {
if (charactersData.isSingleRange()) {
return CodePointTransitions.createWithCodePointRange(target, from, to);
} else {
const intervalSet = new IntervalSet();
intervalSet.addRange(charactersData.lowerFrom, charactersData.lowerTo);
intervalSet.addRange(charactersData.upperFrom, charactersData.upperTo);
return new SetTransition(target, intervalSet);
}
} else {
return CodePointTransitions.createWithCodePointRange(target, from, to);
}
}
createLexerAction(id, arg) {
const command = id.getText();
this.checkCommands(command, id.token);
switch (command) {
case "skip": {
if (!arg) {
return LexerSkipAction.instance;
}
break;
}
case "more": {
if (!arg) {
return LexerMoreAction.instance;
}
break;
}
case "popMode": {
if (!arg) {
return LexerPopModeAction.instance;
}
break;
}
default: {
if (arg) {
const name = arg.getText();
switch (command) {
case "mode": {
const mode = this.getModeConstantValue(name, arg.token);
if (mode === void 0) {
return void 0;
}
return new LexerModeAction(mode);
}
case "pushMode": {
const mode = this.getModeConstantValue(name, arg.token);
if (mode === void 0) {
return void 0;
}
return new LexerPushModeAction(mode);
}
case "type": {
const type = this.getTokenConstantValue(name, arg.token);
if (type === void 0) {
return void 0;
}
return new LexerTypeAction(type);
}
case "channel": {
const channel = this.getChannelConstantValue(name, arg.token);
if (channel === void 0) {
return void 0;
}
return new LexerChannelAction(channel);
}
default:
}
}
break;
}
}
return void 0;
}
checkCommands(command, commandToken) {
if (command !== "pushMode" && command !== "popMode") {
if (this.ruleCommands.includes(command)) {
this.g.tool.errorManager.grammarError(
IssueCode.DuplicatedCommand,
this.g.fileName,
commandToken,
command
);
}
let firstCommand;
if (command === "skip") {
if (this.ruleCommands.includes("more")) {
firstCommand = "more";
} else if (this.ruleCommands.includes("type")) {
firstCommand = "type";
} else if (this.ruleCommands.includes("channel")) {
firstCommand = "channel";
}
} else if (command === "more") {
if (this.ruleCommands.includes("skip")) {
firstCommand = "skip";
} else if (this.ruleCommands.includes("type")) {
firstCommand = "type";
} else if (this.ruleCommands.includes("channel")) {
firstCommand = "channel";
}
} else if (command === "type" || command === "channel") {
if (this.ruleCommands.includes("more")) {
firstCommand = "more";
} else if (this.ruleCommands.includes("skip")) {
firstCommand = "skip";
}
}
if (firstCommand) {
this.g.tool.errorManager.grammarError(
IssueCode.IncompatibleCommands,
this.g.fileName,
commandToken,
firstCommand,
command
);
}
}
this.ruleCommands.push(command);
}
getModeConstantValue(modeName, token) {
if (!modeName || !token) {
return void 0;
}
if (modeName === "DEFAULT_MODE") {
return Lexer.DEFAULT_MODE;
}
if (Constants.COMMON_CONSTANTS.has(modeName)) {
this.g.tool.errorManager.grammarError(
IssueCode.ModeConflictsWithCommonConstants,
this.g.fileName,
token,
token.text
);
return void 0;
}
const modeNames = [...this.g.modes.keys()];
const mode = modeNames.indexOf(modeName);
if (mode >= 0) {
return mode;
}
const result = Number.parseInt(modeName);
if (isNaN(result)) {
this.g.tool.errorManager.grammarError(
IssueCode.ConstantValueIsNotARecognizedModeName,
this.g.fileName,
token,
token.text
);
return void 0;
}
return result;
}
getTokenConstantValue(tokenName, token) {
if (tokenName === void 0 || token === void 0) {
return void 0;
}
if (tokenName === "EOF") {
return Lexer.EOF;
}
if (Constants.COMMON_CONSTANTS.has(tokenName)) {
this.g.tool.errorManager.grammarError(
IssueCode.TokenConflictsWithCommonConstants,
this.g.fileName,
token,
token.text
);
return void 0;
}
const tokenType = this.g.getTokenType(tokenName);
if (tokenType !== Token.INVALID_TYPE) {
return tokenType;
}
const result = Number.parseInt(tokenName);
if (isNaN(result)) {
this.g.tool.errorManager.grammarError(
IssueCode.ConstantValueIsNotARecognizedTokenName,
this.g.fileName,
token,
token.text
);
return void 0;
}
return result;
}
getChannelConstantValue(channelName, token) {
if (channelName === void 0 || token === void 0) {
return void 0;
}
if (channelName === "HIDDEN") {
return Lexer.HIDDEN;
}
if (channelName === "DEFAULT_TOKEN_CHANNEL") {
return Lexer.DEFAULT_TOKEN_CHANNEL;
}
if (Constants.COMMON_CONSTANTS.has(channelName)) {
this.g.tool.errorManager.grammarError(
IssueCode.ChannelConflictsWithCommonConstants,
this.g.fileName,
token,
token.text
);
return void 0;
}
const channelValue = this.g.getChannelValue(channelName);
if (channelValue >= Token.MIN_USER_CHANNEL_VALUE) {
return channelValue;
}
const result = Number.parseInt(channelName);
if (isNaN(result)) {
this.g.tool.errorManager.grammarError(
IssueCode.ConstantValueIsNotARecognizedChannelName,
this.g.fileName,
token,
token.text
);
return void 0;
}
return result;
}
}
export {
LexerATNFactory
};