@scinorandex/slex
Version:
No fuss lexer generator
184 lines (150 loc) • 7.68 kB
text/typescript
import { Slex } from "../src/index.ts";
// prettier-ignore
enum TokenType {
EOF,
IMPORT,
VARIABLE, CONSTANT,
FUNCTION, OBJECT,
RETURN,
TRY, CATCH, THROW,
IF, ELSE, ELIF,
SWITCH, DEFAULT, CASE, SWITCH_BREAK, SWITCH_GOTO,
FOR, OF, WHILE, LOOP_BREAK, LOOP_CONTINUE,
IDENTIFIER,
NUMBER_TYPE, BOOLEAN_TYPE, STRING_TYPE, VOID_TYPE,
L_PAREN, R_PAREN, L_CURLY_BRACE, R_CURLY_BRACE, L_BRACE, R_BRACE,
EQUALS, COMMA, MINUS_R_ANGLE_BAR, DOT, COLON, SEMICOLON,
// LOGICAL AND BITWISE OPERATORS
EXCLAMATION, DOUBLE_AMPERSAND, DOUBLE_PIPE, CARAT, AMPERSAND, PIPE,
// RELATIONAL AND EQUALITY OPERATORS
DOUBLE_EQUALS, EXCLAMATION_EQUALS, L_ANGLE_BAR, R_ANGLE_BAR, L_ANGLE_BAR_EQUALS, R_ANGLE_BAR_EQUALS,
// SHIFTING OPERATORS
DOUBLE_L_ANGLE_BAR, DOUBLE_R_ANGLE_BAR,
// ARITHMETIC OPERATORS
PLUS, MINUS, STAR, FORWARD_SLASH, PERCENT, DOUBLE_STAR,
// INCREMENTATION OPERATOR
DOUBLE_PLUS, DOUBLE_MINUS,
STRING_LITERAL, NUMBER_LITERAL, BOOLEAN_LITERAL, NULL_LITERAL,
SINGLE_LINE_COMMENT, MULTI_LINE_COMMENT
}
const lexerGenerator = new Slex<TokenType, { sourcePath: string }>({
EOF_TYPE: TokenType.EOF,
// The pattern for TokenType.IDENTIFIER matches keywords too. This line
// specifies that keywords have higher precedence than identifiers.
isHigherPrecedence: ({ current, next }) => current === TokenType.IDENTIFIER,
// Specifies that SINGLE_LINE_COMMENT and MULTI_LINE_COMMENT tokens should be ignored.
ignoreTokens: [TokenType.SINGLE_LINE_COMMENT, TokenType.MULTI_LINE_COMMENT],
});
// lex symbols
lexerGenerator.addRule("plus", "$+", TokenType.PLUS);
lexerGenerator.addRule("minus", "$-", TokenType.MINUS);
lexerGenerator.addRule("star", "$*", TokenType.STAR);
lexerGenerator.addRule("forward_slash", "$/", TokenType.FORWARD_SLASH);
lexerGenerator.addRule("percent", "$%", TokenType.PERCENT);
lexerGenerator.addRule("double_star", "$*$*", TokenType.DOUBLE_STAR);
lexerGenerator.addRule("double_plus", "$+$+", TokenType.DOUBLE_PLUS);
lexerGenerator.addRule("double_minus", "$-$-", TokenType.DOUBLE_MINUS);
lexerGenerator.addRule("double_l_angle_bar", "$>$>", TokenType.DOUBLE_L_ANGLE_BAR);
lexerGenerator.addRule("double_r_angle_bar", "$<$<", TokenType.DOUBLE_R_ANGLE_BAR);
lexerGenerator.addRule("pipe", "$|", TokenType.PIPE);
lexerGenerator.addRule("ampersand", "$&", TokenType.AMPERSAND);
lexerGenerator.addRule("carat", "$^", TokenType.CARAT);
lexerGenerator.addRule("l_angle_bar", "$<", TokenType.L_ANGLE_BAR);
lexerGenerator.addRule("l_angle_bar_equals", "$<$=", TokenType.L_ANGLE_BAR_EQUALS);
lexerGenerator.addRule("r_angle_bar", "$>", TokenType.R_ANGLE_BAR);
lexerGenerator.addRule("r_angle_bar_equals", "$>$=", TokenType.R_ANGLE_BAR_EQUALS);
lexerGenerator.addRule("exclamation_equals", "$!$=", TokenType.EXCLAMATION_EQUALS);
lexerGenerator.addRule("double_equals", "$=$=", TokenType.DOUBLE_EQUALS);
lexerGenerator.addRule("double_ampersand", "$&$&", TokenType.DOUBLE_AMPERSAND);
lexerGenerator.addRule("double_pipe", "$|$|", TokenType.DOUBLE_PIPE);
lexerGenerator.addRule("exclamation", "$!", TokenType.EXCLAMATION);
lexerGenerator.addRule("minus_r_angle_bar", "$-$>", TokenType.MINUS_R_ANGLE_BAR);
lexerGenerator.addRule("equals", "$=", TokenType.EQUALS);
lexerGenerator.addRule("comma", "$,", TokenType.COMMA);
lexerGenerator.addRule("dot", "$.", TokenType.DOT);
lexerGenerator.addRule("colon", "$:", TokenType.COLON);
lexerGenerator.addRule("semicolon", "$;", TokenType.SEMICOLON);
lexerGenerator.addRule("l_paren", "$(", TokenType.L_PAREN);
lexerGenerator.addRule("r_paren", "$)", TokenType.R_PAREN);
lexerGenerator.addRule("l_brace", "$[", TokenType.L_BRACE);
lexerGenerator.addRule("r_brace", "$]", TokenType.R_BRACE);
lexerGenerator.addRule("l_curly_brace", "${", TokenType.L_CURLY_BRACE);
lexerGenerator.addRule("r_curly_brace", "$}", TokenType.R_CURLY_BRACE);
lexerGenerator.addRule("character", "${__letter}|${__decimal_digit}|${__symbols}|${__control_character}");
lexerGenerator.addRule("float_number", "(${__decimal_digit})+$.(${di__decimal_digitgit})+");
lexerGenerator.addRule("decimal_number", "(${__decimal_digit})+");
lexerGenerator.addRule("octal_number", "0e(0|1|2|3|4|5|6|7)+");
lexerGenerator.addRule("hexadecimal_number", "0x(${__decimal_digit}|a|b|c|d|e|f|A|B|C|D|E|F)+");
lexerGenerator.addRule("binary_number", "0b(0|1)+");
// handle literal tokens
lexerGenerator.addRule(
"string_literal",
"$\"(${character} | $')*$\" | $'(${character} | $\")*$'",
TokenType.STRING_LITERAL,
(str) => str.substring(1, str.length - 1).replaceAll("\\n", "\n")
);
lexerGenerator.addRule(
"number_literal",
"${float_number}|${decimal_number}|${octal_number}|${binary_number}|${hexadecimal_number}",
TokenType.NUMBER_LITERAL
);
lexerGenerator.addRule("boolean_literal", "faker|shaker", TokenType.BOOLEAN_LITERAL);
lexerGenerator.addRule("null_literal", "cooldown", TokenType.NULL_LITERAL);
// handle identifier and reserved words
lexerGenerator.addRule("item", "item", TokenType.VARIABLE);
lexerGenerator.addRule("rune", "rune", TokenType.CONSTANT);
lexerGenerator.addRule("skill", "skill", TokenType.FUNCTION);
lexerGenerator.addRule("steal", "steal", TokenType.IMPORT);
lexerGenerator.addRule("build", "build", TokenType.OBJECT);
lexerGenerator.addRule("canwin", "canwin", TokenType.IF);
lexerGenerator.addRule("remake", "remake", TokenType.ELIF);
lexerGenerator.addRule("lose", "lose", TokenType.ELSE);
lexerGenerator.addRule("channel", "channel", TokenType.SWITCH);
lexerGenerator.addRule("teleport", "teleport", TokenType.CASE);
lexerGenerator.addRule("recall", "recall", TokenType.DEFAULT);
lexerGenerator.addRule("flash", "flash", TokenType.SWITCH_GOTO);
lexerGenerator.addRule("cancel", "cancel", TokenType.SWITCH_BREAK);
lexerGenerator.addRule("wave", "wave", TokenType.WHILE);
lexerGenerator.addRule("cannon", "cannon", TokenType.FOR);
lexerGenerator.addRule("clear", "clear", TokenType.LOOP_BREAK);
lexerGenerator.addRule("next", "next", TokenType.LOOP_CONTINUE);
lexerGenerator.addRule("of", "of", TokenType.OF);
lexerGenerator.addRule("support", "support", TokenType.TRY);
lexerGenerator.addRule("carry", "carry", TokenType.CATCH);
lexerGenerator.addRule("feed", "feed", TokenType.THROW);
lexerGenerator.addRule("recast", "recast", TokenType.RETURN);
// handle type tokens
lexerGenerator.addRule("number_type", "stats", TokenType.NUMBER_TYPE);
lexerGenerator.addRule("boolean_type", "goat", TokenType.BOOLEAN_TYPE);
lexerGenerator.addRule("string_type", "message", TokenType.STRING_TYPE);
lexerGenerator.addRule("void_type", "passive", TokenType.VOID_TYPE);
lexerGenerator.addRule("identifier", "(${__letter}|$_)(${__letter}|${__decimal_digit}|$_)*", TokenType.IDENTIFIER);
// handle single and multi line comments
lexerGenerator.addRule("single_line_comment", "$/$/(($\n)!)*", TokenType.SINGLE_LINE_COMMENT);
lexerGenerator.addRule("multi_line_comment", "$/$*(($*)!|($*($/)!))*$*$/", TokenType.MULTI_LINE_COMMENT);
const stacktrace_example = `item factorial: skill (stats) -> stats =
skill (item n: stats): stats -> {
canwin(n >= 2) recast n * factorial(n - 1);
// only print the stack in the base case
/**
* This is an example multiline comment
*/
dump_call_stack();
recast 1;
};
broadcast(factorial(5));
`;
const lexer = lexerGenerator.generate(stacktrace_example, () => ({ sourcePath: "stacktrace.example" }));
while (lexer.hasNextToken()) {
const token = lexer.getNextToken();
console.log(
"Token: " +
TokenType[token.type] +
". Lexeme: " +
token.lexeme +
". Column: " +
token.column +
". Line: " +
token.line
);
}