@scinorandex/slex
Version:
No fuss lexer generator
359 lines • 13.5 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.RegexLexer = exports.RegexParser = exports.RegexIntrinsicNode = exports.RegexNode = exports.RegexEngineParsingResult = void 0;
const Character_1 = require("./utils/Character");
const Character = (0, Character_1.initializeCharacter)({});
class RegexEngineParsingResult {
constructor(success, lexeme, from) {
this.success = success;
this.lexeme = lexeme;
this.from = from;
}
toString() {
return this.success ? "From: " + this.from + ". Lexeme: " + this.lexeme : "No token found";
}
}
exports.RegexEngineParsingResult = RegexEngineParsingResult;
class RegexNode {
constructor() {
this.emit = null;
this.transformer = null;
}
getTokenType() {
return this.emit;
}
setTokenType(emit) {
this.emit = emit;
}
getTransformer() {
return this.transformer;
}
setTransformer(transformer) {
this.transformer = transformer;
}
}
exports.RegexNode = RegexNode;
class RegexConcatenationNode extends RegexNode {
constructor(nodes) {
super();
this.nodes = nodes;
}
toString() {
let ret = "";
for (let i = 0; i < this.nodes.length; i++) {
ret += this.nodes[i].toString();
}
return ret;
}
getMatches(restString, environment, negated) {
let caches = [""];
for (const node of this.nodes) {
let nextCaches = [];
for (const cache of caches) {
const rest = restString.replace(cache, "");
const nextMatches = node.getMatches(rest, environment, negated);
nextCaches.push(...nextMatches.map((m) => cache + m));
}
caches = nextCaches;
}
return caches;
}
}
class RegexEitherNode extends RegexNode {
constructor(nodes) {
super();
this.nodes = nodes;
}
toString() {
let ret = "";
for (let i = 0; i < this.nodes.length; i++) {
ret += this.nodes[i].toString();
if (i != this.nodes.length - 1)
ret += "|";
}
return ret;
}
getMatches(restString, environment, negated) {
let matches = [];
for (const node of this.nodes)
matches.push(...node.getMatches(restString, environment, negated));
return matches;
}
}
class RegexLiteralNode extends RegexNode {
constructor(ch) {
super();
this.ch = ch;
}
toString() {
return "" + this.ch;
}
getMatches(restString, environment, negated) {
if (restString.length === 0)
return [];
const matches = [];
const starting = restString.charAt(0);
if (negated === false && starting === this.ch)
matches.push("" + starting);
else if (negated === true && starting !== this.ch)
matches.push("" + starting);
return matches;
}
}
class RegexIntrinsicNode extends RegexNode {
constructor(intrinsicName, calculator) {
super();
this.intrinsicName = intrinsicName;
this.calculator = calculator;
}
toString() {
return "<" + this.intrinsicName + ">";
}
getMatches(restString, environment) {
return this.calculator(restString, environment);
}
}
exports.RegexIntrinsicNode = RegexIntrinsicNode;
class RegexVariableNode extends RegexNode {
constructor(variableName) {
super();
this.variableName = variableName;
}
toString() {
return "<" + this.variableName + ">";
}
getMatches(restString, environment, negated) {
if (!environment.has(this.variableName))
return [];
const rootNode = environment.get(this.variableName);
return rootNode.getMatches(restString, environment, negated);
}
}
var RegexGroupingNodeModifiers;
(function (RegexGroupingNodeModifiers) {
RegexGroupingNodeModifiers[RegexGroupingNodeModifiers["NONE"] = 0] = "NONE";
RegexGroupingNodeModifiers[RegexGroupingNodeModifiers["NONE_OR_MORE"] = 1] = "NONE_OR_MORE";
RegexGroupingNodeModifiers[RegexGroupingNodeModifiers["ONE_OR_MORE"] = 2] = "ONE_OR_MORE";
RegexGroupingNodeModifiers[RegexGroupingNodeModifiers["NEGATION"] = 3] = "NEGATION";
})(RegexGroupingNodeModifiers || (RegexGroupingNodeModifiers = {}));
class RegexGroupingNode extends RegexNode {
constructor(internalNode, modifier = RegexGroupingNodeModifiers.NONE) {
super();
this.internalNode = internalNode;
this.modifier = modifier;
}
toString() {
return ("(" +
this.internalNode.toString() +
")" +
(this.modifier === RegexGroupingNodeModifiers.ONE_OR_MORE
? "+"
: this.modifier === RegexGroupingNodeModifiers.NONE_OR_MORE
? "*"
: ""));
}
_getMatches(restString, environment, negated) {
const initialMatches = this.internalNode.getMatches(restString, environment, negated);
if (initialMatches.length === 0) {
if (this.modifier === RegexGroupingNodeModifiers.NONE_OR_MORE)
initialMatches.push("");
return initialMatches;
}
else if (this.modifier === RegexGroupingNodeModifiers.NONE)
return initialMatches;
let matches = initialMatches;
while (true) {
const nextMatches = [];
for (const match of matches) {
const rest = restString.replace(match, "");
if (rest.length === 0)
continue;
const nextMatch = this.internalNode.getMatches(rest, environment, negated);
nextMatches.push(...nextMatch.map((m) => match + m));
}
if (nextMatches.length === 0)
break;
matches = nextMatches;
}
return matches;
}
getMatches(restString, environment, negated) {
if (this.modifier === RegexGroupingNodeModifiers.NEGATION)
return this.internalNode.getMatches(restString, environment, true);
return this._getMatches(restString, environment, negated);
}
}
class RegexParser {
constructor(tokens) {
this.currentTokenIndex = 0;
this.tokens = tokens;
}
parse() {
const first = this.parseConcatenation();
const possibles = [first];
while (this.currentTokenIndex < this.tokens.length &&
this.tokens[this.currentTokenIndex].type === RegexTokenType.PIPE) {
this.currentTokenIndex++;
const nextNode = this.parseConcatenation();
possibles.push(nextNode);
}
if (possibles.length > 1)
return new RegexEitherNode(possibles);
else
return first;
}
parseConcatenation() {
const first = this.parseTerminal();
const nodes = [first];
while (this.currentTokenIndex < this.tokens.length &&
this.tokens[this.currentTokenIndex].type != RegexTokenType.PIPE &&
this.tokens[this.currentTokenIndex].type != RegexTokenType.RPAREN) {
const nextNode = this.parseTerminal();
nodes.push(nextNode);
}
if (nodes.length > 1)
return new RegexConcatenationNode(nodes);
else
return first;
}
parseTerminal() {
const currentToken = this.tokens[this.currentTokenIndex];
switch (currentToken.type) {
case RegexTokenType.LPAREN: {
this.currentTokenIndex++;
const internalNode = this.parse();
this.expect(RegexTokenType.RPAREN);
let modifier = RegexGroupingNodeModifiers.NONE;
if (this.tokens[this.currentTokenIndex].type === RegexTokenType.ASTERISK) {
modifier = RegexGroupingNodeModifiers.NONE_OR_MORE;
this.currentTokenIndex++;
}
else if (this.tokens[this.currentTokenIndex].type === RegexTokenType.PLUS) {
modifier = RegexGroupingNodeModifiers.ONE_OR_MORE;
this.currentTokenIndex++;
}
else if (this.tokens[this.currentTokenIndex].type === RegexTokenType.EXCLAMATION) {
modifier = RegexGroupingNodeModifiers.NEGATION;
this.currentTokenIndex++;
}
return new RegexGroupingNode(internalNode, modifier);
}
case RegexTokenType.LITERAL: {
this.currentTokenIndex++;
return new RegexLiteralNode(currentToken.value.charAt(0));
}
case RegexTokenType.VARIABLE: {
this.currentTokenIndex++;
return new RegexVariableNode(currentToken.value);
}
default: {
throw new Error("Was not able to parse the regex. Token: " + currentToken.toString());
}
}
}
expect(type) {
const currentToken = this.tokens[this.currentTokenIndex];
if (currentToken.type === type) {
this.currentTokenIndex++;
}
else {
throw new Error("Expected: " + type.toString() + ". Received: " + currentToken.toString());
}
}
}
exports.RegexParser = RegexParser;
var RegexTokenType;
(function (RegexTokenType) {
RegexTokenType[RegexTokenType["LITERAL"] = 0] = "LITERAL";
RegexTokenType[RegexTokenType["PIPE"] = 1] = "PIPE";
RegexTokenType[RegexTokenType["ASTERISK"] = 2] = "ASTERISK";
RegexTokenType[RegexTokenType["EXCLAMATION"] = 3] = "EXCLAMATION";
RegexTokenType[RegexTokenType["PLUS"] = 4] = "PLUS";
RegexTokenType[RegexTokenType["VARIABLE"] = 5] = "VARIABLE";
RegexTokenType[RegexTokenType["LPAREN"] = 6] = "LPAREN";
RegexTokenType[RegexTokenType["RPAREN"] = 7] = "RPAREN";
})(RegexTokenType || (RegexTokenType = {}));
class RegexToken {
constructor(type, value) {
this.type = type;
this.value = value;
}
toString() {
return "Type: " + this.type.toString() + ". Value: " + this.value;
}
}
class RegexLexer {
constructor(expression) {
this.tokens = [];
this.index = 0;
this.expression = expression;
}
lex() {
while (this.index < this.expression.length) {
const currentCharacter = this.expression.charAt(this.index);
if (Character.isAlphabetic(currentCharacter) || Character.isDigit(currentCharacter)) {
this.tokens.push(new RegexToken(RegexTokenType.LITERAL, currentCharacter));
this.index++;
}
else if (currentCharacter === "|") {
this.tokens.push(new RegexToken(RegexTokenType.PIPE, currentCharacter));
this.index++;
}
else if (currentCharacter === "+") {
this.tokens.push(new RegexToken(RegexTokenType.PLUS, currentCharacter));
this.index++;
}
else if (currentCharacter === "*") {
this.tokens.push(new RegexToken(RegexTokenType.ASTERISK, currentCharacter));
this.index++;
}
else if (currentCharacter === "!") {
this.tokens.push(new RegexToken(RegexTokenType.EXCLAMATION, currentCharacter));
this.index++;
}
else if (currentCharacter === "(") {
this.tokens.push(new RegexToken(RegexTokenType.LPAREN, currentCharacter));
this.index++;
}
else if (currentCharacter === ")") {
this.tokens.push(new RegexToken(RegexTokenType.RPAREN, currentCharacter));
this.index++;
}
else if (currentCharacter === "$") {
if ((this.expression.length > this.index + 1 && this.expression.charAt(this.index + 1) != "{") ||
this.expression.length === this.index + 2) {
this.tokens.push(new RegexToken(RegexTokenType.LITERAL, this.expression.charAt(this.index + 1)));
this.index += 2;
}
else {
this.index += 2;
let variableName = "";
if (this.index === this.expression.length) {
this.tokens.push(new RegexToken(RegexTokenType.LITERAL, "}"));
}
else {
while (this.expression.length > this.index && this.expression.charAt(this.index) != "}") {
variableName += this.expression.charAt(this.index);
this.index++;
}
this.tokens.push(new RegexToken(RegexTokenType.VARIABLE, variableName));
this.index++;
}
}
}
else if (Character.isWhitespace(currentCharacter))
this.index++;
else {
console.log("Was not able to handle ch: " +
currentCharacter +
" at index: " +
this.index +
" in expression: " +
this.expression);
}
}
return this.tokens;
}
}
exports.RegexLexer = RegexLexer;
//# sourceMappingURL=internal.js.map