UNPKG

@scinorandex/slex

Version:

No fuss lexer generator

359 lines 13.5 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.RegexLexer = exports.RegexParser = exports.RegexIntrinsicNode = exports.RegexNode = exports.RegexEngineParsingResult = void 0; const Character_1 = require("./utils/Character"); const Character = (0, Character_1.initializeCharacter)({}); class RegexEngineParsingResult { constructor(success, lexeme, from) { this.success = success; this.lexeme = lexeme; this.from = from; } toString() { return this.success ? "From: " + this.from + ". Lexeme: " + this.lexeme : "No token found"; } } exports.RegexEngineParsingResult = RegexEngineParsingResult; class RegexNode { constructor() { this.emit = null; this.transformer = null; } getTokenType() { return this.emit; } setTokenType(emit) { this.emit = emit; } getTransformer() { return this.transformer; } setTransformer(transformer) { this.transformer = transformer; } } exports.RegexNode = RegexNode; class RegexConcatenationNode extends RegexNode { constructor(nodes) { super(); this.nodes = nodes; } toString() { let ret = ""; for (let i = 0; i < this.nodes.length; i++) { ret += this.nodes[i].toString(); } return ret; } getMatches(restString, environment, negated) { let caches = [""]; for (const node of this.nodes) { let nextCaches = []; for (const cache of caches) { const rest = restString.replace(cache, ""); const nextMatches = node.getMatches(rest, environment, negated); nextCaches.push(...nextMatches.map((m) => cache + m)); } caches = nextCaches; } return caches; } } class RegexEitherNode extends RegexNode { constructor(nodes) { super(); this.nodes = nodes; } toString() { let ret = ""; for (let i = 0; i < this.nodes.length; i++) { ret += this.nodes[i].toString(); if (i != this.nodes.length - 1) ret += "|"; } return ret; } getMatches(restString, environment, negated) { let matches = []; for (const node of this.nodes) matches.push(...node.getMatches(restString, environment, negated)); return matches; } } class RegexLiteralNode extends RegexNode { constructor(ch) { super(); this.ch = ch; } toString() { return "" + this.ch; } getMatches(restString, environment, negated) { if (restString.length === 0) return []; const matches = []; const starting = restString.charAt(0); if (negated === false && starting === this.ch) matches.push("" + starting); else if (negated === true && starting !== this.ch) matches.push("" + starting); return matches; } } class RegexIntrinsicNode extends RegexNode { constructor(intrinsicName, calculator) { super(); this.intrinsicName = intrinsicName; this.calculator = calculator; } toString() { return "<" + this.intrinsicName + ">"; } getMatches(restString, environment) { return this.calculator(restString, environment); } } exports.RegexIntrinsicNode = RegexIntrinsicNode; class RegexVariableNode extends RegexNode { constructor(variableName) { super(); this.variableName = variableName; } toString() { return "<" + this.variableName + ">"; } getMatches(restString, environment, negated) { if (!environment.has(this.variableName)) return []; const rootNode = environment.get(this.variableName); return rootNode.getMatches(restString, environment, negated); } } var RegexGroupingNodeModifiers; (function (RegexGroupingNodeModifiers) { RegexGroupingNodeModifiers[RegexGroupingNodeModifiers["NONE"] = 0] = "NONE"; RegexGroupingNodeModifiers[RegexGroupingNodeModifiers["NONE_OR_MORE"] = 1] = "NONE_OR_MORE"; RegexGroupingNodeModifiers[RegexGroupingNodeModifiers["ONE_OR_MORE"] = 2] = "ONE_OR_MORE"; RegexGroupingNodeModifiers[RegexGroupingNodeModifiers["NEGATION"] = 3] = "NEGATION"; })(RegexGroupingNodeModifiers || (RegexGroupingNodeModifiers = {})); class RegexGroupingNode extends RegexNode { constructor(internalNode, modifier = RegexGroupingNodeModifiers.NONE) { super(); this.internalNode = internalNode; this.modifier = modifier; } toString() { return ("(" + this.internalNode.toString() + ")" + (this.modifier === RegexGroupingNodeModifiers.ONE_OR_MORE ? "+" : this.modifier === RegexGroupingNodeModifiers.NONE_OR_MORE ? "*" : "")); } _getMatches(restString, environment, negated) { const initialMatches = this.internalNode.getMatches(restString, environment, negated); if (initialMatches.length === 0) { if (this.modifier === RegexGroupingNodeModifiers.NONE_OR_MORE) initialMatches.push(""); return initialMatches; } else if (this.modifier === RegexGroupingNodeModifiers.NONE) return initialMatches; let matches = initialMatches; while (true) { const nextMatches = []; for (const match of matches) { const rest = restString.replace(match, ""); if (rest.length === 0) continue; const nextMatch = this.internalNode.getMatches(rest, environment, negated); nextMatches.push(...nextMatch.map((m) => match + m)); } if (nextMatches.length === 0) break; matches = nextMatches; } return matches; } getMatches(restString, environment, negated) { if (this.modifier === RegexGroupingNodeModifiers.NEGATION) return this.internalNode.getMatches(restString, environment, true); return this._getMatches(restString, environment, negated); } } class RegexParser { constructor(tokens) { this.currentTokenIndex = 0; this.tokens = tokens; } parse() { const first = this.parseConcatenation(); const possibles = [first]; while (this.currentTokenIndex < this.tokens.length && this.tokens[this.currentTokenIndex].type === RegexTokenType.PIPE) { this.currentTokenIndex++; const nextNode = this.parseConcatenation(); possibles.push(nextNode); } if (possibles.length > 1) return new RegexEitherNode(possibles); else return first; } parseConcatenation() { const first = this.parseTerminal(); const nodes = [first]; while (this.currentTokenIndex < this.tokens.length && this.tokens[this.currentTokenIndex].type != RegexTokenType.PIPE && this.tokens[this.currentTokenIndex].type != RegexTokenType.RPAREN) { const nextNode = this.parseTerminal(); nodes.push(nextNode); } if (nodes.length > 1) return new RegexConcatenationNode(nodes); else return first; } parseTerminal() { const currentToken = this.tokens[this.currentTokenIndex]; switch (currentToken.type) { case RegexTokenType.LPAREN: { this.currentTokenIndex++; const internalNode = this.parse(); this.expect(RegexTokenType.RPAREN); let modifier = RegexGroupingNodeModifiers.NONE; if (this.tokens[this.currentTokenIndex].type === RegexTokenType.ASTERISK) { modifier = RegexGroupingNodeModifiers.NONE_OR_MORE; this.currentTokenIndex++; } else if (this.tokens[this.currentTokenIndex].type === RegexTokenType.PLUS) { modifier = RegexGroupingNodeModifiers.ONE_OR_MORE; this.currentTokenIndex++; } else if (this.tokens[this.currentTokenIndex].type === RegexTokenType.EXCLAMATION) { modifier = RegexGroupingNodeModifiers.NEGATION; this.currentTokenIndex++; } return new RegexGroupingNode(internalNode, modifier); } case RegexTokenType.LITERAL: { this.currentTokenIndex++; return new RegexLiteralNode(currentToken.value.charAt(0)); } case RegexTokenType.VARIABLE: { this.currentTokenIndex++; return new RegexVariableNode(currentToken.value); } default: { throw new Error("Was not able to parse the regex. Token: " + currentToken.toString()); } } } expect(type) { const currentToken = this.tokens[this.currentTokenIndex]; if (currentToken.type === type) { this.currentTokenIndex++; } else { throw new Error("Expected: " + type.toString() + ". Received: " + currentToken.toString()); } } } exports.RegexParser = RegexParser; var RegexTokenType; (function (RegexTokenType) { RegexTokenType[RegexTokenType["LITERAL"] = 0] = "LITERAL"; RegexTokenType[RegexTokenType["PIPE"] = 1] = "PIPE"; RegexTokenType[RegexTokenType["ASTERISK"] = 2] = "ASTERISK"; RegexTokenType[RegexTokenType["EXCLAMATION"] = 3] = "EXCLAMATION"; RegexTokenType[RegexTokenType["PLUS"] = 4] = "PLUS"; RegexTokenType[RegexTokenType["VARIABLE"] = 5] = "VARIABLE"; RegexTokenType[RegexTokenType["LPAREN"] = 6] = "LPAREN"; RegexTokenType[RegexTokenType["RPAREN"] = 7] = "RPAREN"; })(RegexTokenType || (RegexTokenType = {})); class RegexToken { constructor(type, value) { this.type = type; this.value = value; } toString() { return "Type: " + this.type.toString() + ". Value: " + this.value; } } class RegexLexer { constructor(expression) { this.tokens = []; this.index = 0; this.expression = expression; } lex() { while (this.index < this.expression.length) { const currentCharacter = this.expression.charAt(this.index); if (Character.isAlphabetic(currentCharacter) || Character.isDigit(currentCharacter)) { this.tokens.push(new RegexToken(RegexTokenType.LITERAL, currentCharacter)); this.index++; } else if (currentCharacter === "|") { this.tokens.push(new RegexToken(RegexTokenType.PIPE, currentCharacter)); this.index++; } else if (currentCharacter === "+") { this.tokens.push(new RegexToken(RegexTokenType.PLUS, currentCharacter)); this.index++; } else if (currentCharacter === "*") { this.tokens.push(new RegexToken(RegexTokenType.ASTERISK, currentCharacter)); this.index++; } else if (currentCharacter === "!") { this.tokens.push(new RegexToken(RegexTokenType.EXCLAMATION, currentCharacter)); this.index++; } else if (currentCharacter === "(") { this.tokens.push(new RegexToken(RegexTokenType.LPAREN, currentCharacter)); this.index++; } else if (currentCharacter === ")") { this.tokens.push(new RegexToken(RegexTokenType.RPAREN, currentCharacter)); this.index++; } else if (currentCharacter === "$") { if ((this.expression.length > this.index + 1 && this.expression.charAt(this.index + 1) != "{") || this.expression.length === this.index + 2) { this.tokens.push(new RegexToken(RegexTokenType.LITERAL, this.expression.charAt(this.index + 1))); this.index += 2; } else { this.index += 2; let variableName = ""; if (this.index === this.expression.length) { this.tokens.push(new RegexToken(RegexTokenType.LITERAL, "}")); } else { while (this.expression.length > this.index && this.expression.charAt(this.index) != "}") { variableName += this.expression.charAt(this.index); this.index++; } this.tokens.push(new RegexToken(RegexTokenType.VARIABLE, variableName)); this.index++; } } } else if (Character.isWhitespace(currentCharacter)) this.index++; else { console.log("Was not able to handle ch: " + currentCharacter + " at index: " + this.index + " in expression: " + this.expression); } } return this.tokens; } } exports.RegexLexer = RegexLexer; //# sourceMappingURL=internal.js.map