UNPKG

@scinorandex/slex

Version:

No fuss lexer generator

165 lines 7.61 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.ColumnAndRow = exports.Token = exports.RegexEngine = exports.Slex = void 0; const ColumnAndRow_1 = require("./ColumnAndRow"); Object.defineProperty(exports, "ColumnAndRow", { enumerable: true, get: function () { return ColumnAndRow_1.ColumnAndRow; } }); const internal_1 = require("./internal"); const Token_1 = require("./Token"); Object.defineProperty(exports, "Token", { enumerable: true, get: function () { return Token_1.Token; } }); const Character_1 = require("./utils/Character"); class Slex { constructor(options) { this.options = options; this.environment = new Map(); this.Character = (0, Character_1.initializeCharacter)({}); this.environment = new Map(); this.environment.set("__decimal_digit", new internal_1.RegexIntrinsicNode("__decimal_digit", (restString) => { return this.Character.isDigit(restString.charAt(0)) ? [restString.charAt(0)] : []; })); this.environment.set("__letter", new internal_1.RegexIntrinsicNode("__letter", (restString) => { return this.Character.isAlphabetic(restString.charAt(0)) ? [restString.charAt(0)] : []; })); this.environment.set("__uppercase_letter", new internal_1.RegexIntrinsicNode("__uppercase_letter", (restString) => { return this.Character.isAlphabeticUppercase(restString.charAt(0)) ? [restString.charAt(0)] : []; })); this.environment.set("__lowercase_letter", new internal_1.RegexIntrinsicNode("__lowercase_letter", (restString) => { return this.Character.isAlphabeticLowercase(restString.charAt(0)) ? [restString.charAt(0)] : []; })); this.environment.set("__symbols", new internal_1.RegexIntrinsicNode("__symbolic", (restString) => { return this.Character.isSymbolic(restString.charAt(0)) ? [restString.charAt(0)] : []; })); this.environment.set("__control_character", new internal_1.RegexIntrinsicNode("__control_character", (restString) => { return this.Character.isControl(restString.charAt(0)) ? [restString.charAt(0)] : []; })); } addRule(name, expression, emit, transformer) { const lexer = new internal_1.RegexLexer(expression); const tokens = lexer.lex(); const parser = new internal_1.RegexParser(tokens); const root = parser.parse(); if (emit !== undefined) root.setTokenType(emit); if (transformer !== undefined) root.setTransformer(transformer); this.environment.set(name, root); } generate(input, metadataGenerator) { return new RegexEngine(this.options, this.environment, metadataGenerator, input); } } exports.Slex = Slex; class RegexEngine { constructor(options, environment, metadataGenerator, input) { this.options = options; this.environment = environment; this.metadataGenerator = metadataGenerator; this.input = input; this.currentCharacterIndex = 0; this.startCharacterIndex = 0; this.Character = (0, Character_1.initializeCharacter)({ whitespace: this.options.whitespaceCharacters }); } peek() { return this.currentCharacterIndex >= this.input.length ? "\0" : this.input.charAt(this.currentCharacterIndex); } ignoreWhitespace() { while (this.currentCharacterIndex < this.input.length + 1 && this.Character.isWhitespace(this.peek())) { this.currentCharacterIndex++; } } peekNextToken() { const result = this.tryPeekNextToken(); if (result.success === false) throw new Error(result.reason); return result.token; } tryPeekNextToken() { while (true) { const save = this.currentCharacterIndex; const response = this.tryGetNextToken(); this.currentCharacterIndex = save; return response; } } getNextToken() { const result = this.tryGetNextToken(); if (result.success === false) throw new Error(result.reason); return result.token; } tryGetNextToken() { let ret = null; do this.ignoreWhitespace(); while ((ret = this.tryGetNextNonSkippedToken()) === null); return ret; } tryGetNextNonSkippedToken() { let ret = null; while ((ret = this._tryGetNextToken())) { if (ret.success === false) return ret; else if (this.options.ignoreTokens === undefined) return ret; else if (this.options.ignoreTokens.includes(ret.token.type)) return null; else break; } return ret; } _tryGetNextToken() { this.startCharacterIndex = this.currentCharacterIndex; if (this.hasNextToken() === false) { const metadata = this.metadataGenerator(); const position = ColumnAndRow_1.ColumnAndRow.calculate(this.startCharacterIndex, this.input); return { success: true, token: new Token_1.Token(this.options.EOF_TYPE, "", position, metadata) }; } let ret = new internal_1.RegexEngineParsingResult(false, "", null); let retNode = null; for (const [ruleName, attemptNode] of this.environment.entries()) { if (attemptNode.getTokenType() === null) continue; const matches = attemptNode.getMatches(this.input.substring(this.currentCharacterIndex), this.environment, false); if (matches.length > 0) { let longest = ""; for (const match of matches) if (match.length > longest.length) longest = match; if (!ret.success || ret.lexeme.length < longest.length || (ret.lexeme.length === longest.length && retNode != null && this.options.isHigherPrecedence({ current: retNode.getTokenType(), next: attemptNode.getTokenType() }))) { ret = new internal_1.RegexEngineParsingResult(true, longest, ruleName); retNode = attemptNode; } } } if (retNode != null && retNode.getTokenType() != null) { this.currentCharacterIndex += ret.lexeme.length; const transformer = retNode.getTransformer(); const lexeme = transformer != null ? transformer(ret.lexeme) : ret.lexeme; const returnedToken = new Token_1.Token(retNode.getTokenType(), lexeme, ColumnAndRow_1.ColumnAndRow.calculate(this.startCharacterIndex, this.input), this.metadataGenerator()); return { success: true, token: returnedToken }; } const nextChar = this.input.charAt(this.startCharacterIndex); const position = ColumnAndRow_1.ColumnAndRow.calculate(this.startCharacterIndex, this.input); this.currentCharacterIndex++; return { success: false, line: position.getActualRow(), column: position.getActualColumn(), reason: "Unexpected character '" + nextChar + "' at Line: " + position.getActualRow() + ", Column: " + position.getActualColumn(), }; } hasNextToken() { return this.currentCharacterIndex < this.input.length; } } exports.RegexEngine = RegexEngine; //# sourceMappingURL=index.js.map