UNPKG

@making-sense/antlr-editor

Version:
254 lines 10.3 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.RuleTokenizer = exports.rgxReplace = exports.rgxEscape = void 0; const antlr4ng_1 = require("@making-sense/antlr4ng"); const log_1 = require("../utils/log"); const ruleToken_1 = require("./ruleToken"); const tokenType_1 = require("./tokenType"); exports.rgxEscape = /[-/\\^$*+?.()|[\]{}]/g; exports.rgxReplace = "\\$&"; class RuleTokenizer { constructor(vocabulary) { this.context = { tokens: [], parens: [], atom: [], modifier: undefined }; this.vocabulary = vocabulary; } tokenize(declaration) { this.clearContext(); Array.from(declaration).forEach((value, index) => { if (!(this.isAtom(value, index) || this.isModifier(value, index))) { switch (value) { case tokenType_1.TokenType.Lparen: { const token = new ruleToken_1.RuleToken(tokenType_1.TokenType.Lparen, this.context.parens.length); this.context.parens.push(token); this.context.tokens.push(token); break; } case tokenType_1.TokenType.Rparen: { const lparen = this.context.parens.pop(); if (lparen) { const lindex = this.context.tokens.indexOf(lparen); if (lindex !== this.context.tokens.length - 2) { const rparen = new ruleToken_1.RuleToken(tokenType_1.TokenType.Rparen, this.context.parens.length); lparen.sibling = rparen; rparen.sibling = lparen; this.context.tokens.push(rparen); } else { const last = this.lastToken(); if (last) last.nested--; this.context.tokens.splice(lindex, 1); } } else this.unexpected(value, index); break; } case tokenType_1.TokenType.Pipe: { this.addToken(value); break; } case tokenType_1.TokenType.Assign: { const last = this.lastToken([tokenType_1.TokenType.Unknown]); if (last) { last.type = tokenType_1.TokenType.Identifier; this.addToken(value); log_1.Log.warn("Unknown operator token " + last.name + " recognized as identifier", "RuleTokenizer"); } break; } case tokenType_1.TokenType.Hash: { this.addToken(value); break; } case tokenType_1.TokenType.Space: { this.finishAtom(index); break; } default: { this.unexpected(value, index); } } } }); this.finishAtom(declaration.length); return this.context.tokens; } isAtom(value, index) { if (/[a-zA-Z_0-9]/.test(value)) { this.context.atom.push(value); return true; } this.finishAtom(index); return false; } isModifier(value, index) { // TODO: add plus assign and multipliers stacking if (/[*+?]/.test(value)) { const last = this.lastToken([ tokenType_1.TokenType.Keyword, tokenType_1.TokenType.Operator, tokenType_1.TokenType.Operand, tokenType_1.TokenType.Rule, tokenType_1.TokenType.Rparen, tokenType_1.TokenType.Unknown ]); if (this.context.modifier) { if (value === tokenType_1.TokenType.Question && last) last.greedy = false; else this.unexpected(value, index); this.finishModifier(); } else { if (last) { last.multiply(value); this.context.modifier = value; } else this.unexpected(value, index); } return true; } this.finishModifier(); return false; } addToken(type) { this.context.tokens.push(new ruleToken_1.RuleToken(type, this.context.parens.length)); } finishAtom(index) { if (this.context.atom.length > 0) { const name = this.context.atom.join(""); let value = undefined; index = index - this.context.atom.length; const last = this.lastToken([tokenType_1.TokenType.Hash]); let type; if (last) { type = tokenType_1.TokenType.Label; } else if (this.vocabulary.isSymbolicName(name)) { type = tokenType_1.TokenType.Operand; const symbolicIndex = this.vocabulary.symbolicIndex(name); if (this.vocabulary.hasKeyword(symbolicIndex)) { type = tokenType_1.TokenType.Keyword; value = this.vocabulary.keyword(symbolicIndex); } else if (this.vocabulary.hasOperator(symbolicIndex)) { type = tokenType_1.TokenType.Operator; value = this.vocabulary.operator(symbolicIndex); } } else if (name in antlr4ng_1.Lexer) { type = tokenType_1.TokenType.Operator; } else if (this.vocabulary.isRuleName(name)) { type = tokenType_1.TokenType.Rule; } else { type = tokenType_1.TokenType.Unknown; log_1.Log.warn("Unknown operator token " + name + " at " + index, "RuleTokenizer"); } this.context.tokens.push(new ruleToken_1.RuleToken(type, this.context.parens.length, name, value)); this.context.atom.length = 0; } } finishModifier() { this.context.modifier = undefined; } unexpected(token, index) { log_1.Log.error("Unexpected token " + token + " at " + index, "RuleTokenizer"); } clearContext() { this.context.tokens = []; this.context.parens = []; this.context.atom = []; this.context.modifier = undefined; } lastToken(filter) { if (this.context.tokens.length > 0) { const last = this.context.tokens[this.context.tokens.length - 1]; if (last && (!filter || filter.includes(last.type))) return last; } return undefined; } ruleName(declaration, index) { const ruleNames = this.vocabulary.getRuleNames(); if (!ruleNames[index]) return declaration; let rgx = new RegExp(ruleNames[index].replace(exports.rgxEscape, exports.rgxReplace), "g"); if (rgx.test(declaration)) { return ruleNames[index]; } else { log_1.Log.warn("Mismatched rule " + index + " name. Looking for alternatives.", "RuleTokenizer"); ruleNames.forEach((ruleName, other) => { rgx = new RegExp(ruleName.replace(exports.rgxEscape, exports.rgxReplace), "g"); if (rgx.test(declaration)) { log_1.Log.warn("Matched name of rule " + other + ".", "RuleTokenizer"); return ruleName; } }); log_1.Log.error("No alternative for mismatched rule name.", "RuleTokenizer"); } return declaration; } static alternatives(tokens) { let pipeIndex = -1; let statement = []; return tokens.reduce((statements, token, index, initial) => { if (token.nested === 0 && token.type === tokenType_1.TokenType.Pipe) { statement = initial.slice(pipeIndex + 1, index); if (statement.length !== 0) statements.push(statement); pipeIndex = index; } else if (index === initial.length - 1) { statement = initial.slice(pipeIndex + 1); if (statement.length !== 0) statements.push(statement); } return statements; }, []); } static parentheses(tokens) { let parenIndex = -1; let statement = []; return tokens.reduce((statements, token, index, initial) => { if (token.nested === 0 && (token.type === tokenType_1.TokenType.Lparen || token.type === tokenType_1.TokenType.Rparen)) { statement = initial.slice(parenIndex, index + (token.type === tokenType_1.TokenType.Rparen ? 1 : 0)); if (statement.length !== 0) statements.push(statement); parenIndex = index; } else if (index === initial.length - 1) { statement = initial.slice(parenIndex + 1); if (statement.length !== 0) statements.push(statement); } return statements; }, []); } static unnest(tokens) { if (!tokens) return []; const left = tokens[0]; const right = tokens[tokens.length - 1]; if (left.type === tokenType_1.TokenType.Lparen && right.type === tokenType_1.TokenType.Rparen && left.sibling === right) { tokens.pop(); tokens.shift(); tokens.forEach(token => token.nested--); } return tokens; } } exports.RuleTokenizer = RuleTokenizer; //# sourceMappingURL=ruleTokenizer.js.map