UNPKG

nerdamer-ts

Version:

javascript light-weight symbolic math expression evaluator

486 lines (485 loc) 20.4 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.Tokenizer = void 0; const Token_1 = require("./Token"); const Utils_1 = require("../Core/Utils"); const Math2_1 = require("../Functions/Math2"); const Settings_1 = require("../Settings"); const Node_1 = require("./Node"); const Errors_1 = require("../Core/Errors"); class ParityError extends Error { constructor() { super(...arguments); this.name = 'ParityError'; } } class Tokenizer { constructor(functionsProvider, operators, units) { this.preprocessors = { names: [], actions: [] }; this.functions = functionsProvider; this.brackets = operators.getBrackets(); this.operators = operators; this.units = units; } /* * Tokenizes the string * @param {String} e * @returns {Token[]} */ tokenize(e, shouldPrepare = false) { if (shouldPrepare) { e = this.prepareExpression(e); } let t = new InnerTokenizer(this.functions, this.brackets, this.operators, this.units, e); return t.tokenize(); } addPreprocessor(name, action, order, shift_cells) { let names = this.preprocessors.names || []; let actions = this.preprocessors.actions || []; if ((typeof action !== 'function')) //the person probably forgot to specify a name throw new Errors_1.PreprocessorError('Incorrect parameters. Function expected!'); if (!order) { names.push(name); actions.push(action); } else { if (shift_cells) { names.splice(order, 0, name); actions.splice(order, 0, action); } else { names[order] = name; actions[order] = action; } } } getPreprocessors() { var _a; let preprocessors = {}; if ((_a = this.preprocessors.names) === null || _a === void 0 ? void 0 : _a.length) for (let i = 0, l = this.preprocessors.names.length; i < l; i++) { let name = this.preprocessors.names[i] || ''; preprocessors[name] = { order: i, action: this.preprocessors.actions[i] }; } return preprocessors; } removePreprocessor(name, shift_cells) { let i = this.preprocessors.names.indexOf(name); if (shift_cells) { (0, Utils_1.remove)(this.preprocessors.names, i); (0, Utils_1.remove)(this.preprocessors.actions, i); } else { this.preprocessors.names[i] = undefined; this.preprocessors.actions[i] = undefined; } } ; /* * Preforms preprocessing on the string. Useful for making early modification before * sending to the parser * @param {String} e */ prepareExpression(e) { var _a; /* * Since variables cannot start with a number, the assumption is made that when this occurs the * user intents for this to be a coefficient. The multiplication symbol in then added. The same goes for * a side-by-side close and open parenthesis */ e = String(e); //apply preprocessors for (let i = 0; i < this.preprocessors.actions.length; i++) e = ((_a = this.preprocessors.actions[i]) === null || _a === void 0 ? void 0 : _a.call(this, e)) || e; //e = e.split(' ').join('');//strip empty spaces //replace multiple spaces with one space e = e.replace(/\s+/g, ' '); //only even bother to check if the string contains e. This regex is painfully slow and might need a better solution. e.g. hangs on (0.06/3650))^(365) if (/e/gi.test(e)) { //replace scientific numbers e = e.replace(/-*\d+\.*\d*e\+?-?\d+/gi, x => { // @ts-ignore return Math2_1.Math2.scientificToDecimal(x); }); } //allow omission of multiplication after coefficients e = e.replace(Settings_1.Settings.IMPLIED_MULTIPLICATION_REGEX, (match, group1, group2, start, str) => { let first = str.charAt(start), before = '', d = '*'; if (!first.match(/[+\-\/*]/)) before = str.charAt(start - 1); if (before.match(/[a-z]/i)) d = ''; return group1 + d + group2; }); e = e.replace(/([a-z0-9_]+)/gi, (match, a) => { if (!Settings_1.Settings.USE_MULTICHARACTER_VARS && !this.functions.getFunctionDescriptor(a)) { if (!isNaN(a)) return a; return a.split('').join('*'); } return a; }); //allow omission of multiplication sign between brackets e = e.replace(/\)\(/g, ')*(') || '0'; //replace x(x+a) with x*(x+a) while (true) { let e_org = e; //store the original e = e.replace(/([a-z0-9_]+)(\()|(\))([a-z0-9]+)/gi, (match, a, b, c, d) => { let g1 = a || c, g2 = b || d; if (this.functions.getFunctionDescriptor(g1)) //create a passthroughs for functions return g1 + g2; return g1 + '*' + g2; }); //if the original equals the replace we're done if (e_org === e) break; } return e; } tree(tokens) { let Q = []; for (let i = 0; i < tokens.length; i++) { let e = tokens[i]; //Arrays indicate a new scope so parse that out if (Array.isArray(e)) { e = this.tree(e); //if it's a comma then it's just arguments Q.push(e); continue; } if (e.type === Token_1.Token.OPERATOR) { if (e.is_prefix || e.postfix) { //prefixes go to the left, postfix to the right let location = e.is_prefix ? 'left' : 'right'; let last = Q.pop(); e = new Node_1.Node(e); e[location] = last; Q.push(e); } else { e = new Node_1.Node(e); e.right = Q.pop(); e.left = Q.pop(); Q.push(e); } } else if (e.type === Token_1.Token.FUNCTION) { e = new Node_1.Node(e); let args = Q.pop(); e.right = args; if (e.value === 'object') { //check if Q has a value let last = Q[Q.length - 1]; if (last) { while (last.right) { last = last.right; } last.right = e; continue; } } Q.push(e); } else { Q.push(new Node_1.Node(e)); } } return Q[0]; } } exports.Tokenizer = Tokenizer; class InnerTokenizer { constructor(functions, brackets, operators, units, expression) { this.expression = expression; this.functions = functions; this.brackets = brackets; this.operators = operators; this.units = units; this.tokens = []; //the tokens container this.col = 0; //the column position this.lpos = 0; //marks beginning of next token this.scopes = [this.tokens]; //initiate with the tokens as the highest scope this.target = this.tokens; //the target to which the tokens are added. This can swing up or down this.depth = 0; //cast to String let e = String(this.expression); //remove multiple white spaces and spaces at beginning and end of string e = e.trim().replace(/\s+/g, ' '); //remove spaces before and after brackets for (let x in this.brackets) { let regex = new RegExp(this.brackets[x].is_close ? '\\s+\\' + x : '\\' + x + '\\s+', 'g'); e = e.replace(regex, x); } this.expression = e; } /** * Adds a scope to tokens * @param {String} scope_type * @param {int} column * @returns {undefined} */ addScope(scope_type, column) { // TODO: need to refactor this mix of array and object let new_scope = []; //create a new scope if (scope_type !== undefined) { new_scope.type = scope_type; } new_scope.column = column; //mark the column of the scope this.scopes.push(new_scope); //add it to the list of scopes this.target.push(new_scope); //add it to the tokens list since now it's a scope this.target = new_scope; //point to it this.depth++; //go down one in scope } /** * Goes up in scope by one * @returns {undefined} */ goUp() { this.scopes.pop(); //remove the scope from the scopes stack this.target = this.scopes[--this.depth]; //point the above scope } /** * Extracts all the operators from the expression string starting at postion start_at * @param e * @param {int} start_at * @returns {string} */ get_operator_str(e, start_at) { //mark the end of the operator as the start since we're just going //to be walking along the string let end = start_at + 1; //just keep moving along while (this.operators.isOperator(e.charAt(end++))) { } //remember that we started at one position ahead. The beginning operator is what triggered //this function to be called in the first place. String.CharAt is zero based so we now //have to correct two places. The initial increment + the extra++ at the end of end during //the last iteration. return e.substring(start_at, end - 1); } /** * Breaks operator up in to several different operators as defined in operators * @param {string} operator_str * @returns {string[]} */ chunkify(operator_str) { let start = this.col - operator_str.length; //start of operator let _operators = []; let operator = operator_str.charAt(0); //grab the largest possible chunks but start at 2 since we already know //that the first character is an operator let index = start + 1; let operatorLength = operator_str.length; for (let i = 1; i < operatorLength; i++, index++) { let ch = operator_str.charAt(i); let o = operator + ch; //since the operator now is undefined then the last operator //was the largest possible combination. if (!this.operators.isOperator(o)) { _operators.push(new Token_1.Token(operator, Token_1.Token.OPERATOR, index, this.operators.getOperator(operator))); operator = ch; } else operator = o; //now the operator is the larger chunk } //add the last operator _operators.push(new Token_1.Token(operator, Token_1.Token.OPERATOR, index, this.operators.getOperator(operator))); return _operators; } /** * Is used to add a token to the tokens array. Makes sure that no empty token is added * @param {int} at * @param {String} token * @returns {undefined} */ add_token(at, token) { //grab the token if we're not supplied one if (token === undefined) { token = this.expression.substring(this.lpos, at); } //only add it if it's not an empty string if (token in this.units) { this.target.push(new Token_1.Token(token, Token_1.Token.UNIT, this.lpos)); } else if (token !== '') { this.target.push(new Token_1.Token(token, Token_1.Token.VARIABLE_OR_LITERAL, this.lpos)); } } /** * Adds a function to the output * @param {String} f * @returns {undefined} */ add_function(f) { this.target.push(new Token_1.Token(f, Token_1.Token.FUNCTION, this.lpos)); } /** * Tokens are found between operators so this marks the location of where the last token was found * @param {int} position * @returns {undefined} */ set_last_position(position) { this.lpos = position + 1; } /** * When a operator is found and added, especially a combo operator, then the column location * has to be adjusted to the end of the operator * @returns {undefined} */ adjust_column_position(operator_str) { this.lpos = this.lpos + operator_str.length - 2; this.col = this.lpos - 1; } /** * Tokenizes the string * @param {String} e * @returns {Token[]} */ tokenize() { let e = this.expression; let L = e.length; //expression length let open_brackets = []; let has_space = false; //marks if an open space character was found let SPACE = ' '; let EMPTY_STRING = ''; let COMMA = ','; let MINUS = '-'; let MULT = '*'; //Possible source of bug. Review /* //gets the next space var next_space = function(from) { for (var i=from; i<L; i++) { if (e.charAt(i) === ' ') return i; } return L; //assume the end of the string instead }; */ for (; this.col < L; this.col++) { let ch = e.charAt(this.col); if (this.operators.isOperator(ch)) { this.add_token(this.col); //is the last token numeric? let last_token_is_numeric = this.target[0] && (0, Utils_1.isNumber)(this.target[0]); //is this character multiplication? let is_multiplication = last_token_is_numeric && ch === MULT; //if we're in a new scope then go up by one but if the space //is right befor an operator then it makes no sense to go up in scope //consider sin -x. The last position = current position at the minus sign //this means that we're going for sin(x) -x which is wrong //Ignore comma since comma is still part of the existing scope. if (has_space && this.lpos < this.col && !(ch === COMMA || is_multiplication)) { has_space = false; this.goUp(); } //mark the last position that a this.set_last_position(this.col + 1); let operator_str = this.get_operator_str(e, this.col); this.adjust_column_position(operator_str); this.target.push.apply(this.target, this.chunkify(operator_str)); } else if (ch in this.brackets) { let bracket = this.brackets[ch]; if (bracket.is_open) { //mark the bracket open_brackets.push([bracket, this.lpos]); let f = e.substring(this.lpos, this.col); if (this.functions.getFunctionDescriptor(f)) { this.add_function(f); } else if (f !== '') { //assume multiplication //TODO: Add the multiplication to stack this.target.push(new Token_1.Token(f, Token_1.Token.VARIABLE_OR_LITERAL, this.lpos)); } //go down one in scope this.addScope(bracket.maps_to, this.col); } else if (bracket.is_close) { //get the matching bracket let pair = open_brackets.pop(); //throw errors accordingly //missing open bracket if (!pair) throw new ParityError('Missing open bracket for bracket at: ' + (this.col + 1)); //incorrect pair else if (pair[0].id !== bracket.id - 1) throw new ParityError('Parity error'); this.add_token(this.col); this.goUp(); } this.set_last_position(this.col); } else if (ch === SPACE) { let prev = e.substring(this.lpos, this.col); //look back let nxt = e.charAt(this.col + 1); //look forward if (has_space) { if (this.operators.isOperator(prev)) { this.target.push(new Token_1.Token(prev, Token_1.Token.OPERATOR, this.col, this.operators.getOperator(prev))); } else { this.add_token(undefined, prev); //we're at the closing space this.goUp(); //go up in scope if we're at a space //assume multiplication if it's not an operator except for minus let is_operator = this.operators.isOperator(nxt); if ((is_operator && this.operators.getOperator(nxt).value === MINUS) || !is_operator) { this.target.push(new Token_1.Token(MULT, Token_1.Token.OPERATOR, this.col, this.operators.getOperator(MULT))); } } has_space = false; //remove the space } else { //we're at the closing space //check if it's a function let f = e.substring(this.lpos, this.col); if (this.functions.getFunctionDescriptor(f)) { //there's no need to go up in scope if the next character is an operator has_space = true; //mark that a space was found this.add_function(f); this.addScope(); } else if (this.operators.isOperator(f)) { this.target.push(new Token_1.Token(f, Token_1.Token.OPERATOR, this.col, this.operators.getOperator(f))); } else { this.add_token(undefined, f); //peek ahead to the next character let nxt = e.charAt(this.col + 1); //If it's a number then add the multiplication operator to the stack but make sure that the next character //is not an operator if (prev !== EMPTY_STRING && nxt !== EMPTY_STRING && !this.operators.isOperator(prev) && !this.operators.isOperator(nxt)) this.target.push(new Token_1.Token(MULT, Token_1.Token.OPERATOR, this.col, this.operators.getOperator(MULT))); } //Possible source of bug. Review /* //space can mean multiplication so add the symbol if the is encountered if (/\d+|\d+\.?\d*e[\+\-]*\d+/i.test(f)) { var next = e.charAt(col+1); var next_is_operator = next in operators; var ns = next_space(col+1); var next_word = e.substring(col+1, ns); //the next can either be a prefix operator or no operator if ((next_is_operator && operators[next].prefix) || !(next_is_operator || next_word in operators)) this.target.push(new Token('*', Token.OPERATOR, col)); } */ } this.set_last_position(this.col); //mark this location } } //check that all brackets were closed if (open_brackets.length) { const b = open_brackets.pop(); throw new ParityError('Missing closed bracket for bracket at ' + (b ? b[1] + 1 : 'undefined')); } //add the last token this.add_token(this.col); return this.tokens; } } //# sourceMappingURL=Tokenizer.js.map