adder-script
Version:
Python like language to execute untrusted codes in browsers and Node.js.
315 lines (272 loc) • 8.55 kB
JavaScript
;
/**
* The parser takes tokens list and convert into a tree of expressions, optimized for evaluation.
* To put it simple: it converts a list of tokens into an AST.
*
* Author: Ronen Ness.
* Since: 2016
*/
// include errors
var Errors = require("./../errors");
// dictionary of symbols that are part of the AST
var symbols = {};
// include general utils
var Utils = require("./../utils");
// require some language defs
var LanguageDefs = require("./../language/defs");
// set of operators that are words
var wordOperators = Utils.toSet(Object.keys(LanguageDefs.keywords));
// get all token types
var TokenTypes = require("./tokens");
// global scope that holds the current tokens and position we are parsing.
// these are used internally in all the helper and parsing functions.
var gscope = {
i: 0,
tokens: [],
line: undefined,
};
// function to define a new symbol type
var defineSymbol = function (id, nud, lbp, led) {
var sym = symbols[id] || {};
symbols[id] = {
lbp: sym.lbp || lbp,
nud: sym.nud || nud,
led: sym.lef || led
};
};
// define an infix symbol type
var defineInfix = function (id, lbp, rbp, led) {
rbp = rbp || lbp;
defineSymbol(id, null, lbp, led || function (left) {
return {
type: id,
left: left,
right: parseExpression(rbp)
};
});
};
// define a prefix symbol type
var definePrefix = function (id, rbp) {
defineSymbol(id, function () {
return {
type: id,
right: parseExpression(rbp)
};
});
};
// define comma and colon symbols
defineSymbol(",");
defineSymbol("blockopen", function (val) {
return val;
});
// define number symbol
defineSymbol("number", function (number) {
return number;
});
// define string symbol
defineSymbol("string", function (str) {
return str;
});
// define identifier symbol
defineSymbol("identifier", function (name) {
var token = currToken();
if (token && token.type === "(" && !wordOperators.has(name.value)) {
var args = [];
if (gscope.tokens[gscope.i + 1].v === ")") {popToken();}
else {
do {
popToken();
if (!currToken()) {
throw new Errors.SyntaxError("Missing closing parenthesis ')'");
}
args.push(parseExpression(2));
} while (currToken() && currToken().type === ",");
if (!currToken() || currToken().type !== ")") {
throw new Errors.SyntaxError("Missing closing parenthesis ')'");
}
}
popToken();
return {
type: "call",
args: args,
name: name.value
};
}
return name;
});
// define the '(' symbol
defineSymbol("(", function () {
var value = parseExpression(2);
if (currToken().type !== ")") new Errors.SyntaxError("Missing closing parenthesis!");
popToken();
return value;
});
// define the ')' symbol
defineSymbol(")");
definePrefix("-", 10);
definePrefix("+", 10);
definePrefix("not", 3);
definePrefix("for", 10);
defineInfix("*", 7);
defineInfix("**", 9, 8);
defineInfix("/", 7);
defineInfix("+", 6);
defineInfix("-", 6);
defineInfix("%", 7);
defineInfix("|", 4, 4);
defineInfix("&", 5, 5);
defineInfix("or", 3);
defineInfix("and", 3);
defineInfix("in", 4);
defineInfix("not in", 4);
defineInfix("is", 4);
defineInfix("is not", 4);
defineInfix("==", 4);
defineInfix("!=", 4);
defineInfix(">=", 4);
defineInfix("<=", 4);
defineInfix("<", 4);
defineInfix(">", 4);
defineInfix(".", 13);
// assignment operator
defineInfix("=", 1, 2, function (left) {
if (left.type === "call") {
for (var i = 0; i < left.args.length; i++) {
if (left.args[i].type !== "identifier") throw new Errors.SyntaxError("Invalid argument name '" + left.args[i].value + "'!");
}
return {
type: "function",
name: left.name,
args: left.args,
value: parseExpression(2)
};
} else if (left.type === "identifier") {
return {
type: "assign",
name: left.value,
value: parseExpression(2)
};
}
else throw new Errors.SyntaxError("Can't assign to literal!");
});
// add all assignment+ operators
var assignmentWith = ["+", "-", "*", "/", "|", "&", "%"];
for (var i = 0; i < assignmentWith.length; ++i)
{
(function(operator){
defineInfix(operator + "=", 1, 2, function (left) {
if (left.type === "call") {
for (var i = 0; i < left.args.length; i++) {
if (left.args[i].type !== "identifier") throw new Errors.SyntaxError("Invalid argument name '" + left.args[i].value + "'!");
}
return {
type: "function",
name: left.name,
args: left.args,
value: parseExpression(2)
};
} else if (left.type === "identifier") {
return {
type: "assign+",
op: operator,
name: left.value,
value: parseExpression(2)
};
}
else throw new Errors.SyntaxError("Can't assign to literal!");
});
})(assignmentWith[i]);
}
// convert a token to a symbol instance
var tokenToSymbol = function (token) {
// convert from token types to symbol types
var type;
switch (token.t)
{
case TokenTypes.operator:
case TokenTypes.punctuation:
if (token.v === ':') {type = 'blockopen';}
else {type = token.v;}
break;
case TokenTypes.number:
type = 'number';
break;
case TokenTypes.identifier:
type = 'identifier';
break;
case TokenTypes.string:
type = 'string';
break;
default:
throw new Errors.SyntaxError("Unknown token type '" + token.t + "'!");
}
// create symbol and set its type and value
var sym = Object.create(symbols[type]);
sym.type = type;
sym.value = token.v;
// return the newly created symbol
return sym;
};
// start the actual parsing!
// function to get current token
var currToken = function () {
return gscope.tokens[gscope.i] ? tokenToSymbol(gscope.tokens[gscope.i]) : null;
};
// function to get current token and advance index to next token
var popToken = function () {
var ret = currToken();
gscope.i++;
return ret;
};
// parse an expression
var parseExpression = function (rbp) {
// default rbp
if (rbp === undefined) {rbp = 0;}
// get current token and advance to next
var t = popToken();
if (t === null) {return;}
// if current token is nud type
if (!t.nud) {throw new Errors.SyntaxError("Unexpected token: " + t.type);}
// get left operand
var left = t.nud(t);
// parse next gscope.tokens
while (currToken() && rbp < currToken().lbp) {
t = popToken();
if (!t.led) {throw new Errors.SyntaxError("Unexpected token: " + t.type);}
left = t.led(left);
}
// return left operand
return left;
};
// take a list of tokens and generate a parsed tree
// @param tokens - list of tokens, output of lexer.
// @param line - is the line we are parsing (optional, used for exceptions etc)
function parse(tokens, line)
{
// set global scope of current parsing - current tokens and index
gscope.tokens = tokens;
gscope.i = 0;
gscope.line = line;
try {
// parse all tokens and return AST
var parseTree = [];
while (currToken()) {
parseTree.push(parseExpression());
}
return parseTree;
}
catch(e) {
if (e.expectedError) {
if (e.line === undefined) {
e.message += " [at line: " + gscope.line + "]";
e.line = gscope.line;
}
throw e;
}
throw new Errors.SyntaxError("Unknown syntax error!", gscope.line);
}
}
// export the parser functions
module.exports = {
parse: parse,
};