totorojs
Version:
1,124 lines (986 loc) • 36.4 kB
JavaScript
/* -----[ Tokenizer (constants) ]----- */
var KEYWORDS = array_to_hash([
"break",
"case",
"catch",
"continue",
"default",
"delete",
"do",
"else",
"finally",
"for",
"function",
"if",
"in",
"instanceof",
"new",
"return",
"switch",
"throw",
"try",
"typeof",
"var",
"void",
"while",
"with",
"NaN"
]);
var RESERVED_WORDS = array_to_hash([
"abstract",
"boolean",
"byte",
"char",
"class",
"const",
"debugger",
"double",
"enum",
"export",
"extends",
"final",
"float",
"goto",
"implements",
"import",
"int",
"interface",
"long",
"native",
"package",
"private",
"protected",
"public",
"short",
"static",
"super",
"synchronized",
"throws",
"transient",
"volatile"
]);
var KEYWORDS_BEFORE_EXPRESSION = array_to_hash([
"return",
"new",
"delete",
"throw"
]);
var KEYWORDS_ATOM = array_to_hash([
"false",
"null",
"true",
"undefined",
"NaN"
]);
var OPERATOR_CHARS = array_to_hash(characters("+-*&%=<>!?|~^"));
var RE_HEX_NUMBER = /^0x[0-9a-f]+$/i;
var RE_OCT_NUMBER = /^0[0-7]+$/;
var RE_DEC_NUMBER = /^\d*\.?\d*(?:e-?\d*(?:\d\.?|\.?\d)\d*)?$/i;
var OPERATORS = array_to_hash([
"in",
"instanceof",
"typeof",
"new",
"void",
"delete",
"++",
"--",
"+",
"-",
"!",
"~",
"&",
"|",
"^",
"*",
"/",
"%",
">>",
"<<",
">>>",
"<",
">",
"<=",
">=",
"==",
"===",
"!=",
"!==",
"?",
"=",
"+=",
"-=",
"/=",
"*=",
"%=",
">>=",
"<<=",
">>>=",
"~=",
"%=",
"|=",
"^=",
"&&",
"||"
]);
var WHITESPACE_CHARS = array_to_hash(characters(" \n\r\t"));
var PUNC_BEFORE_EXPRESSION = array_to_hash(characters("[{}(,.;:"));
var PUNC_CHARS = array_to_hash(characters("[]{}(),;:"));
var REGEXP_MODIFIERS = array_to_hash(characters("gmsiy"));
/* -----[ Tokenizer ]----- */
function is_alphanumeric_char(ch) {
ch = ch.charCodeAt(0);
return (ch >= 48 && ch <= 57) ||
(ch >= 65 && ch <= 90) ||
(ch >= 97 && ch <= 122);
};
function is_identifier_char(ch) {
return is_alphanumeric_char(ch) || ch == "$" || ch == "_";
};
function is_digit(ch) {
ch = ch.charCodeAt(0);
return ch >= 48 && ch <= 57;
};
function parse_js_number(num) {
if (RE_HEX_NUMBER.test(num)) {
return parseInt(num.substr(2), 16);
} else if (RE_OCT_NUMBER.test(num)) {
return parseInt(num.substr(1), 8);
} else if (RE_DEC_NUMBER.test(num)) {
return parseFloat(num);
}
};
function JS_Parse_Error(message, line, col, pos) {
this.message = message;
this.line = line;
this.col = col;
this.pos = pos;
try {
({})();
} catch(ex) {
this.stack = ex.stack;
};
};
JS_Parse_Error.prototype.toString = function() {
return this.message + " (line: " + this.line + ", col: " + this.col + ", pos: " + this.pos + ")" + "\n\n" + this.stack;
};
function js_error(message, line, col, pos) {
throw new JS_Parse_Error(message, line, col, pos);
};
function is_token(token, type, val) {
return token.type == type && (val == null || token.value == val);
};
var EX_EOF = {};
function tokenizer($TEXT, skip_comments) {
var S = {
text : $TEXT,
pos : 0,
tokpos : 0,
line : 0,
tokline : 0,
col : 0,
tokcol : 0,
newline_before : false,
regex_allowed : false
};
function peek() { return S.text.charAt(S.pos); };
function next(signal_eof) {
var ch = S.text.charAt(S.pos++);
if (signal_eof && !ch)
throw EX_EOF;
if (ch == "\n") {
S.newline_before = true;
++S.line;
S.col = 0;
} else {
++S.col;
}
return ch;
};
function eof() {
return !S.peek();
};
function find(what, signal_eof) {
var pos = S.text.indexOf(what, S.pos);
if (signal_eof && pos == -1) throw EX_EOF;
return pos;
};
function start_token() {
S.tokline = S.line;
S.tokcol = S.col;
S.tokpos = S.pos;
};
function token(type, value) {
S.regex_allowed = (type == "operator" ||
(type == "keyword" && HOP(KEYWORDS_BEFORE_EXPRESSION, value)) ||
(type == "punc" && HOP(PUNC_BEFORE_EXPRESSION, value)));
var ret = {
type : type,
value : value,
line : S.tokline,
col : S.tokcol,
pos : S.tokpos,
nlb : S.newline_before
};
S.newline_before = false;
return ret;
};
function skip_whitespace() {
while (HOP(WHITESPACE_CHARS, peek()))
next();
};
function read_while(pred) {
var ret = "", ch = peek(), i = 0;
while (ch && pred(ch, i++)) {
ret += next();
ch = peek();
}
return ret;
};
function parse_error(err) {
js_error(err, S.tokline, S.tokcol, S.tokpos);
};
function read_num(prefix) {
var num = read_while(function(ch, i){
return is_alphanumeric_char(ch) || ch == "." || (i == 0 && ch == "-");
});
if (prefix)
num = prefix + num;
var valid = parse_js_number(num);
if (!isNaN(valid)) {
return token("num", valid);
} else {
parse_error("Invalid syntax: " + num);
}
};
function read_escaped_char() {
var ch = next(true);
switch (ch) {
case "n" : return "\n";
case "r" : return "\r";
case "t" : return "\t";
case "b" : return "\b";
case "v" : return "\v";
case "f" : return "\f";
case "0" : return "\0";
case "x" : return String.fromCharCode(hex_bytes(2));
case "u" : return String.fromCharCode(hex_bytes(4));
default : return ch;
}
};
function hex_bytes(n) {
var num = 0;
for (; n > 0; --n) {
var digit = parseInt(next(true), 16);
if (isNaN(digit))
parse_error("Invalid hex-character pattern in string");
num = (num << 4) | digit;
}
return num;
};
function read_string() {
return with_eof_error("Unterminated string constant", function(){
var quote = next(), ret = "";
for (;;) {
var ch = next(true);
if (ch == "\\") ch = read_escaped_char();
else if (ch == quote) break;
ret += ch;
}
return token("string", ret);
});
};
function read_line_comment() {
next();
var i = find("\n"), ret;
if (i == -1) {
ret = S.text.substr(S.pos);
S.pos = S.text.length;
} else {
ret = S.text.substring(S.pos, i);
S.pos = i + 1;
}
return token("comment1", ret);
};
function read_multiline_comment() {
next();
return with_eof_error("Unterminated multiline comment", function(){
var i = find("*/", true), ret = S.text.substring(S.pos, i);
S.pos = i + 2;
return token("comment2", ret);
});
};
function read_regexp() {
return with_eof_error("Unterminated regular expression", function(){
var prev_backslash = false, regexp = "", ch;
while ((ch = next(true))) if (prev_backslash) {
regexp += "\\" + ch;
prev_backslash = false;
} else if (ch == "/") {
break;
} else if (ch == "\\") {
prev_backslash = true;
} else {
regexp += ch;
}
var mods = read_while(function(ch){
return HOP(REGEXP_MODIFIERS, ch);
});
return token("regexp", [ regexp, mods ]);
});
};
function read_operator(prefix) {
function grow(op) {
var bigger = op + peek();
if (HOP(OPERATORS, bigger)) {
next();
return grow(bigger);
} else {
return op;
}
};
return token("operator", grow(prefix || next()));
};
var handle_slash = skip_comments ? function() {
next();
switch (peek()) {
case "/": read_line_comment(); return next_token();
case "*": read_multiline_comment(); return next_token();
}
return S.regex_allowed ? read_regexp() : read_operator("/");
} : function() {
next();
switch (peek()) {
case "/": return read_line_comment();
case "*": return read_multiline_comment();
}
return S.regex_allowed ? read_regexp() : read_operator("/");
};
function handle_dot() {
next();
return is_digit(peek())
? read_num(".")
: token("punc", ".");
};
function read_word() {
var word = read_while(is_identifier_char);
return !HOP(KEYWORDS, word)
? token("name", word)
: HOP(OPERATORS, word)
? token("operator", word)
: HOP(KEYWORDS_ATOM, word)
? token("atom", word)
: token("keyword", word);
};
function with_eof_error(eof_error, cont) {
try {
return cont();
} catch(ex) {
if (ex === EX_EOF) parse_error(eof_error);
else throw ex;
}
};
function next_token() {
skip_whitespace();
start_token();
var ch = peek();
if (!ch) return token("eof");
if (is_digit(ch)) return read_num();
if (ch == '"' || ch == "'") return read_string();
if (HOP(PUNC_CHARS, ch)) return token("punc", next());
if (ch == ".") return handle_dot();
if (ch == "/") return handle_slash();
if (HOP(OPERATOR_CHARS, ch)) return read_operator();
if (is_identifier_char(ch)) return read_word();
parse_error("Unexpected character '" + ch + "'");
};
next_token.context = function(nc) {
if (nc) S = nc;
return S;
};
return next_token;
};
/* -----[ Parser (constants) ]----- */
var UNARY_PREFIX = array_to_hash([
"typeof",
"void",
"delete",
"--",
"++",
"!",
"~",
"-",
"+"
]);
var UNARY_POSTFIX = array_to_hash([ "--", "++" ]);
var ASSIGNMENT = (function(a, ret, i){
while (i < a.length) {
ret[a[i]] = a[i].substr(0, a[i].length - 1);
i++;
}
return ret;
})(
["+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "~=", "%=", "|=", "^="],
{ "=": true },
0
);
var PRECEDENCE = (function(a, ret){
for (var i = 0, n = 1; i < a.length; ++i, ++n) {
var b = a[i];
for (var j = 0; j < b.length; ++j) {
ret[b[j]] = n;
}
}
return ret;
})(
[
["||"],
["&&"],
["|"],
["^"],
["&"],
["==", "===", "!=", "!=="],
["<", ">", "<=", ">=", "in", "instanceof"],
[">>", "<<", ">>>"],
["+", "-"],
["*", "/", "%"]
],
{}
);
var STATEMENTS_WITH_LABELS = array_to_hash([ "for", "do", "while", "switch" ]);
var ATOMIC_START_TOKEN = array_to_hash([ "atom", "num", "string", "regexp", "name" ]);
/* -----[ Parser ]----- */
function NodeWithToken(str, start, end) {
this.name = str;
this.start = start;
this.end = end;
};
NodeWithToken.prototype.toString = function() { return this.name; };
function parse($TEXT, strict_semicolons, embed_tokens) {
var S = {
input: tokenizer($TEXT, true),
token: null,
prev: null,
peeked: null,
in_function: 0,
in_loop: 0,
labels: []
};
S.token = next();
function is(type, value) {
return is_token(S.token, type, value);
};
function peek() { return S.peeked || (S.peeked = S.input()); };
function next() {
S.prev = S.token;
if (S.peeked) {
S.token = S.peeked;
S.peeked = null;
} else {
S.token = S.input();
}
return S.token;
};
function prev() {
return S.prev;
};
function croak(msg, line, col, pos) {
var ctx = S.input.context();
js_error(msg,
line != null ? line : ctx.tokline,
col != null ? col : ctx.tokcol,
pos != null ? pos : ctx.tokpos);
};
function token_error(token, msg) {
croak(msg, token.line, token.col);
};
function unexpected(token) {
if (token == null)
token = S.token;
token_error(token, "Unexpected token: " + token.type + " (" + token.value + ")");
};
function expect_token(type, val) {
if (is(type, val)) {
return next();
}
token_error(S.token, "Unexpected token " + S.token.type + ", expected " + type);
};
function expect(punc) { return expect_token("punc", punc); };
function can_insert_semicolon() {
return !strict_semicolons && (
S.token.nlb || is("eof") || is("punc", "}")
);
};
function semicolon() {
if (is("punc", ";")) next();
else if (!can_insert_semicolon()) unexpected();
};
function as() {
return slice(arguments);
};
function parenthesised() {
expect("(");
var ex = expression();
expect(")");
return ex;
};
function maybe_before_semicolon(func) {
var start = S.token;
try {
return prog1(func, semicolon);
} catch(ex) {
if (ex instanceof JS_Parse_Error) {
if ((S.token === start)) {
if (is("punc", ";"))
return next(), null;
if (can_insert_semicolon())
return null;
}
}
throw ex;
}
};
function add_tokens(str, start, end) {
return new NodeWithToken(str, start, end);
};
var statement = embed_tokens ? function(allow_case) {
var start = S.token;
var stmt = $statement(allow_case);
stmt[0] = add_tokens(stmt[0], start, prev());
return stmt;
} : $statement;
function $statement(allow_case) {
switch (S.token.type) {
case "num":
case "string":
case "regexp":
case "operator":
case "atom":
return simple_statement();
case "name":
return is_token(peek(), "punc", ":")
? labeled_statement(prog1(S.token.value, next, next))
: simple_statement();
case "punc":
switch (S.token.value) {
case "{":
next();
return block();
case "[":
case "(":
return simple_statement();
case ";":
next();
return as("block");
default:
unexpected();
}
case "keyword":
switch (prog1(S.token.value, next)) {
case "break":
return break_cont("break");
case "continue":
return break_cont("continue");
case "case":
if (!allow_case)
unexpected();
return as("case", prog1(expression, curry(expect, ":")));
case "debugger":
semicolon();
return as("debugger");
case "default":
if (!allow_case)
unexpected();
expect(":");
return as("default");
case "do":
return (function(body){
expect_token("keyword", "while");
return as("do", prog1(parenthesised, semicolon), body);
})(in_loop(statement));
case "for":
return for_();
case "function":
return function_(true);
case "if":
return if_();
case "return":
if (S.in_function == 0)
croak("'return' outside of function");
return as("return", maybe_before_semicolon(expression));
case "switch":
return as("switch",
prog1(parenthesised, curry(expect, "{")),
prog1(curry(in_loop, function(){
var a = [];
while (!is("punc", "}"))
a.push(statement(true));
return a;
}), next));
case "throw":
return as("throw", prog1(expression, semicolon));
case "try":
return try_();
case "var":
return prog1(var_, semicolon);
case "while":
return as("while", parenthesised(), in_loop(statement));
case "with":
return as("with", parenthesised(), statement());
default:
unexpected();
}
}
};
function labeled_statement(label) {
S.labels.push(label);
var start = S.token, stat = statement();
if (!HOP(STATEMENTS_WITH_LABELS, stat[0]))
unexpected(start);
S.labels.pop();
return as("label", label, stat);
};
function simple_statement() {
return as("stat", prog1(expression, semicolon));
};
function break_cont(type) {
if (S.in_loop == 0)
croak(type + " not inside a loop or switch");
var name = is("name") ? S.token.value : null;
if (name != null) {
next();
if (!member(name, S.labels))
croak("Label " + name + " without matching loop or statement");
}
semicolon();
return as(type, name);
};
function block() {
var a = [];
while (!is("punc", "}"))
a.push(statement());
next();
return as("block", a);
};
function for_() {
expect("(");
var has_var = is("keyword", "var");
if (has_var)
next();
if (is("name") && is_token(peek(), "operator", "in")) {
// for (i in foo)
var name = S.token.value;
next(); next();
var obj = expression();
expect(")");
return as("for-in", has_var, name, obj, in_loop(statement));
} else {
// classic for
var init = maybe_before_semicolon(has_var ? var_ : expression);
var test = maybe_before_semicolon(expression);
var step = is("punc", ")") ? null : expression();
expect(")");
return as("for", init, test, step, in_loop(statement));
}
};
function function_(in_statement) {
var name = is("name") ? prog1(S.token.value, next) : null;
if (in_statement && !name)
unexpected();
expect("(");
return as(in_statement ? "defun" : "function",
name,
// arguments
(function(first, a){
while (!is("punc", ")")) {
if (first) first = false; else expect(",");
if (!is("name")) unexpected();
a.push(S.token.value);
next();
}
next();
expect("{");
return a;
})(true, []),
// body
(function(a){
++S.in_function;
while (!is("punc", "}"))
a.push(statement());
--S.in_function;
next();
return a;
})([]));
};
function if_() {
var cond = parenthesised(), body = statement(), belse;
if (is("keyword", "else")) {
next();
belse = statement();
}
return as("if", cond, body, belse);
};
function try_() {
var body = statement(), bcatch, bfinally;
if (is("keyword", "catch")) {
next();
expect("(");
if (!is("name"))
croak("Name expected");
var name = S.token.value;
next();
expect(")");
bcatch = [ name, statement() ];
}
if (is("keyword", "finally")) {
next();
bfinally = statement();
}
return as("try", body, bcatch, bfinally);
};
function vardefs() {
var a = [];
for (;;) {
if (!is("name"))
unexpected();
var name = S.token.value;
next();
if (is("operator", "=")) {
next();
a.push([ name, expression(false) ]);
} else {
a.push([ name ]);
}
if (!is("punc", ","))
break;
next();
}
return a;
};
function var_() {
return as("var", vardefs());
};
function new_() {
var newexp = expr_atom(false), args;
if (is("punc", "(")) {
next();
args = expr_list(")");
} else {
args = [];
}
return subscripts(as("new", newexp, args), true);
};
function expr_atom(allow_calls) {
if (is("operator", "new")) {
next();
return new_();
}
if (is("operator") && HOP(UNARY_PREFIX, S.token.value)) {
return make_unary("unary-prefix",
prog1(S.token.value, next),
expr_atom(allow_calls));
}
if (is("punc")) {
switch (S.token.value) {
case "(":
next();
return subscripts(prog1(expression, curry(expect, ")")), allow_calls);
case "[":
next();
return subscripts(array_(), allow_calls);
case "{":
next();
return subscripts(object_(), allow_calls);
}
unexpected();
}
if (is("keyword", "function")) {
next();
return subscripts(function_(false), allow_calls);
}
if (HOP(ATOMIC_START_TOKEN, S.token.type)) {
var atom = S.token.type == "regexp"
? as("regexp", S.token.value[0], S.token.value[1])
: as(S.token.type, S.token.value);
return subscripts(prog1(atom, next), allow_calls);
}
unexpected();
};
function expr_list(closing) {
var first = true, a = [];
while (!is("punc", closing)) {
if (first) first = false; else expect(",");
a.push(expression(false));
}
next();
return a;
};
function array_() {
return as("array", expr_list("]"));
};
function object_() {
var first = true, a = [];
while (!is("punc", "}")) {
if (first) first = false; else expect(",");
var name = as_property_name();
expect(":");
var value = expression(false);
a.push([ name, value ]);
}
next();
return as("object", a);
};
function as_property_name() {
switch (S.token.type) {
case "num":
case "string":
return prog1(S.token.value, next);
}
return as_name();
};
function as_name() {
switch (S.token.type) {
case "name":
case "operator":
case "keyword":
case "atom":
return prog1(S.token.value, next);
default:
unexpected();
}
};
function subscripts(expr, allow_calls) {
if (is("punc", ".")) {
next();
return subscripts(as("dot", expr, as_name()), allow_calls);
}
if (is("punc", "[")) {
next();
return subscripts(as("sub", expr, prog1(expression, curry(expect, "]"))), allow_calls);
}
if (allow_calls && is("punc", "(")) {
next();
return subscripts(as("call", expr, expr_list(")")), true);
}
if (allow_calls && is("operator") && HOP(UNARY_POSTFIX, S.token.value)) {
return prog1(curry(make_unary, "unary-postfix", S.token.value, expr),
next);
}
return expr;
};
function make_unary(tag, op, expr) {
if ((op == "++" || op == "--") && !is_assignable(expr))
croak("Invalid use of " + op + " operator");
return as(tag, op, expr);
};
function expr_op(left, min_prec) {
var op = is("operator") ? S.token.value : null;
var prec = op != null ? PRECEDENCE[op] : null;
if (prec != null && prec > min_prec) {
next();
var right = expr_op(expr_atom(true), prec);
return expr_op(as("binary", op, left, right), min_prec);
}
return left;
};
function expr_ops() {
return expr_op(expr_atom(true), 0);
};
function maybe_conditional(commas) {
if (arguments.length == 0)
commas = true;
var expr = expr_ops();
if (is("operator", "?")) {
next();
var yes = expression();
expect(":");
return as("conditional", expr, yes, expression(commas));
}
return expr;
};
function is_assignable(expr) {
expr = expr[0];
return expr == "name" || expr == "dot" || expr == "sub";
};
function maybe_assign(commas) {
if (arguments.length == 0)
commas = true;
var left = maybe_conditional(commas), val = S.token.value;
if (is("operator") && HOP(ASSIGNMENT, val)) {
if (is_assignable(left)) {
next();
return as("assign", ASSIGNMENT[val], left, maybe_assign(commas));
}
croak("Invalid assignment");
}
return left;
};
function expression(commas) {
if (arguments.length == 0)
commas = true;
var expr = maybe_assign(commas);
if (commas && is("punc", ",")) {
next();
return as("seq", expr, expression());
}
return expr;
};
function in_loop(cont) {
try {
++S.in_loop;
return cont();
} finally {
--S.in_loop;
}
};
return as("toplevel", (function(a){
while (!is("eof"))
a.push(statement());
return a;
})([]));
};
/* -----[ Utilities ]----- */
function curry(f) {
var args = slice(arguments, 1);
return function() { return f.apply(this, args.concat(slice(arguments))); };
};
function prog1(ret) {
if (ret instanceof Function)
ret = ret();
for (var i = 1, n = arguments.length; --n > 0; ++i)
arguments[i]();
return ret;
};
function array_to_hash(a) {
var ret = {};
for (var i = 0; i < a.length; ++i)
ret[a[i]] = true;
return ret;
};
function slice(a, start) {
return Array.prototype.slice.call(a, start == null ? 0 : start);
};
function characters(str) {
return str.split("");
};
function member(name, array) {
for (var i = array.length; --i >= 0;)
if (array[i] == name)
return true;
return false;
};
function HOP(obj, prop) {
return Object.prototype.hasOwnProperty.call(obj, prop);
};
/* -----[ Exports ]----- */
exports.tokenizer = tokenizer;
exports.parse = parse;
exports.slice = slice;
exports.curry = curry;
exports.array_to_hash = array_to_hash;
exports.PRECEDENCE = PRECEDENCE;
exports.KEYWORDS_ATOM = KEYWORDS_ATOM;
exports.RESERVED_WORDS = RESERVED_WORDS;
exports.KEYWORDS = KEYWORDS;
exports.ATOMIC_START_TOKEN = ATOMIC_START_TOKEN;
exports.is_alphanumeric_char = is_alphanumeric_char;