UNPKG

totorojs

Version:
1,124 lines (986 loc) 36.4 kB
/* -----[ Tokenizer (constants) ]----- */ var KEYWORDS = array_to_hash([ "break", "case", "catch", "continue", "default", "delete", "do", "else", "finally", "for", "function", "if", "in", "instanceof", "new", "return", "switch", "throw", "try", "typeof", "var", "void", "while", "with", "NaN" ]); var RESERVED_WORDS = array_to_hash([ "abstract", "boolean", "byte", "char", "class", "const", "debugger", "double", "enum", "export", "extends", "final", "float", "goto", "implements", "import", "int", "interface", "long", "native", "package", "private", "protected", "public", "short", "static", "super", "synchronized", "throws", "transient", "volatile" ]); var KEYWORDS_BEFORE_EXPRESSION = array_to_hash([ "return", "new", "delete", "throw" ]); var KEYWORDS_ATOM = array_to_hash([ "false", "null", "true", "undefined", "NaN" ]); var OPERATOR_CHARS = array_to_hash(characters("+-*&%=<>!?|~^")); var RE_HEX_NUMBER = /^0x[0-9a-f]+$/i; var RE_OCT_NUMBER = /^0[0-7]+$/; var RE_DEC_NUMBER = /^\d*\.?\d*(?:e-?\d*(?:\d\.?|\.?\d)\d*)?$/i; var OPERATORS = array_to_hash([ "in", "instanceof", "typeof", "new", "void", "delete", "++", "--", "+", "-", "!", "~", "&", "|", "^", "*", "/", "%", ">>", "<<", ">>>", "<", ">", "<=", ">=", "==", "===", "!=", "!==", "?", "=", "+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "~=", "%=", "|=", "^=", "&&", "||" ]); var WHITESPACE_CHARS = array_to_hash(characters(" \n\r\t")); var PUNC_BEFORE_EXPRESSION = array_to_hash(characters("[{}(,.;:")); var PUNC_CHARS = array_to_hash(characters("[]{}(),;:")); var REGEXP_MODIFIERS = array_to_hash(characters("gmsiy")); /* -----[ Tokenizer ]----- */ function is_alphanumeric_char(ch) { ch = ch.charCodeAt(0); return (ch >= 48 && ch <= 57) || (ch >= 65 && ch <= 90) || (ch >= 97 && ch <= 122); }; function is_identifier_char(ch) { return is_alphanumeric_char(ch) || ch == "$" || ch == "_"; }; function is_digit(ch) { ch = ch.charCodeAt(0); return ch >= 48 && ch <= 57; }; function parse_js_number(num) { if (RE_HEX_NUMBER.test(num)) { return parseInt(num.substr(2), 16); } else if (RE_OCT_NUMBER.test(num)) { return parseInt(num.substr(1), 8); } else if (RE_DEC_NUMBER.test(num)) { return parseFloat(num); } }; function JS_Parse_Error(message, line, col, pos) { this.message = message; this.line = line; this.col = col; this.pos = pos; try { ({})(); } catch(ex) { this.stack = ex.stack; }; }; JS_Parse_Error.prototype.toString = function() { return this.message + " (line: " + this.line + ", col: " + this.col + ", pos: " + this.pos + ")" + "\n\n" + this.stack; }; function js_error(message, line, col, pos) { throw new JS_Parse_Error(message, line, col, pos); }; function is_token(token, type, val) { return token.type == type && (val == null || token.value == val); }; var EX_EOF = {}; function tokenizer($TEXT, skip_comments) { var S = { text : $TEXT, pos : 0, tokpos : 0, line : 0, tokline : 0, col : 0, tokcol : 0, newline_before : false, regex_allowed : false }; function peek() { return S.text.charAt(S.pos); }; function next(signal_eof) { var ch = S.text.charAt(S.pos++); if (signal_eof && !ch) throw EX_EOF; if (ch == "\n") { S.newline_before = true; ++S.line; S.col = 0; } else { ++S.col; } return ch; }; function eof() { return !S.peek(); }; function find(what, signal_eof) { var pos = S.text.indexOf(what, S.pos); if (signal_eof && pos == -1) throw EX_EOF; return pos; }; function start_token() { S.tokline = S.line; S.tokcol = S.col; S.tokpos = S.pos; }; function token(type, value) { S.regex_allowed = (type == "operator" || (type == "keyword" && HOP(KEYWORDS_BEFORE_EXPRESSION, value)) || (type == "punc" && HOP(PUNC_BEFORE_EXPRESSION, value))); var ret = { type : type, value : value, line : S.tokline, col : S.tokcol, pos : S.tokpos, nlb : S.newline_before }; S.newline_before = false; return ret; }; function skip_whitespace() { while (HOP(WHITESPACE_CHARS, peek())) next(); }; function read_while(pred) { var ret = "", ch = peek(), i = 0; while (ch && pred(ch, i++)) { ret += next(); ch = peek(); } return ret; }; function parse_error(err) { js_error(err, S.tokline, S.tokcol, S.tokpos); }; function read_num(prefix) { var num = read_while(function(ch, i){ return is_alphanumeric_char(ch) || ch == "." || (i == 0 && ch == "-"); }); if (prefix) num = prefix + num; var valid = parse_js_number(num); if (!isNaN(valid)) { return token("num", valid); } else { parse_error("Invalid syntax: " + num); } }; function read_escaped_char() { var ch = next(true); switch (ch) { case "n" : return "\n"; case "r" : return "\r"; case "t" : return "\t"; case "b" : return "\b"; case "v" : return "\v"; case "f" : return "\f"; case "0" : return "\0"; case "x" : return String.fromCharCode(hex_bytes(2)); case "u" : return String.fromCharCode(hex_bytes(4)); default : return ch; } }; function hex_bytes(n) { var num = 0; for (; n > 0; --n) { var digit = parseInt(next(true), 16); if (isNaN(digit)) parse_error("Invalid hex-character pattern in string"); num = (num << 4) | digit; } return num; }; function read_string() { return with_eof_error("Unterminated string constant", function(){ var quote = next(), ret = ""; for (;;) { var ch = next(true); if (ch == "\\") ch = read_escaped_char(); else if (ch == quote) break; ret += ch; } return token("string", ret); }); }; function read_line_comment() { next(); var i = find("\n"), ret; if (i == -1) { ret = S.text.substr(S.pos); S.pos = S.text.length; } else { ret = S.text.substring(S.pos, i); S.pos = i + 1; } return token("comment1", ret); }; function read_multiline_comment() { next(); return with_eof_error("Unterminated multiline comment", function(){ var i = find("*/", true), ret = S.text.substring(S.pos, i); S.pos = i + 2; return token("comment2", ret); }); }; function read_regexp() { return with_eof_error("Unterminated regular expression", function(){ var prev_backslash = false, regexp = "", ch; while ((ch = next(true))) if (prev_backslash) { regexp += "\\" + ch; prev_backslash = false; } else if (ch == "/") { break; } else if (ch == "\\") { prev_backslash = true; } else { regexp += ch; } var mods = read_while(function(ch){ return HOP(REGEXP_MODIFIERS, ch); }); return token("regexp", [ regexp, mods ]); }); }; function read_operator(prefix) { function grow(op) { var bigger = op + peek(); if (HOP(OPERATORS, bigger)) { next(); return grow(bigger); } else { return op; } }; return token("operator", grow(prefix || next())); }; var handle_slash = skip_comments ? function() { next(); switch (peek()) { case "/": read_line_comment(); return next_token(); case "*": read_multiline_comment(); return next_token(); } return S.regex_allowed ? read_regexp() : read_operator("/"); } : function() { next(); switch (peek()) { case "/": return read_line_comment(); case "*": return read_multiline_comment(); } return S.regex_allowed ? read_regexp() : read_operator("/"); }; function handle_dot() { next(); return is_digit(peek()) ? read_num(".") : token("punc", "."); }; function read_word() { var word = read_while(is_identifier_char); return !HOP(KEYWORDS, word) ? token("name", word) : HOP(OPERATORS, word) ? token("operator", word) : HOP(KEYWORDS_ATOM, word) ? token("atom", word) : token("keyword", word); }; function with_eof_error(eof_error, cont) { try { return cont(); } catch(ex) { if (ex === EX_EOF) parse_error(eof_error); else throw ex; } }; function next_token() { skip_whitespace(); start_token(); var ch = peek(); if (!ch) return token("eof"); if (is_digit(ch)) return read_num(); if (ch == '"' || ch == "'") return read_string(); if (HOP(PUNC_CHARS, ch)) return token("punc", next()); if (ch == ".") return handle_dot(); if (ch == "/") return handle_slash(); if (HOP(OPERATOR_CHARS, ch)) return read_operator(); if (is_identifier_char(ch)) return read_word(); parse_error("Unexpected character '" + ch + "'"); }; next_token.context = function(nc) { if (nc) S = nc; return S; }; return next_token; }; /* -----[ Parser (constants) ]----- */ var UNARY_PREFIX = array_to_hash([ "typeof", "void", "delete", "--", "++", "!", "~", "-", "+" ]); var UNARY_POSTFIX = array_to_hash([ "--", "++" ]); var ASSIGNMENT = (function(a, ret, i){ while (i < a.length) { ret[a[i]] = a[i].substr(0, a[i].length - 1); i++; } return ret; })( ["+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "~=", "%=", "|=", "^="], { "=": true }, 0 ); var PRECEDENCE = (function(a, ret){ for (var i = 0, n = 1; i < a.length; ++i, ++n) { var b = a[i]; for (var j = 0; j < b.length; ++j) { ret[b[j]] = n; } } return ret; })( [ ["||"], ["&&"], ["|"], ["^"], ["&"], ["==", "===", "!=", "!=="], ["<", ">", "<=", ">=", "in", "instanceof"], [">>", "<<", ">>>"], ["+", "-"], ["*", "/", "%"] ], {} ); var STATEMENTS_WITH_LABELS = array_to_hash([ "for", "do", "while", "switch" ]); var ATOMIC_START_TOKEN = array_to_hash([ "atom", "num", "string", "regexp", "name" ]); /* -----[ Parser ]----- */ function NodeWithToken(str, start, end) { this.name = str; this.start = start; this.end = end; }; NodeWithToken.prototype.toString = function() { return this.name; }; function parse($TEXT, strict_semicolons, embed_tokens) { var S = { input: tokenizer($TEXT, true), token: null, prev: null, peeked: null, in_function: 0, in_loop: 0, labels: [] }; S.token = next(); function is(type, value) { return is_token(S.token, type, value); }; function peek() { return S.peeked || (S.peeked = S.input()); }; function next() { S.prev = S.token; if (S.peeked) { S.token = S.peeked; S.peeked = null; } else { S.token = S.input(); } return S.token; }; function prev() { return S.prev; }; function croak(msg, line, col, pos) { var ctx = S.input.context(); js_error(msg, line != null ? line : ctx.tokline, col != null ? col : ctx.tokcol, pos != null ? pos : ctx.tokpos); }; function token_error(token, msg) { croak(msg, token.line, token.col); }; function unexpected(token) { if (token == null) token = S.token; token_error(token, "Unexpected token: " + token.type + " (" + token.value + ")"); }; function expect_token(type, val) { if (is(type, val)) { return next(); } token_error(S.token, "Unexpected token " + S.token.type + ", expected " + type); }; function expect(punc) { return expect_token("punc", punc); }; function can_insert_semicolon() { return !strict_semicolons && ( S.token.nlb || is("eof") || is("punc", "}") ); }; function semicolon() { if (is("punc", ";")) next(); else if (!can_insert_semicolon()) unexpected(); }; function as() { return slice(arguments); }; function parenthesised() { expect("("); var ex = expression(); expect(")"); return ex; }; function maybe_before_semicolon(func) { var start = S.token; try { return prog1(func, semicolon); } catch(ex) { if (ex instanceof JS_Parse_Error) { if ((S.token === start)) { if (is("punc", ";")) return next(), null; if (can_insert_semicolon()) return null; } } throw ex; } }; function add_tokens(str, start, end) { return new NodeWithToken(str, start, end); }; var statement = embed_tokens ? function(allow_case) { var start = S.token; var stmt = $statement(allow_case); stmt[0] = add_tokens(stmt[0], start, prev()); return stmt; } : $statement; function $statement(allow_case) { switch (S.token.type) { case "num": case "string": case "regexp": case "operator": case "atom": return simple_statement(); case "name": return is_token(peek(), "punc", ":") ? labeled_statement(prog1(S.token.value, next, next)) : simple_statement(); case "punc": switch (S.token.value) { case "{": next(); return block(); case "[": case "(": return simple_statement(); case ";": next(); return as("block"); default: unexpected(); } case "keyword": switch (prog1(S.token.value, next)) { case "break": return break_cont("break"); case "continue": return break_cont("continue"); case "case": if (!allow_case) unexpected(); return as("case", prog1(expression, curry(expect, ":"))); case "debugger": semicolon(); return as("debugger"); case "default": if (!allow_case) unexpected(); expect(":"); return as("default"); case "do": return (function(body){ expect_token("keyword", "while"); return as("do", prog1(parenthesised, semicolon), body); })(in_loop(statement)); case "for": return for_(); case "function": return function_(true); case "if": return if_(); case "return": if (S.in_function == 0) croak("'return' outside of function"); return as("return", maybe_before_semicolon(expression)); case "switch": return as("switch", prog1(parenthesised, curry(expect, "{")), prog1(curry(in_loop, function(){ var a = []; while (!is("punc", "}")) a.push(statement(true)); return a; }), next)); case "throw": return as("throw", prog1(expression, semicolon)); case "try": return try_(); case "var": return prog1(var_, semicolon); case "while": return as("while", parenthesised(), in_loop(statement)); case "with": return as("with", parenthesised(), statement()); default: unexpected(); } } }; function labeled_statement(label) { S.labels.push(label); var start = S.token, stat = statement(); if (!HOP(STATEMENTS_WITH_LABELS, stat[0])) unexpected(start); S.labels.pop(); return as("label", label, stat); }; function simple_statement() { return as("stat", prog1(expression, semicolon)); }; function break_cont(type) { if (S.in_loop == 0) croak(type + " not inside a loop or switch"); var name = is("name") ? S.token.value : null; if (name != null) { next(); if (!member(name, S.labels)) croak("Label " + name + " without matching loop or statement"); } semicolon(); return as(type, name); }; function block() { var a = []; while (!is("punc", "}")) a.push(statement()); next(); return as("block", a); }; function for_() { expect("("); var has_var = is("keyword", "var"); if (has_var) next(); if (is("name") && is_token(peek(), "operator", "in")) { // for (i in foo) var name = S.token.value; next(); next(); var obj = expression(); expect(")"); return as("for-in", has_var, name, obj, in_loop(statement)); } else { // classic for var init = maybe_before_semicolon(has_var ? var_ : expression); var test = maybe_before_semicolon(expression); var step = is("punc", ")") ? null : expression(); expect(")"); return as("for", init, test, step, in_loop(statement)); } }; function function_(in_statement) { var name = is("name") ? prog1(S.token.value, next) : null; if (in_statement && !name) unexpected(); expect("("); return as(in_statement ? "defun" : "function", name, // arguments (function(first, a){ while (!is("punc", ")")) { if (first) first = false; else expect(","); if (!is("name")) unexpected(); a.push(S.token.value); next(); } next(); expect("{"); return a; })(true, []), // body (function(a){ ++S.in_function; while (!is("punc", "}")) a.push(statement()); --S.in_function; next(); return a; })([])); }; function if_() { var cond = parenthesised(), body = statement(), belse; if (is("keyword", "else")) { next(); belse = statement(); } return as("if", cond, body, belse); }; function try_() { var body = statement(), bcatch, bfinally; if (is("keyword", "catch")) { next(); expect("("); if (!is("name")) croak("Name expected"); var name = S.token.value; next(); expect(")"); bcatch = [ name, statement() ]; } if (is("keyword", "finally")) { next(); bfinally = statement(); } return as("try", body, bcatch, bfinally); }; function vardefs() { var a = []; for (;;) { if (!is("name")) unexpected(); var name = S.token.value; next(); if (is("operator", "=")) { next(); a.push([ name, expression(false) ]); } else { a.push([ name ]); } if (!is("punc", ",")) break; next(); } return a; }; function var_() { return as("var", vardefs()); }; function new_() { var newexp = expr_atom(false), args; if (is("punc", "(")) { next(); args = expr_list(")"); } else { args = []; } return subscripts(as("new", newexp, args), true); }; function expr_atom(allow_calls) { if (is("operator", "new")) { next(); return new_(); } if (is("operator") && HOP(UNARY_PREFIX, S.token.value)) { return make_unary("unary-prefix", prog1(S.token.value, next), expr_atom(allow_calls)); } if (is("punc")) { switch (S.token.value) { case "(": next(); return subscripts(prog1(expression, curry(expect, ")")), allow_calls); case "[": next(); return subscripts(array_(), allow_calls); case "{": next(); return subscripts(object_(), allow_calls); } unexpected(); } if (is("keyword", "function")) { next(); return subscripts(function_(false), allow_calls); } if (HOP(ATOMIC_START_TOKEN, S.token.type)) { var atom = S.token.type == "regexp" ? as("regexp", S.token.value[0], S.token.value[1]) : as(S.token.type, S.token.value); return subscripts(prog1(atom, next), allow_calls); } unexpected(); }; function expr_list(closing) { var first = true, a = []; while (!is("punc", closing)) { if (first) first = false; else expect(","); a.push(expression(false)); } next(); return a; }; function array_() { return as("array", expr_list("]")); }; function object_() { var first = true, a = []; while (!is("punc", "}")) { if (first) first = false; else expect(","); var name = as_property_name(); expect(":"); var value = expression(false); a.push([ name, value ]); } next(); return as("object", a); }; function as_property_name() { switch (S.token.type) { case "num": case "string": return prog1(S.token.value, next); } return as_name(); }; function as_name() { switch (S.token.type) { case "name": case "operator": case "keyword": case "atom": return prog1(S.token.value, next); default: unexpected(); } }; function subscripts(expr, allow_calls) { if (is("punc", ".")) { next(); return subscripts(as("dot", expr, as_name()), allow_calls); } if (is("punc", "[")) { next(); return subscripts(as("sub", expr, prog1(expression, curry(expect, "]"))), allow_calls); } if (allow_calls && is("punc", "(")) { next(); return subscripts(as("call", expr, expr_list(")")), true); } if (allow_calls && is("operator") && HOP(UNARY_POSTFIX, S.token.value)) { return prog1(curry(make_unary, "unary-postfix", S.token.value, expr), next); } return expr; }; function make_unary(tag, op, expr) { if ((op == "++" || op == "--") && !is_assignable(expr)) croak("Invalid use of " + op + " operator"); return as(tag, op, expr); }; function expr_op(left, min_prec) { var op = is("operator") ? S.token.value : null; var prec = op != null ? PRECEDENCE[op] : null; if (prec != null && prec > min_prec) { next(); var right = expr_op(expr_atom(true), prec); return expr_op(as("binary", op, left, right), min_prec); } return left; }; function expr_ops() { return expr_op(expr_atom(true), 0); }; function maybe_conditional(commas) { if (arguments.length == 0) commas = true; var expr = expr_ops(); if (is("operator", "?")) { next(); var yes = expression(); expect(":"); return as("conditional", expr, yes, expression(commas)); } return expr; }; function is_assignable(expr) { expr = expr[0]; return expr == "name" || expr == "dot" || expr == "sub"; }; function maybe_assign(commas) { if (arguments.length == 0) commas = true; var left = maybe_conditional(commas), val = S.token.value; if (is("operator") && HOP(ASSIGNMENT, val)) { if (is_assignable(left)) { next(); return as("assign", ASSIGNMENT[val], left, maybe_assign(commas)); } croak("Invalid assignment"); } return left; }; function expression(commas) { if (arguments.length == 0) commas = true; var expr = maybe_assign(commas); if (commas && is("punc", ",")) { next(); return as("seq", expr, expression()); } return expr; }; function in_loop(cont) { try { ++S.in_loop; return cont(); } finally { --S.in_loop; } }; return as("toplevel", (function(a){ while (!is("eof")) a.push(statement()); return a; })([])); }; /* -----[ Utilities ]----- */ function curry(f) { var args = slice(arguments, 1); return function() { return f.apply(this, args.concat(slice(arguments))); }; }; function prog1(ret) { if (ret instanceof Function) ret = ret(); for (var i = 1, n = arguments.length; --n > 0; ++i) arguments[i](); return ret; }; function array_to_hash(a) { var ret = {}; for (var i = 0; i < a.length; ++i) ret[a[i]] = true; return ret; }; function slice(a, start) { return Array.prototype.slice.call(a, start == null ? 0 : start); }; function characters(str) { return str.split(""); }; function member(name, array) { for (var i = array.length; --i >= 0;) if (array[i] == name) return true; return false; }; function HOP(obj, prop) { return Object.prototype.hasOwnProperty.call(obj, prop); }; /* -----[ Exports ]----- */ exports.tokenizer = tokenizer; exports.parse = parse; exports.slice = slice; exports.curry = curry; exports.array_to_hash = array_to_hash; exports.PRECEDENCE = PRECEDENCE; exports.KEYWORDS_ATOM = KEYWORDS_ATOM; exports.RESERVED_WORDS = RESERVED_WORDS; exports.KEYWORDS = KEYWORDS; exports.ATOMIC_START_TOKEN = ATOMIC_START_TOKEN; exports.is_alphanumeric_char = is_alphanumeric_char;