UNPKG

ajs

Version:

Asynchronous templating in Node.js

470 lines (398 loc) 12.4 kB
'use strict'; // Thanks to mishoo/uglifyjs for most of this! // [&laquo; Back to Index](index.html) var g = require('./grammar'), util = require('util'); // AJS Lexer // ------------- // The lexer accepts raw AJS source and processes it character-by-character, // creating token objects that can be interpreted by the parser to // form an AST (abstract syntax tree). var Lexer = module.exports = function Lexer(source, opts) { opts = opts || {}; this.source = source; this.length = source.length; this.tokens = []; this._curToken = null; this._line = 1; this._col = 1; this._pos = 0; this._embedChar = opts.embedChar || "%"; this._includeComments = opts.includeComments == true; this._newLineBefore = false; this._commentsBefore = []; this._regexpAllowed = false; }; Lexer.prototype.tokenize = function () { this.tokens = []; this._curToken = null; this._line = 1; this._col = 1; this._pos = 0; this._inEmbed = false; this._stripNewLine = false; this.nextToken(); while (this._curToken.type != Token.EOF) { this.nextToken(); } return this.tokens; }; Lexer.prototype.nextToken = function () { if (this._inEmbed) this._skipWhitespace(); if (this._stripNewLine) this._skipNewline(); var ch = this._peek(); if (!ch) return this._token(Token.EOF); if (ch == '<' && this._peek(1) == this._embedChar) return this._embed();else if (this._inEmbed) { if (g.is_digit(ch)) return this._number(); if (ch == '"' || ch == "'") return this._string(); if (g.is_punctuation(ch)) return this._punctuation(); if (ch == ".") return this._dot(); if (ch == "/") return this._slash(); if (g.is_operator(ch)) { if (ch === "-" && this._peek(1) == this._embedChar && this._peek(2) == ">") { return this._embed(); } else if (ch == this._embedChar && this._peek(1) == ">") return this._embed();else return this._operator(); } if (ch == "\\" || g.is_identifier_start(ch)) return this._word(); this._error("Unexpected character '" + ch + "'"); } else { return this._output(); } }; Lexer.prototype._peek = function (i) { i = i || 0; return this.source.charAt(this._pos + i); }; Lexer.prototype._next = function (throwEof) { var ch = this.source.charAt(this._pos++); if (throwEof && !ch) throw EX_EOF; if (ch == "\n") { this._newLineBefore = true; this._line++; this._col = 0; } else { this._col++; } return ch; }; Lexer.prototype._skipWhitespace = function () { while (g.is_whitespace(this._peek())) { this._next(); } }; Lexer.prototype._skipNewline = function () { // TODO Handle other new line characters while (this._peek() === '\n') { this._next(); } }; Lexer.prototype._readWhile = function (pred) { var ret = "", ch = this._peek(), i = 0; while (ch && pred(ch, i++)) { ret += this._next(); ch = this._peek(); } return ret; }; Lexer.prototype._eof = function () { return !this._peek(); }; Lexer.prototype._find = function (ch, throwEof) { var pos = this.source.indexOf(ch, this._pos); if (throwEof && pos == -1) throw EX_EOF; return pos; }; Lexer.prototype._output = function () { var i = this._find("<" + this._embedChar), text; if (i == -1) { text = this.source.substr(this._pos); this._pos = this.source.length; } else { text = this.source.substring(this._pos, i); this._pos = i; } this._line += text.split("\n").length - 1; this._newlineBefore = text.indexOf("\n") >= 0; return this._token(Token.OUTPUT, text, true); }; Lexer.prototype._embed = function () { var tag = this._next() + this._next(), ch = this._peek(); if (tag == '<' + this._embedChar) { this._inEmbed = true; if (ch == '=' || ch == '-') return this._token(Token.EMBED, this._next());else return this.nextToken(); } else if (tag == this._embedChar + '>') { this._inEmbed = false; return this.nextToken(); } else if (tag + ch == "-" + this._embedChar + '>') { this._inEmbed = false; this._stripNewLine = true; this._next(); return this.nextToken(); } else this._error('invalid embed token "' + tag + '"'); }; Lexer.prototype._punctuation = function () { return this._token(Token.PUNCTUATION, this._next()); }; Lexer.prototype._number = function (prefix) { var hasE = false, afterE = false, hasX = false, hasDot = prefix == "."; var num = this._readWhile(function (ch, i) { if (ch == "x" || ch == "X") { if (hasX) return false; return hasX = true; } if (!hasX && (ch == "E" || ch == "e")) { if (hasE) return false; return hasE = afterE = true; } if (ch == "-") { if (afterE || i == 0 && !prefix) return true; return false; } if (ch == "+") return afterE; afterE = false; if (ch == ".") { if (!hasDot && !hasX) return hasDot = true; return false; } return g.is_alphanumeric(ch); }); if (prefix) num = prefix + num; var valid = parseJSNumber(num); if (!isNaN(valid)) return this._token(Token.NUMBER, valid);else this._error("Invalid syntax: " + num); }; Lexer.prototype._escapedChar = function () { var ch = this._next(true); switch (ch) { case "n": return "\n"; case "r": return "\r"; case "t": return "\t"; case "b": return "\b"; case "v": return "\v"; case "f": return "\f"; case "0": return "\0"; case "x": return String.fromCharCode(this._hexBytes(2)); case "u": return String.fromCharCode(this._hexBytes(4)); default: return ch; } }; Lexer.prototype._hexBytes = function (n) { var num = 0; for (; n > 0; --n) { var digit = parseInt(this._next(true), 16); if (isNaN(digit)) this._error("Invalid hex-character pattern in string"); num = num << 4 | digit; } return num; }; Lexer.prototype._string = function () { var self = this; return this._withEofError("Unterminated string constant", function () { var quote = self._next(), ret = ""; for (;;) { var ch = self._next(true); if (ch == "\\") ch = self._escapedChar();else if (ch == quote) break; ret += ch; } return self._token(Token.STRING, ret); }); }; Lexer.prototype._comment = function () { this._next(); var i = this._find("\n"), ret; if (i == -1) { ret = this.source.substr(this._pos); this._pos = this.source.length; } else { var j = this._find(this._embedChar + ">"); if (j > -1 && j < i) i = j; ret = this.source.substring(this._pos, i); this._pos = i; } return this._token(Token.COMMENT, ret, true); }; Lexer.prototype._commentBlock = function () { this._next(); var self = this; return this._withEofError("Unterminated multiline comment", function () { var i = self._find("*/", true), text = self.source.substring(self._pos, i), tok = self._token(Token.COMMENT_BLOCK, text, true); self._pos = i + 2; self._line += text.split("\n").length - 1; self._newlineBefore = text.indexOf("\n") >= 0; return tok; }); }; Lexer.prototype._name = function () { var backslash = false, name = "", ch; while ((ch = this._peek()) != null) { if (!backslash) { if (ch == "\\") backslash = true, this._next();else if (g.is_identifier_char(ch)) name += this._next();else break; } else { if (ch != "u") this._error("Expecting UnicodeEscapeSequence -- uXXXX"); ch = this._escapedChar(); if (!g.is_identifier_char(ch)) this._error("Unicode char: " + ch.charCodeAt(0) + " is not valid in identifier"); name += ch; backslash = false; } } return name; }; Lexer.prototype._regexp = function () { var self = this; return this._withEofError("Unterminated regular expression", function () { var ch, regexp = "", inClass = false, prevBackslash = false; while (ch = self._next(true)) { if (prevBackslash) { regexp += "\\" + ch; prevBackslash = false; } else if (ch == "[") { inClass = true; regexp += ch; } else if (ch == "]" && inClass) { inClass = false; regexp += ch; } else if (ch == "/" && !inClass) { break; } else if (ch == "\\") { prevBackslash = true; } else { regexp += ch; } }var mods = self._name(); return self._token(Token.REGEXP, [regexp, mods]); }); }; Lexer.prototype._operator = function (prefix) { var self = this; function grow(op) { if (!self._peek()) return op; var bigger = op + self._peek(); if (g.is_operator(bigger)) { self._next(); return grow(bigger); } else return op; }; return this._token(Token.OPERATOR, grow(prefix || this._next())); }; Lexer.prototype._slash = function () { this._next(); var regexpAllowed = this._regexpAllowed; switch (this._peek()) { case "/": var comment = this._comment(); if (comment) this._commentsBefore.push(comment); this._regexpAllowed = regexpAllowed; return this.nextToken(); case "*": var comment = this._commentBlock(); if (comment) this._commentsBefore.push(comment); this._regexpAllowed = regexpAllowed; return this.nextToken(); } return this._regexpAllowed ? this._regexp() : this._operator("/"); }; Lexer.prototype._dot = function () { this._next(); return g.is_digit(this._peek()) ? this._number(".") : this._token(Token.PUNCTUATION, "."); }; Lexer.prototype._word = function () { var word = this._name(); if (g.is_keyword(word)) { if (g.is_operator(word)) return this._token(Token.OPERATOR, word);else if (g.is_keyword_atom(word)) return this._token(Token.ATOM, word);else return this._token(Token.KEYWORD, word); } else return this._token(Token.NAME, word); }; Lexer.prototype._withEofError = function (message, cont) { try { return cont(); } catch (ex) { if (ex === EX_EOF) this._error(message);else throw ex; } }; Lexer.prototype._token = function (type, value, isComment) { this._regexpAllowed = type == Token.OPERATOR && !g.is_unary_postfix(value) || type == Token.KEYWORD && g.is_keyword_before_expression(value) || type == Token.PUNCTUATION && g.is_punctuation_before_expression(value); this._curToken = new Token(type, value, this._line, this._col, this._pos); if (!isComment && this._commentsBefore.length) { this._curToken.commentsBefore = this._commentsBefore; this._commentsBefore = []; } if (this._newLineBefore) { this._curToken.newLineBefore = this._newLineBefore; this._newLineBefore = false; } if (!this._curToken.isComment() || this._includeComments) this.tokens.push(this._curToken); return this._curToken; }; Lexer.prototype._error = function (message) { throw new Error(message); // should be ParseError }; var Token = module.exports.Token = function (type, value, line, col, pos) { if (typeof type == 'undefined') throw new Error('undefined token type'); this.type = type; this.name = Token[type]; this.value = value; this.line = line; this.col = col; this.pos = pos; }; Token.prototype.isComment = function () { return this.type == Token.COMMENT || this.type == Token.COMMENT_BLOCK; }; Token.prototype.toString = function () { return '[' + this.type + ', ' + this.value + ']'; }; var tokenTypes = ['output', 'embed', 'operator', 'keyword', 'atom', 'name', 'punctuation', 'string', 'number', 'regexp', 'comment', 'comment_block', 'eof']; tokenTypes.forEach(function (name) { name = name.toUpperCase(); Token[name] = 'T_' + name; Token[Token[name]] = name; }); var ParseError = module.exports.ParseError = function (message, line, col, pos) { this.message = message; this.line = line; this.col = col; this.pos = pos; try { ({})(); } catch (ex) { this.stack = ex.stack; }; }; /* utilities */ var EX_EOF = {}; var RE_HEX_NUMBER = /^0x[0-9a-f]+$/i; var RE_OCT_NUMBER = /^0[0-7]+$/; var RE_DEC_NUMBER = /^\d*\.?\d*(?:e[+-]?\d*(?:\d\.?|\.?\d)\d*)?$/i; function parseJSNumber(num) { if (RE_HEX_NUMBER.test(num)) { return parseInt(num.substr(2), 16); } else if (RE_OCT_NUMBER.test(num)) { return parseInt(num.substr(1), 8); } else if (RE_DEC_NUMBER.test(num)) { return parseFloat(num); } };