d2-ui
Version:
1,860 lines (1,578 loc) • 45.9 kB
JavaScript
/*
* Lexical analysis and token construction.
*/
"use strict";
var _ = require("lodash");
var events = require("events");
var reg = require("./reg.js");
var state = require("./state.js").state;
var unicodeData = require("../data/ascii-identifier-data.js");
var asciiIdentifierStartTable = unicodeData.asciiIdentifierStartTable;
var asciiIdentifierPartTable = unicodeData.asciiIdentifierPartTable;
var nonAsciiIdentifierStartTable = require("../data/non-ascii-identifier-start.js");
var nonAsciiIdentifierPartTable = require("../data/non-ascii-identifier-part-only.js");
// Some of these token types are from JavaScript Parser API
// while others are specific to JSHint parser.
// JS Parser API: https://developer.mozilla.org/en-US/docs/SpiderMonkey/Parser_API
var Token = {
Identifier: 1,
Punctuator: 2,
NumericLiteral: 3,
StringLiteral: 4,
Comment: 5,
Keyword: 6,
NullLiteral: 7,
BooleanLiteral: 8,
RegExp: 9,
TemplateHead: 10,
TemplateMiddle: 11,
TemplateTail: 12,
NoSubstTemplate: 13
};
var Context = {
Block: 1,
Template: 2
};
// Object that handles postponed lexing verifications that checks the parsed
// environment state.
function asyncTrigger() {
var _checks = [];
return {
push: function(fn) {
_checks.push(fn);
},
check: function() {
for (var check = 0; check < _checks.length; ++check) {
_checks[check]();
}
_checks.splice(0, _checks.length);
}
};
}
/*
* Lexer for JSHint.
*
* This object does a char-by-char scan of the provided source code
* and produces a sequence of tokens.
*
* var lex = new Lexer("var i = 0;");
* lex.start();
* lex.token(); // returns the next token
*
* You have to use the token() method to move the lexer forward
* but you don't have to use its return value to get tokens. In addition
* to token() method returning the next token, the Lexer object also
* emits events.
*
* lex.on("Identifier", function(data) {
* if (data.name.indexOf("_") >= 0) {
* // Produce a warning.
* }
* });
*
* Note that the token() method returns tokens in a JSLint-compatible
* format while the event emitter uses a slightly modified version of
* Mozilla's JavaScript Parser API. Eventually, we will move away from
* JSLint format.
*/
function Lexer(source) {
var lines = source;
if (typeof lines === "string") {
lines = lines
.replace(/\r\n/g, "\n")
.replace(/\r/g, "\n")
.split("\n");
}
// If the first line is a shebang (#!), make it a blank and move on.
// Shebangs are used by Node scripts.
if (lines[0] && lines[0].substr(0, 2) === "#!") {
if (lines[0].indexOf("node") !== -1) {
state.option.node = true;
}
lines[0] = "";
}
this.emitter = new events.EventEmitter();
this.source = source;
this.setLines(lines);
this.prereg = true;
this.line = 0;
this.char = 1;
this.from = 1;
this.input = "";
this.inComment = false;
this.context = [];
this.templateStarts = [];
for (var i = 0; i < state.option.indent; i += 1) {
state.tab += " ";
}
}
Lexer.prototype = {
_lines: [],
inContext: function(ctxType) {
return this.context.length > 0 && this.context[this.context.length - 1].type === ctxType;
},
pushContext: function(ctxType) {
this.context.push({ type: ctxType });
},
popContext: function() {
return this.context.pop();
},
isContext: function(context) {
return this.context.length > 0 && this.context[this.context.length - 1] === context;
},
currentContext: function() {
return this.context.length > 0 && this.context[this.context.length - 1];
},
getLines: function() {
this._lines = state.lines;
return this._lines;
},
setLines: function(val) {
this._lines = val;
state.lines = this._lines;
},
/*
* Return the next i character without actually moving the
* char pointer.
*/
peek: function(i) {
return this.input.charAt(i || 0);
},
/*
* Move the char pointer forward i times.
*/
skip: function(i) {
i = i || 1;
this.char += i;
this.input = this.input.slice(i);
},
/*
* Subscribe to a token event. The API for this method is similar
* Underscore.js i.e. you can subscribe to multiple events with
* one call:
*
* lex.on("Identifier Number", function(data) {
* // ...
* });
*/
on: function(names, listener) {
names.split(" ").forEach(function(name) {
this.emitter.on(name, listener);
}.bind(this));
},
/*
* Trigger a token event. All arguments will be passed to each
* listener.
*/
trigger: function() {
this.emitter.emit.apply(this.emitter, Array.prototype.slice.call(arguments));
},
/*
* Postpone a token event. the checking condition is set as
* last parameter, and the trigger function is called in a
* stored callback. To be later called using the check() function
* by the parser. This avoids parser's peek() to give the lexer
* a false context.
*/
triggerAsync: function(type, args, checks, fn) {
checks.push(function() {
if (fn()) {
this.trigger(type, args);
}
}.bind(this));
},
/*
* Extract a punctuator out of the next sequence of characters
* or return 'null' if its not possible.
*
* This method's implementation was heavily influenced by the
* scanPunctuator function in the Esprima parser's source code.
*/
scanPunctuator: function() {
var ch1 = this.peek();
var ch2, ch3, ch4;
switch (ch1) {
// Most common single-character punctuators
case ".":
if ((/^[0-9]$/).test(this.peek(1))) {
return null;
}
if (this.peek(1) === "." && this.peek(2) === ".") {
return {
type: Token.Punctuator,
value: "..."
};
}
/* falls through */
case "(":
case ")":
case ";":
case ",":
case "[":
case "]":
case ":":
case "~":
case "?":
return {
type: Token.Punctuator,
value: ch1
};
// A block/object opener
case "{":
this.pushContext(Context.Block);
return {
type: Token.Punctuator,
value: ch1
};
// A block/object closer
case "}":
if (this.inContext(Context.Block)) {
this.popContext();
}
return {
type: Token.Punctuator,
value: ch1
};
// A pound sign (for Node shebangs)
case "#":
return {
type: Token.Punctuator,
value: ch1
};
// We're at the end of input
case "":
return null;
}
// Peek more characters
ch2 = this.peek(1);
ch3 = this.peek(2);
ch4 = this.peek(3);
// 4-character punctuator: >>>=
if (ch1 === ">" && ch2 === ">" && ch3 === ">" && ch4 === "=") {
return {
type: Token.Punctuator,
value: ">>>="
};
}
// 3-character punctuators: === !== >>> <<= >>=
if (ch1 === "=" && ch2 === "=" && ch3 === "=") {
return {
type: Token.Punctuator,
value: "==="
};
}
if (ch1 === "!" && ch2 === "=" && ch3 === "=") {
return {
type: Token.Punctuator,
value: "!=="
};
}
if (ch1 === ">" && ch2 === ">" && ch3 === ">") {
return {
type: Token.Punctuator,
value: ">>>"
};
}
if (ch1 === "<" && ch2 === "<" && ch3 === "=") {
return {
type: Token.Punctuator,
value: "<<="
};
}
if (ch1 === ">" && ch2 === ">" && ch3 === "=") {
return {
type: Token.Punctuator,
value: ">>="
};
}
// Fat arrow punctuator
if (ch1 === "=" && ch2 === ">") {
return {
type: Token.Punctuator,
value: ch1 + ch2
};
}
// 2-character punctuators: <= >= == != ++ -- << >> && ||
// += -= *= %= &= |= ^= /=
if (ch1 === ch2 && ("+-<>&|".indexOf(ch1) >= 0)) {
return {
type: Token.Punctuator,
value: ch1 + ch2
};
}
if ("<>=!+-*%&|^/".indexOf(ch1) >= 0) {
if (ch2 === "=") {
return {
type: Token.Punctuator,
value: ch1 + ch2
};
}
return {
type: Token.Punctuator,
value: ch1
};
}
return null;
},
/*
* Extract a comment out of the next sequence of characters and/or
* lines or return 'null' if its not possible. Since comments can
* span across multiple lines this method has to move the char
* pointer.
*
* In addition to normal JavaScript comments (// and /*) this method
* also recognizes JSHint- and JSLint-specific comments such as
* /*jshint, /*jslint, /*globals and so on.
*/
scanComments: function() {
var ch1 = this.peek();
var ch2 = this.peek(1);
var rest = this.input.substr(2);
var startLine = this.line;
var startChar = this.char;
var self = this;
// Create a comment token object and make sure it
// has all the data JSHint needs to work with special
// comments.
function commentToken(label, body, opt) {
var special = ["jshint", "jslint", "members", "member", "globals", "global", "exported"];
var isSpecial = false;
var value = label + body;
var commentType = "plain";
opt = opt || {};
if (opt.isMultiline) {
value += "*/";
}
body = body.replace(/\n/g, " ");
if (label === "/*" && reg.fallsThrough.test(body)) {
isSpecial = true;
commentType = "falls through";
}
special.forEach(function(str) {
if (isSpecial) {
return;
}
// Don't recognize any special comments other than jshint for single-line
// comments. This introduced many problems with legit comments.
if (label === "//" && str !== "jshint") {
return;
}
if (body.charAt(str.length) === " " && body.substr(0, str.length) === str) {
isSpecial = true;
label = label + str;
body = body.substr(str.length);
}
if (!isSpecial && body.charAt(0) === " " && body.charAt(str.length + 1) === " " &&
body.substr(1, str.length) === str) {
isSpecial = true;
label = label + " " + str;
body = body.substr(str.length + 1);
}
if (!isSpecial) {
return;
}
switch (str) {
case "member":
commentType = "members";
break;
case "global":
commentType = "globals";
break;
default:
var options = body.split(":").map(function(v) {
return v.replace(/^\s+/, "").replace(/\s+$/, "");
});
if (options.length === 2) {
switch (options[0]) {
case "ignore":
switch (options[1]) {
case "start":
self.ignoringLinterErrors = true;
isSpecial = false;
break;
case "end":
self.ignoringLinterErrors = false;
isSpecial = false;
break;
}
}
}
commentType = str;
}
});
return {
type: Token.Comment,
commentType: commentType,
value: value,
body: body,
isSpecial: isSpecial,
isMultiline: opt.isMultiline || false,
isMalformed: opt.isMalformed || false
};
}
// End of unbegun comment. Raise an error and skip that input.
if (ch1 === "*" && ch2 === "/") {
this.trigger("error", {
code: "E018",
line: startLine,
character: startChar
});
this.skip(2);
return null;
}
// Comments must start either with // or /*
if (ch1 !== "/" || (ch2 !== "*" && ch2 !== "/")) {
return null;
}
// One-line comment
if (ch2 === "/") {
this.skip(this.input.length); // Skip to the EOL.
return commentToken("//", rest);
}
var body = "";
/* Multi-line comment */
if (ch2 === "*") {
this.inComment = true;
this.skip(2);
while (this.peek() !== "*" || this.peek(1) !== "/") {
if (this.peek() === "") { // End of Line
body += "\n";
// If we hit EOF and our comment is still unclosed,
// trigger an error and end the comment implicitly.
if (!this.nextLine()) {
this.trigger("error", {
code: "E017",
line: startLine,
character: startChar
});
this.inComment = false;
return commentToken("/*", body, {
isMultiline: true,
isMalformed: true
});
}
} else {
body += this.peek();
this.skip();
}
}
this.skip(2);
this.inComment = false;
return commentToken("/*", body, { isMultiline: true });
}
},
/*
* Extract a keyword out of the next sequence of characters or
* return 'null' if its not possible.
*/
scanKeyword: function() {
var result = /^[a-zA-Z_$][a-zA-Z0-9_$]*/.exec(this.input);
var keywords = [
"if", "in", "do", "var", "for", "new",
"try", "let", "this", "else", "case",
"void", "with", "enum", "while", "break",
"catch", "throw", "const", "yield", "class",
"super", "return", "typeof", "delete",
"switch", "export", "import", "default",
"finally", "extends", "function", "continue",
"debugger", "instanceof"
];
if (result && keywords.indexOf(result[0]) >= 0) {
return {
type: Token.Keyword,
value: result[0]
};
}
return null;
},
/*
* Extract a JavaScript identifier out of the next sequence of
* characters or return 'null' if its not possible. In addition,
* to Identifier this method can also produce BooleanLiteral
* (true/false) and NullLiteral (null).
*/
scanIdentifier: function() {
var id = "";
var index = 0;
var type, char;
function isNonAsciiIdentifierStart(code) {
return nonAsciiIdentifierStartTable.indexOf(code) > -1;
}
function isNonAsciiIdentifierPart(code) {
return isNonAsciiIdentifierStart(code) || nonAsciiIdentifierPartTable.indexOf(code) > -1;
}
function isHexDigit(str) {
return (/^[0-9a-fA-F]$/).test(str);
}
var readUnicodeEscapeSequence = function() {
/*jshint validthis:true */
index += 1;
if (this.peek(index) !== "u") {
return null;
}
var ch1 = this.peek(index + 1);
var ch2 = this.peek(index + 2);
var ch3 = this.peek(index + 3);
var ch4 = this.peek(index + 4);
var code;
if (isHexDigit(ch1) && isHexDigit(ch2) && isHexDigit(ch3) && isHexDigit(ch4)) {
code = parseInt(ch1 + ch2 + ch3 + ch4, 16);
if (asciiIdentifierPartTable[code] || isNonAsciiIdentifierPart(code)) {
index += 5;
return "\\u" + ch1 + ch2 + ch3 + ch4;
}
return null;
}
return null;
}.bind(this);
var getIdentifierStart = function() {
/*jshint validthis:true */
var chr = this.peek(index);
var code = chr.charCodeAt(0);
if (code === 92) {
return readUnicodeEscapeSequence();
}
if (code < 128) {
if (asciiIdentifierStartTable[code]) {
index += 1;
return chr;
}
return null;
}
if (isNonAsciiIdentifierStart(code)) {
index += 1;
return chr;
}
return null;
}.bind(this);
var getIdentifierPart = function() {
/*jshint validthis:true */
var chr = this.peek(index);
var code = chr.charCodeAt(0);
if (code === 92) {
return readUnicodeEscapeSequence();
}
if (code < 128) {
if (asciiIdentifierPartTable[code]) {
index += 1;
return chr;
}
return null;
}
if (isNonAsciiIdentifierPart(code)) {
index += 1;
return chr;
}
return null;
}.bind(this);
function removeEscapeSequences(id) {
return id.replace(/\\u([0-9a-fA-F]{4})/g, function(m0, codepoint) {
return String.fromCharCode(parseInt(codepoint, 16));
});
}
char = getIdentifierStart();
if (char === null) {
return null;
}
id = char;
for (;;) {
char = getIdentifierPart();
if (char === null) {
break;
}
id += char;
}
switch (id) {
case "true":
case "false":
type = Token.BooleanLiteral;
break;
case "null":
type = Token.NullLiteral;
break;
default:
type = Token.Identifier;
}
return {
type: type,
value: removeEscapeSequences(id),
text: id,
tokenLength: id.length
};
},
/*
* Extract a numeric literal out of the next sequence of
* characters or return 'null' if its not possible. This method
* supports all numeric literals described in section 7.8.3
* of the EcmaScript 5 specification.
*
* This method's implementation was heavily influenced by the
* scanNumericLiteral function in the Esprima parser's source code.
*/
scanNumericLiteral: function() {
var index = 0;
var value = "";
var length = this.input.length;
var char = this.peek(index);
var bad;
var isAllowedDigit = isDecimalDigit;
var base = 10;
var isLegacy = false;
function isDecimalDigit(str) {
return (/^[0-9]$/).test(str);
}
function isOctalDigit(str) {
return (/^[0-7]$/).test(str);
}
function isBinaryDigit(str) {
return (/^[01]$/).test(str);
}
function isHexDigit(str) {
return (/^[0-9a-fA-F]$/).test(str);
}
function isIdentifierStart(ch) {
return (ch === "$") || (ch === "_") || (ch === "\\") ||
(ch >= "a" && ch <= "z") || (ch >= "A" && ch <= "Z");
}
// Numbers must start either with a decimal digit or a point.
if (char !== "." && !isDecimalDigit(char)) {
return null;
}
if (char !== ".") {
value = this.peek(index);
index += 1;
char = this.peek(index);
if (value === "0") {
// Base-16 numbers.
if (char === "x" || char === "X") {
isAllowedDigit = isHexDigit;
base = 16;
index += 1;
value += char;
}
// Base-8 numbers.
if (char === "o" || char === "O") {
isAllowedDigit = isOctalDigit;
base = 8;
if (!state.inES6(true)) {
this.trigger("warning", {
code: "W119",
line: this.line,
character: this.char,
data: [ "Octal integer literal", "6" ]
});
}
index += 1;
value += char;
}
// Base-2 numbers.
if (char === "b" || char === "B") {
isAllowedDigit = isBinaryDigit;
base = 2;
if (!state.inES6(true)) {
this.trigger("warning", {
code: "W119",
line: this.line,
character: this.char,
data: [ "Binary integer literal", "6" ]
});
}
index += 1;
value += char;
}
// Legacy base-8 numbers.
if (isOctalDigit(char)) {
isAllowedDigit = isOctalDigit;
base = 8;
isLegacy = true;
bad = false;
index += 1;
value += char;
}
// Decimal numbers that start with '0' such as '09' are illegal
// but we still parse them and return as malformed.
if (!isOctalDigit(char) && isDecimalDigit(char)) {
index += 1;
value += char;
}
}
while (index < length) {
char = this.peek(index);
if (isLegacy && isDecimalDigit(char)) {
// Numbers like '019' (note the 9) are not valid octals
// but we still parse them and mark as malformed.
bad = true;
} else if (!isAllowedDigit(char)) {
break;
}
value += char;
index += 1;
}
if (isAllowedDigit !== isDecimalDigit) {
if (!isLegacy && value.length <= 2) { // 0x
return {
type: Token.NumericLiteral,
value: value,
isMalformed: true
};
}
if (index < length) {
char = this.peek(index);
if (isIdentifierStart(char)) {
return null;
}
}
return {
type: Token.NumericLiteral,
value: value,
base: base,
isLegacy: isLegacy,
isMalformed: false
};
}
}
// Decimal digits.
if (char === ".") {
value += char;
index += 1;
while (index < length) {
char = this.peek(index);
if (!isDecimalDigit(char)) {
break;
}
value += char;
index += 1;
}
}
// Exponent part.
if (char === "e" || char === "E") {
value += char;
index += 1;
char = this.peek(index);
if (char === "+" || char === "-") {
value += this.peek(index);
index += 1;
}
char = this.peek(index);
if (isDecimalDigit(char)) {
value += char;
index += 1;
while (index < length) {
char = this.peek(index);
if (!isDecimalDigit(char)) {
break;
}
value += char;
index += 1;
}
} else {
return null;
}
}
if (index < length) {
char = this.peek(index);
if (isIdentifierStart(char)) {
return null;
}
}
return {
type: Token.NumericLiteral,
value: value,
base: base,
isMalformed: !isFinite(value)
};
},
// Assumes previously parsed character was \ (=== '\\') and was not skipped.
scanEscapeSequence: function(checks) {
var allowNewLine = false;
var jump = 1;
this.skip();
var char = this.peek();
switch (char) {
case "'":
this.triggerAsync("warning", {
code: "W114",
line: this.line,
character: this.char,
data: [ "\\'" ]
}, checks, function() {return state.jsonMode; });
break;
case "b":
char = "\\b";
break;
case "f":
char = "\\f";
break;
case "n":
char = "\\n";
break;
case "r":
char = "\\r";
break;
case "t":
char = "\\t";
break;
case "0":
char = "\\0";
// Octal literals fail in strict mode.
// Check if the number is between 00 and 07.
var n = parseInt(this.peek(1), 10);
this.triggerAsync("warning", {
code: "W115",
line: this.line,
character: this.char
}, checks,
function() { return n >= 0 && n <= 7 && state.isStrict(); });
break;
case "u":
var hexCode = this.input.substr(1, 4);
var code = parseInt(hexCode, 16);
if (isNaN(code)) {
this.trigger("warning", {
code: "W052",
line: this.line,
character: this.char,
data: [ "u" + hexCode ]
});
}
char = String.fromCharCode(code);
jump = 5;
break;
case "v":
this.triggerAsync("warning", {
code: "W114",
line: this.line,
character: this.char,
data: [ "\\v" ]
}, checks, function() { return state.jsonMode; });
char = "\v";
break;
case "x":
var x = parseInt(this.input.substr(1, 2), 16);
this.triggerAsync("warning", {
code: "W114",
line: this.line,
character: this.char,
data: [ "\\x-" ]
}, checks, function() { return state.jsonMode; });
char = String.fromCharCode(x);
jump = 3;
break;
case "\\":
char = "\\\\";
break;
case "\"":
char = "\\\"";
break;
case "/":
break;
case "":
allowNewLine = true;
char = "";
break;
}
return { char: char, jump: jump, allowNewLine: allowNewLine };
},
/*
* Extract a template literal out of the next sequence of characters
* and/or lines or return 'null' if its not possible. Since template
* literals can span across multiple lines, this method has to move
* the char pointer.
*/
scanTemplateLiteral: function(checks) {
var tokenType;
var value = "";
var ch;
var startLine = this.line;
var startChar = this.char;
var depth = this.templateStarts.length;
if (this.peek() === "`") {
if (!state.inES6(true)) {
this.trigger("warning", {
code: "W119",
line: this.line,
character: this.char,
data: ["template literal syntax", "6"]
});
}
// Template must start with a backtick.
tokenType = Token.TemplateHead;
this.templateStarts.push({ line: this.line, char: this.char });
depth = this.templateStarts.length;
this.skip(1);
this.pushContext(Context.Template);
} else if (this.inContext(Context.Template) && this.peek() === "}") {
// If we're in a template context, and we have a '}', lex a TemplateMiddle.
tokenType = Token.TemplateMiddle;
} else {
// Go lex something else.
return null;
}
while (this.peek() !== "`") {
while ((ch = this.peek()) === "") {
value += "\n";
if (!this.nextLine()) {
// Unclosed template literal --- point to the starting "`"
var startPos = this.templateStarts.pop();
this.trigger("error", {
code: "E052",
line: startPos.line,
character: startPos.char
});
return {
type: tokenType,
value: value,
startLine: startLine,
startChar: startChar,
isUnclosed: true,
depth: depth,
context: this.popContext()
};
}
}
if (ch === '$' && this.peek(1) === '{') {
value += '${';
this.skip(2);
return {
type: tokenType,
value: value,
startLine: startLine,
startChar: startChar,
isUnclosed: false,
depth: depth,
context: this.currentContext()
};
} else if (ch === '\\') {
var escape = this.scanEscapeSequence(checks);
value += escape.char;
this.skip(escape.jump);
} else if (ch !== '`') {
// Otherwise, append the value and continue.
value += ch;
this.skip(1);
}
}
// Final value is either NoSubstTemplate or TemplateTail
tokenType = tokenType === Token.TemplateHead ? Token.NoSubstTemplate : Token.TemplateTail;
this.skip(1);
this.templateStarts.pop();
return {
type: tokenType,
value: value,
startLine: startLine,
startChar: startChar,
isUnclosed: false,
depth: depth,
context: this.popContext()
};
},
/*
* Extract a string out of the next sequence of characters and/or
* lines or return 'null' if its not possible. Since strings can
* span across multiple lines this method has to move the char
* pointer.
*
* This method recognizes pseudo-multiline JavaScript strings:
*
* var str = "hello\
* world";
*/
scanStringLiteral: function(checks) {
/*jshint loopfunc:true */
var quote = this.peek();
// String must start with a quote.
if (quote !== "\"" && quote !== "'") {
return null;
}
// In JSON strings must always use double quotes.
this.triggerAsync("warning", {
code: "W108",
line: this.line,
character: this.char // +1?
}, checks, function() { return state.jsonMode && quote !== "\""; });
var value = "";
var startLine = this.line;
var startChar = this.char;
var allowNewLine = false;
this.skip();
while (this.peek() !== quote) {
if (this.peek() === "") { // End Of Line
// If an EOL is not preceded by a backslash, show a warning
// and proceed like it was a legit multi-line string where
// author simply forgot to escape the newline symbol.
//
// Another approach is to implicitly close a string on EOL
// but it generates too many false positives.
if (!allowNewLine) {
this.trigger("warning", {
code: "W112",
line: this.line,
character: this.char
});
} else {
allowNewLine = false;
// Otherwise show a warning if multistr option was not set.
// For JSON, show warning no matter what.
this.triggerAsync("warning", {
code: "W043",
line: this.line,
character: this.char
}, checks, function() { return !state.option.multistr; });
this.triggerAsync("warning", {
code: "W042",
line: this.line,
character: this.char
}, checks, function() { return state.jsonMode && state.option.multistr; });
}
// If we get an EOF inside of an unclosed string, show an
// error and implicitly close it at the EOF point.
if (!this.nextLine()) {
this.trigger("error", {
code: "E029",
line: startLine,
character: startChar
});
return {
type: Token.StringLiteral,
value: value,
startLine: startLine,
startChar: startChar,
isUnclosed: true,
quote: quote
};
}
} else { // Any character other than End Of Line
allowNewLine = false;
var char = this.peek();
var jump = 1; // A length of a jump, after we're done
// parsing this character.
if (char < " ") {
// Warn about a control character in a string.
this.trigger("warning", {
code: "W113",
line: this.line,
character: this.char,
data: [ "<non-printable>" ]
});
}
// Special treatment for some escaped characters.
if (char === "\\") {
var parsed = this.scanEscapeSequence(checks);
char = parsed.char;
jump = parsed.jump;
allowNewLine = parsed.allowNewLine;
}
value += char;
this.skip(jump);
}
}
this.skip();
return {
type: Token.StringLiteral,
value: value,
startLine: startLine,
startChar: startChar,
isUnclosed: false,
quote: quote
};
},
/*
* Extract a regular expression out of the next sequence of
* characters and/or lines or return 'null' if its not possible.
*
* This method is platform dependent: it accepts almost any
* regular expression values but then tries to compile and run
* them using system's RegExp object. This means that there are
* rare edge cases where one JavaScript engine complains about
* your regular expression while others don't.
*/
scanRegExp: function() {
var index = 0;
var length = this.input.length;
var char = this.peek();
var value = char;
var body = "";
var flags = [];
var malformed = false;
var isCharSet = false;
var terminated;
var scanUnexpectedChars = function() {
// Unexpected control character
if (char < " ") {
malformed = true;
this.trigger("warning", {
code: "W048",
line: this.line,
character: this.char
});
}
// Unexpected escaped character
if (char === "<") {
malformed = true;
this.trigger("warning", {
code: "W049",
line: this.line,
character: this.char,
data: [ char ]
});
}
}.bind(this);
// Regular expressions must start with '/'
if (!this.prereg || char !== "/") {
return null;
}
index += 1;
terminated = false;
// Try to get everything in between slashes. A couple of
// cases aside (see scanUnexpectedChars) we don't really
// care whether the resulting expression is valid or not.
// We will check that later using the RegExp object.
while (index < length) {
char = this.peek(index);
value += char;
body += char;
if (isCharSet) {
if (char === "]") {
if (this.peek(index - 1) !== "\\" || this.peek(index - 2) === "\\") {
isCharSet = false;
}
}
if (char === "\\") {
index += 1;
char = this.peek(index);
body += char;
value += char;
scanUnexpectedChars();
}
index += 1;
continue;
}
if (char === "\\") {
index += 1;
char = this.peek(index);
body += char;
value += char;
scanUnexpectedChars();
if (char === "/") {
index += 1;
continue;
}
if (char === "[") {
index += 1;
continue;
}
}
if (char === "[") {
isCharSet = true;
index += 1;
continue;
}
if (char === "/") {
body = body.substr(0, body.length - 1);
terminated = true;
index += 1;
break;
}
index += 1;
}
// A regular expression that was never closed is an
// error from which we cannot recover.
if (!terminated) {
this.trigger("error", {
code: "E015",
line: this.line,
character: this.from
});
return void this.trigger("fatal", {
line: this.line,
from: this.from
});
}
// Parse flags (if any).
while (index < length) {
char = this.peek(index);
if (!/[gim]/.test(char)) {
break;
}
flags.push(char);
value += char;
index += 1;
}
// Check regular expression for correctness.
try {
new RegExp(body, flags.join(""));
} catch (err) {
malformed = true;
this.trigger("error", {
code: "E016",
line: this.line,
character: this.char,
data: [ err.message ] // Platform dependent!
});
}
return {
type: Token.RegExp,
value: value,
flags: flags,
isMalformed: malformed
};
},
/*
* Scan for any occurrence of non-breaking spaces. Non-breaking spaces
* can be mistakenly typed on OS X with option-space. Non UTF-8 web
* pages with non-breaking pages produce syntax errors.
*/
scanNonBreakingSpaces: function() {
return state.option.nonbsp ?
this.input.search(/(\u00A0)/) : -1;
},
/*
* Scan for characters that get silently deleted by one or more browsers.
*/
scanUnsafeChars: function() {
return this.input.search(reg.unsafeChars);
},
/*
* Produce the next raw token or return 'null' if no tokens can be matched.
* This method skips over all space characters.
*/
next: function(checks) {
this.from = this.char;
// Move to the next non-space character.
while (/\s/.test(this.peek())) {
this.from += 1;
this.skip();
}
// Methods that work with multi-line structures and move the
// character pointer.
var match = this.scanComments() ||
this.scanStringLiteral(checks) ||
this.scanTemplateLiteral(checks);
if (match) {
return match;
}
// Methods that don't move the character pointer.
match =
this.scanRegExp() ||
this.scanPunctuator() ||
this.scanKeyword() ||
this.scanIdentifier() ||
this.scanNumericLiteral();
if (match) {
this.skip(match.tokenLength || match.value.length);
return match;
}
// No token could be matched, give up.
return null;
},
/*
* Switch to the next line and reset all char pointers. Once
* switched, this method also checks for other minor warnings.
*/
nextLine: function() {
var char;
if (this.line >= this.getLines().length) {
return false;
}
this.input = this.getLines()[this.line];
this.line += 1;
this.char = 1;
this.from = 1;
var inputTrimmed = this.input.trim();
var startsWith = function() {
return _.some(arguments, function(prefix) {
return inputTrimmed.indexOf(prefix) === 0;
});
};
var endsWith = function() {
return _.some(arguments, function(suffix) {
return inputTrimmed.indexOf(suffix, inputTrimmed.length - suffix.length) !== -1;
});
};
// If we are ignoring linter errors, replace the input with empty string
// if it doesn't already at least start or end a multi-line comment
if (this.ignoringLinterErrors === true) {
if (!startsWith("/*", "//") && !(this.inComment && endsWith("*/"))) {
this.input = "";
}
}
char = this.scanNonBreakingSpaces();
if (char >= 0) {
this.trigger("warning", { code: "W125", line: this.line, character: char + 1 });
}
this.input = this.input.replace(/\t/g, state.tab);
char = this.scanUnsafeChars();
if (char >= 0) {
this.trigger("warning", { code: "W100", line: this.line, character: char });
}
// If there is a limit on line length, warn when lines get too
// long.
if (!this.ignoringLinterErrors && state.option.maxlen &&
state.option.maxlen < this.input.length) {
var inComment = this.inComment ||
startsWith.call(inputTrimmed, "//") ||
startsWith.call(inputTrimmed, "/*");
var shouldTriggerError = !inComment || !reg.maxlenException.test(inputTrimmed);
if (shouldTriggerError) {
this.trigger("warning", { code: "W101", line: this.line, character: this.input.length });
}
}
return true;
},
/*
* This is simply a synonym for nextLine() method with a friendlier
* public name.
*/
start: function() {
this.nextLine();
},
/*
* Produce the next token. This function is called by advance() to get
* the next token. It returns a token in a JSLint-compatible format.
*/
token: function() {
/*jshint loopfunc:true */
var checks = asyncTrigger();
var token;
function isReserved(token, isProperty) {
if (!token.reserved) {
return false;
}
var meta = token.meta;
if (meta && meta.isFutureReservedWord && state.inES5()) {
// ES3 FutureReservedWord in an ES5 environment.
if (!meta.es5) {
return false;
}
// Some ES5 FutureReservedWord identifiers are active only
// within a strict mode environment.
if (meta.strictOnly) {
if (!state.option.strict && !state.isStrict()) {
return false;
}
}
if (isProperty) {
return false;
}
}
return true;
}
// Produce a token object.
var create = function(type, value, isProperty, token) {
/*jshint validthis:true */
var obj;
if (type !== "(endline)" && type !== "(end)") {
this.prereg = false;
}
if (type === "(punctuator)") {
switch (value) {
case ".":
case ")":
case "~":
case "#":
case "]":
case "++":
case "--":
this.prereg = false;
break;
default:
this.prereg = true;
}
obj = Object.create(state.syntax[value] || state.syntax["(error)"]);
}
if (type === "(identifier)") {
if (value === "return" || value === "case" ||
value === "typeof" || value === "instanceof") {
this.prereg = true;
}
if (_.has(state.syntax, value)) {
obj = Object.create(state.syntax[value] || state.syntax["(error)"]);
// If this can't be a reserved keyword, reset the object.
if (!isReserved(obj, isProperty && type === "(identifier)")) {
obj = null;
}
}
}
if (type === "(template)" || type === "(template middle)") {
this.prereg = true;
}
if (!obj) {
obj = Object.create(state.syntax[type]);
}
obj.identifier = (type === "(identifier)");
obj.type = obj.type || type;
obj.value = value;
obj.line = this.line;
obj.character = this.char;
obj.from = this.from;
if (obj.identifier && token) obj.raw_text = token.text || token.value;
if (token && token.startLine && token.startLine !== this.line) {
obj.startLine = token.startLine;
}
if (token && token.context) {
// Context of current token
obj.context = token.context;
}
if (token && token.depth) {
// Nested template depth
obj.depth = token.depth;
}
if (token && token.isUnclosed) {
// Mark token as unclosed string / template literal
obj.isUnclosed = token.isUnclosed;
}
if (isProperty && obj.identifier) {
obj.isProperty = isProperty;
}
obj.check = checks.check;
return obj;
}.bind(this);
for (;;) {
if (!this.input.length) {
if (this.nextLine()) {
return create("(endline)", "");
}
if (this.exhausted) {
return null;
}
this.exhausted = true;
return create("(end)", "");
}
token = this.next(checks);
if (!token) {
if (this.input.length) {
// Unexpected character.
this.trigger("error", {
code: "E024",
line: this.line,
character: this.char,
data: [ this.peek() ]
});
this.input = "";
}
continue;
}
switch (token.type) {
case Token.StringLiteral:
this.triggerAsync("String", {
line: this.line,
char: this.char,
from: this.from,
startLine: token.startLine,
startChar: token.startChar,
value: token.value,
quote: token.quote
}, checks, function() { return true; });
return create("(string)", token.value, null, token);
case Token.TemplateHead:
this.trigger("TemplateHead", {
line: this.line,
char: this.char,
from: this.from,
startLine: token.startLine,
startChar: token.startChar,
value: token.value
});
return create("(template)", token.value, null, token);
case Token.TemplateMiddle:
this.trigger("TemplateMiddle", {
line: this.line,
char: this.char,
from: this.from,
startLine: token.startLine,
startChar: token.startChar,
value: token.value
});
return create("(template middle)", token.value, null, token);
case Token.TemplateTail:
this.trigger("TemplateTail", {
line: this.line,
char: this.char,
from: this.from,
startLine: token.startLine,
startChar: token.startChar,
value: token.value
});
return create("(template tail)", token.value, null, token);
case Token.NoSubstTemplate:
this.trigger("NoSubstTemplate", {
line: this.line,
char: this.char,
from: this.from,
startLine: token.startLine,
startChar: token.startChar,
value: token.value
});
return create("(no subst template)", token.value, null, token);
case Token.Identifier:
this.triggerAsync("Identifier", {
line: this.line,
char: this.char,
from: this.from,
name: token.value,
raw_name: token.text,
isProperty: state.tokens.curr.id === "."
}, checks, function() { return true; });
/* falls through */
case Token.Keyword:
case Token.NullLiteral:
case Token.BooleanLiteral:
return create("(identifier)", token.value, state.tokens.curr.id === ".", token);
case Token.NumericLiteral:
if (token.isMalformed) {
this.trigger("warning", {
code: "W045",
line: this.line,
character: this.char,
data: [ token.value ]
});
}
this.triggerAsync("warning", {
code: "W114",
line: this.line,
character: this.char,
data: [ "0x-" ]
}, checks, function() { return token.base === 16 && state.jsonMode; });
this.triggerAsync("warning", {
code: "W115",
line: this.line,
character: this.char
}, checks, function() {
return state.isStrict() && token.base === 8 && token.isLegacy;
});
this.trigger("Number", {
line: this.line,
char: this.char,
from: this.from,
value: token.value,
base: token.base,
isMalformed: token.malformed
});
return create("(number)", token.value);
case Token.RegExp:
return create("(regexp)", token.value);
case Token.Comment:
state.tokens.curr.comment = true;
if (token.isSpecial) {
return {
id: '(comment)',
value: token.value,
body: token.body,
type: token.commentType,
isSpecial: token.isSpecial,
line: this.line,
character: this.char,
from: this.from
};
}
break;
case "":
break;
default:
return create("(punctuator)", token.value);
}
}
}
};
exports.Lexer = Lexer;
exports.Context = Context;