luaparse
Version:
A Lua parser in JavaScript
1,659 lines (1,426 loc) • 82.3 kB
JavaScript
/* global exports:true, module:true, require:true, define:true, global:true */
(function (root, name, factory) {
'use strict';
// Used to determine if values are of the language type `Object`
var objectTypes = {
'function': true
, 'object': true
}
// Detect free variable `exports`
, freeExports = objectTypes[typeof exports] && exports && !exports.nodeType && exports
// Detect free variable `module`
, freeModule = objectTypes[typeof module] && module && !module.nodeType && module
// Detect free variable `global`, from Node.js or Browserified code, and
// use it as `window`
, freeGlobal = freeExports && freeModule && typeof global === 'object' && global
// Detect the popular CommonJS extension `module.exports`
, moduleExports = freeModule && freeModule.exports === freeExports && freeExports;
/* istanbul ignore else */
if (freeGlobal && (freeGlobal.global === freeGlobal ||
/* istanbul ignore next */ freeGlobal.window === freeGlobal ||
/* istanbul ignore next */ freeGlobal.self === freeGlobal)) {
root = freeGlobal;
}
// Some AMD build optimizers, like r.js, check for specific condition
// patterns like the following:
/* istanbul ignore if */
if (typeof define === 'function' &&
/* istanbul ignore next */ typeof define.amd === 'object' &&
/* istanbul ignore next */ define.amd) {
// defined as an anonymous module.
define(['exports'], factory);
// In case the source has been processed and wrapped in a define module use
// the supplied `exports` object.
if (freeExports && moduleExports) factory(freeModule.exports);
}
// check for `exports` after `define` in case a build optimizer adds an
// `exports` object
else /* istanbul ignore else */ if (freeExports && freeModule) {
// in Node.js or RingoJS v0.8.0+
/* istanbul ignore else */
if (moduleExports) factory(freeModule.exports);
// in RingoJS v0.7.0-
else factory(freeExports);
}
// in a browser or Rhino
else {
factory((root[name] = {}));
}
}(this, 'luaparse', function (exports) {
'use strict';
exports.version = "0.3.1";
var input, options, length, features, encodingMode;
// Options can be set either globally on the parser object through
// defaultOptions, or during the parse call.
var defaultOptions = exports.defaultOptions = {
// Explicitly tell the parser when the input ends.
wait: false
// Store comments as an array in the chunk object.
, comments: true
// Track identifier scopes by adding an isLocal attribute to each
// identifier-node.
, scope: false
// Store location information on each syntax node as
// `loc: { start: { line, column }, end: { line, column } }`.
, locations: false
// Store the start and end character locations on each syntax node as
// `range: [start, end]`.
, ranges: false
// A callback which will be invoked when a syntax node has been completed.
// The node which has been created will be passed as the only parameter.
, onCreateNode: null
// A callback which will be invoked when a new scope is created.
, onCreateScope: null
// A callback which will be invoked when the current scope is destroyed.
, onDestroyScope: null
// A callback which will be invoked when a local variable is declared in the current scope.
// The variable's name will be passed as the only parameter
, onLocalDeclaration: null
// The version of Lua targeted by the parser (string; allowed values are
// '5.1', '5.2', '5.3').
, luaVersion: '5.1'
// Encoding mode: how to interpret code units higher than U+007F in input
, encodingMode: 'none'
};
function encodeUTF8(codepoint, highMask) {
highMask = highMask || 0;
if (codepoint < 0x80) {
return String.fromCharCode(codepoint);
} else if (codepoint < 0x800) {
return String.fromCharCode(
highMask | 0xc0 | (codepoint >> 6) ,
highMask | 0x80 | ( codepoint & 0x3f)
);
} else if (codepoint < 0x10000) {
return String.fromCharCode(
highMask | 0xe0 | (codepoint >> 12) ,
highMask | 0x80 | ((codepoint >> 6) & 0x3f),
highMask | 0x80 | ( codepoint & 0x3f)
);
} else /* istanbul ignore else */ if (codepoint < 0x110000) {
return String.fromCharCode(
highMask | 0xf0 | (codepoint >> 18) ,
highMask | 0x80 | ((codepoint >> 12) & 0x3f),
highMask | 0x80 | ((codepoint >> 6) & 0x3f),
highMask | 0x80 | ( codepoint & 0x3f)
);
} else {
// TODO: Lua 5.4 allows up to six-byte sequences, as in UTF-8:1993
return null;
}
}
function toHex(num, digits) {
var result = num.toString(16);
while (result.length < digits)
result = '0' + result;
return result;
}
function checkChars(rx) {
return function (s) {
var m = rx.exec(s);
if (!m)
return s;
raise(null, errors.invalidCodeUnit, toHex(m[0].charCodeAt(0), 4).toUpperCase());
};
}
var encodingModes = {
// `pseudo-latin1` encoding mode: assume the input was decoded with the latin1 encoding
// WARNING: latin1 does **NOT** mean cp1252 here like in the bone-headed WHATWG standard;
// it means true ISO/IEC 8859-1 identity-mapped to Basic Latin and Latin-1 Supplement blocks
'pseudo-latin1': {
fixup: checkChars(/[^\x00-\xff]/),
encodeByte: function (value) {
if (value === null)
return '';
return String.fromCharCode(value);
},
encodeUTF8: function (codepoint) {
return encodeUTF8(codepoint);
},
},
// `x-user-defined` encoding mode: assume the input was decoded with the WHATWG `x-user-defined` encoding
'x-user-defined': {
fixup: checkChars(/[^\x00-\x7f\uf780-\uf7ff]/),
encodeByte: function (value) {
if (value === null)
return '';
if (value >= 0x80)
return String.fromCharCode(value | 0xf700);
return String.fromCharCode(value);
},
encodeUTF8: function (codepoint) {
return encodeUTF8(codepoint, 0xf700);
}
},
// `none` encoding mode: disregard intrepretation of string literals, leave identifiers as-is
'none': {
discardStrings: true,
fixup: function (s) {
return s;
},
encodeByte: function (value) {
return '';
},
encodeUTF8: function (codepoint) {
return '';
}
}
};
// The available tokens expressed as enum flags so they can be checked with
// bitwise operations.
var EOF = 1, StringLiteral = 2, Keyword = 4, Identifier = 8
, NumericLiteral = 16, Punctuator = 32, BooleanLiteral = 64
, NilLiteral = 128, VarargLiteral = 256;
exports.tokenTypes = { EOF: EOF, StringLiteral: StringLiteral
, Keyword: Keyword, Identifier: Identifier, NumericLiteral: NumericLiteral
, Punctuator: Punctuator, BooleanLiteral: BooleanLiteral
, NilLiteral: NilLiteral, VarargLiteral: VarargLiteral
};
// As this parser is a bit different from luas own, the error messages
// will be different in some situations.
var errors = exports.errors = {
unexpected: 'unexpected %1 \'%2\' near \'%3\''
, unexpectedEOF: 'unexpected symbol near \'<eof>\''
, expected: '\'%1\' expected near \'%2\''
, expectedToken: '%1 expected near \'%2\''
, unfinishedString: 'unfinished string near \'%1\''
, malformedNumber: 'malformed number near \'%1\''
, decimalEscapeTooLarge: 'decimal escape too large near \'%1\''
, invalidEscape: 'invalid escape sequence near \'%1\''
, hexadecimalDigitExpected: 'hexadecimal digit expected near \'%1\''
, braceExpected: 'missing \'%1\' near \'%2\''
, tooLargeCodepoint: 'UTF-8 value too large near \'%1\''
, unfinishedLongString: 'unfinished long string (starting at line %1) near \'%2\''
, unfinishedLongComment: 'unfinished long comment (starting at line %1) near \'%2\''
, ambiguousSyntax: 'ambiguous syntax (function call x new statement) near \'%1\''
, noLoopToBreak: 'no loop to break near \'%1\''
, labelAlreadyDefined: 'label \'%1\' already defined on line %2'
, labelNotVisible: 'no visible label \'%1\' for <goto>'
, gotoJumpInLocalScope: '<goto %1> jumps into the scope of local \'%2\''
, cannotUseVararg: 'cannot use \'...\' outside a vararg function near \'%1\''
, invalidCodeUnit: 'code unit U+%1 is not allowed in the current encoding mode'
};
// ### Abstract Syntax Tree
//
// The default AST structure is inspired by the Mozilla Parser API but can
// easily be customized by overriding these functions.
var ast = exports.ast = {
labelStatement: function(label) {
return {
type: 'LabelStatement'
, label: label
};
}
, breakStatement: function() {
return {
type: 'BreakStatement'
};
}
, gotoStatement: function(label) {
return {
type: 'GotoStatement'
, label: label
};
}
, returnStatement: function(args) {
return {
type: 'ReturnStatement'
, 'arguments': args
};
}
, ifStatement: function(clauses) {
return {
type: 'IfStatement'
, clauses: clauses
};
}
, ifClause: function(condition, body) {
return {
type: 'IfClause'
, condition: condition
, body: body
};
}
, elseifClause: function(condition, body) {
return {
type: 'ElseifClause'
, condition: condition
, body: body
};
}
, elseClause: function(body) {
return {
type: 'ElseClause'
, body: body
};
}
, whileStatement: function(condition, body) {
return {
type: 'WhileStatement'
, condition: condition
, body: body
};
}
, doStatement: function(body) {
return {
type: 'DoStatement'
, body: body
};
}
, repeatStatement: function(condition, body) {
return {
type: 'RepeatStatement'
, condition: condition
, body: body
};
}
, localStatement: function(variables, init) {
return {
type: 'LocalStatement'
, variables: variables
, init: init
};
}
, assignmentStatement: function(variables, init) {
return {
type: 'AssignmentStatement'
, variables: variables
, init: init
};
}
, callStatement: function(expression) {
return {
type: 'CallStatement'
, expression: expression
};
}
, functionStatement: function(identifier, parameters, isLocal, body) {
return {
type: 'FunctionDeclaration'
, identifier: identifier
, isLocal: isLocal
, parameters: parameters
, body: body
};
}
, forNumericStatement: function(variable, start, end, step, body) {
return {
type: 'ForNumericStatement'
, variable: variable
, start: start
, end: end
, step: step
, body: body
};
}
, forGenericStatement: function(variables, iterators, body) {
return {
type: 'ForGenericStatement'
, variables: variables
, iterators: iterators
, body: body
};
}
, chunk: function(body) {
return {
type: 'Chunk'
, body: body
};
}
, identifier: function(name) {
return {
type: 'Identifier'
, name: name
};
}
, literal: function(type, value, raw) {
type = (type === StringLiteral) ? 'StringLiteral'
: (type === NumericLiteral) ? 'NumericLiteral'
: (type === BooleanLiteral) ? 'BooleanLiteral'
: (type === NilLiteral) ? 'NilLiteral'
: 'VarargLiteral';
return {
type: type
, value: value
, raw: raw
};
}
, tableKey: function(key, value) {
return {
type: 'TableKey'
, key: key
, value: value
};
}
, tableKeyString: function(key, value) {
return {
type: 'TableKeyString'
, key: key
, value: value
};
}
, tableValue: function(value) {
return {
type: 'TableValue'
, value: value
};
}
, tableConstructorExpression: function(fields) {
return {
type: 'TableConstructorExpression'
, fields: fields
};
}
, binaryExpression: function(operator, left, right) {
var type = ('and' === operator || 'or' === operator) ?
'LogicalExpression' :
'BinaryExpression';
return {
type: type
, operator: operator
, left: left
, right: right
};
}
, unaryExpression: function(operator, argument) {
return {
type: 'UnaryExpression'
, operator: operator
, argument: argument
};
}
, memberExpression: function(base, indexer, identifier) {
return {
type: 'MemberExpression'
, indexer: indexer
, identifier: identifier
, base: base
};
}
, indexExpression: function(base, index) {
return {
type: 'IndexExpression'
, base: base
, index: index
};
}
, callExpression: function(base, args) {
return {
type: 'CallExpression'
, base: base
, 'arguments': args
};
}
, tableCallExpression: function(base, args) {
return {
type: 'TableCallExpression'
, base: base
, 'arguments': args
};
}
, stringCallExpression: function(base, argument) {
return {
type: 'StringCallExpression'
, base: base
, argument: argument
};
}
, comment: function(value, raw) {
return {
type: 'Comment'
, value: value
, raw: raw
};
}
};
// Wrap up the node object.
function finishNode(node) {
// Pop a `Marker` off the location-array and attach its location data.
if (trackLocations) {
var location = locations.pop();
location.complete();
location.bless(node);
}
if (options.onCreateNode) options.onCreateNode(node);
return node;
}
// Helpers
// -------
var slice = Array.prototype.slice
, toString = Object.prototype.toString
;
var indexOf = /* istanbul ignore next */ function (array, element) {
for (var i = 0, length = array.length; i < length; ++i) {
if (array[i] === element) return i;
}
return -1;
};
/* istanbul ignore else */
if (Array.prototype.indexOf)
indexOf = function (array, element) {
return array.indexOf(element);
};
// Iterate through an array of objects and return the index of an object
// with a matching property.
function indexOfObject(array, property, element) {
for (var i = 0, length = array.length; i < length; ++i) {
if (array[i][property] === element) return i;
}
return -1;
}
// A sprintf implementation using %index (beginning at 1) to input
// arguments in the format string.
//
// Example:
//
// // Unexpected function in token
// sprintf('Unexpected %2 in %1.', 'token', 'function');
function sprintf(format) {
var args = slice.call(arguments, 1);
format = format.replace(/%(\d)/g, function (match, index) {
return '' + args[index - 1] || /* istanbul ignore next */ '';
});
return format;
}
// Polyfill for `Object.assign`.
var assign = /* istanbul ignore next */ function (dest) {
var args = slice.call(arguments, 1)
, src, prop;
for (var i = 0, length = args.length; i < length; ++i) {
src = args[i];
for (prop in src)
/* istanbul ignore else */
if (Object.prototype.hasOwnProperty.call(src, prop)) {
dest[prop] = src[prop];
}
}
return dest;
};
/* istanbul ignore else */
if (Object.assign)
assign = Object.assign;
// ### Error functions
exports.SyntaxError = SyntaxError;
// XXX: Eliminate this function and change the error type to be different from SyntaxError.
// This will unfortunately be a breaking change, because some downstream users depend
// on the error thrown being an instance of SyntaxError. For example, the Ace editor:
// <https://github.com/ajaxorg/ace/blob/4c7e5eb3f5d5ca9434847be51834a4e41661b852/lib/ace/mode/lua_worker.js#L55>
function fixupError(e) {
/* istanbul ignore if */
if (!Object.create)
return e;
return Object.create(e, {
'line': { 'writable': true, value: e.line },
'index': { 'writable': true, value: e.index },
'column': { 'writable': true, value: e.column }
});
}
// #### Raise an exception.
//
// Raise an exception by passing a token, a string format and its paramters.
//
// The passed tokens location will automatically be added to the error
// message if it exists, if not it will default to the lexers current
// position.
//
// Example:
//
// // [1:0] expected [ near (
// raise(token, "expected %1 near %2", '[', token.value);
function raise(token) {
var message = sprintf.apply(null, slice.call(arguments, 1))
, error, col;
if (token === null || typeof token.line === 'undefined') {
col = index - lineStart + 1;
error = fixupError(new SyntaxError(sprintf('[%1:%2] %3', line, col, message)));
error.index = index;
error.line = line;
error.column = col;
} else {
col = token.range[0] - token.lineStart;
error = fixupError(new SyntaxError(sprintf('[%1:%2] %3', token.line, col, message)));
error.line = token.line;
error.index = token.range[0];
error.column = col;
}
throw error;
}
function tokenValue(token) {
var raw = input.slice(token.range[0], token.range[1]);
if (raw)
return raw;
return token.value;
}
// #### Raise an unexpected token error.
//
// Example:
//
// // expected <name> near '0'
// raiseUnexpectedToken('<name>', token);
function raiseUnexpectedToken(type, token) {
raise(token, errors.expectedToken, type, tokenValue(token));
}
// #### Raise a general unexpected error
//
// Usage should pass either a token object or a symbol string which was
// expected. We can also specify a nearby token such as <eof>, this will
// default to the currently active token.
//
// Example:
//
// // Unexpected symbol 'end' near '<eof>'
// unexpected(token);
//
// If there's no token in the buffer it means we have reached <eof>.
function unexpected(found) {
var near = tokenValue(lookahead);
if ('undefined' !== typeof found.type) {
var type;
switch (found.type) {
case StringLiteral: type = 'string'; break;
case Keyword: type = 'keyword'; break;
case Identifier: type = 'identifier'; break;
case NumericLiteral: type = 'number'; break;
case Punctuator: type = 'symbol'; break;
case BooleanLiteral: type = 'boolean'; break;
case NilLiteral:
return raise(found, errors.unexpected, 'symbol', 'nil', near);
case EOF:
return raise(found, errors.unexpectedEOF);
}
return raise(found, errors.unexpected, type, tokenValue(found), near);
}
return raise(found, errors.unexpected, 'symbol', found, near);
}
// Lexer
// -----
//
// The lexer, or the tokenizer reads the input string character by character
// and derives a token left-right. To be as efficient as possible the lexer
// prioritizes the common cases such as identifiers. It also works with
// character codes instead of characters as string comparisons was the
// biggest bottleneck of the parser.
//
// If `options.comments` is enabled, all comments encountered will be stored
// in an array which later will be appended to the chunk object. If disabled,
// they will simply be disregarded.
//
// When the lexer has derived a valid token, it will be returned as an object
// containing its value and as well as its position in the input string (this
// is always enabled to provide proper debug messages).
//
// `lex()` starts lexing and returns the following token in the stream.
var index
, token
, previousToken
, lookahead
, comments
, tokenStart
, line
, lineStart;
exports.lex = lex;
function lex() {
skipWhiteSpace();
// Skip comments beginning with --
while (45 === input.charCodeAt(index) &&
45 === input.charCodeAt(index + 1)) {
scanComment();
skipWhiteSpace();
}
if (index >= length) return {
type : EOF
, value: '<eof>'
, line: line
, lineStart: lineStart
, range: [index, index]
};
var charCode = input.charCodeAt(index)
, next = input.charCodeAt(index + 1);
// Memorize the range index where the token begins.
tokenStart = index;
if (isIdentifierStart(charCode)) return scanIdentifierOrKeyword();
switch (charCode) {
case 39: case 34: // '"
return scanStringLiteral();
case 48: case 49: case 50: case 51: case 52: case 53:
case 54: case 55: case 56: case 57: // 0-9
return scanNumericLiteral();
case 46: // .
// If the dot is followed by a digit it's a float.
if (isDecDigit(next)) return scanNumericLiteral();
if (46 === next) {
if (46 === input.charCodeAt(index + 2)) return scanVarargLiteral();
return scanPunctuator('..');
}
return scanPunctuator('.');
case 61: // =
if (61 === next) return scanPunctuator('==');
return scanPunctuator('=');
case 62: // >
if (features.bitwiseOperators)
if (62 === next) return scanPunctuator('>>');
if (61 === next) return scanPunctuator('>=');
return scanPunctuator('>');
case 60: // <
if (features.bitwiseOperators)
if (60 === next) return scanPunctuator('<<');
if (61 === next) return scanPunctuator('<=');
return scanPunctuator('<');
case 126: // ~
if (61 === next) return scanPunctuator('~=');
if (!features.bitwiseOperators)
break;
return scanPunctuator('~');
case 58: // :
if (features.labels)
if (58 === next) return scanPunctuator('::');
return scanPunctuator(':');
case 91: // [
// Check for a multiline string, they begin with [= or [[
if (91 === next || 61 === next) return scanLongStringLiteral();
return scanPunctuator('[');
case 47: // /
// Check for integer division op (//)
if (features.integerDivision)
if (47 === next) return scanPunctuator('//');
return scanPunctuator('/');
case 38: case 124: // & |
if (!features.bitwiseOperators)
break;
/* fall through */
case 42: case 94: case 37: case 44: case 123: case 125:
case 93: case 40: case 41: case 59: case 35: case 45:
case 43: // * ^ % , { } ] ( ) ; # - +
return scanPunctuator(input.charAt(index));
}
return unexpected(input.charAt(index));
}
// Whitespace has no semantic meaning in lua so simply skip ahead while
// tracking the encounted newlines. Any kind of eol sequence is counted as a
// single line.
function consumeEOL() {
var charCode = input.charCodeAt(index)
, peekCharCode = input.charCodeAt(index + 1);
if (isLineTerminator(charCode)) {
// Count \n\r and \r\n as one newline.
if (10 === charCode && 13 === peekCharCode) ++index;
if (13 === charCode && 10 === peekCharCode) ++index;
++line;
lineStart = ++index;
return true;
}
return false;
}
function skipWhiteSpace() {
while (index < length) {
var charCode = input.charCodeAt(index);
if (isWhiteSpace(charCode)) {
++index;
} else if (!consumeEOL()) {
break;
}
}
}
// Identifiers, keywords, booleans and nil all look the same syntax wise. We
// simply go through them one by one and defaulting to an identifier if no
// previous case matched.
function scanIdentifierOrKeyword() {
var value, type;
// Slicing the input string is prefered before string concatenation in a
// loop for performance reasons.
while (isIdentifierPart(input.charCodeAt(++index)));
value = encodingMode.fixup(input.slice(tokenStart, index));
// Decide on the token type and possibly cast the value.
if (isKeyword(value)) {
type = Keyword;
} else if ('true' === value || 'false' === value) {
type = BooleanLiteral;
value = ('true' === value);
} else if ('nil' === value) {
type = NilLiteral;
value = null;
} else {
type = Identifier;
}
return {
type: type
, value: value
, line: line
, lineStart: lineStart
, range: [tokenStart, index]
};
}
// Once a punctuator reaches this function it should already have been
// validated so we simply return it as a token.
function scanPunctuator(value) {
index += value.length;
return {
type: Punctuator
, value: value
, line: line
, lineStart: lineStart
, range: [tokenStart, index]
};
}
// A vararg literal consists of three dots.
function scanVarargLiteral() {
index += 3;
return {
type: VarargLiteral
, value: '...'
, line: line
, lineStart: lineStart
, range: [tokenStart, index]
};
}
// Find the string literal by matching the delimiter marks used.
function scanStringLiteral() {
var delimiter = input.charCodeAt(index++)
, beginLine = line
, beginLineStart = lineStart
, stringStart = index
, string = encodingMode.discardStrings ? null : ''
, charCode;
for (;;) {
charCode = input.charCodeAt(index++);
if (delimiter === charCode) break;
// EOF or `\n` terminates a string literal. If we haven't found the
// ending delimiter by now, raise an exception.
if (index > length || isLineTerminator(charCode)) {
string += input.slice(stringStart, index - 1);
raise(null, errors.unfinishedString, input.slice(tokenStart, index - 1));
}
if (92 === charCode) { // backslash
if (!encodingMode.discardStrings) {
var beforeEscape = input.slice(stringStart, index - 1);
string += encodingMode.fixup(beforeEscape);
}
var escapeValue = readEscapeSequence();
if (!encodingMode.discardStrings)
string += escapeValue;
stringStart = index;
}
}
if (!encodingMode.discardStrings) {
string += encodingMode.encodeByte(null);
string += encodingMode.fixup(input.slice(stringStart, index - 1));
}
return {
type: StringLiteral
, value: string
, line: beginLine
, lineStart: beginLineStart
, lastLine: line
, lastLineStart: lineStart
, range: [tokenStart, index]
};
}
// Expect a multiline string literal and return it as a regular string
// literal, if it doesn't validate into a valid multiline string, throw an
// exception.
function scanLongStringLiteral() {
var beginLine = line
, beginLineStart = lineStart
, string = readLongString(false);
// Fail if it's not a multiline literal.
if (false === string) raise(token, errors.expected, '[', tokenValue(token));
return {
type: StringLiteral
, value: encodingMode.discardStrings ? null : encodingMode.fixup(string)
, line: beginLine
, lineStart: beginLineStart
, lastLine: line
, lastLineStart: lineStart
, range: [tokenStart, index]
};
}
// Numeric literals will be returned as floating-point numbers instead of
// strings. The raw value should be retrieved from slicing the input string
// later on in the process.
//
// If a hexadecimal number is encountered, it will be converted.
function scanNumericLiteral() {
var character = input.charAt(index)
, next = input.charAt(index + 1);
var literal = ('0' === character && 'xX'.indexOf(next || null) >= 0) ?
readHexLiteral() : readDecLiteral();
var foundImaginaryUnit = readImaginaryUnitSuffix()
, foundInt64Suffix = readInt64Suffix();
if (foundInt64Suffix && (foundImaginaryUnit || literal.hasFractionPart)) {
raise(null, errors.malformedNumber, input.slice(tokenStart, index));
}
return {
type: NumericLiteral
, value: literal.value
, line: line
, lineStart: lineStart
, range: [tokenStart, index]
};
}
function readImaginaryUnitSuffix() {
if (!features.imaginaryNumbers) return;
// Imaginary unit number suffix is optional.
// See http://luajit.org/ext_ffi_api.html#literals
if ('iI'.indexOf(input.charAt(index) || null) >= 0) {
++index;
return true;
} else {
return false;
}
}
function readInt64Suffix() {
if (!features.integerSuffixes) return;
// Int64/uint64 number suffix is optional.
// See http://luajit.org/ext_ffi_api.html#literals
if ('uU'.indexOf(input.charAt(index) || null) >= 0) {
++index;
if ('lL'.indexOf(input.charAt(index) || null) >= 0) {
++index;
if ('lL'.indexOf(input.charAt(index) || null) >= 0) {
++index;
return 'ULL';
} else {
// UL but no L
raise(null, errors.malformedNumber, input.slice(tokenStart, index));
}
} else {
// U but no L
raise(null, errors.malformedNumber, input.slice(tokenStart, index));
}
} else if ('lL'.indexOf(input.charAt(index) || null) >= 0) {
++index;
if ('lL'.indexOf(input.charAt(index) || null) >= 0) {
++index;
return 'LL';
} else {
// First L but no second L
raise(null, errors.malformedNumber, input.slice(tokenStart, index));
}
}
}
// Lua hexadecimals have an optional fraction part and an optional binary
// exoponent part. These are not included in JavaScript so we will compute
// all three parts separately and then sum them up at the end of the function
// with the following algorithm.
//
// Digit := toDec(digit)
// Fraction := toDec(fraction) / 16 ^ fractionCount
// BinaryExp := 2 ^ binaryExp
// Number := ( Digit + Fraction ) * BinaryExp
function readHexLiteral() {
var fraction = 0 // defaults to 0 as it gets summed
, binaryExponent = 1 // defaults to 1 as it gets multiplied
, binarySign = 1 // positive
, digit, fractionStart, exponentStart, digitStart;
digitStart = index += 2; // Skip 0x part
// A minimum of one hex digit is required.
if (!isHexDigit(input.charCodeAt(index)))
raise(null, errors.malformedNumber, input.slice(tokenStart, index));
while (isHexDigit(input.charCodeAt(index))) ++index;
// Convert the hexadecimal digit to base 10.
digit = parseInt(input.slice(digitStart, index), 16);
// Fraction part is optional.
var foundFraction = false;
if ('.' === input.charAt(index)) {
foundFraction = true;
fractionStart = ++index;
while (isHexDigit(input.charCodeAt(index))) ++index;
fraction = input.slice(fractionStart, index);
// Empty fraction parts should default to 0, others should be converted
// 0.x form so we can use summation at the end.
fraction = (fractionStart === index) ? 0
: parseInt(fraction, 16) / Math.pow(16, index - fractionStart);
}
// Binary exponents are optional
var foundBinaryExponent = false;
if ('pP'.indexOf(input.charAt(index) || null) >= 0) {
foundBinaryExponent = true;
++index;
// Sign part is optional and defaults to 1 (positive).
if ('+-'.indexOf(input.charAt(index) || null) >= 0)
binarySign = ('+' === input.charAt(index++)) ? 1 : -1;
exponentStart = index;
// The binary exponent sign requires a decimal digit.
if (!isDecDigit(input.charCodeAt(index)))
raise(null, errors.malformedNumber, input.slice(tokenStart, index));
while (isDecDigit(input.charCodeAt(index))) ++index;
binaryExponent = input.slice(exponentStart, index);
// Calculate the binary exponent of the number.
binaryExponent = Math.pow(2, binaryExponent * binarySign);
}
return {
value: (digit + fraction) * binaryExponent,
hasFractionPart: foundFraction || foundBinaryExponent
};
}
// Decimal numbers are exactly the same in Lua and in JavaScript, because of
// this we check where the token ends and then parse it with native
// functions.
function readDecLiteral() {
while (isDecDigit(input.charCodeAt(index))) ++index;
// Fraction part is optional
var foundFraction = false;
if ('.' === input.charAt(index)) {
foundFraction = true;
++index;
// Fraction part defaults to 0
while (isDecDigit(input.charCodeAt(index))) ++index;
}
// Exponent part is optional.
var foundExponent = false;
if ('eE'.indexOf(input.charAt(index) || null) >= 0) {
foundExponent = true;
++index;
// Sign part is optional.
if ('+-'.indexOf(input.charAt(index) || null) >= 0) ++index;
// An exponent is required to contain at least one decimal digit.
if (!isDecDigit(input.charCodeAt(index)))
raise(null, errors.malformedNumber, input.slice(tokenStart, index));
while (isDecDigit(input.charCodeAt(index))) ++index;
}
return {
value: parseFloat(input.slice(tokenStart, index)),
hasFractionPart: foundFraction || foundExponent
};
}
function readUnicodeEscapeSequence() {
var sequenceStart = index++;
if (input.charAt(index++) !== '{')
raise(null, errors.braceExpected, '{', '\\' + input.slice(sequenceStart, index));
if (!isHexDigit(input.charCodeAt(index)))
raise(null, errors.hexadecimalDigitExpected, '\\' + input.slice(sequenceStart, index));
while (input.charCodeAt(index) === 0x30) ++index;
var escStart = index;
while (isHexDigit(input.charCodeAt(index))) {
++index;
if (index - escStart > 6)
raise(null, errors.tooLargeCodepoint, '\\' + input.slice(sequenceStart, index));
}
var b = input.charAt(index++);
if (b !== '}') {
if ((b === '"') || (b === "'"))
raise(null, errors.braceExpected, '}', '\\' + input.slice(sequenceStart, index--));
else
raise(null, errors.hexadecimalDigitExpected, '\\' + input.slice(sequenceStart, index));
}
var codepoint = parseInt(input.slice(escStart, index - 1) || '0', 16);
var frag = '\\' + input.slice(sequenceStart, index);
if (codepoint > 0x10ffff) {
raise(null, errors.tooLargeCodepoint, frag);
}
return encodingMode.encodeUTF8(codepoint, frag);
}
// Translate escape sequences to the actual characters.
function readEscapeSequence() {
var sequenceStart = index;
switch (input.charAt(index)) {
// Lua allow the following escape sequences.
case 'a': ++index; return '\x07';
case 'n': ++index; return '\n';
case 'r': ++index; return '\r';
case 't': ++index; return '\t';
case 'v': ++index; return '\x0b';
case 'b': ++index; return '\b';
case 'f': ++index; return '\f';
// Backslash at the end of the line. We treat all line endings as equivalent,
// and as representing the [LF] character (code 10). Lua 5.1 through 5.3
// have been verified to behave the same way.
case '\r':
case '\n':
consumeEOL();
return '\n';
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
// \ddd, where ddd is a sequence of up to three decimal digits.
while (isDecDigit(input.charCodeAt(index)) && index - sequenceStart < 3) ++index;
var frag = input.slice(sequenceStart, index);
var ddd = parseInt(frag, 10);
if (ddd > 255) {
raise(null, errors.decimalEscapeTooLarge, '\\' + ddd);
}
return encodingMode.encodeByte(ddd, '\\' + frag);
case 'z':
if (features.skipWhitespaceEscape) {
++index;
skipWhiteSpace();
return '';
}
break;
case 'x':
if (features.hexEscapes) {
// \xXX, where XX is a sequence of exactly two hexadecimal digits
if (isHexDigit(input.charCodeAt(index + 1)) &&
isHexDigit(input.charCodeAt(index + 2))) {
index += 3;
return encodingMode.encodeByte(parseInt(input.slice(sequenceStart + 1, index), 16), '\\' + input.slice(sequenceStart, index));
}
raise(null, errors.hexadecimalDigitExpected, '\\' + input.slice(sequenceStart, index + 2));
}
break;
case 'u':
if (features.unicodeEscapes)
return readUnicodeEscapeSequence();
break;
case '\\': case '"': case "'":
return input.charAt(index++);
}
if (features.strictEscapes)
raise(null, errors.invalidEscape, '\\' + input.slice(sequenceStart, index + 1));
return input.charAt(index++);
}
// Comments begin with -- after which it will be decided if they are
// multiline comments or not.
//
// The multiline functionality works the exact same way as with string
// literals so we reuse the functionality.
function scanComment() {
tokenStart = index;
index += 2; // --
var character = input.charAt(index)
, content = ''
, isLong = false
, commentStart = index
, lineStartComment = lineStart
, lineComment = line;
if ('[' === character) {
content = readLongString(true);
// This wasn't a multiline comment after all.
if (false === content) content = character;
else isLong = true;
}
// Scan until next line as long as it's not a multiline comment.
if (!isLong) {
while (index < length) {
if (isLineTerminator(input.charCodeAt(index))) break;
++index;
}
if (options.comments) content = input.slice(commentStart, index);
}
if (options.comments) {
var node = ast.comment(content, input.slice(tokenStart, index));
// `Marker`s depend on tokens available in the parser and as comments are
// intercepted in the lexer all location data is set manually.
if (options.locations) {
node.loc = {
start: { line: lineComment, column: tokenStart - lineStartComment }
, end: { line: line, column: index - lineStart }
};
}
if (options.ranges) {
node.range = [tokenStart, index];
}
if (options.onCreateNode) options.onCreateNode(node);
comments.push(node);
}
}
// Read a multiline string by calculating the depth of `=` characters and
// then appending until an equal depth is found.
function readLongString(isComment) {
var level = 0
, content = ''
, terminator = false
, character, stringStart, firstLine = line;
++index; // [
// Calculate the depth of the comment.
while ('=' === input.charAt(index + level)) ++level;
// Exit, this is not a long string afterall.
if ('[' !== input.charAt(index + level)) return false;
index += level + 1;
// If the first character is a newline, ignore it and begin on next line.
if (isLineTerminator(input.charCodeAt(index))) consumeEOL();
stringStart = index;
while (index < length) {
// To keep track of line numbers run the `consumeEOL()` which increments
// its counter.
while (isLineTerminator(input.charCodeAt(index))) consumeEOL();
character = input.charAt(index++);
// Once the delimiter is found, iterate through the depth count and see
// if it matches.
if (']' === character) {
terminator = true;
for (var i = 0; i < level; ++i) {
if ('=' !== input.charAt(index + i)) terminator = false;
}
if (']' !== input.charAt(index + level)) terminator = false;
}
// We reached the end of the multiline string. Get out now.
if (terminator) {
content += input.slice(stringStart, index - 1);
index += level + 1;
return content;
}
}
raise(null, isComment ?
errors.unfinishedLongComment :
errors.unfinishedLongString,
firstLine, '<eof>');
}
// ## Lex functions and helpers.
// Read the next token.
//
// This is actually done by setting the current token to the lookahead and
// reading in the new lookahead token.
function next() {
previousToken = token;
token = lookahead;
lookahead = lex();
}
// Consume a token if its value matches. Once consumed or not, return the
// success of the operation.
function consume(value) {
if (value === token.value) {
next();
return true;
}
return false;
}
// Expect the next token value to match. If not, throw an exception.
function expect(value) {
if (value === token.value) next();
else raise(token, errors.expected, value, tokenValue(token));
}
// ### Validation functions
function isWhiteSpace(charCode) {
return 9 === charCode || 32 === charCode || 0xB === charCode || 0xC === charCode;
}
function isLineTerminator(charCode) {
return 10 === charCode || 13 === charCode;
}
function isDecDigit(charCode) {
return charCode >= 48 && charCode <= 57;
}
function isHexDigit(charCode) {
return (charCode >= 48 && charCode <= 57) || (charCode >= 97 && charCode <= 102) || (charCode >= 65 && charCode <= 70);
}
// From [Lua 5.2](http://www.lua.org/manual/5.2/manual.html#8.1) onwards
// identifiers cannot use 'locale-dependent' letters (i.e. dependent on the C locale).
// On the other hand, LuaJIT allows arbitrary octets ≥ 128 in identifiers.
function isIdentifierStart(charCode) {
if ((charCode >= 65 && charCode <= 90) || (charCode >= 97 && charCode <= 122) || 95 === charCode)
return true;
if (features.extendedIdentifiers && charCode >= 128)
return true;
return false;
}
function isIdentifierPart(charCode) {
if ((charCode >= 65 && charCode <= 90) || (charCode >= 97 && charCode <= 122) || 95 === charCode || (charCode >= 48 && charCode <= 57))
return true;
if (features.extendedIdentifiers && charCode >= 128)
return true;
return false;
}
// [3.1 Lexical Conventions](http://www.lua.org/manual/5.2/manual.html#3.1)
//
// `true`, `false` and `nil` will not be considered keywords, but literals.
function isKeyword(id) {
switch (id.length) {
case 2:
return 'do' === id || 'if' === id || 'in' === id || 'or' === id;
case 3:
return 'and' === id || 'end' === id || 'for' === id || 'not' === id;
case 4:
if ('else' === id || 'then' === id)
return true;
if (features.labels && !features.contextualGoto)
return ('goto' === id);
return false;
case 5:
return 'break' === id || 'local' === id || 'until' === id || 'while' === id;
case 6:
return 'elseif' === id || 'repeat' === id || 'return' === id;
case 8:
return 'function' === id;
}
return false;
}
function isUnary(token) {
if (Punctuator === token.type) return '#-~'.indexOf(token.value) >= 0;
if (Keyword === token.type) return 'not' === token.value;
return false;
}
// Check if the token syntactically closes a block.
function isBlockFollow(token) {
if (EOF === token.type) return true;
if (Keyword !== token.type) return false;
switch (token.value) {
case 'else': case 'elseif':
case 'end': case 'until':
return true;
default:
return false;
}
}
// Scope
// -----
// Store each block scope as a an array of identifier names. Each scope is
// stored in an FILO-array.
var scopes
// The current scope index
, scopeDepth
// A list of all global identifier nodes.
, globals;
// Create a new scope inheriting all declarations from the previous scope.
function createScope() {
var scope = scopes[scopeDepth++].slice();
scopes.push(scope);
if (options.onCreateScope) options.onCreateScope();
}
// Exit and remove the current scope.
function destroyScope() {
var scope = scopes.pop();
--scopeDepth;
if (options.onDestroyScope) options.onDestroyScope();
}
// Add identifier name to the current scope if it doesnt already exist.
function scopeIdentifierName(name) {
if (options.onLocalDeclaration) options.onLocalDeclaration(name);
if (-1 !== indexOf(scopes[scopeDepth], name)) return;
scopes[scopeDepth].push(name);
}
// Add identifier to the current scope
function scopeIdentifier(node) {
scopeIdentifierName(node.name);
attachScope(node, true);
}
// Attach scope information to node. If the node is global, store it in the
// globals array so we can return the information to the user.
function attachScope(node, isLocal) {
if (!isLocal && -1 === indexOfObject(globals, 'name', node.name))
globals.push(node);
node.isLocal = isLocal;
}
// Is the identifier name available in this scope.
function scopeHasName(name) {
return (-1 !== indexOf(scopes[scopeDepth], name));
}
// Location tracking
// -----------------
//
// Locations are stored in FILO-array as a `Marker` object consisting of both
// `loc` and `range` data. Once a `Marker` is popped off the list an end
// location is added and the data is attached to a syntax node.
var locations = []
, trackLocations;
function createLocationMarker() {
return new Marker(token);
}
function Marker(token) {
if (options.locations) {
this.loc = {
start: {
line: token.line
, column: token.range[0] - token.lineStart
}
, end: {
line: 0
, column: 0
}
};
}
if (options.ranges) this.range = [token.range[0], 0];
}
// Complete the location data stored in the `Marker` by adding the location
// of the *previous token* as an end location.
Marker.prototype.complete = function() {
if (options.locations) {
this.loc.end.line = previousToken.lastLine || previousToken.line;
this.loc.end.column = previousToken.range[1] - (previousToken.lastLineStart || previousToken.lineStart);
}
if (options.ranges) {
this.range[1] = previousToken.range[1];
}
};
Marker.prototype.bless = function (node) {
if (this.loc) {
var loc = this.loc;
node.loc = {
start: {
line: loc.start.line,
column: loc.start.column
},
end: {
line: loc.end.line,
column: loc.end.column
}
};
}
if (this.range) {
node.range = [
this.range[0],
this.range[1]
];
}
};
// Create a new `Marker` and add it to the FILO-array.
function markLocation() {
if (trackLocations) locations.push(createLocationMarker());
}
// Push an arbitrary `Marker` object onto the FILO-array.
function pushLocation(marker) {
if (trackLocations) locations.push(marker);
}
// Control flow tracking
// ---------------------
// A context object that validates loop breaks and `goto`-based control flow.
function FullFlowContext() {
this.scopes = [];
this.pendingGotos = [];
}
FullFlowContext.prototype.isInLoop = function () {
var i = this.scopes.length;
while (i --> 0) {
if (this.scopes[i].isLoop)
return true;
}
return false;
};
FullFlowContext.prototype.pushScope = function (isLoop) {
var scope = {
labels: {},
locals: [],
deferredGotos: [],
isLoop: !!isLoop
};
this.scopes.push(scope);
};
FullFlowContext.prototype.popScope = function () {
for (var i = 0; i < this.pendingGotos.length; ++i) {
var theGoto = this.pendingGotos[i];
if (theGoto.maxDepth >= this.scopes.length)
if (--theGoto.maxDepth <= 0)
raise(theGoto.token, errors.labelNotVisible, theGoto.target);
}
this.scopes.pop();
};
FullFlowContext.prototype.addGoto = function (target, token) {
var localCounts = [];
for (var i = 0; i < this.scopes.length; ++i) {
var scope = this.scopes[i];
localCounts.push(scope.locals.length);
if (Object.prototype.hasOwnProperty.call(scope.labels, target))
return;
}
this.pendingGotos.push({
maxDepth: this.scopes.length,
target: target,
token: token,
localCounts: localCounts
});
};
FullFlowContext.prototype.addLabel = function (name, token) {
var scope = this.currentScope();
if (Object.prototype.hasOwnProperty.call(scope.labels, name)) {
raise(token, errors.labelAlreadyDefined, name, scope.labels[name].line);
} else {
var newGotos = [];
for (var i = 0; i < this.pendingGotos.length; ++i) {
var theGoto = this.pendingGotos[i];
if (theGoto.maxDepth >= this.scopes.length && theGoto.target === name) {
if (theGoto.localCounts[this.scopes.length - 1] < scope.locals.length) {
scope.deferredGotos.push(theGoto);
}
continue;
}
newGotos.push(theGoto);
}
this.pendingGoto