UNPKG

luaparse

Version:
1,659 lines (1,426 loc) 82.3 kB
/* global exports:true, module:true, require:true, define:true, global:true */ (function (root, name, factory) { 'use strict'; // Used to determine if values are of the language type `Object` var objectTypes = { 'function': true , 'object': true } // Detect free variable `exports` , freeExports = objectTypes[typeof exports] && exports && !exports.nodeType && exports // Detect free variable `module` , freeModule = objectTypes[typeof module] && module && !module.nodeType && module // Detect free variable `global`, from Node.js or Browserified code, and // use it as `window` , freeGlobal = freeExports && freeModule && typeof global === 'object' && global // Detect the popular CommonJS extension `module.exports` , moduleExports = freeModule && freeModule.exports === freeExports && freeExports; /* istanbul ignore else */ if (freeGlobal && (freeGlobal.global === freeGlobal || /* istanbul ignore next */ freeGlobal.window === freeGlobal || /* istanbul ignore next */ freeGlobal.self === freeGlobal)) { root = freeGlobal; } // Some AMD build optimizers, like r.js, check for specific condition // patterns like the following: /* istanbul ignore if */ if (typeof define === 'function' && /* istanbul ignore next */ typeof define.amd === 'object' && /* istanbul ignore next */ define.amd) { // defined as an anonymous module. define(['exports'], factory); // In case the source has been processed and wrapped in a define module use // the supplied `exports` object. if (freeExports && moduleExports) factory(freeModule.exports); } // check for `exports` after `define` in case a build optimizer adds an // `exports` object else /* istanbul ignore else */ if (freeExports && freeModule) { // in Node.js or RingoJS v0.8.0+ /* istanbul ignore else */ if (moduleExports) factory(freeModule.exports); // in RingoJS v0.7.0- else factory(freeExports); } // in a browser or Rhino else { factory((root[name] = {})); } }(this, 'luaparse', function (exports) { 'use strict'; exports.version = "0.3.1"; var input, options, length, features, encodingMode; // Options can be set either globally on the parser object through // defaultOptions, or during the parse call. var defaultOptions = exports.defaultOptions = { // Explicitly tell the parser when the input ends. wait: false // Store comments as an array in the chunk object. , comments: true // Track identifier scopes by adding an isLocal attribute to each // identifier-node. , scope: false // Store location information on each syntax node as // `loc: { start: { line, column }, end: { line, column } }`. , locations: false // Store the start and end character locations on each syntax node as // `range: [start, end]`. , ranges: false // A callback which will be invoked when a syntax node has been completed. // The node which has been created will be passed as the only parameter. , onCreateNode: null // A callback which will be invoked when a new scope is created. , onCreateScope: null // A callback which will be invoked when the current scope is destroyed. , onDestroyScope: null // A callback which will be invoked when a local variable is declared in the current scope. // The variable's name will be passed as the only parameter , onLocalDeclaration: null // The version of Lua targeted by the parser (string; allowed values are // '5.1', '5.2', '5.3'). , luaVersion: '5.1' // Encoding mode: how to interpret code units higher than U+007F in input , encodingMode: 'none' }; function encodeUTF8(codepoint, highMask) { highMask = highMask || 0; if (codepoint < 0x80) { return String.fromCharCode(codepoint); } else if (codepoint < 0x800) { return String.fromCharCode( highMask | 0xc0 | (codepoint >> 6) , highMask | 0x80 | ( codepoint & 0x3f) ); } else if (codepoint < 0x10000) { return String.fromCharCode( highMask | 0xe0 | (codepoint >> 12) , highMask | 0x80 | ((codepoint >> 6) & 0x3f), highMask | 0x80 | ( codepoint & 0x3f) ); } else /* istanbul ignore else */ if (codepoint < 0x110000) { return String.fromCharCode( highMask | 0xf0 | (codepoint >> 18) , highMask | 0x80 | ((codepoint >> 12) & 0x3f), highMask | 0x80 | ((codepoint >> 6) & 0x3f), highMask | 0x80 | ( codepoint & 0x3f) ); } else { // TODO: Lua 5.4 allows up to six-byte sequences, as in UTF-8:1993 return null; } } function toHex(num, digits) { var result = num.toString(16); while (result.length < digits) result = '0' + result; return result; } function checkChars(rx) { return function (s) { var m = rx.exec(s); if (!m) return s; raise(null, errors.invalidCodeUnit, toHex(m[0].charCodeAt(0), 4).toUpperCase()); }; } var encodingModes = { // `pseudo-latin1` encoding mode: assume the input was decoded with the latin1 encoding // WARNING: latin1 does **NOT** mean cp1252 here like in the bone-headed WHATWG standard; // it means true ISO/IEC 8859-1 identity-mapped to Basic Latin and Latin-1 Supplement blocks 'pseudo-latin1': { fixup: checkChars(/[^\x00-\xff]/), encodeByte: function (value) { if (value === null) return ''; return String.fromCharCode(value); }, encodeUTF8: function (codepoint) { return encodeUTF8(codepoint); }, }, // `x-user-defined` encoding mode: assume the input was decoded with the WHATWG `x-user-defined` encoding 'x-user-defined': { fixup: checkChars(/[^\x00-\x7f\uf780-\uf7ff]/), encodeByte: function (value) { if (value === null) return ''; if (value >= 0x80) return String.fromCharCode(value | 0xf700); return String.fromCharCode(value); }, encodeUTF8: function (codepoint) { return encodeUTF8(codepoint, 0xf700); } }, // `none` encoding mode: disregard intrepretation of string literals, leave identifiers as-is 'none': { discardStrings: true, fixup: function (s) { return s; }, encodeByte: function (value) { return ''; }, encodeUTF8: function (codepoint) { return ''; } } }; // The available tokens expressed as enum flags so they can be checked with // bitwise operations. var EOF = 1, StringLiteral = 2, Keyword = 4, Identifier = 8 , NumericLiteral = 16, Punctuator = 32, BooleanLiteral = 64 , NilLiteral = 128, VarargLiteral = 256; exports.tokenTypes = { EOF: EOF, StringLiteral: StringLiteral , Keyword: Keyword, Identifier: Identifier, NumericLiteral: NumericLiteral , Punctuator: Punctuator, BooleanLiteral: BooleanLiteral , NilLiteral: NilLiteral, VarargLiteral: VarargLiteral }; // As this parser is a bit different from luas own, the error messages // will be different in some situations. var errors = exports.errors = { unexpected: 'unexpected %1 \'%2\' near \'%3\'' , unexpectedEOF: 'unexpected symbol near \'<eof>\'' , expected: '\'%1\' expected near \'%2\'' , expectedToken: '%1 expected near \'%2\'' , unfinishedString: 'unfinished string near \'%1\'' , malformedNumber: 'malformed number near \'%1\'' , decimalEscapeTooLarge: 'decimal escape too large near \'%1\'' , invalidEscape: 'invalid escape sequence near \'%1\'' , hexadecimalDigitExpected: 'hexadecimal digit expected near \'%1\'' , braceExpected: 'missing \'%1\' near \'%2\'' , tooLargeCodepoint: 'UTF-8 value too large near \'%1\'' , unfinishedLongString: 'unfinished long string (starting at line %1) near \'%2\'' , unfinishedLongComment: 'unfinished long comment (starting at line %1) near \'%2\'' , ambiguousSyntax: 'ambiguous syntax (function call x new statement) near \'%1\'' , noLoopToBreak: 'no loop to break near \'%1\'' , labelAlreadyDefined: 'label \'%1\' already defined on line %2' , labelNotVisible: 'no visible label \'%1\' for <goto>' , gotoJumpInLocalScope: '<goto %1> jumps into the scope of local \'%2\'' , cannotUseVararg: 'cannot use \'...\' outside a vararg function near \'%1\'' , invalidCodeUnit: 'code unit U+%1 is not allowed in the current encoding mode' }; // ### Abstract Syntax Tree // // The default AST structure is inspired by the Mozilla Parser API but can // easily be customized by overriding these functions. var ast = exports.ast = { labelStatement: function(label) { return { type: 'LabelStatement' , label: label }; } , breakStatement: function() { return { type: 'BreakStatement' }; } , gotoStatement: function(label) { return { type: 'GotoStatement' , label: label }; } , returnStatement: function(args) { return { type: 'ReturnStatement' , 'arguments': args }; } , ifStatement: function(clauses) { return { type: 'IfStatement' , clauses: clauses }; } , ifClause: function(condition, body) { return { type: 'IfClause' , condition: condition , body: body }; } , elseifClause: function(condition, body) { return { type: 'ElseifClause' , condition: condition , body: body }; } , elseClause: function(body) { return { type: 'ElseClause' , body: body }; } , whileStatement: function(condition, body) { return { type: 'WhileStatement' , condition: condition , body: body }; } , doStatement: function(body) { return { type: 'DoStatement' , body: body }; } , repeatStatement: function(condition, body) { return { type: 'RepeatStatement' , condition: condition , body: body }; } , localStatement: function(variables, init) { return { type: 'LocalStatement' , variables: variables , init: init }; } , assignmentStatement: function(variables, init) { return { type: 'AssignmentStatement' , variables: variables , init: init }; } , callStatement: function(expression) { return { type: 'CallStatement' , expression: expression }; } , functionStatement: function(identifier, parameters, isLocal, body) { return { type: 'FunctionDeclaration' , identifier: identifier , isLocal: isLocal , parameters: parameters , body: body }; } , forNumericStatement: function(variable, start, end, step, body) { return { type: 'ForNumericStatement' , variable: variable , start: start , end: end , step: step , body: body }; } , forGenericStatement: function(variables, iterators, body) { return { type: 'ForGenericStatement' , variables: variables , iterators: iterators , body: body }; } , chunk: function(body) { return { type: 'Chunk' , body: body }; } , identifier: function(name) { return { type: 'Identifier' , name: name }; } , literal: function(type, value, raw) { type = (type === StringLiteral) ? 'StringLiteral' : (type === NumericLiteral) ? 'NumericLiteral' : (type === BooleanLiteral) ? 'BooleanLiteral' : (type === NilLiteral) ? 'NilLiteral' : 'VarargLiteral'; return { type: type , value: value , raw: raw }; } , tableKey: function(key, value) { return { type: 'TableKey' , key: key , value: value }; } , tableKeyString: function(key, value) { return { type: 'TableKeyString' , key: key , value: value }; } , tableValue: function(value) { return { type: 'TableValue' , value: value }; } , tableConstructorExpression: function(fields) { return { type: 'TableConstructorExpression' , fields: fields }; } , binaryExpression: function(operator, left, right) { var type = ('and' === operator || 'or' === operator) ? 'LogicalExpression' : 'BinaryExpression'; return { type: type , operator: operator , left: left , right: right }; } , unaryExpression: function(operator, argument) { return { type: 'UnaryExpression' , operator: operator , argument: argument }; } , memberExpression: function(base, indexer, identifier) { return { type: 'MemberExpression' , indexer: indexer , identifier: identifier , base: base }; } , indexExpression: function(base, index) { return { type: 'IndexExpression' , base: base , index: index }; } , callExpression: function(base, args) { return { type: 'CallExpression' , base: base , 'arguments': args }; } , tableCallExpression: function(base, args) { return { type: 'TableCallExpression' , base: base , 'arguments': args }; } , stringCallExpression: function(base, argument) { return { type: 'StringCallExpression' , base: base , argument: argument }; } , comment: function(value, raw) { return { type: 'Comment' , value: value , raw: raw }; } }; // Wrap up the node object. function finishNode(node) { // Pop a `Marker` off the location-array and attach its location data. if (trackLocations) { var location = locations.pop(); location.complete(); location.bless(node); } if (options.onCreateNode) options.onCreateNode(node); return node; } // Helpers // ------- var slice = Array.prototype.slice , toString = Object.prototype.toString ; var indexOf = /* istanbul ignore next */ function (array, element) { for (var i = 0, length = array.length; i < length; ++i) { if (array[i] === element) return i; } return -1; }; /* istanbul ignore else */ if (Array.prototype.indexOf) indexOf = function (array, element) { return array.indexOf(element); }; // Iterate through an array of objects and return the index of an object // with a matching property. function indexOfObject(array, property, element) { for (var i = 0, length = array.length; i < length; ++i) { if (array[i][property] === element) return i; } return -1; } // A sprintf implementation using %index (beginning at 1) to input // arguments in the format string. // // Example: // // // Unexpected function in token // sprintf('Unexpected %2 in %1.', 'token', 'function'); function sprintf(format) { var args = slice.call(arguments, 1); format = format.replace(/%(\d)/g, function (match, index) { return '' + args[index - 1] || /* istanbul ignore next */ ''; }); return format; } // Polyfill for `Object.assign`. var assign = /* istanbul ignore next */ function (dest) { var args = slice.call(arguments, 1) , src, prop; for (var i = 0, length = args.length; i < length; ++i) { src = args[i]; for (prop in src) /* istanbul ignore else */ if (Object.prototype.hasOwnProperty.call(src, prop)) { dest[prop] = src[prop]; } } return dest; }; /* istanbul ignore else */ if (Object.assign) assign = Object.assign; // ### Error functions exports.SyntaxError = SyntaxError; // XXX: Eliminate this function and change the error type to be different from SyntaxError. // This will unfortunately be a breaking change, because some downstream users depend // on the error thrown being an instance of SyntaxError. For example, the Ace editor: // <https://github.com/ajaxorg/ace/blob/4c7e5eb3f5d5ca9434847be51834a4e41661b852/lib/ace/mode/lua_worker.js#L55> function fixupError(e) { /* istanbul ignore if */ if (!Object.create) return e; return Object.create(e, { 'line': { 'writable': true, value: e.line }, 'index': { 'writable': true, value: e.index }, 'column': { 'writable': true, value: e.column } }); } // #### Raise an exception. // // Raise an exception by passing a token, a string format and its paramters. // // The passed tokens location will automatically be added to the error // message if it exists, if not it will default to the lexers current // position. // // Example: // // // [1:0] expected [ near ( // raise(token, "expected %1 near %2", '[', token.value); function raise(token) { var message = sprintf.apply(null, slice.call(arguments, 1)) , error, col; if (token === null || typeof token.line === 'undefined') { col = index - lineStart + 1; error = fixupError(new SyntaxError(sprintf('[%1:%2] %3', line, col, message))); error.index = index; error.line = line; error.column = col; } else { col = token.range[0] - token.lineStart; error = fixupError(new SyntaxError(sprintf('[%1:%2] %3', token.line, col, message))); error.line = token.line; error.index = token.range[0]; error.column = col; } throw error; } function tokenValue(token) { var raw = input.slice(token.range[0], token.range[1]); if (raw) return raw; return token.value; } // #### Raise an unexpected token error. // // Example: // // // expected <name> near '0' // raiseUnexpectedToken('<name>', token); function raiseUnexpectedToken(type, token) { raise(token, errors.expectedToken, type, tokenValue(token)); } // #### Raise a general unexpected error // // Usage should pass either a token object or a symbol string which was // expected. We can also specify a nearby token such as <eof>, this will // default to the currently active token. // // Example: // // // Unexpected symbol 'end' near '<eof>' // unexpected(token); // // If there's no token in the buffer it means we have reached <eof>. function unexpected(found) { var near = tokenValue(lookahead); if ('undefined' !== typeof found.type) { var type; switch (found.type) { case StringLiteral: type = 'string'; break; case Keyword: type = 'keyword'; break; case Identifier: type = 'identifier'; break; case NumericLiteral: type = 'number'; break; case Punctuator: type = 'symbol'; break; case BooleanLiteral: type = 'boolean'; break; case NilLiteral: return raise(found, errors.unexpected, 'symbol', 'nil', near); case EOF: return raise(found, errors.unexpectedEOF); } return raise(found, errors.unexpected, type, tokenValue(found), near); } return raise(found, errors.unexpected, 'symbol', found, near); } // Lexer // ----- // // The lexer, or the tokenizer reads the input string character by character // and derives a token left-right. To be as efficient as possible the lexer // prioritizes the common cases such as identifiers. It also works with // character codes instead of characters as string comparisons was the // biggest bottleneck of the parser. // // If `options.comments` is enabled, all comments encountered will be stored // in an array which later will be appended to the chunk object. If disabled, // they will simply be disregarded. // // When the lexer has derived a valid token, it will be returned as an object // containing its value and as well as its position in the input string (this // is always enabled to provide proper debug messages). // // `lex()` starts lexing and returns the following token in the stream. var index , token , previousToken , lookahead , comments , tokenStart , line , lineStart; exports.lex = lex; function lex() { skipWhiteSpace(); // Skip comments beginning with -- while (45 === input.charCodeAt(index) && 45 === input.charCodeAt(index + 1)) { scanComment(); skipWhiteSpace(); } if (index >= length) return { type : EOF , value: '<eof>' , line: line , lineStart: lineStart , range: [index, index] }; var charCode = input.charCodeAt(index) , next = input.charCodeAt(index + 1); // Memorize the range index where the token begins. tokenStart = index; if (isIdentifierStart(charCode)) return scanIdentifierOrKeyword(); switch (charCode) { case 39: case 34: // '" return scanStringLiteral(); case 48: case 49: case 50: case 51: case 52: case 53: case 54: case 55: case 56: case 57: // 0-9 return scanNumericLiteral(); case 46: // . // If the dot is followed by a digit it's a float. if (isDecDigit(next)) return scanNumericLiteral(); if (46 === next) { if (46 === input.charCodeAt(index + 2)) return scanVarargLiteral(); return scanPunctuator('..'); } return scanPunctuator('.'); case 61: // = if (61 === next) return scanPunctuator('=='); return scanPunctuator('='); case 62: // > if (features.bitwiseOperators) if (62 === next) return scanPunctuator('>>'); if (61 === next) return scanPunctuator('>='); return scanPunctuator('>'); case 60: // < if (features.bitwiseOperators) if (60 === next) return scanPunctuator('<<'); if (61 === next) return scanPunctuator('<='); return scanPunctuator('<'); case 126: // ~ if (61 === next) return scanPunctuator('~='); if (!features.bitwiseOperators) break; return scanPunctuator('~'); case 58: // : if (features.labels) if (58 === next) return scanPunctuator('::'); return scanPunctuator(':'); case 91: // [ // Check for a multiline string, they begin with [= or [[ if (91 === next || 61 === next) return scanLongStringLiteral(); return scanPunctuator('['); case 47: // / // Check for integer division op (//) if (features.integerDivision) if (47 === next) return scanPunctuator('//'); return scanPunctuator('/'); case 38: case 124: // & | if (!features.bitwiseOperators) break; /* fall through */ case 42: case 94: case 37: case 44: case 123: case 125: case 93: case 40: case 41: case 59: case 35: case 45: case 43: // * ^ % , { } ] ( ) ; # - + return scanPunctuator(input.charAt(index)); } return unexpected(input.charAt(index)); } // Whitespace has no semantic meaning in lua so simply skip ahead while // tracking the encounted newlines. Any kind of eol sequence is counted as a // single line. function consumeEOL() { var charCode = input.charCodeAt(index) , peekCharCode = input.charCodeAt(index + 1); if (isLineTerminator(charCode)) { // Count \n\r and \r\n as one newline. if (10 === charCode && 13 === peekCharCode) ++index; if (13 === charCode && 10 === peekCharCode) ++index; ++line; lineStart = ++index; return true; } return false; } function skipWhiteSpace() { while (index < length) { var charCode = input.charCodeAt(index); if (isWhiteSpace(charCode)) { ++index; } else if (!consumeEOL()) { break; } } } // Identifiers, keywords, booleans and nil all look the same syntax wise. We // simply go through them one by one and defaulting to an identifier if no // previous case matched. function scanIdentifierOrKeyword() { var value, type; // Slicing the input string is prefered before string concatenation in a // loop for performance reasons. while (isIdentifierPart(input.charCodeAt(++index))); value = encodingMode.fixup(input.slice(tokenStart, index)); // Decide on the token type and possibly cast the value. if (isKeyword(value)) { type = Keyword; } else if ('true' === value || 'false' === value) { type = BooleanLiteral; value = ('true' === value); } else if ('nil' === value) { type = NilLiteral; value = null; } else { type = Identifier; } return { type: type , value: value , line: line , lineStart: lineStart , range: [tokenStart, index] }; } // Once a punctuator reaches this function it should already have been // validated so we simply return it as a token. function scanPunctuator(value) { index += value.length; return { type: Punctuator , value: value , line: line , lineStart: lineStart , range: [tokenStart, index] }; } // A vararg literal consists of three dots. function scanVarargLiteral() { index += 3; return { type: VarargLiteral , value: '...' , line: line , lineStart: lineStart , range: [tokenStart, index] }; } // Find the string literal by matching the delimiter marks used. function scanStringLiteral() { var delimiter = input.charCodeAt(index++) , beginLine = line , beginLineStart = lineStart , stringStart = index , string = encodingMode.discardStrings ? null : '' , charCode; for (;;) { charCode = input.charCodeAt(index++); if (delimiter === charCode) break; // EOF or `\n` terminates a string literal. If we haven't found the // ending delimiter by now, raise an exception. if (index > length || isLineTerminator(charCode)) { string += input.slice(stringStart, index - 1); raise(null, errors.unfinishedString, input.slice(tokenStart, index - 1)); } if (92 === charCode) { // backslash if (!encodingMode.discardStrings) { var beforeEscape = input.slice(stringStart, index - 1); string += encodingMode.fixup(beforeEscape); } var escapeValue = readEscapeSequence(); if (!encodingMode.discardStrings) string += escapeValue; stringStart = index; } } if (!encodingMode.discardStrings) { string += encodingMode.encodeByte(null); string += encodingMode.fixup(input.slice(stringStart, index - 1)); } return { type: StringLiteral , value: string , line: beginLine , lineStart: beginLineStart , lastLine: line , lastLineStart: lineStart , range: [tokenStart, index] }; } // Expect a multiline string literal and return it as a regular string // literal, if it doesn't validate into a valid multiline string, throw an // exception. function scanLongStringLiteral() { var beginLine = line , beginLineStart = lineStart , string = readLongString(false); // Fail if it's not a multiline literal. if (false === string) raise(token, errors.expected, '[', tokenValue(token)); return { type: StringLiteral , value: encodingMode.discardStrings ? null : encodingMode.fixup(string) , line: beginLine , lineStart: beginLineStart , lastLine: line , lastLineStart: lineStart , range: [tokenStart, index] }; } // Numeric literals will be returned as floating-point numbers instead of // strings. The raw value should be retrieved from slicing the input string // later on in the process. // // If a hexadecimal number is encountered, it will be converted. function scanNumericLiteral() { var character = input.charAt(index) , next = input.charAt(index + 1); var literal = ('0' === character && 'xX'.indexOf(next || null) >= 0) ? readHexLiteral() : readDecLiteral(); var foundImaginaryUnit = readImaginaryUnitSuffix() , foundInt64Suffix = readInt64Suffix(); if (foundInt64Suffix && (foundImaginaryUnit || literal.hasFractionPart)) { raise(null, errors.malformedNumber, input.slice(tokenStart, index)); } return { type: NumericLiteral , value: literal.value , line: line , lineStart: lineStart , range: [tokenStart, index] }; } function readImaginaryUnitSuffix() { if (!features.imaginaryNumbers) return; // Imaginary unit number suffix is optional. // See http://luajit.org/ext_ffi_api.html#literals if ('iI'.indexOf(input.charAt(index) || null) >= 0) { ++index; return true; } else { return false; } } function readInt64Suffix() { if (!features.integerSuffixes) return; // Int64/uint64 number suffix is optional. // See http://luajit.org/ext_ffi_api.html#literals if ('uU'.indexOf(input.charAt(index) || null) >= 0) { ++index; if ('lL'.indexOf(input.charAt(index) || null) >= 0) { ++index; if ('lL'.indexOf(input.charAt(index) || null) >= 0) { ++index; return 'ULL'; } else { // UL but no L raise(null, errors.malformedNumber, input.slice(tokenStart, index)); } } else { // U but no L raise(null, errors.malformedNumber, input.slice(tokenStart, index)); } } else if ('lL'.indexOf(input.charAt(index) || null) >= 0) { ++index; if ('lL'.indexOf(input.charAt(index) || null) >= 0) { ++index; return 'LL'; } else { // First L but no second L raise(null, errors.malformedNumber, input.slice(tokenStart, index)); } } } // Lua hexadecimals have an optional fraction part and an optional binary // exoponent part. These are not included in JavaScript so we will compute // all three parts separately and then sum them up at the end of the function // with the following algorithm. // // Digit := toDec(digit) // Fraction := toDec(fraction) / 16 ^ fractionCount // BinaryExp := 2 ^ binaryExp // Number := ( Digit + Fraction ) * BinaryExp function readHexLiteral() { var fraction = 0 // defaults to 0 as it gets summed , binaryExponent = 1 // defaults to 1 as it gets multiplied , binarySign = 1 // positive , digit, fractionStart, exponentStart, digitStart; digitStart = index += 2; // Skip 0x part // A minimum of one hex digit is required. if (!isHexDigit(input.charCodeAt(index))) raise(null, errors.malformedNumber, input.slice(tokenStart, index)); while (isHexDigit(input.charCodeAt(index))) ++index; // Convert the hexadecimal digit to base 10. digit = parseInt(input.slice(digitStart, index), 16); // Fraction part is optional. var foundFraction = false; if ('.' === input.charAt(index)) { foundFraction = true; fractionStart = ++index; while (isHexDigit(input.charCodeAt(index))) ++index; fraction = input.slice(fractionStart, index); // Empty fraction parts should default to 0, others should be converted // 0.x form so we can use summation at the end. fraction = (fractionStart === index) ? 0 : parseInt(fraction, 16) / Math.pow(16, index - fractionStart); } // Binary exponents are optional var foundBinaryExponent = false; if ('pP'.indexOf(input.charAt(index) || null) >= 0) { foundBinaryExponent = true; ++index; // Sign part is optional and defaults to 1 (positive). if ('+-'.indexOf(input.charAt(index) || null) >= 0) binarySign = ('+' === input.charAt(index++)) ? 1 : -1; exponentStart = index; // The binary exponent sign requires a decimal digit. if (!isDecDigit(input.charCodeAt(index))) raise(null, errors.malformedNumber, input.slice(tokenStart, index)); while (isDecDigit(input.charCodeAt(index))) ++index; binaryExponent = input.slice(exponentStart, index); // Calculate the binary exponent of the number. binaryExponent = Math.pow(2, binaryExponent * binarySign); } return { value: (digit + fraction) * binaryExponent, hasFractionPart: foundFraction || foundBinaryExponent }; } // Decimal numbers are exactly the same in Lua and in JavaScript, because of // this we check where the token ends and then parse it with native // functions. function readDecLiteral() { while (isDecDigit(input.charCodeAt(index))) ++index; // Fraction part is optional var foundFraction = false; if ('.' === input.charAt(index)) { foundFraction = true; ++index; // Fraction part defaults to 0 while (isDecDigit(input.charCodeAt(index))) ++index; } // Exponent part is optional. var foundExponent = false; if ('eE'.indexOf(input.charAt(index) || null) >= 0) { foundExponent = true; ++index; // Sign part is optional. if ('+-'.indexOf(input.charAt(index) || null) >= 0) ++index; // An exponent is required to contain at least one decimal digit. if (!isDecDigit(input.charCodeAt(index))) raise(null, errors.malformedNumber, input.slice(tokenStart, index)); while (isDecDigit(input.charCodeAt(index))) ++index; } return { value: parseFloat(input.slice(tokenStart, index)), hasFractionPart: foundFraction || foundExponent }; } function readUnicodeEscapeSequence() { var sequenceStart = index++; if (input.charAt(index++) !== '{') raise(null, errors.braceExpected, '{', '\\' + input.slice(sequenceStart, index)); if (!isHexDigit(input.charCodeAt(index))) raise(null, errors.hexadecimalDigitExpected, '\\' + input.slice(sequenceStart, index)); while (input.charCodeAt(index) === 0x30) ++index; var escStart = index; while (isHexDigit(input.charCodeAt(index))) { ++index; if (index - escStart > 6) raise(null, errors.tooLargeCodepoint, '\\' + input.slice(sequenceStart, index)); } var b = input.charAt(index++); if (b !== '}') { if ((b === '"') || (b === "'")) raise(null, errors.braceExpected, '}', '\\' + input.slice(sequenceStart, index--)); else raise(null, errors.hexadecimalDigitExpected, '\\' + input.slice(sequenceStart, index)); } var codepoint = parseInt(input.slice(escStart, index - 1) || '0', 16); var frag = '\\' + input.slice(sequenceStart, index); if (codepoint > 0x10ffff) { raise(null, errors.tooLargeCodepoint, frag); } return encodingMode.encodeUTF8(codepoint, frag); } // Translate escape sequences to the actual characters. function readEscapeSequence() { var sequenceStart = index; switch (input.charAt(index)) { // Lua allow the following escape sequences. case 'a': ++index; return '\x07'; case 'n': ++index; return '\n'; case 'r': ++index; return '\r'; case 't': ++index; return '\t'; case 'v': ++index; return '\x0b'; case 'b': ++index; return '\b'; case 'f': ++index; return '\f'; // Backslash at the end of the line. We treat all line endings as equivalent, // and as representing the [LF] character (code 10). Lua 5.1 through 5.3 // have been verified to behave the same way. case '\r': case '\n': consumeEOL(); return '\n'; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': // \ddd, where ddd is a sequence of up to three decimal digits. while (isDecDigit(input.charCodeAt(index)) && index - sequenceStart < 3) ++index; var frag = input.slice(sequenceStart, index); var ddd = parseInt(frag, 10); if (ddd > 255) { raise(null, errors.decimalEscapeTooLarge, '\\' + ddd); } return encodingMode.encodeByte(ddd, '\\' + frag); case 'z': if (features.skipWhitespaceEscape) { ++index; skipWhiteSpace(); return ''; } break; case 'x': if (features.hexEscapes) { // \xXX, where XX is a sequence of exactly two hexadecimal digits if (isHexDigit(input.charCodeAt(index + 1)) && isHexDigit(input.charCodeAt(index + 2))) { index += 3; return encodingMode.encodeByte(parseInt(input.slice(sequenceStart + 1, index), 16), '\\' + input.slice(sequenceStart, index)); } raise(null, errors.hexadecimalDigitExpected, '\\' + input.slice(sequenceStart, index + 2)); } break; case 'u': if (features.unicodeEscapes) return readUnicodeEscapeSequence(); break; case '\\': case '"': case "'": return input.charAt(index++); } if (features.strictEscapes) raise(null, errors.invalidEscape, '\\' + input.slice(sequenceStart, index + 1)); return input.charAt(index++); } // Comments begin with -- after which it will be decided if they are // multiline comments or not. // // The multiline functionality works the exact same way as with string // literals so we reuse the functionality. function scanComment() { tokenStart = index; index += 2; // -- var character = input.charAt(index) , content = '' , isLong = false , commentStart = index , lineStartComment = lineStart , lineComment = line; if ('[' === character) { content = readLongString(true); // This wasn't a multiline comment after all. if (false === content) content = character; else isLong = true; } // Scan until next line as long as it's not a multiline comment. if (!isLong) { while (index < length) { if (isLineTerminator(input.charCodeAt(index))) break; ++index; } if (options.comments) content = input.slice(commentStart, index); } if (options.comments) { var node = ast.comment(content, input.slice(tokenStart, index)); // `Marker`s depend on tokens available in the parser and as comments are // intercepted in the lexer all location data is set manually. if (options.locations) { node.loc = { start: { line: lineComment, column: tokenStart - lineStartComment } , end: { line: line, column: index - lineStart } }; } if (options.ranges) { node.range = [tokenStart, index]; } if (options.onCreateNode) options.onCreateNode(node); comments.push(node); } } // Read a multiline string by calculating the depth of `=` characters and // then appending until an equal depth is found. function readLongString(isComment) { var level = 0 , content = '' , terminator = false , character, stringStart, firstLine = line; ++index; // [ // Calculate the depth of the comment. while ('=' === input.charAt(index + level)) ++level; // Exit, this is not a long string afterall. if ('[' !== input.charAt(index + level)) return false; index += level + 1; // If the first character is a newline, ignore it and begin on next line. if (isLineTerminator(input.charCodeAt(index))) consumeEOL(); stringStart = index; while (index < length) { // To keep track of line numbers run the `consumeEOL()` which increments // its counter. while (isLineTerminator(input.charCodeAt(index))) consumeEOL(); character = input.charAt(index++); // Once the delimiter is found, iterate through the depth count and see // if it matches. if (']' === character) { terminator = true; for (var i = 0; i < level; ++i) { if ('=' !== input.charAt(index + i)) terminator = false; } if (']' !== input.charAt(index + level)) terminator = false; } // We reached the end of the multiline string. Get out now. if (terminator) { content += input.slice(stringStart, index - 1); index += level + 1; return content; } } raise(null, isComment ? errors.unfinishedLongComment : errors.unfinishedLongString, firstLine, '<eof>'); } // ## Lex functions and helpers. // Read the next token. // // This is actually done by setting the current token to the lookahead and // reading in the new lookahead token. function next() { previousToken = token; token = lookahead; lookahead = lex(); } // Consume a token if its value matches. Once consumed or not, return the // success of the operation. function consume(value) { if (value === token.value) { next(); return true; } return false; } // Expect the next token value to match. If not, throw an exception. function expect(value) { if (value === token.value) next(); else raise(token, errors.expected, value, tokenValue(token)); } // ### Validation functions function isWhiteSpace(charCode) { return 9 === charCode || 32 === charCode || 0xB === charCode || 0xC === charCode; } function isLineTerminator(charCode) { return 10 === charCode || 13 === charCode; } function isDecDigit(charCode) { return charCode >= 48 && charCode <= 57; } function isHexDigit(charCode) { return (charCode >= 48 && charCode <= 57) || (charCode >= 97 && charCode <= 102) || (charCode >= 65 && charCode <= 70); } // From [Lua 5.2](http://www.lua.org/manual/5.2/manual.html#8.1) onwards // identifiers cannot use 'locale-dependent' letters (i.e. dependent on the C locale). // On the other hand, LuaJIT allows arbitrary octets ≥ 128 in identifiers. function isIdentifierStart(charCode) { if ((charCode >= 65 && charCode <= 90) || (charCode >= 97 && charCode <= 122) || 95 === charCode) return true; if (features.extendedIdentifiers && charCode >= 128) return true; return false; } function isIdentifierPart(charCode) { if ((charCode >= 65 && charCode <= 90) || (charCode >= 97 && charCode <= 122) || 95 === charCode || (charCode >= 48 && charCode <= 57)) return true; if (features.extendedIdentifiers && charCode >= 128) return true; return false; } // [3.1 Lexical Conventions](http://www.lua.org/manual/5.2/manual.html#3.1) // // `true`, `false` and `nil` will not be considered keywords, but literals. function isKeyword(id) { switch (id.length) { case 2: return 'do' === id || 'if' === id || 'in' === id || 'or' === id; case 3: return 'and' === id || 'end' === id || 'for' === id || 'not' === id; case 4: if ('else' === id || 'then' === id) return true; if (features.labels && !features.contextualGoto) return ('goto' === id); return false; case 5: return 'break' === id || 'local' === id || 'until' === id || 'while' === id; case 6: return 'elseif' === id || 'repeat' === id || 'return' === id; case 8: return 'function' === id; } return false; } function isUnary(token) { if (Punctuator === token.type) return '#-~'.indexOf(token.value) >= 0; if (Keyword === token.type) return 'not' === token.value; return false; } // Check if the token syntactically closes a block. function isBlockFollow(token) { if (EOF === token.type) return true; if (Keyword !== token.type) return false; switch (token.value) { case 'else': case 'elseif': case 'end': case 'until': return true; default: return false; } } // Scope // ----- // Store each block scope as a an array of identifier names. Each scope is // stored in an FILO-array. var scopes // The current scope index , scopeDepth // A list of all global identifier nodes. , globals; // Create a new scope inheriting all declarations from the previous scope. function createScope() { var scope = scopes[scopeDepth++].slice(); scopes.push(scope); if (options.onCreateScope) options.onCreateScope(); } // Exit and remove the current scope. function destroyScope() { var scope = scopes.pop(); --scopeDepth; if (options.onDestroyScope) options.onDestroyScope(); } // Add identifier name to the current scope if it doesnt already exist. function scopeIdentifierName(name) { if (options.onLocalDeclaration) options.onLocalDeclaration(name); if (-1 !== indexOf(scopes[scopeDepth], name)) return; scopes[scopeDepth].push(name); } // Add identifier to the current scope function scopeIdentifier(node) { scopeIdentifierName(node.name); attachScope(node, true); } // Attach scope information to node. If the node is global, store it in the // globals array so we can return the information to the user. function attachScope(node, isLocal) { if (!isLocal && -1 === indexOfObject(globals, 'name', node.name)) globals.push(node); node.isLocal = isLocal; } // Is the identifier name available in this scope. function scopeHasName(name) { return (-1 !== indexOf(scopes[scopeDepth], name)); } // Location tracking // ----------------- // // Locations are stored in FILO-array as a `Marker` object consisting of both // `loc` and `range` data. Once a `Marker` is popped off the list an end // location is added and the data is attached to a syntax node. var locations = [] , trackLocations; function createLocationMarker() { return new Marker(token); } function Marker(token) { if (options.locations) { this.loc = { start: { line: token.line , column: token.range[0] - token.lineStart } , end: { line: 0 , column: 0 } }; } if (options.ranges) this.range = [token.range[0], 0]; } // Complete the location data stored in the `Marker` by adding the location // of the *previous token* as an end location. Marker.prototype.complete = function() { if (options.locations) { this.loc.end.line = previousToken.lastLine || previousToken.line; this.loc.end.column = previousToken.range[1] - (previousToken.lastLineStart || previousToken.lineStart); } if (options.ranges) { this.range[1] = previousToken.range[1]; } }; Marker.prototype.bless = function (node) { if (this.loc) { var loc = this.loc; node.loc = { start: { line: loc.start.line, column: loc.start.column }, end: { line: loc.end.line, column: loc.end.column } }; } if (this.range) { node.range = [ this.range[0], this.range[1] ]; } }; // Create a new `Marker` and add it to the FILO-array. function markLocation() { if (trackLocations) locations.push(createLocationMarker()); } // Push an arbitrary `Marker` object onto the FILO-array. function pushLocation(marker) { if (trackLocations) locations.push(marker); } // Control flow tracking // --------------------- // A context object that validates loop breaks and `goto`-based control flow. function FullFlowContext() { this.scopes = []; this.pendingGotos = []; } FullFlowContext.prototype.isInLoop = function () { var i = this.scopes.length; while (i --> 0) { if (this.scopes[i].isLoop) return true; } return false; }; FullFlowContext.prototype.pushScope = function (isLoop) { var scope = { labels: {}, locals: [], deferredGotos: [], isLoop: !!isLoop }; this.scopes.push(scope); }; FullFlowContext.prototype.popScope = function () { for (var i = 0; i < this.pendingGotos.length; ++i) { var theGoto = this.pendingGotos[i]; if (theGoto.maxDepth >= this.scopes.length) if (--theGoto.maxDepth <= 0) raise(theGoto.token, errors.labelNotVisible, theGoto.target); } this.scopes.pop(); }; FullFlowContext.prototype.addGoto = function (target, token) { var localCounts = []; for (var i = 0; i < this.scopes.length; ++i) { var scope = this.scopes[i]; localCounts.push(scope.locals.length); if (Object.prototype.hasOwnProperty.call(scope.labels, target)) return; } this.pendingGotos.push({ maxDepth: this.scopes.length, target: target, token: token, localCounts: localCounts }); }; FullFlowContext.prototype.addLabel = function (name, token) { var scope = this.currentScope(); if (Object.prototype.hasOwnProperty.call(scope.labels, name)) { raise(token, errors.labelAlreadyDefined, name, scope.labels[name].line); } else { var newGotos = []; for (var i = 0; i < this.pendingGotos.length; ++i) { var theGoto = this.pendingGotos[i]; if (theGoto.maxDepth >= this.scopes.length && theGoto.target === name) { if (theGoto.localCounts[this.scopes.length - 1] < scope.locals.length) { scope.deferredGotos.push(theGoto); } continue; } newGotos.push(theGoto); } this.pendingGoto