UNPKG

phpjs

Version:

php.js offers community built php functions in javascript

704 lines (701 loc) • 21.3 kB

JavaScript

function token_get_all (source) { // Split given source into PHP tokens // + original by: Marco Marchi� // + improved by: Brett Zamir (http://brett-zamir.me) // - depends on: token_name // % note 1: Token numbers depend on the PHP version // % note 2: token_name is only necessary for a non-standard php.js-specific use of this function; // % note 2: if you define an object on this.php_js.phpParser (where "this" is the scope of the // % note 2: token_get_all function (either a namespaced php.js object or the window object)), // % note 2: this function will call that object's methods if they have the same names as the tokens, // % note 2: passing them the string, line number, and token number (in that order) // * example 1: token_get_all('/'+'* comment *'+'/'); // * returns 1: [[311, '/* comment */', 1]] // Token to number conversion var tokens = { T_REQUIRE_ONCE:261, T_REQUIRE:260, T_EVAL:259, T_INCLUDE_ONCE:258, T_INCLUDE:257, T_LOGICAL_OR:262, T_LOGICAL_XOR:263, T_LOGICAL_AND:264, T_PRINT:265, T_SR_EQUAL:276, T_SL_EQUAL:275, T_XOR_EQUAL:274, T_OR_EQUAL:273, T_AND_EQUAL:272, T_MOD_EQUAL:271, T_CONCAT_EQUAL:270, T_DIV_EQUAL:269, T_MUL_EQUAL:268, T_MINUS_EQUAL:267, T_PLUS_EQUAL:266, T_BOOLEAN_OR:277, T_BOOLEAN_AND:278, T_IS_NOT_IDENTICAL:282, T_IS_IDENTICAL:281, T_IS_NOT_EQUAL:280, T_IS_EQUAL:279, T_IS_GREATER_OR_EQUAL:284, T_IS_SMALLER_OR_EQUAL:283, T_SR:286, T_SL:285, T_INSTANCEOF:287, T_UNSET_CAST:296, T_BOOL_CAST:295, T_OBJECT_CAST:294, T_ARRAY_CAST:293, T_STRING_CAST:292, T_DOUBLE_CAST:291, T_INT_CAST:290, T_DEC:289, T_INC:288, T_CLONE:298, T_NEW:297, T_EXIT:299, T_IF:300, T_ELSEIF:301, T_ELSE:302, T_ENDIF:303, T_LNUMBER:304, T_DNUMBER:305, T_STRING:306, T_STRING_VARNAME:307, T_VARIABLE:308, T_NUM_STRING:309, T_INLINE_HTML:310, T_CHARACTER:311, T_BAD_CHARACTER:312, T_ENCAPSED_AND_WHITESPACE:313, T_CONSTANT_ENCAPSED_STRING:314, T_ECHO:315, T_DO:316, T_WHILE:317, T_ENDWHILE:318, T_FOR:319, T_ENDFOR:320, T_FOREACH:321, T_ENDFOREACH:322, T_DECLARE:323, T_ENDDECLARE:324, T_AS:325, T_SWITCH:326, T_ENDSWITCH:327, T_CASE:328, T_DEFAULT:329, T_BREAK:330, T_CONTINUE:331, T_GOTO:332, T_FUNCTION:333, T_CONST:334, T_RETURN:335, T_TRY:336, T_CATCH:337, T_THROW:338, T_USE:339, T_GLOBAL:340, T_PUBLIC:346, T_PROTECTED:345, T_PRIVATE:344, T_FINAL:343, T_ABSTRACT:342, T_STATIC:341, T_VAR:347, T_UNSET:348, T_ISSET:349, T_EMPTY:350, T_HALT_COMPILER:351, T_CLASS:352, T_INTERFACE:353, T_EXTENDS:354, T_IMPLEMENTS:355, T_OBJECT_OPERATOR:356, T_DOUBLE_ARROW:357, T_LIST:358, T_ARRAY:359, T_CLASS_C:360, T_METHOD_C:361, T_FUNC_C:362, T_LINE:363, T_FILE:364, T_COMMENT:365, T_DOC_COMMENT:366, T_OPEN_TAG:367, T_OPEN_TAG_WITH_ECHO:368, T_CLOSE_TAG:369, T_WHITESPACE:370, T_START_HEREDOC:371, T_END_HEREDOC:372, T_DOLLAR_OPEN_CURLY_BRACES:373, T_CURLY_OPEN:374, T_PAAMAYIM_NEKUDOTAYIM:375, T_NAMESPACE:376, T_NS_C:377, T_DIR:378, T_NS_SEPARATOR:379 }, //Keywords tokens keywordsToken = { "abstract": tokens.T_ABSTRACT, "array": tokens.T_ARRAY, "as": tokens.T_AS, "break": tokens.T_BREAK, "case": tokens.T_CASE, "catch": tokens.T_CATCH, "class": tokens.T_CLASS, "__CLASS__": tokens.T_CLASS_C, "clone": tokens.T_CLONE, "const": tokens.T_CONST, "continue": tokens.T_CONTINUE, "declare": tokens.T_DECLARE, "default": tokens.T_DEFAULT, "__DIR__": tokens.T_DIR, "die": tokens.T_EXIT, "do": tokens.T_DO, "echo": tokens.T_ECHO, "else": tokens.T_ELSE, "elseif": tokens.T_ELSEIF, "empty": tokens.T_EMPTY, "enddeclare": tokens.T_ENDDECLARE, "endfor": tokens.T_ENDFOR, "endforeach": tokens.T_ENDFOREACH, "endif": tokens.T_ENDIF, "endswitch": tokens.T_ENDSWITCH, "endwhile": tokens.T_ENDWHILE, "eval": tokens.T_EVAL, "exit": tokens.T_EXIT, "extends": tokens.T_EXTENDS, "__FILE__": tokens.T_FILE, "final": tokens.T_FINAL, "for": tokens.T_FOR, "foreach": tokens.T_FOREACH, "function": tokens.T_FUNCTION, "__FUNCTION__": tokens.T_FUNC_C, "global": tokens.T_GLOBAL, "goto": tokens.T_GOTO, "__halt_compiler": tokens.T_HALT_COMPILER, "if": tokens.T_IF, "implements": tokens.T_IMPLEMENTS, "include": tokens.T_INCLUDE, "include_once": tokens.T_INCLUDE_ONCE, "instanceof": tokens.T_INSTANCEOF, "interface": tokens.T_INTERFACE, "isset": tokens.T_ISSET, "__LINE__": tokens.T_LINE, "list": tokens.T_LIST, "and": tokens.T_LOGICAL_AND, "or": tokens.T_LOGICAL_OR, "xor": tokens.T_LOGICAL_XOR, "__METHOD__": tokens.T_METHOD_C, "namespace": tokens.T_NAMESPACE, "__NAMESPACE__": tokens.T_NS_C, "new": tokens.T_NEW, "print": tokens.T_PRINT, "private": tokens.T_PRIVATE, "public": tokens.T_PUBLIC, "protected": tokens.T_PROTECTED, "require": tokens.T_REQUIRE, "require_once": tokens.T_REQUIRE_ONCE, "return": tokens.T_RETURN, "static": tokens.T_STATIC, "switch": tokens.T_SWITCH, "throw": tokens.T_THROW, "try": tokens.T_TRY, "unset": tokens.T_UNSET, "use": tokens.T_USE, "var": tokens.T_VAR, "while": tokens.T_WHILE }, //Type casting tokens typeCasting = { "array": tokens.T_ARRAY_CAST, "bool": tokens.T_BOOL_CAST, "boolean": tokens.T_BOOL_CAST, "real": tokens.T_DOUBLE_CAST, "double": tokens.T_DOUBLE_CAST, "float": tokens.T_DOUBLE_CAST, "int": tokens.T_INT_CAST, "integer": tokens.T_INT_CAST, "object": tokens.T_OBJECT_CAST, "string": tokens.T_STRING_CAST, "unset": tokens.T_UNSET_CAST, "binary": tokens.T_STRING_CAST }, //Symbols tokens with 2 characters symbols2chars = { "&=": tokens.T_AND_EQUAL, "&&": tokens.T_BOOLEAN_AND, "||": tokens.T_BOOLEAN_OR, "?>": tokens.T_CLOSE_TAG, "%>": tokens.T_CLOSE_TAG, ".=": tokens.T_CONCAT_EQUAL, "--": tokens.T_DEC, "/=": tokens.T_DIV_EQUAL, "=>": tokens.T_DOUBLE_ARROW, "::": tokens.T_PAAMAYIM_NEKUDOTAYIM, "++": tokens.T_INC, "==": tokens.T_IS_EQUAL, ">=": tokens.T_IS_GREATER_OR_EQUAL, "!=": tokens.T_IS_NOT_EQUAL, "<>": tokens.T_IS_NOT_EQUAL, "<=": tokens.T_IS_SMALLER_OR_EQUAL, "-=": tokens.T_MINUS_EQUAL, "%=": tokens.T_MOD_EQUAL, "*=": tokens.T_MUL_EQUAL, "->": tokens.T_OBJECT_OPERATOR, "|=": tokens.T_OR_EQUAL, "+=": tokens.T_PLUS_EQUAL, "<<": tokens.T_SL, ">>": tokens.T_SR, "^=": tokens.T_XOR_EQUAL, "<?": tokens.T_OPEN_TAG }, //Symbols tokens with 3 characters symbols3chars = { "===": tokens.T_IS_IDENTICAL, "!==": tokens.T_IS_NOT_IDENTICAL, "<<=": tokens.T_SL_EQUAL, ">>=": tokens.T_SR_EQUAL, "<?=": tokens.T_OPEN_TAG_WITH_ECHO, "<%=": tokens.T_OPEN_TAG_WITH_ECHO }, //Buffer tokens bufferTokens = { "html": tokens.T_INLINE_HTML, "inlineComment": tokens.T_COMMENT, "comment": tokens.T_COMMENT, "docComment": tokens.T_DOC_COMMENT, "singleQuote": tokens.T_CONSTANT_ENCAPSED_STRING, "doubleQuotes": tokens.T_CONSTANT_ENCAPSED_STRING, "nowdoc": tokens.T_ENCAPSED_AND_WHITESPACE, "heredoc": tokens.T_ENCAPSED_AND_WHITESPACE }, //Characters that are emitted as tokens without a code singleTokenChars = ";(){}[],~@`=+/-*.$|^&<>%!?:\"'\\", //Buffer type. Start an html buffer immediatelly. bufferType = "html", //Buffer content buffer = "", //Last emitted token lastToken, //Results array ret = [], //Word that started the heredoc or nowdoc buffer heredocWord, //Line number line = 1, //Line at which the buffer begins lineBuffer = 1, //Flag that indicates if the current double quoted string has been splitted split, //This variable will store the previous buffer type of the tokenizer before parsing a //complex variable syntax complexVarPrevBuffer, //Number of open brackets inside a complex variable syntax openBrackets, //Function to emit tokens emitToken = function (token, code, preventBuffer, l) { if (!preventBuffer && bufferType) { buffer += token; lastToken = null; } else { lastToken = code ? code : token; ret.push(code ? [code, token, l || line] : token); } }, //Function to emit and close the current buffer emitBuffer = function () { buffer && emitToken(buffer, bufferTokens[bufferType], true, lineBuffer); buffer = ""; bufferType = null; }, //Function to check if the token at the current index is escaped isEscaped = function () { var escaped = false, c = i - 1; for (; c >= 0; c--) { if (source.charAt(c) !== "\\") { break; } escaped = !escaped; } return escaped; }, //This function is used to split a double quoted string or a heredoc buffer after a variable //has been found inside it splitString = function () { //Don't emit empty buffers if (!buffer) { return; } //If the buffer is a double quoted string and it has not yet been splitted, emit the double //quotes as a token without an associated code if (bufferType === "doubleQuotes" && !split) { split = true; emitToken('"', null, true); buffer = buffer.substr(1); } buffer && emitToken(buffer, tokens.T_ENCAPSED_AND_WHITESPACE, true, lineBuffer); buffer = ""; lineBuffer = line; }, //Returns the number of line feed characters in the given string getNewLines = function (str) { var i = 0; str.replace(/\n/g, function () { i++; }); return i; }, //Checks if the given ASCII identifies a whitespace isWhitespace = function (ASCII) { return ASCII === 9 || ASCII === 10 || ASCII === 13 || ASCII === 32; }, //Get next whitespaces getWhitespaces = function () { var as, chr, ret = ""; for (c = i + 1; c < length; c++) { chr = source.charAt(c); as = chr.charCodeAt(0); if (isWhitespace(as)) { ret += chr; } else { break; } } return ret; }, //Get next word getWord = function (i) { var match = /^[a-zA-Z_]\w*/.exec(source.substr(i)); return match ? match[0] : null; }, //Get next heredoc declaration getHeredocWord = function () { return /^<<< *(['"]?[a-zA-Z]\w*)['"]?\r?\n/.exec(source.substr(i)); }, //Get next type casting declaration getTypeCasting = function () { var match = /^\( *([a-zA-Z]+) *\)/.exec(source.substr(i)); return match && match[1] && (match[1].toLowerCase()) in typeCasting ? match : null; }, //Get next php long open declaration getLongOpenDeclaration = function (i) { return /^php(?:\r?\s)?/i.exec(source.substr(i)); }, //Get next integer or float number getNumber = function () { var rnum = /^(?:((?:\d+(?:\.\d*)?|\d*\.\d+)[eE][\+\-]?\d+|\d*\.\d+|\d+\.\d*)|(\d+(?:x[0-9a-fA-F]+)?))/, match = rnum.exec(source.substr(i)); if (!match) { return null; } if (match[2]) { var isHex = match[2].toLowerCase().indexOf("x") > -1; //If it's greater than 2147483648 it's considered as a floating point number if (parseInt(isHex ? parseInt(match[2], 16) : match[2], 10) < 2147483648) { return [match[2], tokens.T_LNUMBER]; } return [match[2], tokens.T_DNUMBER]; } return [match[1], tokens.T_DNUMBER]; }, //Regexp to check if the characters that follow a word are valid as heredoc end declaration heredocEndFollowing = /^;?\r?\n/, i = 0, num, length = source.length, nextch, word, ch, parts, sym, ASCII; for (; i < length; i++) { ch = source.charAt(i); ASCII = ch.charCodeAt(0); //Whitespaces if (isWhitespace(ASCII)) { //Get next whitespaces too ch += getWhitespaces(); //PHP closing tags and inline comments include the following new line characters if ((bufferType === "inlineComment" || lastToken === tokens.T_CLOSE_TAG) && ch.indexOf("\n") > -1) { parts = ch.split("\n"); line++; if (bufferType === "inlineComment") { //Close the inline comment buffer buffer += parts[0] + "\n"; emitBuffer(); } else { //Add the new line characters to the previous token ret[ret.length - 1][1] += parts[0] + "\n"; lineBuffer = line; } i += parts[0].length; parts.shift(); ch = parts.join("\n"); if (ch === "") { continue; } else { i++; } } emitToken(ch, tokens.T_WHITESPACE); line += getNewLines(ch); i += ch.length - 1; } else if (ch === "#" || ch === "/" && ((nextch = source.charAt(i + 1)) === "*" || nextch === "/")) { //Comment signs //Change the buffer only if there's no active buffer if (!bufferType) { if (ch === "#") { bufferType = "inlineComment"; } else if(ch + nextch === "//") { bufferType = "inlineComment"; ch += nextch; i++; } else if ((ch + nextch + source.charAt(i + 2)) === "/**") { ch += "**"; i += 2; //It's a doc comment only if it's followed by a whitespace if (isWhitespace(source.charCodeAt(i + 1))) { bufferType = "docComment"; } else { bufferType = "comment"; } } else { ch += "*"; bufferType = "comment"; i++ } lineBuffer = line; } emitToken(ch); } else if (ch === "*" && source.charAt(i + 1) === "/") { //Multiline comments closing sings ch += "/"; emitToken(ch); if (bufferType === "comment" || bufferType === "docComment") { emitBuffer(); } i++; } else if (ch === "$" && (word = getWord(i + 1))) { //Variable if ((bufferType === "heredoc" || bufferType === "doubleQuotes") && !isEscaped()) { splitString(); emitToken(ch + word, tokens.T_VARIABLE, true); } else { emitToken(ch + word, tokens.T_VARIABLE); } i += word.length; } else if (ch === "<" && source.substr(i + 1, 2) === "<<" && (word = getHeredocWord())) { //Heredoc and nowdoc start declaration emitToken(word[0], tokens.T_START_HEREDOC); line++; if (!bufferType) { heredocWord = word[1] //If the first character is a quote then it's a nowdoc otherwise it's an heredoc if (heredocWord.charAt(0) === "'") { //Strip the leading quote heredocWord = heredocWord.substr(1); bufferType = "nowdoc"; } else { if (heredocWord.charAt(0) === '"') { heredocWord = heredocWord.substr(1); } bufferType = "heredoc"; } lineBuffer = line; } i += word[0].length - 1; } else if (ch === "(" && (word = getTypeCasting())) { //Type-casting emitToken(word[0], typeCasting[word[1].toLowerCase()]); i += word[0].length - 1; } else if ((ch === "." || (ch >= "0" && ch <= "9")) && (num = getNumber())) { //Numbers //Numeric array index inside a heredoc or a double quoted string if (lastToken === "[" && (bufferType === "heredoc" || bufferType === "doubleQuotes")) { emitToken(num[0], tokens.T_NUM_STRING, true); } else { emitToken(num[0], num[1]); } i += String(num[0]).length - 1; } else if (singleTokenChars.indexOf(ch) > -1) { //Symbols sym = source.substr(i, 3); if (sym in symbols3chars) { i += 2; //If it's a php open tag emit the html buffer if (bufferType === "html" && symbols3chars[sym] === tokens.T_OPEN_TAG_WITH_ECHO) { emitBuffer(); } emitToken(sym, symbols3chars[sym]); continue; } sym = ch + source.charAt(i + 1); if (sym in symbols2chars) { //If it's a php open tag check if it's written in the long form and emit the html buffer if (symbols2chars[sym] === tokens.T_OPEN_TAG && bufferType === "html") { emitBuffer(); i++; if (word = getLongOpenDeclaration(i + 1)) { i += word[0].length; sym += word[0]; } emitToken(sym, tokens.T_OPEN_TAG); if (sym.indexOf("\n") > -1) { line++; } continue; } i++; //Syntax $obj->prop inside strings and heredoc if (sym === "->" && lastToken === tokens.T_VARIABLE && (bufferType === "heredoc" || bufferType === "doubleQuotes")) { emitToken(sym, symbols2chars[sym], true); continue; } emitToken(sym, symbols2chars[sym]); //If the token is a PHP close tag and there isn't an active buffer start an html buffer if (!bufferType && symbols2chars[sym] === tokens.T_CLOSE_TAG) { bufferType = "html"; lineBuffer = line; } continue; } //Start string buffers if there isn't an active buffer and the character is a quote if (!bufferType && (ch === "'" || ch === '"')) { if (ch === "'") { bufferType = "singleQuote"; } else { split = false; bufferType = "doubleQuotes"; } lineBuffer = line; //Add the token to the buffer and continue to skip next checks emitToken(ch); continue; } else if (ch === '"' && bufferType === "doubleQuotes" && !isEscaped()) { //If the string has been splitted emit the current buffer and the double quotes //as separate tokens if (split) { splitString(); bufferType = null; emitToken('"'); } else { emitToken('"'); emitBuffer(); } continue; } else if (bufferType === "heredoc" || bufferType === "doubleQuotes") { //Array index delimiters inside heredoc or double quotes if ((ch === "[" && lastToken === tokens.T_VARIABLE) || (ch === "]" && (lastToken === tokens.T_NUM_STRING || lastToken === tokens.T_STRING))) { emitToken(ch, null, true); continue; } else if (((ch === "$" && source.charAt(i + 1) === "{") || (ch === "{" && source.charAt(i + 1) === "$")) && !isEscaped()) { //Complex variable syntax ${varname} or {$varname}. Store the current //buffer type and evaluate next tokens as there's no active buffer. //The current buffer will be reset when the declaration is closed splitString(); complexVarPrevBuffer = bufferType; bufferType = null; if (ch === "$") { emitToken(ch + "{", tokens.T_DOLLAR_OPEN_CURLY_BRACES); i++; } else { emitToken(ch, tokens.T_CURLY_OPEN); } openBrackets = 1; continue; } } else if (ch === "\\") { //Namespace separator emitToken(ch, tokens.T_NS_SEPARATOR); continue; } emitToken(ch); //Increment or decrement the number of open brackets inside a complex //variable syntax if (complexVarPrevBuffer && (ch === "{" || ch === "}")) { if (ch === "{") { openBrackets++; } else if (!--openBrackets) { //If every bracket has been closed reset the previous buffer bufferType = complexVarPrevBuffer; complexVarPrevBuffer = null; } } else if (ch === "'" && bufferType === "singleQuote" && !isEscaped()) { //Stop the single quoted string buffer if the character is a quote, //there's an open single quoted string buffer and the character is //not escaped emitBuffer(); } } else if (word = getWord(i)) { //Words var wordLower = word.toLowerCase(); //Check to see if it's a keyword if (keywordsToken.hasOwnProperty(word) || keywordsToken.hasOwnProperty(wordLower)) { //If it's preceded by -> than it's an object property and it must be tokenized as T_STRING emitToken( word, lastToken === tokens.T_OBJECT_OPERATOR ? tokens.T_STRING : keywordsToken[word] || keywordsToken[wordLower] ); i += word.length - 1; continue; } //Stop the heredoc or the nowdoc if it's the word that has generated it if ((bufferType === "nowdoc" || bufferType === "heredoc") && word === heredocWord && source.charAt(i - 1) === "\n" && heredocEndFollowing.test(source.substr(i + word.length))) { emitBuffer(); emitToken(word, tokens.T_END_HEREDOC); i += word.length - 1; continue; } else if ((bufferType === "heredoc" || bufferType === "doubleQuotes")) { if (lastToken === "[") { //Literal array index inside a heredoc or a double quoted string emitToken(word, tokens.T_STRING, true); i += word.length - 1; continue; } else if (lastToken === tokens.T_OBJECT_OPERATOR) { //Syntax $obj->prop inside strings and heredoc emitToken(word, tokens.T_STRING, true); i += word.length - 1; continue; } } else if (complexVarPrevBuffer && lastToken === tokens.T_DOLLAR_OPEN_CURLY_BRACES) { //Complex variable syntax ${varname} emitToken(word, tokens.T_STRING_VARNAME); i += word.length - 1; continue; } emitToken(word, tokens.T_STRING); i += word.length - 1; } else if (ASCII < 32) { //If below ASCII 32 it's a bad character emitToken(ch, tokens.T_BAD_CHARACTER); } else { //If there isn't an open buffer there should be an syntax error, but we don't care //so it will be emitted as a simple string emitToken(ch, tokens.T_STRING); } } //If there's an open buffer emit it if (bufferType && (bufferType !== "doubleQuotes" || !split)) { emitBuffer(); } else { splitString(); } return ret; }