UNPKG

phpjs

Version:

php.js offers community built php functions in javascript

865 lines (861 loc) 32.4 kB
function token_get_all(source) { // Split given source into PHP tokens // + original by: Marco Marchiò // + improved by: Brett Zamir (http://brett-zamir.me) // - depends on: token_name // % note 1: Token numbers depend on the PHP version // % note 2: token_name is only necessary for a non-standard php.js-specific use of this function; // % note 2: if you define an object on this.php_js.phpParser (where "this" is the scope of the // % note 2: token_get_all function (either a namespaced php.js object or the window object)), // % note 2: this function will call that object's methods if they have the same names as the tokens, // % note 2: passing them the string, line number, and token number (in that order) // * example 1: token_get_all('/'+'* comment *'+'/'); // * returns 1: [[311, '/* comment */', 1]] // Token to number conversion var tokens = { T_REQUIRE_ONCE:261, T_REQUIRE:260, T_EVAL:259, T_INCLUDE_ONCE:258, T_INCLUDE:257, T_LOGICAL_OR:262, T_LOGICAL_XOR:263, T_LOGICAL_AND:264, T_PRINT:265, T_SR_EQUAL:276, T_SL_EQUAL:275, T_XOR_EQUAL:274, T_OR_EQUAL:273, T_AND_EQUAL:272, T_MOD_EQUAL:271, T_CONCAT_EQUAL:270, T_DIV_EQUAL:269, T_MUL_EQUAL:268, T_MINUS_EQUAL:267, T_PLUS_EQUAL:266, T_BOOLEAN_OR:277, T_BOOLEAN_AND:278, T_IS_NOT_IDENTICAL:282, T_IS_IDENTICAL:281, T_IS_NOT_EQUAL:280, T_IS_EQUAL:279, T_IS_GREATER_OR_EQUAL:284, T_IS_SMALLER_OR_EQUAL:283, T_SR:286, T_SL:285, T_INSTANCEOF:287, T_UNSET_CAST:296, T_BOOL_CAST:295, T_OBJECT_CAST:294, T_ARRAY_CAST:293, T_STRING_CAST:292, T_DOUBLE_CAST:291, T_INT_CAST:290, T_DEC:289, T_INC:288, T_CLONE:298, T_NEW:297, T_EXIT:299, T_IF:300, T_ELSEIF:301, T_ELSE:302, T_ENDIF:303, T_LNUMBER:304, T_DNUMBER:305, T_STRING:306, T_STRING_VARNAME:307, T_VARIABLE:308, T_NUM_STRING:309, T_INLINE_HTML:310, T_CHARACTER:311, T_BAD_CHARACTER:312, T_ENCAPSED_AND_WHITESPACE:313, T_CONSTANT_ENCAPSED_STRING:314, T_ECHO:315, T_DO:316, T_WHILE:317, T_ENDWHILE:318, T_FOR:319, T_ENDFOR:320, T_FOREACH:321, T_ENDFOREACH:322, T_DECLARE:323, T_ENDDECLARE:324, T_AS:325, T_SWITCH:326, T_ENDSWITCH:327, T_CASE:328, T_DEFAULT:329, T_BREAK:330, T_CONTINUE:331, T_GOTO:332, T_FUNCTION:333, T_CONST:334, T_RETURN:335, T_TRY:336, T_CATCH:337, T_THROW:338, T_USE:339, T_GLOBAL:340, T_PUBLIC:346, T_PROTECTED:345, T_PRIVATE:344, T_FINAL:343, T_ABSTRACT:342, T_STATIC:341, T_VAR:347, T_UNSET:348, T_ISSET:349, T_EMPTY:350, T_HALT_COMPILER:351, T_CLASS:352, T_INTERFACE:353, T_EXTENDS:354, T_IMPLEMENTS:355, T_OBJECT_OPERATOR:356, T_DOUBLE_ARROW:357, T_LIST:358, T_ARRAY:359, T_CLASS_C:360, T_METHOD_C:361, T_FUNC_C:362, T_LINE:363, T_FILE:364, T_COMMENT:365, T_DOC_COMMENT:366, T_OPEN_TAG:367, T_OPEN_TAG_WITH_ECHO:368, T_CLOSE_TAG:369, T_WHITESPACE:370, T_START_HEREDOC:371, T_END_HEREDOC:372, T_DOLLAR_OPEN_CURLY_BRACES:373, T_CURLY_OPEN:374, T_PAAMAYIM_NEKUDOTAYIM:375, T_NAMESPACE:376, T_NS_C:377, T_DIR:378, T_NS_SEPARATOR:379 }, /** tokens = { // using PHP 5.2.6 on Windows, I get these values for token_name() T_REQUIRE_ONCE:258, T_REQUIRE:259, T_EVAL:260, T_INCLUDE_ONCE:261, T_INCLUDE:262, T_LOGICAL_OR:263, T_LOGICAL_XOR:264, T_LOGICAL_AND:265, T_PRINT:266, T_SR_EQUAL:267, T_SL_EQUAL:268, T_XOR_EQUAL:269, T_OR_EQUAL:270, T_AND_EQUAL:271, T_MOD_EQUAL:272, T_CONCAT_EQUAL:273, T_DIV_EQUAL:274, T_MUL_EQUAL:275, T_MINUS_EQUAL:276, T_PLUS_EQUAL:277, T_BOOLEAN_OR:278, T_BOOLEAN_AND:279, T_IS_NOT_IDENTICAL:280, T_IS_IDENTICAL:281, T_IS_NOT_EQUAL:282, T_IS_EQUAL:283, T_IS_GREATER_OR_EQUAL:284, T_IS_SMALLER_OR_EQUAL:285, T_SR:286, T_SL:287, T_INSTANCEOF:288, T_UNSET_CAST:289, T_BOOL_CAST:290, T_OBJECT_CAST:291, T_ARRAY_CAST:292, T_STRING_CAST:293, T_DOUBLE_CAST:294, T_INT_CAST:295, T_DEC:296, T_INC:297, T_CLONE:298, T_NEW:299, T_EXIT:300, T_IF:301, T_ELSEIF:302, T_ELSE:303, T_ENDIF:304, T_LNUMBER:305, T_DNUMBER:306, T_STRING:307, T_STRING_VARNAME:308, T_VARIABLE:309, T_NUM_STRING:310, T_INLINE_HTML:311, T_CHARACTER:312, T_BAD_CHARACTER:313, T_ENCAPSED_AND_WHITESPACE:314, T_CONSTANT_ENCAPSED_STRING:315, T_ECHO:316, T_DO:317, T_WHILE:318, T_ENDWHILE:319, T_FOR:320, T_ENDFOR:321, T_FOREACH:322, T_ENDFOREACH:323, T_DECLARE:324, T_ENDDECLARE:325, T_AS:326, T_SWITCH:327, T_ENDSWITCH:328, T_CASE:329, T_DEFAULT:330, T_BREAK:331, T_CONTINUE:332, T_FUNCTION:333, T_CONST:334, T_RETURN:335, T_TRY:336, T_CATCH:337, T_THROW:338, T_USE:339, T_GLOBAL:340, T_PUBLIC:341, T_PROTECTED:342, T_PRIVATE:343, T_FINAL:344, T_ABSTRACT:345, T_STATIC:346, T_VAR:347, T_UNSET:348, T_ISSET:349, T_EMPTY:350, T_HALT_COMPILER:351, T_CLASS:352, T_INTERFACE:353, T_EXTENDS:354, T_IMPLEMENTS:355, T_OBJECT_OPERATOR:356, T_DOUBLE_ARROW:357, T_LIST:358, T_ARRAY:359, T_CLASS_C:360, T_METHOD_C:361, T_FUNC_C:362, T_LINE:363, T_FILE:364, T_COMMENT:365, T_DOC_COMMENT:366, T_OPEN_TAG:367, T_OPEN_TAG_WITH_ECHO:368, T_CLOSE_TAG:369, T_WHITESPACE:370, T_START_HEREDOC:371, T_END_HEREDOC:372, T_DOLLAR_OPEN_CURLY_BRACES:373, T_CURLY_OPEN:374, T_DOUBLE_COLON:375 /*,UNKNOWN:376, UNKNOWN:377, UNKNOWN:378, UNKNOWN:379, UNKNOWN:380,*/ }, //*/ // Tokens indentified by a keyword keywordsTokens = { 'abstract':'T_ABSTRACT', 'as':'T_AS', 'break':'T_BREAK', 'case':'T_CASE', 'catch':'T_CATCH', 'class':'T_CLASS', '__CLASS__':'T_CLASS_C', 'clone':'T_CLONE', 'const':'T_CONST', 'continue':'T_CONTINUE', 'default':'T_DEFAULT', '__DIR__':'T_DIR', 'do':'T_DO', 'else':'T_ELSE', 'enddeclare':'T_ENDDECLARE', 'endfor':'T_ENDFOR', 'endforeach':'T_ENDFOREACH', 'endif':'T_ENDIF', 'endswitch':'T_ENDSWITCH', 'endwhile':'T_ENDWHILE', 'extends':'T_EXTENDS', '__FILE__':'T_FILE', 'final':'T_FINAL', 'function':'T_FUNCTION', '__FUNCTION__':'T_FUNC_C', 'global':'T_GLOBAL', 'goto':'T_GOTO', 'implements':'T_IMPLEMENTS', 'instanceof':'T_INSTANCEOF', 'interface':'T_INTERFACE', '__LINE__':'T_LINE', 'and':'T_LOGICAL_AND', 'or':'T_LOGICAL_OR', 'xor':'T_LOGICAL_XOR', '__METHOD__':'T_METHOD_C', '__NAMESPACE__':'T_NS_C', 'new':'T_NEW', 'namespace':'T_NAMESPACE', 'private':'T_PRIVATE', 'public':'T_PUBLIC', 'protected':'T_PROTECTED', 'return':'T_RETURN', 'static':'T_STATIC', 'throw':'T_THROW', 'try':'T_TRY', 'use':'T_USE', 'var':'T_VAR', 'echo':'T_ECHO', 'exit':'T_EXIT', 'die':'T_EXIT', 'include':'T_INCLUDE', 'include_once':'T_INCLUDE_ONCE', 'print':'T_PRINT', 'require':'T_REQUIRE', 'require_once':'T_REQUIRE_ONCE' }, // Tokens indentified by a keyword followed by a ( funcLoopCondTokens = { 'array':'T_ARRAY', 'declare':'T_DECLARE', 'elseif':'T_ELSEIF', 'empty':'T_EMPTY', 'eval':'T_EVAL', 'for':'T_FOR', 'foreach':'T_FOREACH', '__halt_compiler':'T_HALT_COMPILER', 'if':'T_IF', 'isset':'T_ISSET', 'list':'T_LIST', 'switch':'T_SWITCH', 'unset':'T_UNSET', 'while':'T_WHILE' }, // Type casting tokens castingTokens = { 'unset':'T_UNSET_CAST', 'bool':'T_BOOL_CAST', 'boolean':'T_BOOL_CAST', 'object':'T_OBJECT_CAST', 'array':'T_ARRAY_CAST', 'string':'T_STRING_CAST', 'binary':'T_STRING_CAST', 'real':'T_DOUBLE_CAST', 'double':'T_DOUBLE_CAST', 'float':'T_DOUBLE_CAST', 'int':'T_INT_CAST', 'integer':'T_INT_CAST' }, // 2 chars tokens twoCharsTokens = { '&&':tokens.T_BOOLEAN_AND, '&=':tokens.T_AND_EQUAL, '||':tokens.T_BOOLEAN_OR, '|=':tokens.T_OR_EQUAL, '.=':tokens.T_CONCAT_EQUAL, '--':tokens.T_DEC, '-=':tokens.T_MINUS_EQUAL, '->':tokens.T_OBJECT_OPERATOR, '%=':tokens.T_MOD_EQUAL, '=>':tokens.T_DOUBLE_ARROW, '::':tokens.T_PAAMAYIM_NEKUDOTAYIM, '/=':tokens.T_DIV_EQUAL, '++':tokens.T_INC, '+=':tokens.T_PLUS_EQUAL, '<>':tokens.T_IS_NOT_EQUAL, '<=':tokens.T_IS_SMALLER_OR_EQUAL, '*=':tokens.T_MUL_EQUAL, '<%':tokens.T_OPEN_TAG, '>=':tokens.T_IS_GREATER_OR_EQUAL, '^=':tokens.T_XOR_EQUAL, '==':tokens.T_IS_EQUAL, '!=':tokens.T_IS_NOT_EQUAL, '>>':tokens.T_SR, '<<':tokens.T_SL }, // 3 chars tokens threeCharsTokens = { '===':tokens.T_IS_IDENTICAL, '!==':tokens.T_IS_NOT_IDENTICAL, '>>=':tokens.T_SR_EQUAL, '<<=':tokens.T_SL_EQUAL, '<?=':tokens.T_OPEN_TAG_WITH_ECHO, '<%=':tokens.T_OPEN_TAG_WITH_ECHO }, // These two variables contain a set of char without an associated token nonTokensChar = ';(){}[],~@`', charNoToken = '=+/-*.$|^&<>%!?:', // Immediately start an HTML buffer buffer = '', bufferType = 'HTML', line = 1, isEncapsed, hdlabel, ret = [], // Get a word in the code starting from the given index getCurrentWord = function (start) { var match = (/^([\w]+)\s*(\()?/).exec(source.substr(start)); return match; }, // Get a type cast construct in the code starting from the given index getCurrentCasting = function (start) { var match = (/^\(\s*(\w+)\s*\)/).exec(source.substr(start)); if (match && match[1]) { match[1] = match[1].toLowerCase(); } return match; }, // Get a decimal or integer number in the code starting from the given index checkCurrentNumber = function (start) { var match = (/^\d*\.?\d+(?:x[\da-f]+|e\-?\d+)?/i).exec(source.substr(start)); if (match) { var at; if ((/^\d+(?:x[\da-f]+)?$/i).test(match[0])) {at = tokens.T_LNUMBER;} else {at = tokens.T_DNUMBER;} return [at, match[0]]; } else {return null;} }, // Check if the char at the given index is escaped isEscaped = function (start) { if (source.charAt(start-1) !== '\\') {return false;} var count = 1; for (var c = start-2; c>=0; c--) { if (source.charAt(c) !== '\\') {break;} else {count++;} } return (count % 2 !== 0); }, // Get the heredoc starting label getHeredoc = function (start) { var match = (/^(\s*(.*)?)(\r?\n)/i).exec(source.substr(start)); return match; }, // Get heredoc closing label getHeredocClose = function(start, lab) { var s = start - 1; if (source.charAt(s) !== '\n') {return null;} var reg = new RegExp('^' + lab + ';\\r?\\n'), match = reg.exec(source.substr(start)); return match; }, // Get whitespaces at the given position // Mode: 0 every whitespace, 1 only next new line, 2 only next space or new line getCurrentWhitespaces = function(start, mode) { var ascii = source.charCodeAt(start), sp = ''; if (!mode) { while (ascii === 9 || ascii === 10 || ascii === 13 || ascii === 32) { sp += source.charAt(start); start++; ascii = source.charCodeAt(start); } return sp; } else if (mode === 1) { if (ascii === 10 || (ascii === 13 && source.charCodeAt(start + 1) === 10)) { return (ascii === 13 ? source.charAt(start) + source.charAt(start + 1) : source.charAt(start)); } else {return '';} } else { if (ascii === 32 || ascii === 10 || (ascii === 13 && source.charCodeAt(start + 1) === 10)) { return (ascii === 13 ? source.charAt(start) + source.charAt(start + 1) : source.charAt(start)); } else {return '';} } }, // Count the number of substrings in a given string countSubstrings = function (str, sub) { if (!str.length || !sub.length) {return 0;} var ind = str.indexOf(sub), count = 0; while (ind>-1) { count++; ind = str.indexOf(sub, ind + 1); } return count; }, // Add a token to the result array pushOnRet = function (token, string) { if (string === undefined) {ret.push(token);} else {ret.push([token, string, line]);} }, oldPushOnRet = pushOnRet; var that = this; if (this.php_js && this.php_js.phpParser) { pushOnRet = function (token, string) { var action = that.php_js.phpParser[typeof token === 'number' ? that.token_name(token) : token]; if (typeof action === 'function') { action.call(that.php_js.phpParser, string, line, token); } oldPushOnRet(token, string); }; } // Loop through every character in the string for (var i = 0; i < source.length; i++) { // Get the current character and its ascii code var ch = source.charAt(i), ASCII = source.charCodeAt(i); // If is set a buffer then manage it if (buffer !== undefined) { switch (bufferType) { // HTML case 'HTML': // If there's no php open tag add the char to the buffer and continue if (ch === '<' && (source.charAt(i + 1) === '?' || source.charAt(i + 1) === '%')) { if (buffer.length) {pushOnRet(tokens.T_INLINE_HTML, buffer);} line += countSubstrings(buffer, '\n'); bufferType = undefined; buffer = undefined; } else { buffer += ch; continue; } break; // Inline comments case 'inlineComment': // Stop it if the current char is a new line char otherwise add the char to the buffer buffer += ch; if (ASCII === 10) { pushOnRet(tokens.T_COMMENT, buffer); bufferType = undefined; buffer = undefined; line++; } continue; // Multiline e doc comments case 'DOCComment': case 'multilineComment': // Add the char to the buffer and stop it if there's the close comments sign buffer += ch; if (ch === '*' && source.charAt(i + 1) === '/') { buffer += source.charAt(i + 1); if (bufferType === 'multilineComment') {pushOnRet(tokens.T_COMMENT, buffer);} else { pushOnRet(tokens.T_DOC_COMMENT, buffer); } line += countSubstrings(buffer, '\n'); bufferType = undefined; buffer = undefined; i++; } continue; // Single quoted strings and double quoted strings case 'doubleQuote': case 'singleQuote': // If the buffer is a double quote string and the current char is a dollar sign // or a curly bracket and it's not escaped don't skip this part if (bufferType === 'singleQuote' || (ch !== '$' && ch !== '{') || isEscaped(i)) { // Heredoc. If there's a heredoc open and this can close it, close the buffer if (hdlabel && ch === hdlabel.charAt(0) && getHeredocClose(i, hdlabel)) { if (buffer.length) { // Is the fact that token_get_all does report a line break at // the end of a HEREDOC, despite it not being counted as // part of the HEREDOC, a PHP bug? pushOnRet(tokens.T_ENCAPSED_AND_WHITESPACE, buffer); line += countSubstrings(buffer, '\n'); } pushOnRet(tokens.T_END_HEREDOC, hdlabel); i += hdlabel.length - 1; hdlabel = null; bufferType = undefined; buffer = undefined; continue; } else {buffer += ch;} // If the current char is a quote (for single quoted string) or a double quote(for double quoted string) // and it's not escaped close the buffer if (!hdlabel && ((ch === "'" && bufferType === 'singleQuote') || (ch === '"' && bufferType === 'doubleQuote')) && !isEscaped(i)) { // If the isEncapsed is true add it as a T_ENCAPSED_AND_WHITESPACE otherwise add it as a normal string if (isEncapsed) { if (buffer.length>1) { pushOnRet(tokens.T_ENCAPSED_AND_WHITESPACE, buffer.substr(0, buffer.length-1)); } pushOnRet('"'); } else { pushOnRet(tokens.T_CONSTANT_ENCAPSED_STRING, buffer); } line += countSubstrings(buffer, '\n'); bufferType = undefined; buffer = undefined; } continue; } break; // This buffer is activated when {$ is found so if the char is a closed bracket and it's not escaped stop the // buffer and reset the double quoted string buffer case 'curlyInString': if (ch === '}' && !isEscaped(i)) { pushOnRet('}'); bufferType = 'doubleQuote'; buffer = ''; } break; } } var ws; if (bufferType !== 'doubleQuote') { // Whitespaces if (ASCII === 9 || ASCII === 10 || ASCII === 13 || ASCII === 32) { ws = getCurrentWhitespaces(i + 1); ch += ws; pushOnRet(tokens.T_WHITESPACE, ch); // If it's new line character increment the line variable if (ASCII === 10) {line++;} if (ws) {line += countSubstrings(ws, '\n');} i += ch.length-1; continue; } // Bad char else if (ASCII < 32) { pushOnRet(tokens.T_BAD_CHARACTER, ch); continue; } // Char without token: (){}[] else if (nonTokensChar.indexOf(ch) !== -1) { if (ch === '(') { // Type casting var cast = getCurrentCasting(i); if (cast && castingTokens[cast[1]]) { pushOnRet(castingTokens[cast[1]], cast[0]); i += cast[0].length - 1; continue; } } pushOnRet(ch); continue; } // Start a comment (with #), single or double quoted string buffer else if (ch === '#' || ch === "'" || ch === '"') { buffer = ch; bufferType = ch === '#' ? 'inlineComment' : (ch === "'" ? 'singleQuote' : 'doubleQuote'); isEncapsed = false; continue; } // Namespace separator else if (ch === '\\') { pushOnRet(tokens.T_NS_SEPARATOR, ch); continue; } } // Get the current word var word = getCurrentWord(i), lowWord = word ? word[1].toLowerCase() : '', nextCharWord = getCurrentWord(i + 1); // Keyword if (word && (keywordsTokens[word[1]] || keywordsTokens[lowWord])) { pushOnRet((keywordsTokens[lowWord] ? tokens[keywordsTokens[lowWord]] : tokens[keywordsTokens[word[1]]]), word[1]); i += lowWord.length - 1; continue; } // Functions, loops and condition: every keyword followed by ( else if (word && word[2] === '(' && funcLoopCondTokens[lowWord]) { pushOnRet(tokens[funcLoopCondTokens[lowWord]], word[1]); i += lowWord.length - 1; continue; } // Variables else if (bufferType !== 'doubleQuote' && ch === '$' && nextCharWord) { pushOnRet(tokens.T_VARIABLE, ch + nextCharWord[1]); i += nextCharWord[1].length; continue; } // Variables inside strings else if (bufferType === 'doubleQuote' && (ch === '$' || ch === '{')) { var toInsert = [], changeBuffer = false; if (ch === '$') { // ${a} if (source.charAt(i + 1) === '{') { nextCharWord = getCurrentWord(i + 2); if (nextCharWord) { // Get the next word and check that it is followed by a } var afterChar = source.charAt(i + nextCharWord[0].length + 2); if (afterChar === '}') { toInsert.push([tokens.T_DOLLAR_OPEN_CURLY_BRACES, '${']); toInsert.push([tokens.T_STRING_VARNAME, nextCharWord[0]]); toInsert.push('}'); i += nextCharWord[0].length + 2; } // ${a[0]}, ${a[b]} else if (afterChar === '[') { // If it's followed by a [ get the array index var nextNextCharWord = getCurrentWord(i + nextCharWord[0].length + 3); // Check also that it's followed by a ] and a } if (nextNextCharWord && source.charAt(i + nextCharWord[0].length + 3 + nextNextCharWord[0].length) === ']' && source.charAt(i + nextCharWord[0].length + 3 + nextNextCharWord[0].length + 1) === '}') { toInsert.push([tokens.T_DOLLAR_OPEN_CURLY_BRACES, '${']); toInsert.push([tokens.T_STRING_VARNAME, nextCharWord[0]]); toInsert.push('['); if ((/^\d+$/).test(nextNextCharWord[0])) {toInsert.push([tokens.T_LNUMBER, nextNextCharWord[0]]);} else {toInsert.push([tokens.T_STRING, nextNextCharWord[0]]);} toInsert.push(']'); toInsert.push('}'); i += nextCharWord[0].length + 3 + nextNextCharWord[0].length + 1; } } } } // $a else { nextCharWord = getCurrentWord(i + 1); if (nextCharWord) { toInsert.push([tokens.T_VARIABLE, ch + nextCharWord[1]]); i += nextCharWord[1].length; // $a[0], $a[b] if (source.charAt(i + 1) === '[') { // If it's an array get its index and check that it's followed by a ] nextCharWord = getCurrentWord(i + 2); if (nextCharWord && source.charAt(i + nextCharWord[0].length + 2) === ']') { toInsert.push('['); if ((/^\d+$/).test(nextCharWord[0])) { toInsert.push([tokens.T_NUM_STRING, nextCharWord[0]]); } else { toInsert.push([tokens.T_STRING, nextCharWord[0]]); } toInsert.push(']'); i += nextCharWord[0].length + 2; } } } } } // {$a} else if (source.charAt(i + 1) === '$') { // If there are variables inside brackets parse them as normal code by changing the buffer toInsert.push([tokens.T_CURLY_OPEN, ch]); changeBuffer = true; } // If there's nothing to insert it means that it's not a string variable sintax if (!toInsert.length) { buffer += ch; continue; } // Insert the buffer as with the T_ENCAPSED_AND_WHITESPACE token if (!isEncapsed && buffer.charAt(0) === '"') { pushOnRet('"'); buffer = buffer.substr(1); isEncapsed = true; } if (buffer.length) { pushOnRet(tokens.T_ENCAPSED_AND_WHITESPACE, buffer); line += countSubstrings(buffer, '\n'); buffer = ''; } // Insert every token found for (var ind = 0; ind < toInsert.length; ind++) { if (Object.prototype.toString.call(toInsert[ind]) === '[object Array]') { pushOnRet(toInsert[ind][0], toInsert[ind][1]); } else { pushOnRet(toInsert[ind]); } } // Change the buffer if necessary if (changeBuffer) {bufferType = 'curlyInString';} continue; } // Concat the current char with the following var couple = ch + source.charAt(i + 1), triplet = couple + source.charAt(i + 2), insString; // If it's a three chars token add it and continue if (threeCharsTokens[triplet]) { pushOnRet(threeCharsTokens[triplet], triplet); i += 2; continue; // If it's a two chars token add it and continue } else if (triplet === '<<<') { // Avoid being treated as '<<' shift by couple check (handle instead in switch below) } else if (twoCharsTokens[couple]) { pushOnRet(twoCharsTokens[couple], couple); i++; continue; } // Other symbols switch (couple) { // If it's a php closing tag start an HTML buffer case '?>': case '%>': ws = getCurrentWhitespaces(i + 2, 1); couple += ws; pushOnRet(tokens.T_CLOSE_TAG, couple); if (ws && ws.indexOf('\n') !== -1) {line++;} i += couple.length - 1; buffer = ''; bufferType = 'HTML'; continue; case '<<': // If <<< check for heredoc start nextCharWord = getHeredoc(i + 3); if (source.charAt(i + 2) === '<' && nextCharWord) { // If there's a heredoc start a double quoted string buffer // because they have the same behaviour bufferType = 'doubleQuote'; isEncapsed = true; buffer = ''; i += nextCharWord[0].length + 2; hdlabel = nextCharWord[1]; pushOnRet(tokens.T_START_HEREDOC, '<<<'+nextCharWord[0]); line++; continue; } break; case '<%': case '<?': insString = couple; if (couple === '<?' && source.charAt(i + 2) === 'p' && source.charAt(i + 3) === 'h' && source.charAt(i + 4) === 'p') { insString += 'php'; } ws = getCurrentWhitespaces(i + 2 + (insString.length>2 ? 3 : 0), 2); insString += ws; pushOnRet(tokens.T_OPEN_TAG, insString); i += insString.length - 1; if (ws && ws.indexOf('\n') !== -1) {line++;} continue; // Start a multiline comment buffer case '/*': buffer = couple; if (source.charAt(i + 2) === '*' && (/\s/).test(source.charAt(i + 3))) { bufferType = 'DOCComment'; buffer += source.charAt(i + 2) + source.charAt(i + 3); i += 2; } else {bufferType = 'multilineComment';} i++; continue; // Start a comment buffer case '//': buffer = couple; bufferType = 'inlineComment'; i++; continue; default: insString = checkCurrentNumber(i); // Other characters without tokens if (charNoToken.indexOf(ch) !== -1) { pushOnRet(ch); continue; } // Integer and decimal numbers else if (insString) { pushOnRet(insString[0], insString[1]); i += insString[1].length - 1; continue; } break; } // If a word was found insert it as a T_STRING if (word && word[1]) { pushOnRet(tokens.T_STRING, word[1]); i += word[1].length - 1; } } // Close the HTML buffer if there's one open if (buffer !== undefined && bufferType === 'HTML' && buffer.length) { pushOnRet(tokens.T_INLINE_HTML, buffer); } // Return the token array return ret; }