UNPKG

php-parser

Version:

Parse PHP code from JS and returns its AST

749 lines (724 loc) 19.6 kB
/** * Copyright (C) 2018 Glayzzle (BSD3 License) * @authors https://github.com/glayzzle/php-parser/graphs/contributors * @url http://glayzzle.com */ "use strict"; const Position = require("./ast/position"); /** * @private */ function isNumber(n) { return n != "." && n != "," && !isNaN(parseFloat(n)) && isFinite(n); } /** * The PHP Parser class that build the AST tree from the lexer * * @constructor Parser * @memberOf module:php-parser * @tutorial Parser * @property {Lexer} lexer - current lexer instance * @property {AST} ast - the AST factory instance * @property {number|string} token - current token * @property {boolean} extractDoc - should extract documentation as AST node * @property {boolean} extractTokens - should extract each token * @property {boolean} suppressErrors - should ignore parsing errors and continue * @property {boolean} debug - should output debug informations */ const Parser = function (lexer, ast) { this.lexer = lexer; this.ast = ast; this.tok = lexer.tok; this.EOF = lexer.EOF; this.token = null; this.prev = null; this.debug = false; this.version = 803; this.extractDoc = false; this.extractTokens = false; this.suppressErrors = false; const mapIt = function (item) { return [item, null]; }; this.entries = { // reserved_non_modifiers IDENTIFIER: new Map( [ this.tok.T_ABSTRACT, this.tok.T_ARRAY, this.tok.T_AS, this.tok.T_BREAK, this.tok.T_CALLABLE, this.tok.T_CASE, this.tok.T_CATCH, this.tok.T_CLASS, this.tok.T_CLASS_C, this.tok.T_CLONE, this.tok.T_CONST, this.tok.T_CONTINUE, this.tok.T_DECLARE, this.tok.T_DEFAULT, this.tok.T_DIR, this.tok.T_DO, this.tok.T_ECHO, this.tok.T_ELSE, this.tok.T_ELSEIF, this.tok.T_EMPTY, this.tok.T_ENDDECLARE, this.tok.T_ENDFOR, this.tok.T_ENDFOREACH, this.tok.T_ENDIF, this.tok.T_ENDSWITCH, this.tok.T_ENDWHILE, this.tok.T_ENUM, this.tok.T_EVAL, this.tok.T_EXIT, this.tok.T_EXTENDS, this.tok.T_FILE, this.tok.T_FINAL, this.tok.T_FINALLY, this.tok.T_FN, this.tok.T_FOR, this.tok.T_FOREACH, this.tok.T_FUNC_C, this.tok.T_FUNCTION, this.tok.T_GLOBAL, this.tok.T_GOTO, this.tok.T_IF, this.tok.T_IMPLEMENTS, this.tok.T_INCLUDE, this.tok.T_INCLUDE_ONCE, this.tok.T_INSTANCEOF, this.tok.T_INSTEADOF, this.tok.T_INTERFACE, this.tok.T_ISSET, this.tok.T_LINE, this.tok.T_LIST, this.tok.T_LOGICAL_AND, this.tok.T_LOGICAL_OR, this.tok.T_LOGICAL_XOR, this.tok.T_MATCH, this.tok.T_METHOD_C, this.tok.T_NAMESPACE, this.tok.T_NEW, this.tok.T_NS_C, this.tok.T_PRINT, this.tok.T_PRIVATE, this.tok.T_PROTECTED, this.tok.T_PUBLIC, this.tok.T_READ_ONLY, this.tok.T_REQUIRE, this.tok.T_REQUIRE_ONCE, this.tok.T_RETURN, this.tok.T_STATIC, this.tok.T_SWITCH, this.tok.T_THROW, this.tok.T_TRAIT, this.tok.T_TRY, this.tok.T_UNSET, this.tok.T_USE, this.tok.T_VAR, this.tok.T_WHILE, this.tok.T_YIELD, ].map(mapIt), ), VARIABLE: new Map( [ this.tok.T_VARIABLE, "$", "&", this.tok.T_STRING, this.tok.T_NAME_RELATIVE, this.tok.T_NAME_QUALIFIED, this.tok.T_NAME_FULLY_QUALIFIED, this.tok.T_NAMESPACE, this.tok.T_STATIC, ].map(mapIt), ), SCALAR: new Map( [ this.tok.T_CONSTANT_ENCAPSED_STRING, this.tok.T_START_HEREDOC, this.tok.T_LNUMBER, this.tok.T_DNUMBER, this.tok.T_ARRAY, "[", this.tok.T_CLASS_C, this.tok.T_TRAIT_C, this.tok.T_FUNC_C, this.tok.T_METHOD_C, this.tok.T_LINE, this.tok.T_FILE, this.tok.T_DIR, this.tok.T_NS_C, '"', 'b"', 'B"', "-", this.tok.T_NS_SEPARATOR, ].map(mapIt), ), T_MAGIC_CONST: new Map( [ this.tok.T_CLASS_C, this.tok.T_TRAIT_C, this.tok.T_FUNC_C, this.tok.T_METHOD_C, this.tok.T_LINE, this.tok.T_FILE, this.tok.T_DIR, this.tok.T_NS_C, ].map(mapIt), ), T_MEMBER_FLAGS: new Map( [ this.tok.T_PUBLIC, this.tok.T_PRIVATE, this.tok.T_PROTECTED, this.tok.T_STATIC, this.tok.T_ABSTRACT, this.tok.T_FINAL, ].map(mapIt), ), EOS: new Map([";", this.EOF, this.tok.T_INLINE_HTML].map(mapIt)), EXPR: new Map( [ "@", "-", "+", "!", "~", "(", "`", this.tok.T_LIST, this.tok.T_CLONE, this.tok.T_INC, this.tok.T_DEC, this.tok.T_NEW, this.tok.T_ISSET, this.tok.T_EMPTY, this.tok.T_MATCH, this.tok.T_INCLUDE, this.tok.T_INCLUDE_ONCE, this.tok.T_REQUIRE, this.tok.T_REQUIRE_ONCE, this.tok.T_EVAL, this.tok.T_INT_CAST, this.tok.T_DOUBLE_CAST, this.tok.T_STRING_CAST, this.tok.T_ARRAY_CAST, this.tok.T_OBJECT_CAST, this.tok.T_BOOL_CAST, this.tok.T_UNSET_CAST, this.tok.T_EXIT, this.tok.T_PRINT, this.tok.T_YIELD, this.tok.T_STATIC, this.tok.T_FUNCTION, this.tok.T_FN, // using VARIABLES : this.tok.T_VARIABLE, "$", this.tok.T_NS_SEPARATOR, this.tok.T_STRING, this.tok.T_NAME_RELATIVE, this.tok.T_NAME_QUALIFIED, this.tok.T_NAME_FULLY_QUALIFIED, // using SCALAR : this.tok.T_STRING, // @see variable.js line 45 > conflict with variable = shift/reduce :) this.tok.T_CONSTANT_ENCAPSED_STRING, this.tok.T_START_HEREDOC, this.tok.T_LNUMBER, this.tok.T_DNUMBER, this.tok.T_ARRAY, "[", this.tok.T_CLASS_C, this.tok.T_TRAIT_C, this.tok.T_FUNC_C, this.tok.T_METHOD_C, this.tok.T_LINE, this.tok.T_FILE, this.tok.T_DIR, this.tok.T_NS_C, '"', 'b"', 'B"', "-", this.tok.T_NS_SEPARATOR, ].map(mapIt), ), }; }; /** * helper : gets a token name * @function Parser#getTokenName * @memberOf module:php-parser */ Parser.prototype.getTokenName = function (token) { if (!isNumber(token)) { return "'" + token + "'"; } else { if (token == this.EOF) return "the end of file (EOF)"; return this.lexer.engine.tokens.values[token]; } }; /** * main entry point : converts a source code to AST * @function Parser#parse * @memberOf module:php-parser */ Parser.prototype.parse = function (code, filename) { this._errors = []; this.filename = filename || "eval"; this.currentNamespace = [""]; if (this.extractDoc) { this._docs = []; } else { this._docs = null; } if (this.extractTokens) { this._tokens = []; } else { this._tokens = null; } this._docIndex = 0; this._lastNode = null; this.lexer.setInput(code); this.lexer.all_tokens = this.extractTokens; this.lexer.comment_tokens = this.extractDoc; this.length = this.lexer._input.length; this.innerList = false; this.innerListForm = false; const program = this.node("program"); const childs = []; this.next(); while (this.token != this.EOF) { childs.push(this.read_start()); } // append last comment if ( childs.length === 0 && this.extractDoc && this._docs.length > this._docIndex ) { childs.push(this.node("noop")()); } // #176 : register latest position this.prev = [ this.lexer.yylloc.last_line, this.lexer.yylloc.last_column, this.lexer.offset, ]; const result = program(childs, this._errors, this._docs, this._tokens); if (this.debug) { const errors = this.ast.checkNodes(); /* istanbul ignore next */ if (errors.length > 0) { errors.forEach(function (error) { if (error.position) { // eslint-disable-next-line no-console console.log( "Node at line " + error.position.line + ", column " + error.position.column, ); } // eslint-disable-next-line no-console console.log(error.stack.join("\n")); }); throw new Error("Some nodes are not closed"); } } return result; }; /** * Raise an error * @function Parser#raiseError * @memberOf module:php-parser */ Parser.prototype.raiseError = function (message, msgExpect, expect, token) { message += " on line " + this.lexer.yylloc.first_line; if (!this.suppressErrors) { const err = new SyntaxError( message, this.filename, this.lexer.yylloc.first_line, ); err.lineNumber = this.lexer.yylloc.first_line; err.fileName = this.filename; err.columnNumber = this.lexer.yylloc.first_column; throw err; } // Error node : const node = this.ast.prepare("error", null, this)( message, token, this.lexer.yylloc.first_line, expect, ); this._errors.push(node); return node; }; /** * handling errors * @function Parser#error * @memberOf module:php-parser */ Parser.prototype.error = function (expect) { let msg = "Parse Error : syntax error"; let token = this.getTokenName(this.token); let msgExpect = ""; if (this.token !== this.EOF) { if (isNumber(this.token)) { let symbol = this.text(); /* istanbul ignore next */ if (symbol.length > 10) { symbol = symbol.substring(0, 7) + "..."; } token = "'" + symbol + "' (" + token + ")"; } msg += ", unexpected " + token; } if (expect && !Array.isArray(expect)) { if (isNumber(expect) || expect.length === 1) { msgExpect = ", expecting " + this.getTokenName(expect); } msg += msgExpect; } return this.raiseError(msg, msgExpect, expect, token); }; /** * Create a position node from the lexers position * * @function Parser#position * @memberOf module:php-parser * @return {Position} */ Parser.prototype.position = function () { return new Position( this.lexer.yylloc.first_line, this.lexer.yylloc.first_column, this.lexer.yylloc.first_offset, ); }; /** * Creates a new AST node * @function Parser#node * @memberOf module:php-parser */ Parser.prototype.node = function (name) { if (this.extractDoc) { let docs = null; if (this._docIndex < this._docs.length) { docs = this._docs.slice(this._docIndex); this._docIndex = this._docs.length; /* istanbul ignore next */ if (this.debug) { // eslint-disable-next-line no-console console.log(new Error("Append docs on " + name)); // eslint-disable-next-line no-console console.log(docs); } } const node = this.ast.prepare(name, docs, this); /* * TOKENS : * node1 commentA token commmentB node2 commentC token commentD node3 commentE token * * AST : * structure:S1 [ * left: node1 ( trail: commentA ), * right: structure:S2 [ * node2 (lead: commentB, trail: commentC), * node3 (lead: commentD) * ], * trail: commentE * ] * * Algorithm : * * Attach the last comments on parent of current node * If a new node is started and the parent has a trailing comment * the move it on previous node * * start S2 * start node1 * consume node1 & set commentA as trailingComment on S2 * start S2 * S1 has a trailingComment, attach it on node1 * ... * NOTE : As the trailingComment Behavior depends on AST, it will be build on * the AST layer - last child node will keep it's trailingComment nodes */ node.postBuild = function (self) { if (this._docIndex < this._docs.length) { if (this._lastNode) { const offset = this.prev[2]; let max = this._docIndex; for (; max < this._docs.length; max++) { if (this._docs[max].offset > offset) { break; } } if (max > this._docIndex) { // inject trailing comment on child node this._lastNode.setTrailingComments( this._docs.slice(this._docIndex, max), ); this._docIndex = max; } } else if (this.token === this.EOF) { // end of content self.setTrailingComments(this._docs.slice(this._docIndex)); this._docIndex = this._docs.length; } } this._lastNode = self; }.bind(this); return node; } return this.ast.prepare(name, null, this); }; /** * expects an end of statement or end of file * @function Parser#expectEndOfStatement * @memberOf module:php-parser * @return {boolean} */ Parser.prototype.expectEndOfStatement = function (node) { if (this.token === ";") { // include only real ';' statements // https://github.com/glayzzle/php-parser/issues/164 if (node && this.lexer.yytext === ";") { node.includeToken(this); } } else if (this.token !== this.tok.T_INLINE_HTML && this.token !== this.EOF) { this.error(";"); return false; } this.next(); return true; }; const ignoreStack = ["parser.next", "parser.node", "parser.showlog"]; /** * outputs some debug information on current token * @private * @function Parser#showlog * @memberOf module:php-parser */ Parser.prototype.showlog = function () { const stack = new Error().stack.split("\n"); let line; for (let offset = 2; offset < stack.length; offset++) { line = stack[offset].trim(); let found = false; for (let i = 0; i < ignoreStack.length; i++) { /* istanbul ignore next */ if (line.substring(3, 3 + ignoreStack[i].length) === ignoreStack[i]) { found = true; break; } } /* istanbul ignore next */ if (!found) { break; } } // eslint-disable-next-line no-console console.log( "Line " + this.lexer.yylloc.first_line + " : " + this.getTokenName(this.token) + ">" + this.lexer.yytext + "<" + " @-->" + line, ); return this; }; /** * Force the parser to check the current token. * * If the current token does not match to expected token, * the an error will be raised. * * If the suppressError mode is activated, then the error will * be added to the program error stack and this function will return `false`. * * @function Parser#expect * @memberOf module:php-parser * @param {String|Number} token * @return {boolean} * @throws Error */ Parser.prototype.expect = function (token) { if (Array.isArray(token)) { if (token.indexOf(this.token) === -1) { this.error(token); return false; } } else if (this.token != token) { this.error(token); return false; } return true; }; /** * Returns the current token contents * @function Parser#text * @memberOf module:php-parser * @return {String} */ Parser.prototype.text = function () { return this.lexer.yytext; }; /** * consume the next token * @function Parser#next * @memberOf module:php-parser */ Parser.prototype.next = function () { // prepare the back command if (this.token !== ";" || this.lexer.yytext === ";") { // ignore '?>' from automated resolution // https://github.com/glayzzle/php-parser/issues/168 this.prev = [ this.lexer.yylloc.last_line, this.lexer.yylloc.last_column, this.lexer.offset, ]; } // eating the token this.lex(); // showing the debug if (this.debug) { this.showlog(); } // handling comments if (this.extractDoc) { while ( this.token === this.tok.T_COMMENT || this.token === this.tok.T_DOC_COMMENT ) { // APPEND COMMENTS if (this.token === this.tok.T_COMMENT) { this._docs.push(this.read_comment()); } else { this._docs.push(this.read_doc_comment()); } } } return this; }; /** * Peek at the next token. * @function Parser#peek * @memberOf module:php-parser * @returns {string|number} Next Token */ Parser.prototype.peek = function () { const lexerState = this.lexer.getState(); const nextToken = this.lexer.lex(); this.lexer.setState(lexerState); return nextToken; }; /** * Eating a token * @function Parser#lex * @memberOf module:php-parser */ Parser.prototype.lex = function () { // append on token stack if (this.extractTokens) { do { // the token this.token = this.lexer.lex() || /* istanbul ignore next */ this.EOF; if (this.token === this.EOF) return this; let entry = this.lexer.yytext; if ( Object.prototype.hasOwnProperty.call( this.lexer.engine.tokens.values, this.token, ) ) { entry = [ this.lexer.engine.tokens.values[this.token], entry, this.lexer.yylloc.first_line, this.lexer.yylloc.first_offset, this.lexer.offset, ]; } else { entry = [ null, entry, this.lexer.yylloc.first_line, this.lexer.yylloc.first_offset, this.lexer.offset, ]; } this._tokens.push(entry); if (this.token === this.tok.T_CLOSE_TAG) { // https://github.com/php/php-src/blob/7ff186434e82ee7be7c59d0db9a976641cf7b09c/Zend/zend_compile.c#L1680 this.token = ";"; return this; } else if (this.token === this.tok.T_OPEN_TAG_WITH_ECHO) { this.token = this.tok.T_ECHO; return this; } } while ( this.token === this.tok.T_WHITESPACE || // ignore white space (!this.extractDoc && (this.token === this.tok.T_COMMENT || // ignore single lines comments this.token === this.tok.T_DOC_COMMENT)) || // ignore doc comments // ignore open tags this.token === this.tok.T_OPEN_TAG ); } else { this.token = this.lexer.lex() || /* istanbul ignore next */ this.EOF; } return this; }; /** * Check if token is of specified type * @function Parser#is * @memberOf module:php-parser */ Parser.prototype.is = function (type) { if (Array.isArray(type)) { return type.indexOf(this.token) !== -1; } return this.entries[type].has(this.token); }; // extends the parser with syntax files [ require("./parser/array.js"), require("./parser/class.js"), require("./parser/comment.js"), require("./parser/expr.js"), require("./parser/enum.js"), require("./parser/function.js"), require("./parser/if.js"), require("./parser/loops.js"), require("./parser/main.js"), require("./parser/namespace.js"), require("./parser/scalar.js"), require("./parser/statement.js"), require("./parser/switch.js"), require("./parser/try.js"), require("./parser/utils.js"), require("./parser/variable.js"), ].forEach(function (ext) { for (const k in ext) { /* istanbul ignore next */ if (Object.prototype.hasOwnProperty.call(Parser.prototype, k)) { // @see https://github.com/glayzzle/php-parser/issues/234 throw new Error("Function " + k + " is already defined - collision"); } Parser.prototype[k] = ext[k]; } }); module.exports = Parser;