php-parser
Version:
Parse PHP code from JS and returns its AST
749 lines (724 loc) • 19.6 kB
JavaScript
/**
* Copyright (C) 2018 Glayzzle (BSD3 License)
* @authors https://github.com/glayzzle/php-parser/graphs/contributors
* @url http://glayzzle.com
*/
"use strict";
const Position = require("./ast/position");
/**
* @private
*/
function isNumber(n) {
return n != "." && n != "," && !isNaN(parseFloat(n)) && isFinite(n);
}
/**
* The PHP Parser class that build the AST tree from the lexer
*
* @constructor Parser
* @memberOf module:php-parser
* @tutorial Parser
* @property {Lexer} lexer - current lexer instance
* @property {AST} ast - the AST factory instance
* @property {number|string} token - current token
* @property {boolean} extractDoc - should extract documentation as AST node
* @property {boolean} extractTokens - should extract each token
* @property {boolean} suppressErrors - should ignore parsing errors and continue
* @property {boolean} debug - should output debug informations
*/
const Parser = function (lexer, ast) {
this.lexer = lexer;
this.ast = ast;
this.tok = lexer.tok;
this.EOF = lexer.EOF;
this.token = null;
this.prev = null;
this.debug = false;
this.version = 803;
this.extractDoc = false;
this.extractTokens = false;
this.suppressErrors = false;
const mapIt = function (item) {
return [item, null];
};
this.entries = {
// reserved_non_modifiers
IDENTIFIER: new Map(
[
this.tok.T_ABSTRACT,
this.tok.T_ARRAY,
this.tok.T_AS,
this.tok.T_BREAK,
this.tok.T_CALLABLE,
this.tok.T_CASE,
this.tok.T_CATCH,
this.tok.T_CLASS,
this.tok.T_CLASS_C,
this.tok.T_CLONE,
this.tok.T_CONST,
this.tok.T_CONTINUE,
this.tok.T_DECLARE,
this.tok.T_DEFAULT,
this.tok.T_DIR,
this.tok.T_DO,
this.tok.T_ECHO,
this.tok.T_ELSE,
this.tok.T_ELSEIF,
this.tok.T_EMPTY,
this.tok.T_ENDDECLARE,
this.tok.T_ENDFOR,
this.tok.T_ENDFOREACH,
this.tok.T_ENDIF,
this.tok.T_ENDSWITCH,
this.tok.T_ENDWHILE,
this.tok.T_ENUM,
this.tok.T_EVAL,
this.tok.T_EXIT,
this.tok.T_EXTENDS,
this.tok.T_FILE,
this.tok.T_FINAL,
this.tok.T_FINALLY,
this.tok.T_FN,
this.tok.T_FOR,
this.tok.T_FOREACH,
this.tok.T_FUNC_C,
this.tok.T_FUNCTION,
this.tok.T_GLOBAL,
this.tok.T_GOTO,
this.tok.T_IF,
this.tok.T_IMPLEMENTS,
this.tok.T_INCLUDE,
this.tok.T_INCLUDE_ONCE,
this.tok.T_INSTANCEOF,
this.tok.T_INSTEADOF,
this.tok.T_INTERFACE,
this.tok.T_ISSET,
this.tok.T_LINE,
this.tok.T_LIST,
this.tok.T_LOGICAL_AND,
this.tok.T_LOGICAL_OR,
this.tok.T_LOGICAL_XOR,
this.tok.T_MATCH,
this.tok.T_METHOD_C,
this.tok.T_NAMESPACE,
this.tok.T_NEW,
this.tok.T_NS_C,
this.tok.T_PRINT,
this.tok.T_PRIVATE,
this.tok.T_PROTECTED,
this.tok.T_PUBLIC,
this.tok.T_READ_ONLY,
this.tok.T_REQUIRE,
this.tok.T_REQUIRE_ONCE,
this.tok.T_RETURN,
this.tok.T_STATIC,
this.tok.T_SWITCH,
this.tok.T_THROW,
this.tok.T_TRAIT,
this.tok.T_TRY,
this.tok.T_UNSET,
this.tok.T_USE,
this.tok.T_VAR,
this.tok.T_WHILE,
this.tok.T_YIELD,
].map(mapIt),
),
VARIABLE: new Map(
[
this.tok.T_VARIABLE,
"$",
"&",
this.tok.T_STRING,
this.tok.T_NAME_RELATIVE,
this.tok.T_NAME_QUALIFIED,
this.tok.T_NAME_FULLY_QUALIFIED,
this.tok.T_NAMESPACE,
this.tok.T_STATIC,
].map(mapIt),
),
SCALAR: new Map(
[
this.tok.T_CONSTANT_ENCAPSED_STRING,
this.tok.T_START_HEREDOC,
this.tok.T_LNUMBER,
this.tok.T_DNUMBER,
this.tok.T_ARRAY,
"[",
this.tok.T_CLASS_C,
this.tok.T_TRAIT_C,
this.tok.T_FUNC_C,
this.tok.T_METHOD_C,
this.tok.T_LINE,
this.tok.T_FILE,
this.tok.T_DIR,
this.tok.T_NS_C,
'"',
'b"',
'B"',
"-",
this.tok.T_NS_SEPARATOR,
].map(mapIt),
),
T_MAGIC_CONST: new Map(
[
this.tok.T_CLASS_C,
this.tok.T_TRAIT_C,
this.tok.T_FUNC_C,
this.tok.T_METHOD_C,
this.tok.T_LINE,
this.tok.T_FILE,
this.tok.T_DIR,
this.tok.T_NS_C,
].map(mapIt),
),
T_MEMBER_FLAGS: new Map(
[
this.tok.T_PUBLIC,
this.tok.T_PRIVATE,
this.tok.T_PROTECTED,
this.tok.T_STATIC,
this.tok.T_ABSTRACT,
this.tok.T_FINAL,
].map(mapIt),
),
EOS: new Map([";", this.EOF, this.tok.T_INLINE_HTML].map(mapIt)),
EXPR: new Map(
[
"@",
"-",
"+",
"!",
"~",
"(",
"`",
this.tok.T_LIST,
this.tok.T_CLONE,
this.tok.T_INC,
this.tok.T_DEC,
this.tok.T_NEW,
this.tok.T_ISSET,
this.tok.T_EMPTY,
this.tok.T_MATCH,
this.tok.T_INCLUDE,
this.tok.T_INCLUDE_ONCE,
this.tok.T_REQUIRE,
this.tok.T_REQUIRE_ONCE,
this.tok.T_EVAL,
this.tok.T_INT_CAST,
this.tok.T_DOUBLE_CAST,
this.tok.T_STRING_CAST,
this.tok.T_ARRAY_CAST,
this.tok.T_OBJECT_CAST,
this.tok.T_BOOL_CAST,
this.tok.T_UNSET_CAST,
this.tok.T_EXIT,
this.tok.T_PRINT,
this.tok.T_YIELD,
this.tok.T_STATIC,
this.tok.T_FUNCTION,
this.tok.T_FN,
// using VARIABLES :
this.tok.T_VARIABLE,
"$",
this.tok.T_NS_SEPARATOR,
this.tok.T_STRING,
this.tok.T_NAME_RELATIVE,
this.tok.T_NAME_QUALIFIED,
this.tok.T_NAME_FULLY_QUALIFIED,
// using SCALAR :
this.tok.T_STRING, // @see variable.js line 45 > conflict with variable = shift/reduce :)
this.tok.T_CONSTANT_ENCAPSED_STRING,
this.tok.T_START_HEREDOC,
this.tok.T_LNUMBER,
this.tok.T_DNUMBER,
this.tok.T_ARRAY,
"[",
this.tok.T_CLASS_C,
this.tok.T_TRAIT_C,
this.tok.T_FUNC_C,
this.tok.T_METHOD_C,
this.tok.T_LINE,
this.tok.T_FILE,
this.tok.T_DIR,
this.tok.T_NS_C,
'"',
'b"',
'B"',
"-",
this.tok.T_NS_SEPARATOR,
].map(mapIt),
),
};
};
/**
* helper : gets a token name
* @function Parser#getTokenName
* @memberOf module:php-parser
*/
Parser.prototype.getTokenName = function (token) {
if (!isNumber(token)) {
return "'" + token + "'";
} else {
if (token == this.EOF) return "the end of file (EOF)";
return this.lexer.engine.tokens.values[token];
}
};
/**
* main entry point : converts a source code to AST
* @function Parser#parse
* @memberOf module:php-parser
*/
Parser.prototype.parse = function (code, filename) {
this._errors = [];
this.filename = filename || "eval";
this.currentNamespace = [""];
if (this.extractDoc) {
this._docs = [];
} else {
this._docs = null;
}
if (this.extractTokens) {
this._tokens = [];
} else {
this._tokens = null;
}
this._docIndex = 0;
this._lastNode = null;
this.lexer.setInput(code);
this.lexer.all_tokens = this.extractTokens;
this.lexer.comment_tokens = this.extractDoc;
this.length = this.lexer._input.length;
this.innerList = false;
this.innerListForm = false;
const program = this.node("program");
const childs = [];
this.next();
while (this.token != this.EOF) {
childs.push(this.read_start());
}
// append last comment
if (
childs.length === 0 &&
this.extractDoc &&
this._docs.length > this._docIndex
) {
childs.push(this.node("noop")());
}
// #176 : register latest position
this.prev = [
this.lexer.yylloc.last_line,
this.lexer.yylloc.last_column,
this.lexer.offset,
];
const result = program(childs, this._errors, this._docs, this._tokens);
if (this.debug) {
const errors = this.ast.checkNodes();
/* istanbul ignore next */
if (errors.length > 0) {
errors.forEach(function (error) {
if (error.position) {
// eslint-disable-next-line no-console
console.log(
"Node at line " +
error.position.line +
", column " +
error.position.column,
);
}
// eslint-disable-next-line no-console
console.log(error.stack.join("\n"));
});
throw new Error("Some nodes are not closed");
}
}
return result;
};
/**
* Raise an error
* @function Parser#raiseError
* @memberOf module:php-parser
*/
Parser.prototype.raiseError = function (message, msgExpect, expect, token) {
message += " on line " + this.lexer.yylloc.first_line;
if (!this.suppressErrors) {
const err = new SyntaxError(
message,
this.filename,
this.lexer.yylloc.first_line,
);
err.lineNumber = this.lexer.yylloc.first_line;
err.fileName = this.filename;
err.columnNumber = this.lexer.yylloc.first_column;
throw err;
}
// Error node :
const node = this.ast.prepare("error", null, this)(
message,
token,
this.lexer.yylloc.first_line,
expect,
);
this._errors.push(node);
return node;
};
/**
* handling errors
* @function Parser#error
* @memberOf module:php-parser
*/
Parser.prototype.error = function (expect) {
let msg = "Parse Error : syntax error";
let token = this.getTokenName(this.token);
let msgExpect = "";
if (this.token !== this.EOF) {
if (isNumber(this.token)) {
let symbol = this.text();
/* istanbul ignore next */
if (symbol.length > 10) {
symbol = symbol.substring(0, 7) + "...";
}
token = "'" + symbol + "' (" + token + ")";
}
msg += ", unexpected " + token;
}
if (expect && !Array.isArray(expect)) {
if (isNumber(expect) || expect.length === 1) {
msgExpect = ", expecting " + this.getTokenName(expect);
}
msg += msgExpect;
}
return this.raiseError(msg, msgExpect, expect, token);
};
/**
* Create a position node from the lexers position
*
* @function Parser#position
* @memberOf module:php-parser
* @return {Position}
*/
Parser.prototype.position = function () {
return new Position(
this.lexer.yylloc.first_line,
this.lexer.yylloc.first_column,
this.lexer.yylloc.first_offset,
);
};
/**
* Creates a new AST node
* @function Parser#node
* @memberOf module:php-parser
*/
Parser.prototype.node = function (name) {
if (this.extractDoc) {
let docs = null;
if (this._docIndex < this._docs.length) {
docs = this._docs.slice(this._docIndex);
this._docIndex = this._docs.length;
/* istanbul ignore next */
if (this.debug) {
// eslint-disable-next-line no-console
console.log(new Error("Append docs on " + name));
// eslint-disable-next-line no-console
console.log(docs);
}
}
const node = this.ast.prepare(name, docs, this);
/*
* TOKENS :
* node1 commentA token commmentB node2 commentC token commentD node3 commentE token
*
* AST :
* structure:S1 [
* left: node1 ( trail: commentA ),
* right: structure:S2 [
* node2 (lead: commentB, trail: commentC),
* node3 (lead: commentD)
* ],
* trail: commentE
* ]
*
* Algorithm :
*
* Attach the last comments on parent of current node
* If a new node is started and the parent has a trailing comment
* the move it on previous node
*
* start S2
* start node1
* consume node1 & set commentA as trailingComment on S2
* start S2
* S1 has a trailingComment, attach it on node1
* ...
* NOTE : As the trailingComment Behavior depends on AST, it will be build on
* the AST layer - last child node will keep it's trailingComment nodes
*/
node.postBuild = function (self) {
if (this._docIndex < this._docs.length) {
if (this._lastNode) {
const offset = this.prev[2];
let max = this._docIndex;
for (; max < this._docs.length; max++) {
if (this._docs[max].offset > offset) {
break;
}
}
if (max > this._docIndex) {
// inject trailing comment on child node
this._lastNode.setTrailingComments(
this._docs.slice(this._docIndex, max),
);
this._docIndex = max;
}
} else if (this.token === this.EOF) {
// end of content
self.setTrailingComments(this._docs.slice(this._docIndex));
this._docIndex = this._docs.length;
}
}
this._lastNode = self;
}.bind(this);
return node;
}
return this.ast.prepare(name, null, this);
};
/**
* expects an end of statement or end of file
* @function Parser#expectEndOfStatement
* @memberOf module:php-parser
* @return {boolean}
*/
Parser.prototype.expectEndOfStatement = function (node) {
if (this.token === ";") {
// include only real ';' statements
// https://github.com/glayzzle/php-parser/issues/164
if (node && this.lexer.yytext === ";") {
node.includeToken(this);
}
} else if (this.token !== this.tok.T_INLINE_HTML && this.token !== this.EOF) {
this.error(";");
return false;
}
this.next();
return true;
};
const ignoreStack = ["parser.next", "parser.node", "parser.showlog"];
/**
* outputs some debug information on current token
* @private
* @function Parser#showlog
* @memberOf module:php-parser
*/
Parser.prototype.showlog = function () {
const stack = new Error().stack.split("\n");
let line;
for (let offset = 2; offset < stack.length; offset++) {
line = stack[offset].trim();
let found = false;
for (let i = 0; i < ignoreStack.length; i++) {
/* istanbul ignore next */
if (line.substring(3, 3 + ignoreStack[i].length) === ignoreStack[i]) {
found = true;
break;
}
}
/* istanbul ignore next */
if (!found) {
break;
}
}
// eslint-disable-next-line no-console
console.log(
"Line " +
this.lexer.yylloc.first_line +
" : " +
this.getTokenName(this.token) +
">" +
this.lexer.yytext +
"<" +
" @-->" +
line,
);
return this;
};
/**
* Force the parser to check the current token.
*
* If the current token does not match to expected token,
* the an error will be raised.
*
* If the suppressError mode is activated, then the error will
* be added to the program error stack and this function will return `false`.
*
* @function Parser#expect
* @memberOf module:php-parser
* @param {String|Number} token
* @return {boolean}
* @throws Error
*/
Parser.prototype.expect = function (token) {
if (Array.isArray(token)) {
if (token.indexOf(this.token) === -1) {
this.error(token);
return false;
}
} else if (this.token != token) {
this.error(token);
return false;
}
return true;
};
/**
* Returns the current token contents
* @function Parser#text
* @memberOf module:php-parser
* @return {String}
*/
Parser.prototype.text = function () {
return this.lexer.yytext;
};
/**
* consume the next token
* @function Parser#next
* @memberOf module:php-parser
*/
Parser.prototype.next = function () {
// prepare the back command
if (this.token !== ";" || this.lexer.yytext === ";") {
// ignore '?>' from automated resolution
// https://github.com/glayzzle/php-parser/issues/168
this.prev = [
this.lexer.yylloc.last_line,
this.lexer.yylloc.last_column,
this.lexer.offset,
];
}
// eating the token
this.lex();
// showing the debug
if (this.debug) {
this.showlog();
}
// handling comments
if (this.extractDoc) {
while (
this.token === this.tok.T_COMMENT ||
this.token === this.tok.T_DOC_COMMENT
) {
// APPEND COMMENTS
if (this.token === this.tok.T_COMMENT) {
this._docs.push(this.read_comment());
} else {
this._docs.push(this.read_doc_comment());
}
}
}
return this;
};
/**
* Peek at the next token.
* @function Parser#peek
* @memberOf module:php-parser
* @returns {string|number} Next Token
*/
Parser.prototype.peek = function () {
const lexerState = this.lexer.getState();
const nextToken = this.lexer.lex();
this.lexer.setState(lexerState);
return nextToken;
};
/**
* Eating a token
* @function Parser#lex
* @memberOf module:php-parser
*/
Parser.prototype.lex = function () {
// append on token stack
if (this.extractTokens) {
do {
// the token
this.token = this.lexer.lex() || /* istanbul ignore next */ this.EOF;
if (this.token === this.EOF) return this;
let entry = this.lexer.yytext;
if (
Object.prototype.hasOwnProperty.call(
this.lexer.engine.tokens.values,
this.token,
)
) {
entry = [
this.lexer.engine.tokens.values[this.token],
entry,
this.lexer.yylloc.first_line,
this.lexer.yylloc.first_offset,
this.lexer.offset,
];
} else {
entry = [
null,
entry,
this.lexer.yylloc.first_line,
this.lexer.yylloc.first_offset,
this.lexer.offset,
];
}
this._tokens.push(entry);
if (this.token === this.tok.T_CLOSE_TAG) {
// https://github.com/php/php-src/blob/7ff186434e82ee7be7c59d0db9a976641cf7b09c/Zend/zend_compile.c#L1680
this.token = ";";
return this;
} else if (this.token === this.tok.T_OPEN_TAG_WITH_ECHO) {
this.token = this.tok.T_ECHO;
return this;
}
} while (
this.token === this.tok.T_WHITESPACE || // ignore white space
(!this.extractDoc &&
(this.token === this.tok.T_COMMENT || // ignore single lines comments
this.token === this.tok.T_DOC_COMMENT)) || // ignore doc comments
// ignore open tags
this.token === this.tok.T_OPEN_TAG
);
} else {
this.token = this.lexer.lex() || /* istanbul ignore next */ this.EOF;
}
return this;
};
/**
* Check if token is of specified type
* @function Parser#is
* @memberOf module:php-parser
*/
Parser.prototype.is = function (type) {
if (Array.isArray(type)) {
return type.indexOf(this.token) !== -1;
}
return this.entries[type].has(this.token);
};
// extends the parser with syntax files
[
require("./parser/array.js"),
require("./parser/class.js"),
require("./parser/comment.js"),
require("./parser/expr.js"),
require("./parser/enum.js"),
require("./parser/function.js"),
require("./parser/if.js"),
require("./parser/loops.js"),
require("./parser/main.js"),
require("./parser/namespace.js"),
require("./parser/scalar.js"),
require("./parser/statement.js"),
require("./parser/switch.js"),
require("./parser/try.js"),
require("./parser/utils.js"),
require("./parser/variable.js"),
].forEach(function (ext) {
for (const k in ext) {
/* istanbul ignore next */
if (Object.prototype.hasOwnProperty.call(Parser.prototype, k)) {
// @see https://github.com/glayzzle/php-parser/issues/234
throw new Error("Function " + k + " is already defined - collision");
}
Parser.prototype[k] = ext[k];
}
});
module.exports = Parser;