php-parser
Version:
Parse PHP code from JS and returns its AST
562 lines (543 loc) • 15.7 kB
JavaScript
/**
* Copyright (C) 2018 Glayzzle (BSD3 License)
* @authors https://github.com/glayzzle/php-parser/graphs/contributors
* @url http://glayzzle.com
*/
"use strict";
/**
* This is the php lexer. It will tokenize the string for helping the
* parser to build the AST from its grammar.
*
* @constructor Lexer
* @memberOf module:php-parser
* @property {number} EOF
* @property {boolean} all_tokens defines if all tokens must be retrieved (used by token_get_all only)
* @property {boolean} comment_tokens extracts comments tokens
* @property {boolean} mode_eval enables the evald mode (ignore opening tags)
* @property {boolean} asp_tags disables by default asp tags mode
* @property {boolean} short_tags enables by default short tags mode
* @property {object} keywords List of php keyword
* @property {object} castKeywords List of php keywords for type casting
*/
const Lexer = function (engine) {
this.engine = engine;
this.tok = this.engine.tokens.names;
this.EOF = 1;
this.debug = false;
this.all_tokens = true;
this.comment_tokens = false;
this.mode_eval = false;
this.asp_tags = false;
this.short_tags = false;
this.version = 803;
this.yyprevcol = 0;
this.keywords = {
__class__: this.tok.T_CLASS_C,
__trait__: this.tok.T_TRAIT_C,
__function__: this.tok.T_FUNC_C,
__method__: this.tok.T_METHOD_C,
__line__: this.tok.T_LINE,
__file__: this.tok.T_FILE,
__dir__: this.tok.T_DIR,
__namespace__: this.tok.T_NS_C,
exit: this.tok.T_EXIT,
die: this.tok.T_EXIT,
function: this.tok.T_FUNCTION,
const: this.tok.T_CONST,
return: this.tok.T_RETURN,
try: this.tok.T_TRY,
catch: this.tok.T_CATCH,
finally: this.tok.T_FINALLY,
throw: this.tok.T_THROW,
if: this.tok.T_IF,
elseif: this.tok.T_ELSEIF,
endif: this.tok.T_ENDIF,
else: this.tok.T_ELSE,
while: this.tok.T_WHILE,
endwhile: this.tok.T_ENDWHILE,
do: this.tok.T_DO,
for: this.tok.T_FOR,
endfor: this.tok.T_ENDFOR,
foreach: this.tok.T_FOREACH,
endforeach: this.tok.T_ENDFOREACH,
declare: this.tok.T_DECLARE,
enddeclare: this.tok.T_ENDDECLARE,
instanceof: this.tok.T_INSTANCEOF,
as: this.tok.T_AS,
switch: this.tok.T_SWITCH,
endswitch: this.tok.T_ENDSWITCH,
case: this.tok.T_CASE,
default: this.tok.T_DEFAULT,
break: this.tok.T_BREAK,
continue: this.tok.T_CONTINUE,
goto: this.tok.T_GOTO,
echo: this.tok.T_ECHO,
print: this.tok.T_PRINT,
class: this.tok.T_CLASS,
interface: this.tok.T_INTERFACE,
trait: this.tok.T_TRAIT,
enum: this.tok.T_ENUM,
extends: this.tok.T_EXTENDS,
implements: this.tok.T_IMPLEMENTS,
new: this.tok.T_NEW,
clone: this.tok.T_CLONE,
var: this.tok.T_VAR,
eval: this.tok.T_EVAL,
include: this.tok.T_INCLUDE,
include_once: this.tok.T_INCLUDE_ONCE,
require: this.tok.T_REQUIRE,
require_once: this.tok.T_REQUIRE_ONCE,
namespace: this.tok.T_NAMESPACE,
use: this.tok.T_USE,
insteadof: this.tok.T_INSTEADOF,
global: this.tok.T_GLOBAL,
isset: this.tok.T_ISSET,
empty: this.tok.T_EMPTY,
__halt_compiler: this.tok.T_HALT_COMPILER,
static: this.tok.T_STATIC,
abstract: this.tok.T_ABSTRACT,
final: this.tok.T_FINAL,
private: this.tok.T_PRIVATE,
protected: this.tok.T_PROTECTED,
public: this.tok.T_PUBLIC,
unset: this.tok.T_UNSET,
list: this.tok.T_LIST,
array: this.tok.T_ARRAY,
callable: this.tok.T_CALLABLE,
or: this.tok.T_LOGICAL_OR,
and: this.tok.T_LOGICAL_AND,
xor: this.tok.T_LOGICAL_XOR,
match: this.tok.T_MATCH,
readonly: this.tok.T_READ_ONLY,
};
this.castKeywords = {
int: this.tok.T_INT_CAST,
integer: this.tok.T_INT_CAST,
real: this.tok.T_DOUBLE_CAST,
double: this.tok.T_DOUBLE_CAST,
float: this.tok.T_DOUBLE_CAST,
string: this.tok.T_STRING_CAST,
binary: this.tok.T_STRING_CAST,
array: this.tok.T_ARRAY_CAST,
object: this.tok.T_OBJECT_CAST,
bool: this.tok.T_BOOL_CAST,
boolean: this.tok.T_BOOL_CAST,
unset: this.tok.T_UNSET_CAST,
};
};
/**
* Initialize the lexer with the specified input
* @function Lexer#setInput
* @memberOf module:php-parser
*/
Lexer.prototype.setInput = function (input) {
this._input = input;
this.size = input.length;
this.yylineno = 1;
this.offset = 0;
this.yyprevcol = 0;
this.yytext = "";
this.yylloc = {
first_offset: 0,
first_line: 1,
first_column: 0,
prev_offset: 0,
prev_line: 1,
prev_column: 0,
last_line: 1,
last_column: 0,
};
this.tokens = [];
if (this.version > 703) {
this.keywords.fn = this.tok.T_FN;
} else {
delete this.keywords.fn;
}
this.done = this.offset >= this.size;
if (!this.all_tokens && this.mode_eval) {
this.conditionStack = ["INITIAL"];
this.begin("ST_IN_SCRIPTING");
} else {
this.conditionStack = [];
this.begin("INITIAL");
}
// https://github.com/php/php-src/blob/999e32b65a8a4bb59e27e538fa68ffae4b99d863/Zend/zend_language_scanner.h#L59
// Used for heredoc and nowdoc
this.heredoc_label = {
label: "",
length: 0,
indentation: 0,
indentation_uses_spaces: false,
finished: false,
/*
* this used for parser to detemine the if current node segment is first encaps node.
* if ture, the indentation will remove from the begining. and if false, the prev node
* might be a variable '}' ,and the leading spaces should not be removed util meet the
* first \n
*/
first_encaps_node: false,
// for backward compatible
/* istanbul ignore next */
toString: function () {
this.label;
},
};
return this;
};
/**
* consumes and returns one char from the input
* @function Lexer#input
* @memberOf module:php-parser
*/
Lexer.prototype.input = function () {
const ch = this._input[this.offset];
if (!ch) return "";
this.yytext += ch;
this.offset++;
if (ch === "\r" && this._input[this.offset] === "\n") {
this.yytext += "\n";
this.offset++;
}
if (ch === "\n" || ch === "\r") {
this.yylloc.last_line = ++this.yylineno;
this.yyprevcol = this.yylloc.last_column;
this.yylloc.last_column = 0;
} else {
this.yylloc.last_column++;
}
return ch;
};
/**
* revert eating specified size
* @function Lexer#unput
* @memberOf module:php-parser
*/
Lexer.prototype.unput = function (size) {
if (size === 1) {
// 1 char unput (most cases)
this.offset--;
if (
this._input[this.offset] === "\n" &&
this._input[this.offset - 1] === "\r"
) {
this.offset--;
size++;
}
if (
this._input[this.offset] === "\r" ||
this._input[this.offset] === "\n"
) {
this.yylloc.last_line--;
this.yylineno--;
this.yylloc.last_column = this.yyprevcol;
} else {
this.yylloc.last_column--;
}
this.yytext = this.yytext.substring(0, this.yytext.length - size);
} else if (size > 0) {
this.offset -= size;
if (size < this.yytext.length) {
this.yytext = this.yytext.substring(0, this.yytext.length - size);
// re-calculate position
this.yylloc.last_line = this.yylloc.first_line;
this.yylloc.last_column = this.yyprevcol = this.yylloc.first_column;
for (let i = 0; i < this.yytext.length; i++) {
let c = this.yytext[i];
if (c === "\r") {
c = this.yytext[++i];
this.yyprevcol = this.yylloc.last_column;
this.yylloc.last_line++;
this.yylloc.last_column = 0;
if (c !== "\n") {
if (c === "\r") {
this.yylloc.last_line++;
} else {
this.yylloc.last_column++;
}
}
} else if (c === "\n") {
this.yyprevcol = this.yylloc.last_column;
this.yylloc.last_line++;
this.yylloc.last_column = 0;
} else {
this.yylloc.last_column++;
}
}
this.yylineno = this.yylloc.last_line;
} else {
// reset full text
this.yytext = "";
this.yylloc.last_line = this.yylineno = this.yylloc.first_line;
this.yylloc.last_column = this.yylloc.first_column;
}
}
return this;
};
/**
* check if the text matches
* @function Lexer#tryMatch
* @memberOf module:php-parser
* @param {string} text
* @returns {boolean}
*/
Lexer.prototype.tryMatch = function (text) {
return text === this.ahead(text.length);
};
/**
* check if the text matches
* @function Lexer#tryMatchCaseless
* @memberOf module:php-parser
* @param {string} text
* @returns {boolean}
*/
Lexer.prototype.tryMatchCaseless = function (text) {
return text === this.ahead(text.length).toLowerCase();
};
/**
* look ahead
* @function Lexer#ahead
* @memberOf module:php-parser
* @param {number} size
* @returns {string}
*/
Lexer.prototype.ahead = function (size) {
let text = this._input.substring(this.offset, this.offset + size);
if (
text[text.length - 1] === "\r" &&
this._input[this.offset + size + 1] === "\n"
) {
text += "\n";
}
return text;
};
/**
* consume the specified size
* @function Lexer#consume
* @memberOf module:php-parser
* @param {number} size
* @returns {Lexer}
*/
Lexer.prototype.consume = function (size) {
for (let i = 0; i < size; i++) {
const ch = this._input[this.offset];
if (!ch) break;
this.yytext += ch;
this.offset++;
if (ch === "\r" && this._input[this.offset] === "\n") {
this.yytext += "\n";
this.offset++;
i++;
}
if (ch === "\n" || ch === "\r") {
this.yylloc.last_line = ++this.yylineno;
this.yyprevcol = this.yylloc.last_column;
this.yylloc.last_column = 0;
} else {
this.yylloc.last_column++;
}
}
return this;
};
/**
* Gets the current state
* @function Lexer#getState
* @memberOf module:php-parser
*/
Lexer.prototype.getState = function () {
return {
yytext: this.yytext,
offset: this.offset,
yylineno: this.yylineno,
yyprevcol: this.yyprevcol,
yylloc: {
first_offset: this.yylloc.first_offset,
first_line: this.yylloc.first_line,
first_column: this.yylloc.first_column,
last_line: this.yylloc.last_line,
last_column: this.yylloc.last_column,
},
heredoc_label: this.heredoc_label,
};
};
/**
* Sets the current lexer state
* @function Lexer#setState
* @memberOf module:php-parser
*/
Lexer.prototype.setState = function (state) {
this.yytext = state.yytext;
this.offset = state.offset;
this.yylineno = state.yylineno;
this.yyprevcol = state.yyprevcol;
this.yylloc = state.yylloc;
if (state.heredoc_label) {
this.heredoc_label = state.heredoc_label;
}
return this;
};
/**
* prepend next token
* @function Lexer#appendToken
* @memberOf module:php-parser
* @param {*} value
* @param {*} ahead
* @returns {Lexer}
*/
Lexer.prototype.appendToken = function (value, ahead) {
this.tokens.push([value, ahead]);
return this;
};
/**
* return next match that has a token
* @function Lexer#lex
* @memberOf module:php-parser
* @returns {number|string}
*/
Lexer.prototype.lex = function () {
this.yylloc.prev_offset = this.offset;
this.yylloc.prev_line = this.yylloc.last_line;
this.yylloc.prev_column = this.yylloc.last_column;
let token = this.next() || this.lex();
if (!this.all_tokens) {
while (
token === this.tok.T_WHITESPACE || // ignore white space
(!this.comment_tokens &&
(token === this.tok.T_COMMENT || // ignore single lines comments
token === this.tok.T_DOC_COMMENT)) || // ignore doc comments
// ignore open tags
token === this.tok.T_OPEN_TAG
) {
token = this.next() || this.lex();
}
if (token == this.tok.T_OPEN_TAG_WITH_ECHO) {
// https://github.com/php/php-src/blob/7ff186434e82ee7be7c59d0db9a976641cf7b09c/Zend/zend_compile.c#L1683
// open tag with echo statement
return this.tok.T_ECHO;
} else if (token === this.tok.T_CLOSE_TAG) {
// https://github.com/php/php-src/blob/7ff186434e82ee7be7c59d0db9a976641cf7b09c/Zend/zend_compile.c#L1680
return ";"; /* implicit ; */
}
}
if (!this.yylloc.prev_offset) {
this.yylloc.prev_offset = this.yylloc.first_offset;
this.yylloc.prev_line = this.yylloc.first_line;
this.yylloc.prev_column = this.yylloc.first_column;
}
/*else if (this.yylloc.prev_offset === this.offset && this.offset !== this.size) {
throw new Error('Infinite loop @ ' + this.offset + ' / ' + this.size);
}*/
return token;
};
/**
* activates a new lexer condition state (pushes the new lexer condition state onto the condition stack)
* @function Lexer#begin
* @memberOf module:php-parser
* @param {*} condition
* @returns {Lexer}
*/
Lexer.prototype.begin = function (condition) {
this.conditionStack.push(condition);
this.curCondition = condition;
this.stateCb = this["match" + condition];
/* istanbul ignore next */
if (typeof this.stateCb !== "function") {
throw new Error('Undefined condition state "' + condition + '"');
}
return this;
};
/**
* pop the previously active lexer condition state off the condition stack
* @function Lexer#popState
* @memberOf module:php-parser
* @returns {string|*}
*/
Lexer.prototype.popState = function () {
const n = this.conditionStack.length - 1;
const condition = n > 0 ? this.conditionStack.pop() : this.conditionStack[0];
this.curCondition = this.conditionStack[this.conditionStack.length - 1];
this.stateCb = this["match" + this.curCondition];
/* istanbul ignore next */
if (typeof this.stateCb !== "function") {
throw new Error('Undefined condition state "' + this.curCondition + '"');
}
return condition;
};
/**
* return next match in input
* @function Lexer#next
* @memberOf module:php-parser
* @returns {number|*}
*/
Lexer.prototype.next = function () {
let token;
if (!this._input) {
this.done = true;
}
this.yylloc.first_offset = this.offset;
this.yylloc.first_line = this.yylloc.last_line;
this.yylloc.first_column = this.yylloc.last_column;
this.yytext = "";
if (this.done) {
this.yylloc.prev_offset = this.yylloc.first_offset;
this.yylloc.prev_line = this.yylloc.first_line;
this.yylloc.prev_column = this.yylloc.first_column;
return this.EOF;
}
if (this.tokens.length > 0) {
token = this.tokens.shift();
if (typeof token[1] === "object") {
this.setState(token[1]);
} else {
this.consume(token[1]);
}
token = token[0];
} else {
token = this.stateCb.apply(this, []);
}
if (this.offset >= this.size && this.tokens.length === 0) {
this.done = true;
}
/* istanbul ignore next */
if (this.debug) {
let tName = token;
if (typeof tName === "number") {
tName = this.engine.tokens.values[tName];
} else {
tName = '"' + tName + '"';
}
const e = new Error(
tName +
"\tfrom " +
this.yylloc.first_line +
"," +
this.yylloc.first_column +
"\t - to " +
this.yylloc.last_line +
"," +
this.yylloc.last_column +
'\t"' +
this.yytext +
'"',
);
// eslint-disable-next-line no-console
console.error(e.stack);
}
return token;
};
// extends the lexer with states
[
require("./lexer/attribute.js"),
require("./lexer/comments.js"),
require("./lexer/initial.js"),
require("./lexer/numbers.js"),
require("./lexer/property.js"),
require("./lexer/scripting.js"),
require("./lexer/strings.js"),
require("./lexer/tokens.js"),
require("./lexer/utils.js"),
].forEach(function (ext) {
for (const k in ext) {
Lexer.prototype[k] = ext[k];
}
});
module.exports = Lexer;