@rightcapital/phpdoc-parser
Version:
TypeScript version of PHPDoc parser with support for intersection types and generics
144 lines (143 loc) • 6.45 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.Lexer = void 0;
class Lexer {
constructor() {
this.regexp = this.generateRegexp();
}
tokenize(source) {
const matchArray = source.matchAll(this.regexp);
const tokens = [];
let line = 1;
for (const match of matchArray) {
const type = Object.entries(match.groups).filter(([_, value]) => typeof value !== 'undefined')[0][0];
tokens.push([match[0], type, line]);
if (type !== Lexer.TOKEN_PHPDOC_EOL) {
continue;
}
line++;
}
tokens.push(['', Lexer.TOKEN_END, line]);
return tokens;
}
generateRegexp() {
const patterns = {
[]: '[\\x09\\x20]+',
[]: '(?:[\\\\]?[a-z_\\x80-\\xFF][0-9a-z_\\x80-\\xFF-]*)+',
[]: '\\$this(?![0-9a-z_\\x80-\\xFF])',
[]: '\\$[a-z_\\x80-\\xFF][0-9a-z_\\x80-\\xFF]*',
[]: '&(?=\\s*(?:[.,=)]|(?:\\$(?!this(?![0-9a-z_\\x80-\\xFF])))))',
[]: '\\|',
[]: '&',
[]: '\\?',
[]: '!',
[]: '\\(',
[]: '\\)',
[]: '<',
[]: '>',
[]: '\\[',
[]: '\\]',
[]: '\\{',
[]: '\\}',
[]: ',',
[]: '\\.\\.\\.',
[]: '::',
[]: '=>',
[]: '->',
[]: '=',
[]: ':',
[]: '\\/\\*\\*(?=\\s)\\x20?',
[]: '\\*\\/',
[]: '@(?:[a-z][a-z0-9-\\\\]+:)?[a-z][a-z0-9-\\\\]*',
[]: '\\r?\\n[\\x09\\x20]*(?:\\*(?!\\/)\\x20?)?',
[]: '[+-]?(?:(?:[0-9]+(_[0-9]+)*\\.[0-9]*(_[0-9]+)*(?:e[+-]?[0-9]+(_[0-9]+)*)?)|(?:[0-9]*(_[0-9]+)*\\.[0-9]+(_[0-9]+)*(?:e[+-]?[0-9]+(_[0-9]+)*)?)|(?:[0-9]+(_[0-9]+)*e[+-]?[0-9]+(_[0-9]+)*))',
[]: '[+-]?(?:(?:0b[0-1]+(_[0-1]+)*)|(?:0o[0-7]+(_[0-7]+)*)|(?:0x[0-9a-f]+(_[0-9a-f]+)*)|(?:[0-9]+(_[0-9]+)*))',
[]: "'(?:\\\\[^\\r\\n]|[^'\\r\\n\\\\])*'",
[]: '"(?:\\\\[^\\r\\n]|[^"\\r\\n\\\\])*"',
[]: '\\*',
};
patterns[Lexer.TOKEN_OTHER] = '(?:(?!\\*\\/)[^\\s])+';
const combinedRegExp = Object.entries(patterns)
.map(([key, pattern]) => {
return `(?<${key}>${pattern})`;
})
.join('|');
return new RegExp(combinedRegExp, 'sig');
}
}
exports.Lexer = Lexer;
Lexer.TOKEN_REFERENCE = 'TOKEN_REFERENCE';
Lexer.TOKEN_UNION = 'TOKEN_UNION';
Lexer.TOKEN_INTERSECTION = 'TOKEN_INTERSECTION';
Lexer.TOKEN_NULLABLE = 'TOKEN_NULLABLE';
Lexer.TOKEN_OPEN_PARENTHESES = 'TOKEN_OPEN_PARENTHESES';
Lexer.TOKEN_CLOSE_PARENTHESES = 'TOKEN_CLOSE_PARENTHESES';
Lexer.TOKEN_OPEN_ANGLE_BRACKET = 'TOKEN_OPEN_ANGLE_BRACKET';
Lexer.TOKEN_CLOSE_ANGLE_BRACKET = 'TOKEN_CLOSE_ANGLE_BRACKET';
Lexer.TOKEN_OPEN_SQUARE_BRACKET = 'TOKEN_OPEN_SQUARE_BRACKET';
Lexer.TOKEN_CLOSE_SQUARE_BRACKET = 'TOKEN_CLOSE_SQUARE_BRACKET';
Lexer.TOKEN_COMMA = 'TOKEN_COMMA';
Lexer.TOKEN_VARIADIC = 'TOKEN_VARIADIC';
Lexer.TOKEN_DOUBLE_COLON = 'TOKEN_DOUBLE_COLON';
Lexer.TOKEN_DOUBLE_ARROW = 'TOKEN_DOUBLE_ARROW';
Lexer.TOKEN_EQUAL = 'TOKEN_EQUAL';
Lexer.TOKEN_OPEN_PHPDOC = 'TOKEN_OPEN_PHPDOC';
Lexer.TOKEN_CLOSE_PHPDOC = 'TOKEN_CLOSE_PHPDOC';
Lexer.TOKEN_PHPDOC_TAG = 'TOKEN_PHPDOC_TAG';
Lexer.TOKEN_FLOAT = 'TOKEN_FLOAT';
Lexer.TOKEN_INTEGER = 'TOKEN_INTEGER';
Lexer.TOKEN_SINGLE_QUOTED_STRING = 'TOKEN_SINGLE_QUOTED_STRING';
Lexer.TOKEN_DOUBLE_QUOTED_STRING = 'TOKEN_DOUBLE_QUOTED_STRING';
Lexer.TOKEN_IDENTIFIER = 'TOKEN_IDENTIFIER';
Lexer.TOKEN_THIS_VARIABLE = 'TOKEN_THIS_VARIABLE';
Lexer.TOKEN_VARIABLE = 'TOKEN_VARIABLE';
Lexer.TOKEN_HORIZONTAL_WS = 'TOKEN_HORIZONTAL_WS';
Lexer.TOKEN_PHPDOC_EOL = 'TOKEN_PHPDOC_EOL';
Lexer.TOKEN_OTHER = 'TOKEN_OTHER';
Lexer.TOKEN_END = 'TOKEN_END';
Lexer.TOKEN_COLON = 'TOKEN_COLON';
Lexer.TOKEN_WILDCARD = 'TOKEN_WILDCARD';
Lexer.TOKEN_OPEN_CURLY_BRACKET = 'TOKEN_OPEN_CURLY_BRACKET';
Lexer.TOKEN_CLOSE_CURLY_BRACKET = 'TOKEN_CLOSE_CURLY_BRACKET';
Lexer.TOKEN_NEGATED = 'TOKEN_NEGATED';
Lexer.TOKEN_ARROW = 'TOKEN_ARROW';
Lexer.TOKEN_LABELS = {
[]: '&',
[]: '|',
[]: '&',
[]: '?',
[]: '!',
[]: '(',
[]: ')',
[]: '<',
[]: '>',
[]: '[',
[]: ']',
[]: '{',
[]: '}',
[]: ',',
[]: ':',
[]: '...',
[]: '::',
[]: '=>',
[]: '->',
[]: '=',
[]: '/**',
[]: '*/',
[]: 'TOKEN_PHPDOC_TAG',
[]: 'TOKEN_PHPDOC_EOL',
[]: 'TOKEN_FLOAT',
[]: 'TOKEN_INTEGER',
[]: 'TOKEN_SINGLE_QUOTED_STRING',
[]: 'TOKEN_DOUBLE_QUOTED_STRING',
[]: 'type',
[]: '$this',
[]: 'variable',
[]: 'TOKEN_HORIZONTAL_WS',
[]: 'TOKEN_OTHER',
[]: 'TOKEN_END',
[]: '*',
};
Lexer.VALUE_OFFSET = 0;
Lexer.TYPE_OFFSET = 1;
Lexer.LINE_OFFSET = 2;