UNPKG

@rightcapital/phpdoc-parser

Version:

TypeScript version of PHPDoc parser with support for intersection types and generics

144 lines (143 loc) 6.45 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.Lexer = void 0; class Lexer { constructor() { this.regexp = this.generateRegexp(); } tokenize(source) { const matchArray = source.matchAll(this.regexp); const tokens = []; let line = 1; for (const match of matchArray) { const type = Object.entries(match.groups).filter(([_, value]) => typeof value !== 'undefined')[0][0]; tokens.push([match[0], type, line]); if (type !== Lexer.TOKEN_PHPDOC_EOL) { continue; } line++; } tokens.push(['', Lexer.TOKEN_END, line]); return tokens; } generateRegexp() { const patterns = { [Lexer.TOKEN_HORIZONTAL_WS]: '[\\x09\\x20]+', [Lexer.TOKEN_IDENTIFIER]: '(?:[\\\\]?[a-z_\\x80-\\xFF][0-9a-z_\\x80-\\xFF-]*)+', [Lexer.TOKEN_THIS_VARIABLE]: '\\$this(?![0-9a-z_\\x80-\\xFF])', [Lexer.TOKEN_VARIABLE]: '\\$[a-z_\\x80-\\xFF][0-9a-z_\\x80-\\xFF]*', [Lexer.TOKEN_REFERENCE]: '&(?=\\s*(?:[.,=)]|(?:\\$(?!this(?![0-9a-z_\\x80-\\xFF])))))', [Lexer.TOKEN_UNION]: '\\|', [Lexer.TOKEN_INTERSECTION]: '&', [Lexer.TOKEN_NULLABLE]: '\\?', [Lexer.TOKEN_NEGATED]: '!', [Lexer.TOKEN_OPEN_PARENTHESES]: '\\(', [Lexer.TOKEN_CLOSE_PARENTHESES]: '\\)', [Lexer.TOKEN_OPEN_ANGLE_BRACKET]: '<', [Lexer.TOKEN_CLOSE_ANGLE_BRACKET]: '>', [Lexer.TOKEN_OPEN_SQUARE_BRACKET]: '\\[', [Lexer.TOKEN_CLOSE_SQUARE_BRACKET]: '\\]', [Lexer.TOKEN_OPEN_CURLY_BRACKET]: '\\{', [Lexer.TOKEN_CLOSE_CURLY_BRACKET]: '\\}', [Lexer.TOKEN_COMMA]: ',', [Lexer.TOKEN_VARIADIC]: '\\.\\.\\.', [Lexer.TOKEN_DOUBLE_COLON]: '::', [Lexer.TOKEN_DOUBLE_ARROW]: '=>', [Lexer.TOKEN_ARROW]: '->', [Lexer.TOKEN_EQUAL]: '=', [Lexer.TOKEN_COLON]: ':', [Lexer.TOKEN_OPEN_PHPDOC]: '\\/\\*\\*(?=\\s)\\x20?', [Lexer.TOKEN_CLOSE_PHPDOC]: '\\*\\/', [Lexer.TOKEN_PHPDOC_TAG]: '@(?:[a-z][a-z0-9-\\\\]+:)?[a-z][a-z0-9-\\\\]*', [Lexer.TOKEN_PHPDOC_EOL]: '\\r?\\n[\\x09\\x20]*(?:\\*(?!\\/)\\x20?)?', [Lexer.TOKEN_FLOAT]: '[+-]?(?:(?:[0-9]+(_[0-9]+)*\\.[0-9]*(_[0-9]+)*(?:e[+-]?[0-9]+(_[0-9]+)*)?)|(?:[0-9]*(_[0-9]+)*\\.[0-9]+(_[0-9]+)*(?:e[+-]?[0-9]+(_[0-9]+)*)?)|(?:[0-9]+(_[0-9]+)*e[+-]?[0-9]+(_[0-9]+)*))', [Lexer.TOKEN_INTEGER]: '[+-]?(?:(?:0b[0-1]+(_[0-1]+)*)|(?:0o[0-7]+(_[0-7]+)*)|(?:0x[0-9a-f]+(_[0-9a-f]+)*)|(?:[0-9]+(_[0-9]+)*))', [Lexer.TOKEN_SINGLE_QUOTED_STRING]: "'(?:\\\\[^\\r\\n]|[^'\\r\\n\\\\])*'", [Lexer.TOKEN_DOUBLE_QUOTED_STRING]: '"(?:\\\\[^\\r\\n]|[^"\\r\\n\\\\])*"', [Lexer.TOKEN_WILDCARD]: '\\*', }; patterns[Lexer.TOKEN_OTHER] = '(?:(?!\\*\\/)[^\\s])+'; const combinedRegExp = Object.entries(patterns) .map(([key, pattern]) => { return `(?<${key}>${pattern})`; }) .join('|'); return new RegExp(combinedRegExp, 'sig'); } } exports.Lexer = Lexer; Lexer.TOKEN_REFERENCE = 'TOKEN_REFERENCE'; Lexer.TOKEN_UNION = 'TOKEN_UNION'; Lexer.TOKEN_INTERSECTION = 'TOKEN_INTERSECTION'; Lexer.TOKEN_NULLABLE = 'TOKEN_NULLABLE'; Lexer.TOKEN_OPEN_PARENTHESES = 'TOKEN_OPEN_PARENTHESES'; Lexer.TOKEN_CLOSE_PARENTHESES = 'TOKEN_CLOSE_PARENTHESES'; Lexer.TOKEN_OPEN_ANGLE_BRACKET = 'TOKEN_OPEN_ANGLE_BRACKET'; Lexer.TOKEN_CLOSE_ANGLE_BRACKET = 'TOKEN_CLOSE_ANGLE_BRACKET'; Lexer.TOKEN_OPEN_SQUARE_BRACKET = 'TOKEN_OPEN_SQUARE_BRACKET'; Lexer.TOKEN_CLOSE_SQUARE_BRACKET = 'TOKEN_CLOSE_SQUARE_BRACKET'; Lexer.TOKEN_COMMA = 'TOKEN_COMMA'; Lexer.TOKEN_VARIADIC = 'TOKEN_VARIADIC'; Lexer.TOKEN_DOUBLE_COLON = 'TOKEN_DOUBLE_COLON'; Lexer.TOKEN_DOUBLE_ARROW = 'TOKEN_DOUBLE_ARROW'; Lexer.TOKEN_EQUAL = 'TOKEN_EQUAL'; Lexer.TOKEN_OPEN_PHPDOC = 'TOKEN_OPEN_PHPDOC'; Lexer.TOKEN_CLOSE_PHPDOC = 'TOKEN_CLOSE_PHPDOC'; Lexer.TOKEN_PHPDOC_TAG = 'TOKEN_PHPDOC_TAG'; Lexer.TOKEN_FLOAT = 'TOKEN_FLOAT'; Lexer.TOKEN_INTEGER = 'TOKEN_INTEGER'; Lexer.TOKEN_SINGLE_QUOTED_STRING = 'TOKEN_SINGLE_QUOTED_STRING'; Lexer.TOKEN_DOUBLE_QUOTED_STRING = 'TOKEN_DOUBLE_QUOTED_STRING'; Lexer.TOKEN_IDENTIFIER = 'TOKEN_IDENTIFIER'; Lexer.TOKEN_THIS_VARIABLE = 'TOKEN_THIS_VARIABLE'; Lexer.TOKEN_VARIABLE = 'TOKEN_VARIABLE'; Lexer.TOKEN_HORIZONTAL_WS = 'TOKEN_HORIZONTAL_WS'; Lexer.TOKEN_PHPDOC_EOL = 'TOKEN_PHPDOC_EOL'; Lexer.TOKEN_OTHER = 'TOKEN_OTHER'; Lexer.TOKEN_END = 'TOKEN_END'; Lexer.TOKEN_COLON = 'TOKEN_COLON'; Lexer.TOKEN_WILDCARD = 'TOKEN_WILDCARD'; Lexer.TOKEN_OPEN_CURLY_BRACKET = 'TOKEN_OPEN_CURLY_BRACKET'; Lexer.TOKEN_CLOSE_CURLY_BRACKET = 'TOKEN_CLOSE_CURLY_BRACKET'; Lexer.TOKEN_NEGATED = 'TOKEN_NEGATED'; Lexer.TOKEN_ARROW = 'TOKEN_ARROW'; Lexer.TOKEN_LABELS = { [Lexer.TOKEN_REFERENCE]: '&', [Lexer.TOKEN_UNION]: '|', [Lexer.TOKEN_INTERSECTION]: '&', [Lexer.TOKEN_NULLABLE]: '?', [Lexer.TOKEN_NEGATED]: '!', [Lexer.TOKEN_OPEN_PARENTHESES]: '(', [Lexer.TOKEN_CLOSE_PARENTHESES]: ')', [Lexer.TOKEN_OPEN_ANGLE_BRACKET]: '<', [Lexer.TOKEN_CLOSE_ANGLE_BRACKET]: '>', [Lexer.TOKEN_OPEN_SQUARE_BRACKET]: '[', [Lexer.TOKEN_CLOSE_SQUARE_BRACKET]: ']', [Lexer.TOKEN_OPEN_CURLY_BRACKET]: '{', [Lexer.TOKEN_CLOSE_CURLY_BRACKET]: '}', [Lexer.TOKEN_COMMA]: ',', [Lexer.TOKEN_COLON]: ':', [Lexer.TOKEN_VARIADIC]: '...', [Lexer.TOKEN_DOUBLE_COLON]: '::', [Lexer.TOKEN_DOUBLE_ARROW]: '=>', [Lexer.TOKEN_ARROW]: '->', [Lexer.TOKEN_EQUAL]: '=', [Lexer.TOKEN_OPEN_PHPDOC]: '/**', [Lexer.TOKEN_CLOSE_PHPDOC]: '*/', [Lexer.TOKEN_PHPDOC_TAG]: 'TOKEN_PHPDOC_TAG', [Lexer.TOKEN_PHPDOC_EOL]: 'TOKEN_PHPDOC_EOL', [Lexer.TOKEN_FLOAT]: 'TOKEN_FLOAT', [Lexer.TOKEN_INTEGER]: 'TOKEN_INTEGER', [Lexer.TOKEN_SINGLE_QUOTED_STRING]: 'TOKEN_SINGLE_QUOTED_STRING', [Lexer.TOKEN_DOUBLE_QUOTED_STRING]: 'TOKEN_DOUBLE_QUOTED_STRING', [Lexer.TOKEN_IDENTIFIER]: 'type', [Lexer.TOKEN_THIS_VARIABLE]: '$this', [Lexer.TOKEN_VARIABLE]: 'variable', [Lexer.TOKEN_HORIZONTAL_WS]: 'TOKEN_HORIZONTAL_WS', [Lexer.TOKEN_OTHER]: 'TOKEN_OTHER', [Lexer.TOKEN_END]: 'TOKEN_END', [Lexer.TOKEN_WILDCARD]: '*', }; Lexer.VALUE_OFFSET = 0; Lexer.TYPE_OFFSET = 1; Lexer.LINE_OFFSET = 2;