buntis
Version:
A 100% compliant, self-hosted typescript parser that emits an ESTree-compatible abstract syntax tree
555 lines (488 loc) • 20.2 kB
text/typescript
import { Token } from '../token';
import { Chars } from '../chars';
import { ParserState, Context } from '../common';
import { scanNumber, scanLeadingZero } from './numeric';
import { scanStringLiteral, scanTemplate } from './string';
import { scanIdentifierOrKeyword, scanIdentifierSlowPath, scanUnicodeEscapeIdStart } from './identifier';
import { advance, isExoticECMAScriptWhitespace } from './common';
import { report, Errors } from '../errors';
import { skipSingleLineComment, skipMultiLineComment } from './comments';
import { scanRegularExpression } from './regexp';
import { unicodeLookup } from './unicode';
export const firstCharKinds = [
/* 0 - Null */ Token.Error,
/* 1 - Start of Heading */ Token.Error,
/* 2 - Start of Text */ Token.Error,
/* 3 - End of Text */ Token.Error,
/* 4 - End of Transm. */ Token.Error,
/* 5 - Enquiry */ Token.Error,
/* 6 - Acknowledgment */ Token.Error,
/* 7 - Bell */ Token.Error,
/* 8 - Backspace */ Token.Error,
/* 9 - Horizontal Tab */ Token.WhiteSpace,
/* 10 - Line Feed */ Token.LineFeed,
/* 11 - Vertical Tab */ Token.WhiteSpace,
/* 12 - Form Feed */ Token.WhiteSpace,
/* 13 - Carriage Return */ Token.CarriageReturn,
/* 14 - Shift Out */ Token.Error,
/* 15 - Shift In */ Token.Error,
/* 16 - Data Line Escape */ Token.Error,
/* 17 - Device Control 1 */ Token.Error,
/* 18 - Device Control 2 */ Token.Error,
/* 19 - Device Control 3 */ Token.Error,
/* 20 - Device Control 4 */ Token.Error,
/* 21 - Negative Ack. */ Token.Error,
/* 22 - Synchronous Idle */ Token.Error,
/* 23 - End of Transmit */ Token.Error,
/* 24 - Cancel */ Token.Error,
/* 25 - End of Medium */ Token.Error,
/* 26 - Substitute */ Token.Error,
/* 27 - Escape */ Token.Error,
/* 28 - File Separator */ Token.Error,
/* 29 - Group Separator */ Token.Error,
/* 30 - Record Separator */ Token.Error,
/* 31 - Unit Separator */ Token.Error,
/* 32 - Space */ Token.WhiteSpace,
/* 33 - ! */ Token.Negate,
/* 34 - " */ Token.StringLiteral,
/* 35 - # */ Token.PrivateField,
/* 36 - $ */ Token.Identifier,
/* 37 - % */ Token.Modulo,
/* 38 - & */ Token.BitwiseAnd,
/* 39 - ' */ Token.StringLiteral,
/* 40 - ( */ Token.LeftParen,
/* 41 - ) */ Token.RightParen,
/* 42 - * */ Token.Multiply,
/* 43 - + */ Token.Add,
/* 44 - , */ Token.Comma,
/* 45 - - */ Token.Subtract,
/* 46 - . */ Token.Period,
/* 47 - / */ Token.Divide,
/* 48 - 0 */ Token.LeadingZero,
/* 49 - 1 */ Token.NumericLiteral,
/* 50 - 2 */ Token.NumericLiteral,
/* 51 - 3 */ Token.NumericLiteral,
/* 52 - 4 */ Token.NumericLiteral,
/* 53 - 5 */ Token.NumericLiteral,
/* 54 - 6 */ Token.NumericLiteral,
/* 55 - 7 */ Token.NumericLiteral,
/* 56 - 8 */ Token.NumericLiteral,
/* 57 - 9 */ Token.NumericLiteral,
/* 58 - : */ Token.Colon,
/* 59 - ; */ Token.Semicolon,
/* 60 - < */ Token.LessThan,
/* 61 - = */ Token.Assign,
/* 62 - > */ Token.GreaterThan,
/* 63 - ? */ Token.QuestionMark,
/* 64 - @ */ Token.At,
/* 65 - A */ Token.Identifier,
/* 66 - B */ Token.Identifier,
/* 67 - C */ Token.Identifier,
/* 68 - D */ Token.Identifier,
/* 69 - E */ Token.Identifier,
/* 70 - F */ Token.Identifier,
/* 71 - G */ Token.Identifier,
/* 72 - H */ Token.Identifier,
/* 73 - I */ Token.Identifier,
/* 74 - J */ Token.Identifier,
/* 75 - K */ Token.Identifier,
/* 76 - L */ Token.Identifier,
/* 77 - M */ Token.Identifier,
/* 78 - N */ Token.Identifier,
/* 79 - O */ Token.Identifier,
/* 80 - P */ Token.Identifier,
/* 81 - Q */ Token.Identifier,
/* 82 - R */ Token.Identifier,
/* 83 - S */ Token.Identifier,
/* 84 - T */ Token.Identifier,
/* 85 - U */ Token.Identifier,
/* 86 - V */ Token.Identifier,
/* 87 - W */ Token.Identifier,
/* 88 - X */ Token.Identifier,
/* 89 - Y */ Token.Identifier,
/* 90 - Z */ Token.Identifier,
/* 91 - [ */ Token.LeftBracket,
/* 92 - \ */ Token.Backslash,
/* 93 - ] */ Token.RightBracket,
/* 94 - ^ */ Token.BitwiseXor,
/* 95 - _ */ Token.Identifier,
/* 96 - ` */ Token.TemplateTail,
/* 97 - a */ Token.IdentifierOrKeyword,
/* 98 - b */ Token.IdentifierOrKeyword,
/* 99 - c */ Token.IdentifierOrKeyword,
/* 100 - d */ Token.IdentifierOrKeyword,
/* 101 - e */ Token.IdentifierOrKeyword,
/* 102 - f */ Token.IdentifierOrKeyword,
/* 103 - g */ Token.IdentifierOrKeyword,
/* 104 - h */ Token.Identifier,
/* 105 - i */ Token.IdentifierOrKeyword,
/* 106 - j */ Token.Identifier,
/* 107 - k */ Token.IdentifierOrKeyword,
/* 108 - l */ Token.IdentifierOrKeyword,
/* 109 - m */ Token.IdentifierOrKeyword,
/* 110 - n */ Token.IdentifierOrKeyword,
/* 111 - o */ Token.IdentifierOrKeyword,
/* 112 - p */ Token.IdentifierOrKeyword,
/* 113 - q */ Token.Identifier,
/* 114 - r */ Token.IdentifierOrKeyword,
/* 115 - s */ Token.IdentifierOrKeyword,
/* 116 - t */ Token.IdentifierOrKeyword,
/* 117 - u */ Token.IdentifierOrKeyword,
/* 118 - v */ Token.IdentifierOrKeyword,
/* 119 - w */ Token.IdentifierOrKeyword,
/* 120 - x */ Token.Identifier,
/* 121 - y */ Token.IdentifierOrKeyword,
/* 122 - z */ Token.IdentifierOrKeyword,
/* 123 - { */ Token.LeftBrace,
/* 124 - | */ Token.BitwiseOr,
/* 125 - } */ Token.RightBrace,
/* 126 - ~ */ Token.Complement,
/* 127 - Delete */ Token.Error
];
export function scanSingleToken(parser: ParserState, context: Context): Token {
let lastIsCR = 0;
const isStartOfLine = parser.index === 0;
while (parser.index < parser.length) {
parser.tokenPos = parser.index;
parser.endColumn = parser.column;
parser.endLine = parser.line;
let char = parser.nextCodePoint;
if (char <= 0x7e) {
const token = firstCharKinds[char];
switch (token) {
case Token.RightBrace:
case Token.LeftBrace:
case Token.Comma:
case Token.Colon:
case Token.Complement:
case Token.LeftParen:
case Token.RightParen:
case Token.Semicolon:
case Token.LeftBracket:
case Token.RightBracket:
case Token.At:
case Token.Error:
advance(parser);
return token;
case Token.WhiteSpace:
advance(parser);
continue;
case Token.CarriageReturn:
lastIsCR = 1;
parser.column = 0;
parser.line++;
case Token.LineFeed: {
parser.precedingLineBreak = 1;
parser.nextCodePoint = parser.source.charCodeAt(++parser.index);
if (!lastIsCR) {
parser.column = 0;
parser.line++;
}
lastIsCR = 0;
continue;
}
// `a`...`z`
case Token.IdentifierOrKeyword:
return scanIdentifierOrKeyword(parser, context, /* canBeKeyword */ 1);
// `A`...`Z`, `_var`, `$var`
case Token.Identifier:
return scanIdentifierOrKeyword(parser, context, /* canBeKeyword */ 0);
// `1`...`9`
case Token.NumericLiteral:
return scanNumber(parser, context, /* nonOctalDecimalInteger */ 0, 0);
// `'string'`, `"string"`
case Token.StringLiteral:
return scanStringLiteral(parser, context, char);
// `0`
case Token.LeadingZero:
return scanLeadingZero(parser, context, char);
// `\\u{N}var`
case Token.Backslash:
return scanUnicodeEscapeIdStart(parser, context);
// ``string``
case Token.TemplateTail:
return scanTemplate(parser, context);
// `.`, `...`, `.123` (numeric literal)
case Token.Period:
const next = advance(parser);
if (next >= Chars.Zero && next <= Chars.Nine)
return scanNumber(parser, context, /* nonOctalDecimalInteger */ 0, 1);
if (next === Chars.Period) {
const index = parser.index + 1;
if (index < parser.source.length && parser.source.charCodeAt(index) === Chars.Period) {
parser.column += 2;
parser.nextCodePoint = parser.source.charCodeAt((parser.index += 2));
return Token.Ellipsis;
}
}
return Token.Period;
// `<`, `<=`, `<<`, `<<=`, `</`, `<!--`
case Token.LessThan:
advance(parser);
if (parser.index < parser.length) {
let next = parser.nextCodePoint;
if (next === Chars.LessThan) {
if (advance(parser) === Chars.EqualSign) {
advance(parser);
return Token.ShiftLeftAssign;
}
return Token.ShiftLeft;
}
if (next === Chars.EqualSign) {
advance(parser);
return Token.LessThanOrEqual;
}
if (next === Chars.Slash) {
if ((context & Context.OptionsJSX) < 1) return Token.LessThan;
const index = parser.index + 1;
// Check that it's not a comment start.
if (index < parser.length) {
next = parser.source.charCodeAt(index);
if (next === Chars.Asterisk || next === Chars.Slash) break;
}
advance(parser);
return Token.JSXClose;
}
if (next === Chars.Exclamation) {
// Treat HTML begin-comment as comment-till-end-of-line.
if (
parser.source.charCodeAt(parser.index + 2) === Chars.Hyphen &&
parser.source.charCodeAt(parser.index + 1) === Chars.Hyphen &&
(context & (Context.DisableWebCompat | Context.Module)) === 0
) {
parser.index += 2;
parser.column += 3;
skipSingleLineComment(parser);
continue;
}
}
}
return Token.LessThan;
// `?`, `??`, `?.`
case Token.QuestionMark: {
advance(parser);
if (context & Context.OptionsNext) {
let ch = parser.nextCodePoint;
if (ch === Chars.QuestionMark) {
advance(parser);
return Token.Coalesce;
}
if (ch === Chars.Period) {
// Check that it's not followed by any numbers
ch = parser.source.charCodeAt(parser.index + 1) | 0;
if (ch > Chars.Nine || ch <= Chars.Zero) {
advance(parser);
return Token.QuestionMarkPeriod;
}
}
}
return Token.QuestionMark;
}
// `=`, `==`, `===`, `=>`
case Token.Assign: {
advance(parser);
if (parser.index >= parser.length) return Token.Assign;
const char = parser.nextCodePoint;
if (char === Chars.EqualSign) {
if (advance(parser) === Chars.EqualSign) {
advance(parser);
return Token.StrictEqual;
}
return Token.LooseEqual;
}
if (char === Chars.GreaterThan) {
advance(parser);
return Token.Arrow;
}
return Token.Assign;
}
// `!`, `!=`, `!==`
case Token.Negate:
if (advance(parser) !== Chars.EqualSign) return Token.Negate;
if (advance(parser) !== Chars.EqualSign) return Token.LooseNotEqual;
advance(parser);
return Token.StrictNotEqual;
// `%`, `%=`
case Token.Modulo:
if (advance(parser) !== Chars.EqualSign) return Token.Modulo;
advance(parser);
return Token.ModuloAssign;
// `*`, `**`, `*=`, `**=`
case Token.Multiply: {
advance(parser);
if (parser.index >= parser.length) return Token.Multiply;
const char = parser.nextCodePoint;
if (char === Chars.EqualSign) {
advance(parser);
return Token.MultiplyAssign;
}
if (char !== Chars.Asterisk) return Token.Multiply;
advance(parser);
if (parser.nextCodePoint !== Chars.EqualSign) return Token.Exponentiate;
advance(parser);
return Token.ExponentiateAssign;
}
// `^`, `^=`
case Token.BitwiseXor:
if (advance(parser) !== Chars.EqualSign) return Token.BitwiseXor;
advance(parser);
return Token.BitwiseXorAssign;
// `+`, `++`, `+=`
case Token.Add: {
advance(parser);
if (parser.index >= parser.length) return Token.Add;
const char = parser.nextCodePoint;
if (char === Chars.Plus) {
advance(parser);
return Token.Increment;
}
if (char === Chars.EqualSign) {
advance(parser);
return Token.AddAssign;
}
return Token.Add;
}
// `-`, `--`, `-=`, `-->`
case Token.Subtract: {
advance(parser);
if (parser.index >= parser.length) return Token.Subtract;
const char = parser.nextCodePoint;
if (char === Chars.Hyphen) {
if (
advance(parser) === Chars.GreaterThan &&
(context & (Context.DisableWebCompat | Context.Module)) === 0 &&
(parser.precedingLineBreak || isStartOfLine)
) {
skipSingleLineComment(parser);
continue;
}
return Token.Decrement;
}
if (char === Chars.EqualSign) {
advance(parser);
return Token.SubtractAssign;
}
return Token.Subtract;
}
// `/`, `/=`, `/>`, '/*..*/'
case Token.Divide: {
const char = advance(parser);
if (char === Chars.Slash) {
advance(parser);
skipSingleLineComment(parser);
continue;
}
if (char === Chars.Asterisk) {
advance(parser);
const state = skipMultiLineComment(parser, context);
if (state < 1) return Token.Error;
continue;
}
if (context & Context.AllowRegExp) {
return scanRegularExpression(parser, context);
}
if (char === Chars.EqualSign) {
advance(parser);
return Token.DivideAssign;
}
return Token.Divide;
}
// `|`, `||`, `|=`
case Token.BitwiseOr: {
advance(parser);
if (parser.index >= parser.length) return Token.BitwiseOr;
const char = parser.nextCodePoint;
if (char === Chars.VerticalBar) {
advance(parser);
return Token.LogicalOr;
}
if (char === Chars.EqualSign) {
advance(parser);
return Token.BitwiseOrAssign;
}
return Token.BitwiseOr;
}
// `>`, `>=`, `>>`, `>>>`, `>>=`, `>>>=`
case Token.GreaterThan: {
advance(parser);
if (context & Context.InTSTypes || parser.index >= parser.length) return Token.GreaterThan;
let char = parser.nextCodePoint;
if (char === Chars.EqualSign) {
advance(parser);
return Token.GreaterThanOrEqual;
}
if (char !== Chars.GreaterThan) return Token.GreaterThan;
char = advance(parser);
if (char === Chars.GreaterThan) {
if (advance(parser) !== Chars.EqualSign) return Token.LogicalShiftRight;
advance(parser);
return Token.LogicalShiftRightAssign;
}
if (char === Chars.EqualSign) {
advance(parser);
return Token.ShiftRightAssign;
}
return Token.ShiftRight;
}
// `&`, `&&`, `&=`
case Token.BitwiseAnd: {
advance(parser);
if (parser.index >= parser.source.length) return Token.BitwiseAnd;
const char = parser.nextCodePoint;
if (char === Chars.Ampersand) {
advance(parser);
return Token.LogicalAnd;
}
if (char === Chars.EqualSign) {
advance(parser);
return Token.BitwiseAndAssign;
}
return Token.BitwiseAnd;
}
default: // ignore
}
}
if ((char ^ Chars.LineSeparator) <= 1) {
lastIsCR = 0;
parser.precedingLineBreak = 1;
parser.nextCodePoint = parser.source.charCodeAt(++parser.index);
parser.column = 0;
parser.line++;
continue;
}
if ((char & 0xfc00) === 0xd800 || ((unicodeLookup[(char >>> 5) + 34816] >>> char) & 31 & 1) !== 0) {
if ((char & 0xfc00) === 0xdc00) {
char = ((char & 0x3ff) << 10) | (char & 0x3ff) | 0x10000;
if (((unicodeLookup[(char >>> 5) + 0] >>> char) & 31 & 1) === 0) {
report(parser, context, Errors.InvalidSMPCharacter, /* early */ 0);
return Token.Error;
}
parser.index++;
parser.nextCodePoint = char;
}
return scanIdentifierSlowPath(parser, context, '', /* canBeKeyword */ 0);
}
if (isExoticECMAScriptWhitespace(char)) {
advance(parser);
continue;
}
// Invalid ASCII code point/unit
report(parser, context, Errors.InvalidCharacter, /* early */ 0);
return Token.Error;
}
return Token.EndOfSource;
}
/**
* Scans next token in the stream
*
* @param parser Parser object
* @param context Context masks
*/
export function nextToken(parser: ParserState, context: Context): Token {
parser.precedingLineBreak = 0;
parser.startPos = parser.index;
parser.startColumn = parser.column;
parser.startLine = parser.line;
parser.token = scanSingleToken(parser, context);
return parser.token;
}