UNPKG

buntis

Version:

A 100% compliant, self-hosted typescript parser that emits an ESTree-compatible abstract syntax tree

342 lines (282 loc) 10.3 kB
import { CharTypes, CharFlags } from './charClassifier'; import { Token } from '../token'; import { Chars } from '../chars'; import { ParserState, Context } from '../common'; import { advance, toHex, fromCodePoint } from './common'; import { report, Errors } from '../errors'; import { handleEscapeError, UnicodeEscape } from './recovery'; export function scanStringLiteral(parser: ParserState, context: Context, quote: number): Token { let ret = ''; const { index: start } = parser; let ch = advance(parser); while ((CharTypes[ch] & CharFlags.IsWhiteSpaceOrLineTerminator) === 0) { if (ch === quote) { advance(parser); // Consume the quote if (context & Context.OptionsRaw) parser.tokenRaw = parser.source.slice(start, parser.index); parser.tokenValue = ret; return Token.StringLiteral; } if ((ch & 8) === 8 && ch === Chars.Backslash) { ch = advance(parser); if (ch >= 128) { ret += fromCodePoint(ch); } else { parser.nextCodePoint = ch; const code = parseEscape(parser, context, ch); if (code >= 0) ret += fromCodePoint(code); else if (code !== UnicodeEscape.Empty) { report(parser, context, handleEscapeError(code as UnicodeEscape, /* isTemplate */ 0), /* early */ 1); return Token.Error; } } } else if ((ch ^ Chars.LineSeparator) <= 1) { parser.index++; parser.column = 0; parser.line++; } else { ret += fromCodePoint(ch); } ch = advance(parser); if (parser.index >= parser.length) { report(parser, context, Errors.UnterminatedString, /* early */ 0); return Token.Error; } } report(parser, context, Errors.InvalidASCIILineBreak, /* early */ 1); return Token.Error; } export function scanTemplate(parser: ParserState, context: Context): Token { const { index: start } = parser; let ret: string | null = ''; let tail = true; let ch = advance(parser); while (ch !== Chars.Backtick) { if (ch === Chars.Dollar) { const index = parser.index + 1; if (index < parser.source.length && parser.source.charCodeAt(index) === Chars.LeftBrace) { parser.index = index; parser.column++; tail = false; break; } ret += '$'; } else if (ch === Chars.Backslash) { ch = advance(parser); if (ch >= 128) { ret += fromCodePoint(ch); } else { parser.nextCodePoint = ch; const code = parseEscape(parser, context | Context.Strict, ch); if (code >= 0) { ret += fromCodePoint(code); } else if (code !== UnicodeEscape.Empty && context & Context.TaggedTemplate) { ret = null; ch = scanLooserTemplateSegment(parser, context, parser.nextCodePoint); if (ch < 0) { tail = false; break; } else if (ch === 0x11000) { return Token.Error; } break; } else if (code !== UnicodeEscape.Empty) { report(parser, context, handleEscapeError(code as UnicodeEscape, /* isTemplate */ 1), /* early */ 1); return Token.Error; } } } else if ( ((ch & 83) < 3 && CharTypes[ch] & CharFlags.IsWhiteSpaceOrLineTerminator) || (ch ^ Chars.LineSeparator) <= 1 ) { if (ch === Chars.CarriageReturn) { if (parser.index < parser.length && parser.source.charCodeAt(parser.index) === Chars.LineFeed) { ret += fromCodePoint(ch); ch = parser.source.charCodeAt(parser.index); parser.index++; } } parser.column = -1; parser.line++; ret += fromCodePoint(ch); } else { ret += fromCodePoint(ch); } ch = advance(parser); if (parser.index >= parser.length) { report(parser, context, Errors.UnterminatedTemplate, /* early */ 0); return Token.Error; } } advance(parser); // Consume the quote or opening brace parser.tokenValue = ret; if (tail) { parser.tokenRaw = parser.source.slice(start + 1, parser.index - 1); return Token.TemplateTail; } else { parser.tokenRaw = parser.source.slice(start + 1, parser.index - 2); return Token.TemplateCont; } } /** * Scan looser template segment * * @param parser Parser object * @param ch codepoint */ function scanLooserTemplateSegment(parser: ParserState, context: Context, ch: number): number { while (ch !== Chars.Backtick) { if (ch === Chars.Dollar) { const index = parser.index + 1; if (index < parser.source.length && parser.source.charCodeAt(index) === Chars.LeftBrace) { parser.index = index; parser.column++; return -ch; } } ch = parser.source.charCodeAt(++parser.index); if (parser.index >= parser.length) { report(parser, context, Errors.UnterminatedTemplate, /* early */ 0); return 0x11000; } } return ch; } export function scanTemplateTail(parser: ParserState, context: Context): Token { // if (parser.index >= parser.end) report(parser, Errors.Unexpected); parser.index--; //parser.column--; return scanTemplate(parser, context); } export function parseEscape(parser: ParserState, context: Context, first: number): number { switch (first) { case Chars.LowerB: return Chars.Backspace; case Chars.LowerF: return Chars.FormFeed; case Chars.LowerR: return Chars.CarriageReturn; case Chars.LowerN: return Chars.LineFeed; case Chars.LowerT: return Chars.Tab; case Chars.LowerV: return Chars.VerticalTab; // Line continuations case Chars.CarriageReturn: { const { index } = parser; if (index < parser.source.length) { const ch = parser.source.charCodeAt(index); if (ch === Chars.LineFeed) { parser.nextCodePoint = ch; parser.index = index + 1; } } } case Chars.LineFeed: case Chars.LineSeparator: case Chars.ParagraphSeparator: parser.column = -1; parser.line++; return UnicodeEscape.Empty; // Null character, octals case Chars.Zero: case Chars.One: case Chars.Two: case Chars.Three: { let code = first - Chars.Zero; let index = parser.index + 1; let column = parser.column + 1; if (index < parser.length) { const next = parser.source.charCodeAt(index); if (next < Chars.Zero || next > Chars.Seven) { if ( (code !== 0 || CharTypes[next] & CharFlags.ImplicitOctalDigits) && context & (Context.Strict | Context.DisableWebCompat) ) return UnicodeEscape.StrictOctal; } else if (context & (Context.Strict | Context.DisableWebCompat)) { return UnicodeEscape.StrictOctal; } else { parser.nextCodePoint = next; code = (code << 3) | (next - Chars.Zero); index++; column++; if (index < parser.length) { const next = parser.source.charCodeAt(index); if (next >= Chars.Zero && next <= Chars.Seven) { parser.nextCodePoint = next; code = (code << 3) | (next - Chars.Zero); index++; column++; } } parser.index = index - 1; parser.column = column - 1; } } return code; } case Chars.Four: case Chars.Five: case Chars.Six: case Chars.Seven: { if (context & (Context.Strict | Context.DisableWebCompat)) return UnicodeEscape.StrictOctal; let code = first - Chars.Zero; const index = parser.index + 1; const column = parser.column + 1; if (index < parser.length) { const next = parser.source.charCodeAt(index); if (next >= Chars.Zero && next <= Chars.Seven) { code = (code << 3) | (next - Chars.Zero); parser.nextCodePoint = next; parser.index = index; parser.column = column; } } return code; } // `8`, `9` (invalid escapes) case Chars.Eight: case Chars.Nine: return UnicodeEscape.EightOrNine; // ASCII escapes case Chars.LowerX: { const hi = advance(parser); if ((CharTypes[hi] & CharFlags.Hex) === 0) return UnicodeEscape.InvalidHex; const lo = advance(parser); if ((CharTypes[lo] & CharFlags.Hex) === 0) return UnicodeEscape.InvalidHex; return (toHex(hi) << 4) | toHex(lo); } // UCS-2/Unicode escapes case Chars.LowerU: { let ch = advance(parser); if (ch === Chars.LeftBrace) { ch = advance(parser); // skip: '{' let code = 0; let digits = 0; while (CharTypes[ch] & CharFlags.Hex) { code = (code << 4) | toHex(ch); if (code > 0x10ffff) return UnicodeEscape.OutOfRange; ch = parser.source.charCodeAt(++parser.index); parser.column++; digits++; } if (digits < 4) return UnicodeEscape.InvalidHex; if (ch !== Chars.RightBrace) return UnicodeEscape.MissingCurlyBrace; return code; } if ((CharTypes[ch] & CharFlags.Hex) === 0) return UnicodeEscape.InvalidHex; // first one is mandatory const ch2 = parser.source.charCodeAt(parser.index + 1); if ((CharTypes[ch2] & CharFlags.Hex) === 0) return UnicodeEscape.InvalidHex; const ch3 = parser.source.charCodeAt(parser.index + 2); if ((CharTypes[ch3] & CharFlags.Hex) === 0) return UnicodeEscape.InvalidHex; const ch4 = parser.source.charCodeAt(parser.index + 3); if ((CharTypes[ch4] & CharFlags.Hex) === 0) return UnicodeEscape.InvalidHex; parser.column += 3; parser.nextCodePoint = parser.source.charCodeAt((parser.index += 3)); return (toHex(ch) << 12) | (toHex(ch2) << 8) | (toHex(ch3) << 4) | toHex(ch4); } default: return parser.source.charCodeAt(parser.index); } }