buntis
Version:
A 100% compliant, self-hosted typescript parser that emits an ESTree-compatible abstract syntax tree
100 lines (92 loc) • 2.98 kB
text/typescript
import { ParserState } from '../common';
import { Chars } from '../chars';
import { unicodeLookup } from './unicode';
import { Escape } from './recovery';
export function advance(parser: ParserState): number {
parser.column++;
return (parser.nextCodePoint = parser.source.charCodeAt(++parser.index));
}
/**
* Optimized version of 'fromCodePoint'
*
* @param {number} code
* @returns {string}
*/
export function fromCodePoint(codePoint: number): string {
return codePoint <= 65535
? String.fromCharCode(codePoint)
: String.fromCharCode(codePoint >>> 10) + String.fromCharCode(codePoint & 0x3ff);
}
/**
* Converts a value to a hex value
*
* @param code CodePoint
*/
export function toHex(code: number): number {
return code < Chars.UpperA ? code - Chars.Zero : (code - Chars.UpperA + 10) & 0xf;
}
export function consumeMultiUnitCodePoint(parser: ParserState, hi: number): Escape {
// See: https://tc39.github.io/ecma262/#sec-ecmascript-language-types-string-type
if ((hi & 0xfc00) !== 0xd800) return 0;
const lo = parser.source.charCodeAt(parser.index + 1);
if ((lo & 0xfc00) !== 0xdc00) return 0;
hi = parser.nextCodePoint = 0x10000 + ((hi & 0x3ff) << 10) + (lo & 0x3ff);
if (((unicodeLookup[(hi >>> 5) + 0] >>> hi) & 31 & 1) === 0) {
return Escape.InvalidCodePoint;
}
parser.index++;
return 1;
}
export function consumeLineFeed(parser: ParserState, lastIsCR: 0 | 1) {
parser.nextCodePoint = parser.source.charCodeAt(++parser.index);
parser.precedingLineBreak = 1;
if (lastIsCR === 0) {
parser.column = 0;
parser.line++;
}
}
export function advanceNewline(parser: ParserState) {
parser.nextCodePoint = parser.source.charCodeAt(++parser.index);
parser.precedingLineBreak = 1;
parser.column = 0;
parser.line++;
}
// ECMA-262 11.2 White Space
export function isExoticECMAScriptWhitespace(code: number): boolean {
/**
* There are 25 white space characters we need to correctly class.
* The lower ASCII range (127) white space have already been classified, so
* only needed is to validate against the remaining
* 15 Unicode category "Zs" ("Space_Separator") chars.
*
* - 0x1680
* - 0x2000
* - 0x2001
* - 0x2002
* - 0x2003
* - 0x2004
* - 0x2005
* - 0x2006
* - 0x2007
* - 0x2008
* - 0x2009
* - 0x200a
* - 0x2028 // <LS> LineTerminator (LINE SEPARATOR)
* - 0x2029 // <PS> LineTerminator (PARAGRAPH SEPARATOR)
* - 0x202f
* - 0x205f
* - 0x3000
* - 0xfeff // <ZWNBSP>
*/
return (
code === Chars.NonBreakingSpace ||
code === Chars.ZeroWidthNoBreakSpace ||
code === Chars.NextLine ||
code === Chars.Ogham ||
(code >= Chars.EnQuad && code <= Chars.ZeroWidthSpace) ||
code === Chars.NarrowNoBreakSpace ||
code === Chars.MathematicalSpace ||
code === Chars.IdeographicSpace ||
code === Chars.ByteOrderMark
);
}