UNPKG

mancha

Version:

Javscript HTML rendering engine

250 lines 8.39 kB
/* * @license * Portions Copyright (c) 2013, the Dart project authors. */ import { KEYWORDS, POSTFIX_PRECEDENCE, PRECEDENCE, WORD_OPERATORS } from "./constants.js"; const _TWO_CHAR_OPS = new Set(["==", "!=", "<=", ">=", "||", "&&", "??", "?."]); const _THREE_CHAR_OPS = new Set(["===", "!=="]); export var Kind; (function (Kind) { Kind[Kind["STRING"] = 1] = "STRING"; Kind[Kind["IDENTIFIER"] = 2] = "IDENTIFIER"; Kind[Kind["DOT"] = 3] = "DOT"; Kind[Kind["COMMA"] = 4] = "COMMA"; Kind[Kind["COLON"] = 5] = "COLON"; Kind[Kind["INTEGER"] = 6] = "INTEGER"; Kind[Kind["DECIMAL"] = 7] = "DECIMAL"; Kind[Kind["OPERATOR"] = 8] = "OPERATOR"; Kind[Kind["GROUPER"] = 9] = "GROUPER"; Kind[Kind["KEYWORD"] = 10] = "KEYWORD"; Kind[Kind["ARROW"] = 11] = "ARROW"; Kind[Kind["OPTIONAL_DOT"] = 12] = "OPTIONAL_DOT"; Kind[Kind["SPREAD"] = 13] = "SPREAD"; })(Kind || (Kind = {})); export const token = (kind, value, precedence = 0) => ({ kind, value, precedence, }); const _isWhitespace = (ch) => ch === 9 /* \t */ || ch === 10 /* \n */ || ch === 13 /* \r */ || ch === 32; /* space */ // TODO(justinfagnani): allow code points > 127 const _isIdentOrKeywordStart = (ch) => { if (ch === 95 /* _ */ || ch === 36 /* $ */) return true; const upper = ch & ~32; return 65 <= upper && upper <= 90; }; // TODO(justinfagnani): allow code points > 127 const _isIdentifier = (ch) => _isIdentOrKeywordStart(ch) || _isNumber(ch); const _isKeyword = (str) => KEYWORDS.has(str); const _isQuote = (ch) => ch === 34 /* " */ || ch === 39; /* ' */ const _isNumber = (ch) => 48 /* 0 */ <= ch && ch <= 57; /* 9 */ const _isOperator = (ch) => ch === 43 /* + */ || ch === 45 /* - */ || ch === 42 /* * */ || ch === 47 /* / */ || ch === 33 /* ! */ || ch === 38 /* & */ || ch === 37 /* % */ || ch === 60 /* < */ || ch === 61 /* = */ || ch === 62 /* > */ || ch === 63 /* ? */ || ch === 94 /* ^ */ || ch === 124; /* | */ const _isGrouper = (ch) => ch === 40 /* ( */ || ch === 41 /* ) */ || ch === 91 /* [ */ || ch === 93 /* ] */ || ch === 123 /* { */ || ch === 125; /* } */ const _escapeString = (str) => str.replace(/\\(.)/g, (_match, group) => { switch (group) { case "n": return "\n"; case "r": return "\r"; case "t": return "\t"; case "b": return "\b"; case "f": return "\f"; default: return group; } }); export class Tokenizer { _input; _index = -1; _tokenStart = 0; _next; constructor(input) { this._input = input; this._advance(); } nextToken() { while (_isWhitespace(this._next ?? -1)) { this._advance(true); } if (_isQuote(this._next ?? -1)) return this._tokenizeString(); if (_isIdentOrKeywordStart(this._next ?? -1)) { return this._tokenizeIdentOrKeyword(); } if (_isNumber(this._next ?? -1)) return this._tokenizeNumber(); if (this._next === 46 /* . */) return this._tokenizeDot(); if (this._next === 44 /* , */) return this._tokenizeComma(); if (this._next === 58 /* : */) return this._tokenizeColon(); if (_isOperator(this._next ?? -1)) return this._tokenizeOperator(); if (_isGrouper(this._next ?? -1)) return this._tokenizeGrouper(); // no match, should be end of input this._advance(); if (this._next !== undefined) { throw new Error(`Expected end of input, got ${this._next}`); } return undefined; } _advance(resetTokenStart) { this._index++; if (this._index < this._input.length) { this._next = this._input.charCodeAt(this._index); if (resetTokenStart === true) { this._tokenStart = this._index; } } else { this._next = undefined; } } _getValue(lookahead = 0) { const v = this._input.substring(this._tokenStart, this._index + lookahead); if (lookahead === 0) { this._clearValue(); } return v; } _clearValue() { this._tokenStart = this._index; } _tokenizeString() { const _us = "unterminated string"; const quoteChar = this._next; this._advance(true); while (this._next !== quoteChar) { if (this._next === undefined) throw new Error(_us); if (this._next === 92 /* \ */) { this._advance(); if (this._next === undefined) throw new Error(_us); } this._advance(); } const t = token(Kind.STRING, _escapeString(this._getValue())); this._advance(); return t; } _tokenizeIdentOrKeyword() { // This do/while loops assumes _isIdentifier(this._next!), so it must only // be called if _isIdentOrKeywordStart(this._next!) has returned true. do { this._advance(); } while (_isIdentifier(this._next ?? -1)); const value = this._getValue(); const kind = _isKeyword(value) ? Kind.KEYWORD : WORD_OPERATORS.has(value) ? Kind.OPERATOR : Kind.IDENTIFIER; return token(kind, value, PRECEDENCE[value] ?? 0); } _tokenizeNumber() { // This do/while loops assumes _isNumber(this._next!), so it must only // be called if _isNumber(this._next!) has returned true. do { this._advance(); } while (_isNumber(this._next ?? -1)); if (this._next === 46 /* . */) return this._tokenizeDot(); return token(Kind.INTEGER, this._getValue()); } _tokenizeDot() { this._advance(); if (_isNumber(this._next ?? -1)) return this._tokenizeFraction(); if (this._next === 46 /* . */) { this._advance(); if (this._next === 46 /* . */) { this._advance(); this._clearValue(); return token(Kind.SPREAD, "..."); } throw new Error("Unexpected token .."); } this._clearValue(); return token(Kind.DOT, ".", POSTFIX_PRECEDENCE); } _tokenizeComma() { this._advance(true); return token(Kind.COMMA, ","); } _tokenizeColon() { this._advance(true); return token(Kind.COLON, ":"); } _tokenizeFraction() { // This do/while loops assumes _isNumber(this._next!), so it must only // be called if _isNumber(this._next!) has returned true. do { this._advance(); } while (_isNumber(this._next ?? -1)); return token(Kind.DECIMAL, this._getValue()); } _tokenizeOperator() { this._advance(); // Check for 3-char operator let op = this._getValue(2); if (_THREE_CHAR_OPS.has(op)) { this._advance(); this._advance(); } else { // Check for 2-char operator op = this._getValue(1); if (op === "=>") { this._advance(); return token(Kind.ARROW, op); } if (_TWO_CHAR_OPS.has(op)) { this._advance(); } else { // 1-char operator // Do nothing, we already advanced past the first char } } // Check if we advanced past the token (e.g. into whitespace or next token) // Actually _getValue() relies on _index. // If we advanced correctly, _getValue() without arg should return the token string. // Re-read op to be sure, using current _index op = this._getValue(); if (op === "?.") { return token(Kind.OPTIONAL_DOT, op, POSTFIX_PRECEDENCE); } return token(Kind.OPERATOR, op, PRECEDENCE[op]); } _tokenizeGrouper() { const value = String.fromCharCode(this._next ?? 0); const t = token(Kind.GROUPER, value, PRECEDENCE[value]); this._advance(true); return t; } } //# sourceMappingURL=tokenizer.js.map