angular-html-parser
Version:
A HTML parser extracted from Angular with some modifications
1,178 lines • 54.7 kB
JavaScript
/**
* @license
* Copyright Google LLC All Rights Reserved.
*
* Use of this source code is governed by an MIT-style license that can be
* found in the LICENSE file at https://angular.dev/license
*/
import * as chars from "../chars.js";
import { ParseError, ParseLocation, ParseSourceFile, ParseSourceSpan } from "../parse_util.js";
import { DEFAULT_INTERPOLATION_CONFIG } from "./defaults.js";
import { NAMED_ENTITIES } from "./entities.js";
import { mergeNsAndName, TagContentType } from "./tags.js";
export class TokenError extends ParseError {
constructor(errorMsg, tokenType, span) {
super(span, errorMsg);
this.tokenType = tokenType;
}
}
export class TokenizeResult {
constructor(tokens, errors, nonNormalizedIcuExpressions) {
this.tokens = tokens;
this.errors = errors;
this.nonNormalizedIcuExpressions = nonNormalizedIcuExpressions;
}
}
export function tokenize(source, url, getTagContentType, options = {}) {
const tokenizer = new _Tokenizer(new ParseSourceFile(source, url), getTagContentType, options);
tokenizer.tokenize();
return new TokenizeResult(mergeTextTokens(tokenizer.tokens), tokenizer.errors, tokenizer.nonNormalizedIcuExpressions);
}
const _CR_OR_CRLF_REGEXP = /\r\n?/g;
function _unexpectedCharacterErrorMsg(charCode) {
const char = charCode === chars.$EOF ? 'EOF' : String.fromCharCode(charCode);
return `Unexpected character "${char}"`;
}
function _unknownEntityErrorMsg(entitySrc) {
return `Unknown entity "${entitySrc}" - use the "&#<decimal>;" or "&#x<hex>;" syntax`;
}
function _unparsableEntityErrorMsg(type, entityStr) {
return `Unable to parse entity "${entityStr}" - ${type} character reference entities must end with ";"`;
}
var CharacterReferenceType;
(function (CharacterReferenceType) {
CharacterReferenceType["HEX"] = "hexadecimal";
CharacterReferenceType["DEC"] = "decimal";
})(CharacterReferenceType || (CharacterReferenceType = {}));
class _ControlFlowError {
constructor(error) {
this.error = error;
}
}
// See https://www.w3.org/TR/html51/syntax.html#writing-html-documents
class _Tokenizer {
/**
* @param _file The html source file being tokenized.
* @param _getTagContentType A function that will retrieve a tag content type for a given tag
* name.
* @param options Configuration of the tokenization.
*/
constructor(_file, _getTagContentType, options) {
this._getTagContentType = _getTagContentType;
this._currentTokenStart = null;
this._currentTokenType = null;
this._expansionCaseStack = [];
this._inInterpolation = false;
this._fullNameStack = [];
this.tokens = [];
this.errors = [];
this.nonNormalizedIcuExpressions = [];
this._tokenizeIcu = options.tokenizeExpansionForms || false;
this._interpolationConfig = options.interpolationConfig || DEFAULT_INTERPOLATION_CONFIG;
this._leadingTriviaCodePoints =
options.leadingTriviaChars && options.leadingTriviaChars.map((c) => c.codePointAt(0) || 0);
this._canSelfClose = options.canSelfClose || false;
this._allowHtmComponentClosingTags = options.allowHtmComponentClosingTags || false;
const range = options.range || {
endPos: _file.content.length,
startPos: 0,
startLine: 0,
startCol: 0,
};
this._cursor = options.escapedString
? new EscapedCharacterCursor(_file, range)
: new PlainCharacterCursor(_file, range);
this._preserveLineEndings = options.preserveLineEndings || false;
this._i18nNormalizeLineEndingsInICUs = options.i18nNormalizeLineEndingsInICUs || false;
this._tokenizeBlocks = options.tokenizeBlocks ?? true;
this._tokenizeLet = options.tokenizeLet ?? true;
try {
this._cursor.init();
}
catch (e) {
this.handleError(e);
}
}
_processCarriageReturns(content) {
if (this._preserveLineEndings) {
return content;
}
// https://www.w3.org/TR/html51/syntax.html#preprocessing-the-input-stream
// In order to keep the original position in the source, we can not
// pre-process it.
// Instead CRs are processed right before instantiating the tokens.
return content.replace(_CR_OR_CRLF_REGEXP, '\n');
}
tokenize() {
while (this._cursor.peek() !== chars.$EOF) {
const start = this._cursor.clone();
try {
if (this._attemptCharCode(chars.$LT)) {
if (this._attemptCharCode(chars.$BANG)) {
if (this._attemptStr('[CDATA[')) {
this._consumeCdata(start);
}
else if (this._attemptStr('--')) {
this._consumeComment(start);
}
else if (this._attemptStrCaseInsensitive('doctype')) {
this._consumeDocType(start);
}
else {
this._consumeBogusComment(start);
}
}
else if (this._attemptCharCode(chars.$SLASH)) {
this._consumeTagClose(start);
}
else {
const savedPos = this._cursor.clone();
if (this._attemptCharCode(chars.$QUESTION)) {
this._cursor = savedPos;
this._consumeBogusComment(start);
}
else {
this._consumeTagOpen(start);
}
}
}
else if (this._tokenizeLet &&
// Use `peek` instead of `attempCharCode` since we
// don't want to advance in case it's not `@let`.
this._cursor.peek() === chars.$AT &&
!this._inInterpolation &&
this._attemptStr('@let')) {
this._consumeLetDeclaration(start);
}
else if (this._tokenizeBlocks && this._attemptCharCode(chars.$AT)) {
this._consumeBlockStart(start);
}
else if (this._tokenizeBlocks &&
!this._inInterpolation &&
!this._isInExpansionCase() &&
!this._isInExpansionForm() &&
this._attemptCharCode(chars.$RBRACE)) {
this._consumeBlockEnd(start);
}
else if (!(this._tokenizeIcu && this._tokenizeExpansionForm())) {
// In (possibly interpolated) text the end of the text is given by `isTextEnd()`, while
// the premature end of an interpolation is given by the start of a new HTML element.
this._consumeWithInterpolation(5 /* TokenType.TEXT */, 8 /* TokenType.INTERPOLATION */, () => this._isTextEnd(), () => this._isTagStart());
}
}
catch (e) {
this.handleError(e);
}
}
this._beginToken(34 /* TokenType.EOF */);
this._endToken([]);
}
_getBlockName() {
// This allows us to capture up something like `@else if`, but not `@ if`.
let spacesInNameAllowed = false;
const nameCursor = this._cursor.clone();
this._attemptCharCodeUntilFn((code) => {
if (chars.isWhitespace(code)) {
return !spacesInNameAllowed;
}
if (isBlockNameChar(code)) {
spacesInNameAllowed = true;
return false;
}
return true;
});
return this._cursor.getChars(nameCursor).trim();
}
_consumeBlockStart(start) {
this._beginToken(25 /* TokenType.BLOCK_OPEN_START */, start);
const startToken = this._endToken([this._getBlockName()]);
if (this._cursor.peek() === chars.$LPAREN) {
// Advance past the opening paren.
this._cursor.advance();
// Capture the parameters.
this._consumeBlockParameters();
// Allow spaces before the closing paren.
this._attemptCharCodeUntilFn(isNotWhitespace);
if (this._attemptCharCode(chars.$RPAREN)) {
// Allow spaces after the paren.
this._attemptCharCodeUntilFn(isNotWhitespace);
}
else {
startToken.type = 29 /* TokenType.INCOMPLETE_BLOCK_OPEN */;
return;
}
}
if (this._attemptCharCode(chars.$LBRACE)) {
this._beginToken(26 /* TokenType.BLOCK_OPEN_END */);
this._endToken([]);
}
else {
startToken.type = 29 /* TokenType.INCOMPLETE_BLOCK_OPEN */;
}
}
_consumeBlockEnd(start) {
this._beginToken(27 /* TokenType.BLOCK_CLOSE */, start);
this._endToken([]);
}
_consumeBlockParameters() {
// Trim the whitespace until the first parameter.
this._attemptCharCodeUntilFn(isBlockParameterChar);
while (this._cursor.peek() !== chars.$RPAREN && this._cursor.peek() !== chars.$EOF) {
this._beginToken(28 /* TokenType.BLOCK_PARAMETER */);
const start = this._cursor.clone();
let inQuote = null;
let openParens = 0;
// Consume the parameter until the next semicolon or brace.
// Note that we skip over semicolons/braces inside of strings.
while ((this._cursor.peek() !== chars.$SEMICOLON && this._cursor.peek() !== chars.$EOF) ||
inQuote !== null) {
const char = this._cursor.peek();
// Skip to the next character if it was escaped.
if (char === chars.$BACKSLASH) {
this._cursor.advance();
}
else if (char === inQuote) {
inQuote = null;
}
else if (inQuote === null && chars.isQuote(char)) {
inQuote = char;
}
else if (char === chars.$LPAREN && inQuote === null) {
openParens++;
}
else if (char === chars.$RPAREN && inQuote === null) {
if (openParens === 0) {
break;
}
else if (openParens > 0) {
openParens--;
}
}
this._cursor.advance();
}
this._endToken([this._cursor.getChars(start)]);
// Skip to the next parameter.
this._attemptCharCodeUntilFn(isBlockParameterChar);
}
}
_consumeLetDeclaration(start) {
this._beginToken(30 /* TokenType.LET_START */, start);
// Require at least one white space after the `@let`.
if (chars.isWhitespace(this._cursor.peek())) {
this._attemptCharCodeUntilFn(isNotWhitespace);
}
else {
const token = this._endToken([this._cursor.getChars(start)]);
token.type = 33 /* TokenType.INCOMPLETE_LET */;
return;
}
const startToken = this._endToken([this._getLetDeclarationName()]);
// Skip over white space before the equals character.
this._attemptCharCodeUntilFn(isNotWhitespace);
// Expect an equals sign.
if (!this._attemptCharCode(chars.$EQ)) {
startToken.type = 33 /* TokenType.INCOMPLETE_LET */;
return;
}
// Skip spaces after the equals.
this._attemptCharCodeUntilFn((code) => isNotWhitespace(code) && !chars.isNewLine(code));
this._consumeLetDeclarationValue();
// Terminate the `@let` with a semicolon.
const endChar = this._cursor.peek();
if (endChar === chars.$SEMICOLON) {
this._beginToken(32 /* TokenType.LET_END */);
this._endToken([]);
this._cursor.advance();
}
else {
startToken.type = 33 /* TokenType.INCOMPLETE_LET */;
startToken.sourceSpan = this._cursor.getSpan(start);
}
}
_getLetDeclarationName() {
const nameCursor = this._cursor.clone();
let allowDigit = false;
this._attemptCharCodeUntilFn((code) => {
if (chars.isAsciiLetter(code) ||
code === chars.$$ ||
code === chars.$_ ||
// `@let` names can't start with a digit, but digits are valid anywhere else in the name.
(allowDigit && chars.isDigit(code))) {
allowDigit = true;
return false;
}
return true;
});
return this._cursor.getChars(nameCursor).trim();
}
_consumeLetDeclarationValue() {
const start = this._cursor.clone();
this._beginToken(31 /* TokenType.LET_VALUE */, start);
while (this._cursor.peek() !== chars.$EOF) {
const char = this._cursor.peek();
// `@let` declarations terminate with a semicolon.
if (char === chars.$SEMICOLON) {
break;
}
// If we hit a quote, skip over its content since we don't care what's inside.
if (chars.isQuote(char)) {
this._cursor.advance();
this._attemptCharCodeUntilFn((inner) => {
if (inner === chars.$BACKSLASH) {
this._cursor.advance();
return false;
}
return inner === char;
});
}
this._cursor.advance();
}
this._endToken([this._cursor.getChars(start)]);
}
/**
* @returns whether an ICU token has been created
* @internal
*/
_tokenizeExpansionForm() {
if (this.isExpansionFormStart()) {
this._consumeExpansionFormStart();
return true;
}
if (isExpansionCaseStart(this._cursor.peek()) && this._isInExpansionForm()) {
this._consumeExpansionCaseStart();
return true;
}
if (this._cursor.peek() === chars.$RBRACE) {
if (this._isInExpansionCase()) {
this._consumeExpansionCaseEnd();
return true;
}
if (this._isInExpansionForm()) {
this._consumeExpansionFormEnd();
return true;
}
}
return false;
}
_beginToken(type, start = this._cursor.clone()) {
this._currentTokenStart = start;
this._currentTokenType = type;
}
_endToken(parts, end) {
if (this._currentTokenStart === null) {
throw new TokenError('Programming error - attempted to end a token when there was no start to the token', this._currentTokenType, this._cursor.getSpan(end));
}
if (this._currentTokenType === null) {
throw new TokenError('Programming error - attempted to end a token which has no token type', null, this._cursor.getSpan(this._currentTokenStart));
}
const token = {
type: this._currentTokenType,
parts,
sourceSpan: (end ?? this._cursor).getSpan(this._currentTokenStart, this._leadingTriviaCodePoints),
};
this.tokens.push(token);
this._currentTokenStart = null;
this._currentTokenType = null;
return token;
}
_createError(msg, span) {
if (this._isInExpansionForm()) {
msg += ` (Do you have an unescaped "{" in your template? Use "{{ '{' }}") to escape it.)`;
}
const error = new TokenError(msg, this._currentTokenType, span);
this._currentTokenStart = null;
this._currentTokenType = null;
return new _ControlFlowError(error);
}
handleError(e) {
if (e instanceof CursorError) {
e = this._createError(e.msg, this._cursor.getSpan(e.cursor));
}
if (e instanceof _ControlFlowError) {
this.errors.push(e.error);
}
else {
throw e;
}
}
_attemptCharCode(charCode) {
if (this._cursor.peek() === charCode) {
this._cursor.advance();
return true;
}
return false;
}
_attemptCharCodeCaseInsensitive(charCode) {
if (compareCharCodeCaseInsensitive(this._cursor.peek(), charCode)) {
this._cursor.advance();
return true;
}
return false;
}
_requireCharCode(charCode) {
const location = this._cursor.clone();
if (!this._attemptCharCode(charCode)) {
throw this._createError(_unexpectedCharacterErrorMsg(this._cursor.peek()), this._cursor.getSpan(location));
}
}
_attemptStr(chars) {
const len = chars.length;
if (this._cursor.charsLeft() < len) {
return false;
}
const initialPosition = this._cursor.clone();
for (let i = 0; i < len; i++) {
if (!this._attemptCharCode(chars.charCodeAt(i))) {
// If attempting to parse the string fails, we want to reset the parser
// to where it was before the attempt
this._cursor = initialPosition;
return false;
}
}
return true;
}
_attemptStrCaseInsensitive(chars) {
for (let i = 0; i < chars.length; i++) {
if (!this._attemptCharCodeCaseInsensitive(chars.charCodeAt(i))) {
return false;
}
}
return true;
}
_requireStr(chars) {
const location = this._cursor.clone();
if (!this._attemptStr(chars)) {
throw this._createError(_unexpectedCharacterErrorMsg(this._cursor.peek()), this._cursor.getSpan(location));
}
}
_requireStrCaseInsensitive(chars) {
const location = this._cursor.clone();
if (!this._attemptStrCaseInsensitive(chars)) {
throw this._createError(_unexpectedCharacterErrorMsg(this._cursor.peek()), this._cursor.getSpan(location));
}
}
_attemptCharCodeUntilFn(predicate) {
while (!predicate(this._cursor.peek())) {
this._cursor.advance();
}
}
_requireCharCodeUntilFn(predicate, len) {
const start = this._cursor.clone();
this._attemptCharCodeUntilFn(predicate);
if (this._cursor.diff(start) < len) {
throw this._createError(_unexpectedCharacterErrorMsg(this._cursor.peek()), this._cursor.getSpan(start));
}
}
_attemptUntilChar(char) {
while (this._cursor.peek() !== char) {
this._cursor.advance();
}
}
_readChar() {
// Don't rely upon reading directly from `_input` as the actual char value
// may have been generated from an escape sequence.
const char = String.fromCodePoint(this._cursor.peek());
this._cursor.advance();
return char;
}
_consumeEntity(textTokenType) {
this._beginToken(9 /* TokenType.ENCODED_ENTITY */);
const start = this._cursor.clone();
this._cursor.advance();
if (this._attemptCharCode(chars.$HASH)) {
const isHex = this._attemptCharCode(chars.$x) || this._attemptCharCode(chars.$X);
const codeStart = this._cursor.clone();
this._attemptCharCodeUntilFn(isDigitEntityEnd);
if (this._cursor.peek() != chars.$SEMICOLON) {
// Advance cursor to include the peeked character in the string provided to the error
// message.
this._cursor.advance();
const entityType = isHex ? CharacterReferenceType.HEX : CharacterReferenceType.DEC;
throw this._createError(_unparsableEntityErrorMsg(entityType, this._cursor.getChars(start)), this._cursor.getSpan());
}
const strNum = this._cursor.getChars(codeStart);
this._cursor.advance();
try {
const charCode = parseInt(strNum, isHex ? 16 : 10);
this._endToken([String.fromCharCode(charCode), this._cursor.getChars(start)]);
}
catch {
throw this._createError(_unknownEntityErrorMsg(this._cursor.getChars(start)), this._cursor.getSpan());
}
}
else {
const nameStart = this._cursor.clone();
this._attemptCharCodeUntilFn(isNamedEntityEnd);
if (this._cursor.peek() != chars.$SEMICOLON) {
// No semicolon was found so abort the encoded entity token that was in progress, and treat
// this as a text token
this._beginToken(textTokenType, start);
this._cursor = nameStart;
this._endToken(['&']);
}
else {
const name = this._cursor.getChars(nameStart);
this._cursor.advance();
const char = NAMED_ENTITIES[name];
if (!char) {
throw this._createError(_unknownEntityErrorMsg(name), this._cursor.getSpan(start));
}
this._endToken([char, `&${name};`]);
}
}
}
_consumeRawText(consumeEntities, endMarkerPredicate) {
this._beginToken(consumeEntities ? 6 /* TokenType.ESCAPABLE_RAW_TEXT */ : 7 /* TokenType.RAW_TEXT */);
const parts = [];
while (true) {
const tagCloseStart = this._cursor.clone();
const foundEndMarker = endMarkerPredicate();
this._cursor = tagCloseStart;
if (foundEndMarker) {
break;
}
if (consumeEntities && this._cursor.peek() === chars.$AMPERSAND) {
this._endToken([this._processCarriageReturns(parts.join(''))]);
parts.length = 0;
this._consumeEntity(6 /* TokenType.ESCAPABLE_RAW_TEXT */);
this._beginToken(6 /* TokenType.ESCAPABLE_RAW_TEXT */);
}
else {
parts.push(this._readChar());
}
}
this._endToken([this._processCarriageReturns(parts.join(''))]);
}
_consumeComment(start) {
this._beginToken(10 /* TokenType.COMMENT_START */, start);
this._endToken([]);
this._consumeRawText(false, () => this._attemptStr('-->'));
this._beginToken(11 /* TokenType.COMMENT_END */);
this._requireStr('-->');
this._endToken([]);
}
// https://www.w3.org/TR/html5/syntax.html#bogus-comment-state
_consumeBogusComment(start) {
this._beginToken(10 /* TokenType.COMMENT_START */, start);
this._endToken([]);
this._consumeRawText(false, () => this._cursor.peek() === chars.$GT);
this._beginToken(11 /* TokenType.COMMENT_END */);
this._cursor.advance();
this._endToken([]);
}
_consumeCdata(start) {
this._beginToken(12 /* TokenType.CDATA_START */, start);
this._endToken([]);
this._consumeRawText(false, () => this._attemptStr(']]>'));
this._beginToken(13 /* TokenType.CDATA_END */);
this._requireStr(']]>');
this._endToken([]);
}
_consumeDocType(start) {
this._beginToken(18 /* TokenType.DOC_TYPE_START */, start);
this._endToken([]);
this._consumeRawText(false, () => this._cursor.peek() === chars.$GT);
this._beginToken(19 /* TokenType.DOC_TYPE_END */);
this._cursor.advance();
this._endToken([]);
}
_consumePrefixAndName() {
const nameOrPrefixStart = this._cursor.clone();
let prefix = '';
while (this._cursor.peek() !== chars.$COLON && !isPrefixEnd(this._cursor.peek())) {
this._cursor.advance();
}
let nameStart;
if (this._cursor.peek() === chars.$COLON) {
prefix = this._cursor.getChars(nameOrPrefixStart);
this._cursor.advance();
nameStart = this._cursor.clone();
}
else {
nameStart = nameOrPrefixStart;
}
this._requireCharCodeUntilFn(isNameEnd, prefix === '' ? 0 : 1);
const name = this._cursor.getChars(nameStart);
return [prefix, name];
}
_consumeTagOpen(start) {
let tagName;
let prefix;
let openTagToken;
const attrs = [];
try {
if (!chars.isAsciiLetter(this._cursor.peek())) {
throw this._createError(_unexpectedCharacterErrorMsg(this._cursor.peek()), this._cursor.getSpan(start));
}
openTagToken = this._consumeTagOpenStart(start);
prefix = openTagToken.parts[0];
tagName = openTagToken.parts[1];
this._attemptCharCodeUntilFn(isNotWhitespace);
while (this._cursor.peek() !== chars.$SLASH &&
this._cursor.peek() !== chars.$GT &&
this._cursor.peek() !== chars.$LT &&
this._cursor.peek() !== chars.$EOF) {
const [prefix, name] = this._consumeAttributeName();
this._attemptCharCodeUntilFn(isNotWhitespace);
if (this._attemptCharCode(chars.$EQ)) {
this._attemptCharCodeUntilFn(isNotWhitespace);
const value = this._consumeAttributeValue();
attrs.push({ prefix, name, value });
}
else {
attrs.push({ prefix, name });
}
this._attemptCharCodeUntilFn(isNotWhitespace);
}
this._consumeTagOpenEnd();
}
catch (e) {
if (e instanceof _ControlFlowError) {
if (openTagToken) {
// We errored before we could close the opening tag, so it is incomplete.
openTagToken.type = 4 /* TokenType.INCOMPLETE_TAG_OPEN */;
}
else {
// When the start tag is invalid, assume we want a "<" as text.
// Back to back text tokens are merged at the end.
this._beginToken(5 /* TokenType.TEXT */, start);
this._endToken(['<']);
}
return;
}
throw e;
}
if (this._canSelfClose &&
this.tokens[this.tokens.length - 1].type === 2 /* TokenType.TAG_OPEN_END_VOID */) {
return;
}
const contentTokenType = this._getTagContentType(tagName, prefix, this._fullNameStack.length > 0, attrs);
this._handleFullNameStackForTagOpen(prefix, tagName);
if (contentTokenType === TagContentType.RAW_TEXT) {
this._consumeRawTextWithTagClose(prefix, tagName, false);
}
else if (contentTokenType === TagContentType.ESCAPABLE_RAW_TEXT) {
this._consumeRawTextWithTagClose(prefix, tagName, true);
}
}
_consumeRawTextWithTagClose(prefix, tagName, consumeEntities) {
this._consumeRawText(consumeEntities, () => {
if (!this._attemptCharCode(chars.$LT))
return false;
if (!this._attemptCharCode(chars.$SLASH))
return false;
this._attemptCharCodeUntilFn(isNotWhitespace);
if (!this._attemptStrCaseInsensitive(prefix ? `${prefix}:${tagName}` : tagName))
return false;
this._attemptCharCodeUntilFn(isNotWhitespace);
return this._attemptCharCode(chars.$GT);
});
this._beginToken(3 /* TokenType.TAG_CLOSE */);
this._requireCharCodeUntilFn((code) => code === chars.$GT, 3);
this._cursor.advance(); // Consume the `>`
this._endToken([prefix, tagName]);
this._handleFullNameStackForTagClose(prefix, tagName);
}
_consumeTagOpenStart(start) {
this._beginToken(0 /* TokenType.TAG_OPEN_START */, start);
const parts = this._consumePrefixAndName();
return this._endToken(parts);
}
_consumeAttributeName() {
const attrNameStart = this._cursor.peek();
if (attrNameStart === chars.$SQ || attrNameStart === chars.$DQ) {
throw this._createError(_unexpectedCharacterErrorMsg(attrNameStart), this._cursor.getSpan());
}
this._beginToken(14 /* TokenType.ATTR_NAME */);
const prefixAndName = this._consumePrefixAndName();
this._endToken(prefixAndName);
return prefixAndName;
}
_consumeAttributeValue() {
let value;
if (this._cursor.peek() === chars.$SQ || this._cursor.peek() === chars.$DQ) {
const quoteChar = this._cursor.peek();
this._consumeQuote(quoteChar);
// In an attribute then end of the attribute value and the premature end to an interpolation
// are both triggered by the `quoteChar`.
const endPredicate = () => this._cursor.peek() === quoteChar;
value = this._consumeWithInterpolation(16 /* TokenType.ATTR_VALUE_TEXT */, 17 /* TokenType.ATTR_VALUE_INTERPOLATION */, endPredicate, endPredicate);
this._consumeQuote(quoteChar);
}
else {
const endPredicate = () => isNameEnd(this._cursor.peek());
value = this._consumeWithInterpolation(16 /* TokenType.ATTR_VALUE_TEXT */, 17 /* TokenType.ATTR_VALUE_INTERPOLATION */, endPredicate, endPredicate);
}
return value;
}
_consumeQuote(quoteChar) {
this._beginToken(15 /* TokenType.ATTR_QUOTE */);
this._requireCharCode(quoteChar);
this._endToken([String.fromCodePoint(quoteChar)]);
}
_consumeTagOpenEnd() {
const tokenType = this._attemptCharCode(chars.$SLASH)
? 2 /* TokenType.TAG_OPEN_END_VOID */
: 1 /* TokenType.TAG_OPEN_END */;
this._beginToken(tokenType);
this._requireCharCode(chars.$GT);
this._endToken([]);
}
_consumeTagClose(start) {
this._beginToken(3 /* TokenType.TAG_CLOSE */, start);
this._attemptCharCodeUntilFn(isNotWhitespace);
// https://github.com/developit/htm
if (this._allowHtmComponentClosingTags && this._attemptCharCode(chars.$SLASH)) {
this._attemptCharCodeUntilFn(isNotWhitespace);
this._requireCharCode(chars.$GT);
this._endToken([]);
}
else {
const [prefix, name] = this._consumePrefixAndName();
this._attemptCharCodeUntilFn(isNotWhitespace);
this._requireCharCode(chars.$GT);
this._endToken([prefix, name]);
this._handleFullNameStackForTagClose(prefix, name);
}
}
_consumeExpansionFormStart() {
this._beginToken(20 /* TokenType.EXPANSION_FORM_START */);
this._requireCharCode(chars.$LBRACE);
this._endToken([]);
this._expansionCaseStack.push(20 /* TokenType.EXPANSION_FORM_START */);
this._beginToken(7 /* TokenType.RAW_TEXT */);
const condition = this._readUntil(chars.$COMMA);
const normalizedCondition = this._processCarriageReturns(condition);
if (this._i18nNormalizeLineEndingsInICUs) {
// We explicitly want to normalize line endings for this text.
this._endToken([normalizedCondition]);
}
else {
// We are not normalizing line endings.
const conditionToken = this._endToken([condition]);
if (normalizedCondition !== condition) {
this.nonNormalizedIcuExpressions.push(conditionToken);
}
}
this._requireCharCode(chars.$COMMA);
this._attemptCharCodeUntilFn(isNotWhitespace);
this._beginToken(7 /* TokenType.RAW_TEXT */);
const type = this._readUntil(chars.$COMMA);
this._endToken([type]);
this._requireCharCode(chars.$COMMA);
this._attemptCharCodeUntilFn(isNotWhitespace);
}
_consumeExpansionCaseStart() {
this._beginToken(21 /* TokenType.EXPANSION_CASE_VALUE */);
const value = this._readUntil(chars.$LBRACE).trim();
this._endToken([value]);
this._attemptCharCodeUntilFn(isNotWhitespace);
this._beginToken(22 /* TokenType.EXPANSION_CASE_EXP_START */);
this._requireCharCode(chars.$LBRACE);
this._endToken([]);
this._attemptCharCodeUntilFn(isNotWhitespace);
this._expansionCaseStack.push(22 /* TokenType.EXPANSION_CASE_EXP_START */);
}
_consumeExpansionCaseEnd() {
this._beginToken(23 /* TokenType.EXPANSION_CASE_EXP_END */);
this._requireCharCode(chars.$RBRACE);
this._endToken([]);
this._attemptCharCodeUntilFn(isNotWhitespace);
this._expansionCaseStack.pop();
}
_consumeExpansionFormEnd() {
this._beginToken(24 /* TokenType.EXPANSION_FORM_END */);
this._requireCharCode(chars.$RBRACE);
this._endToken([]);
this._expansionCaseStack.pop();
}
/**
* Consume a string that may contain interpolation expressions.
*
* The first token consumed will be of `tokenType` and then there will be alternating
* `interpolationTokenType` and `tokenType` tokens until the `endPredicate()` returns true.
*
* If an interpolation token ends prematurely it will have no end marker in its `parts` array.
*
* @param textTokenType the kind of tokens to interleave around interpolation tokens.
* @param interpolationTokenType the kind of tokens that contain interpolation.
* @param endPredicate a function that should return true when we should stop consuming.
* @param endInterpolation a function that should return true if there is a premature end to an
* interpolation expression - i.e. before we get to the normal interpolation closing marker.
*/
_consumeWithInterpolation(textTokenType, interpolationTokenType, endPredicate, endInterpolation) {
this._beginToken(textTokenType);
const parts = [];
while (!endPredicate()) {
const current = this._cursor.clone();
if (this._interpolationConfig && this._attemptStr(this._interpolationConfig.start)) {
this._endToken([this._processCarriageReturns(parts.join(''))], current);
parts.length = 0;
this._consumeInterpolation(interpolationTokenType, current, endInterpolation);
this._beginToken(textTokenType);
}
else if (this._cursor.peek() === chars.$AMPERSAND) {
this._endToken([this._processCarriageReturns(parts.join(''))]);
parts.length = 0;
this._consumeEntity(textTokenType);
this._beginToken(textTokenType);
}
else {
parts.push(this._readChar());
}
}
// It is possible that an interpolation was started but not ended inside this text token.
// Make sure that we reset the state of the lexer correctly.
this._inInterpolation = false;
const value = this._processCarriageReturns(parts.join(''));
this._endToken([value]);
return value;
}
/**
* Consume a block of text that has been interpreted as an Angular interpolation.
*
* @param interpolationTokenType the type of the interpolation token to generate.
* @param interpolationStart a cursor that points to the start of this interpolation.
* @param prematureEndPredicate a function that should return true if the next characters indicate
* an end to the interpolation before its normal closing marker.
*/
_consumeInterpolation(interpolationTokenType, interpolationStart, prematureEndPredicate) {
const parts = [];
this._beginToken(interpolationTokenType, interpolationStart);
parts.push(this._interpolationConfig.start);
// Find the end of the interpolation, ignoring content inside quotes.
const expressionStart = this._cursor.clone();
let inQuote = null;
let inComment = false;
while (this._cursor.peek() !== chars.$EOF &&
(prematureEndPredicate === null || !prematureEndPredicate())) {
const current = this._cursor.clone();
if (this._isTagStart()) {
// We are starting what looks like an HTML element in the middle of this interpolation.
// Reset the cursor to before the `<` character and end the interpolation token.
// (This is actually wrong but here for backward compatibility).
this._cursor = current;
parts.push(this._getProcessedChars(expressionStart, current));
this._endToken(parts);
return;
}
if (inQuote === null) {
if (this._attemptStr(this._interpolationConfig.end)) {
// We are not in a string, and we hit the end interpolation marker
parts.push(this._getProcessedChars(expressionStart, current));
parts.push(this._interpolationConfig.end);
this._endToken(parts);
return;
}
else if (this._attemptStr('//')) {
// Once we are in a comment we ignore any quotes
inComment = true;
}
}
const char = this._cursor.peek();
this._cursor.advance();
if (char === chars.$BACKSLASH) {
// Skip the next character because it was escaped.
this._cursor.advance();
}
else if (char === inQuote) {
// Exiting the current quoted string
inQuote = null;
}
else if (!inComment && inQuote === null && chars.isQuote(char)) {
// Entering a new quoted string
inQuote = char;
}
}
// We hit EOF without finding a closing interpolation marker
parts.push(this._getProcessedChars(expressionStart, this._cursor));
this._endToken(parts);
}
_getProcessedChars(start, end) {
return this._processCarriageReturns(end.getChars(start));
}
_isTextEnd() {
if (this._isTagStart() || this._cursor.peek() === chars.$EOF) {
return true;
}
if (this._tokenizeIcu && !this._inInterpolation) {
if (this.isExpansionFormStart()) {
// start of an expansion form
return true;
}
if (this._cursor.peek() === chars.$RBRACE && this._isInExpansionCase()) {
// end of and expansion case
return true;
}
}
if (this._tokenizeBlocks &&
!this._inInterpolation &&
!this._isInExpansion() &&
(this._isBlockStart() || this._cursor.peek() === chars.$AT || this._cursor.peek() === chars.$RBRACE)) {
return true;
}
return false;
}
/**
* Returns true if the current cursor is pointing to the start of a tag
* (opening/closing/comments/cdata/etc).
*/
_isTagStart() {
if (this._cursor.peek() === chars.$LT) {
// We assume that `<` followed by whitespace is not the start of an HTML element.
const tmp = this._cursor.clone();
tmp.advance();
// If the next character is alphabetic, ! nor / then it is a tag start
const code = tmp.peek();
if ((chars.$a <= code && code <= chars.$z) ||
(chars.$A <= code && code <= chars.$Z) ||
code === chars.$SLASH ||
code === chars.$BANG) {
return true;
}
}
return false;
}
_isBlockStart() {
if (this._tokenizeBlocks && this._cursor.peek() === chars.$AT) {
const tmp = this._cursor.clone();
// If it is, also verify that the next character is a valid block identifier.
tmp.advance();
if (isBlockNameChar(tmp.peek())) {
return true;
}
}
return false;
}
_readUntil(char) {
const start = this._cursor.clone();
this._attemptUntilChar(char);
return this._cursor.getChars(start);
}
_isInExpansion() {
return this._isInExpansionCase() || this._isInExpansionForm();
}
_isInExpansionCase() {
return (this._expansionCaseStack.length > 0 && this._expansionCaseStack[this._expansionCaseStack.length - 1] ===
22) /* TokenType.EXPANSION_CASE_EXP_START */;
}
_isInExpansionForm() {
return (this._expansionCaseStack.length > 0 && this._expansionCaseStack[this._expansionCaseStack.length - 1] ===
20) /* TokenType.EXPANSION_FORM_START */;
}
isExpansionFormStart() {
if (this._cursor.peek() !== chars.$LBRACE) {
return false;
}
if (this._interpolationConfig) {
const start = this._cursor.clone();
const isInterpolation = this._attemptStr(this._interpolationConfig.start);
this._cursor = start;
return !isInterpolation;
}
return true;
}
_handleFullNameStackForTagOpen(prefix, tagName) {
const fullName = mergeNsAndName(prefix, tagName);
if (this._fullNameStack.length === 0 ||
this._fullNameStack[this._fullNameStack.length - 1] === fullName) {
this._fullNameStack.push(fullName);
}
}
_handleFullNameStackForTagClose(prefix, tagName) {
const fullName = mergeNsAndName(prefix, tagName);
if (this._fullNameStack.length !== 0 &&
this._fullNameStack[this._fullNameStack.length - 1] === fullName) {
this._fullNameStack.pop();
}
}
}
function isNotWhitespace(code) {
return !chars.isWhitespace(code) || code === chars.$EOF;
}
function isNameEnd(code) {
return (chars.isWhitespace(code) ||
code === chars.$GT ||
code === chars.$LT ||
code === chars.$SLASH ||
code === chars.$SQ ||
code === chars.$DQ ||
code === chars.$EQ ||
code === chars.$EOF);
}
function isPrefixEnd(code) {
return ((code < chars.$a || chars.$z < code) &&
(code < chars.$A || chars.$Z < code) &&
(code < chars.$0 || code > chars.$9));
}
function isDigitEntityEnd(code) {
return code === chars.$SEMICOLON || code === chars.$EOF || !chars.isAsciiHexDigit(code);
}
function isNamedEntityEnd(code) {
return code === chars.$SEMICOLON || code === chars.$EOF || !chars.isAsciiLetter(code);
}
function isExpansionCaseStart(peek) {
return peek !== chars.$RBRACE;
}
function compareCharCodeCaseInsensitive(code1, code2) {
return toUpperCaseCharCode(code1) === toUpperCaseCharCode(code2);
}
function toUpperCaseCharCode(code) {
return code >= chars.$a && code <= chars.$z ? code - chars.$a + chars.$A : code;
}
function isBlockNameChar(code) {
return chars.isAsciiLetter(code) || chars.isDigit(code) || code === chars.$_;
}
function isBlockParameterChar(code) {
return code !== chars.$SEMICOLON && isNotWhitespace(code);
}
function mergeTextTokens(srcTokens) {
const dstTokens = [];
let lastDstToken = undefined;
for (let i = 0; i < srcTokens.length; i++) {
const token = srcTokens[i];
if (((lastDstToken && lastDstToken.type === 5 /* TokenType.TEXT */ && token.type === 5) /* TokenType.TEXT */) ||
((lastDstToken &&
lastDstToken.type === 16 /* TokenType.ATTR_VALUE_TEXT */ && token.type === 16) /* TokenType.ATTR_VALUE_TEXT */)) {
lastDstToken.parts[0] += token.parts[0];
lastDstToken.sourceSpan.end = token.sourceSpan.end;
}
else {
lastDstToken = token;
dstTokens.push(lastDstToken);
}
}
return dstTokens;
}
class PlainCharacterCursor {
constructor(fileOrCursor, range) {
if (fileOrCursor instanceof PlainCharacterCursor) {
this.file = fileOrCursor.file;
this.input = fileOrCursor.input;
this.end = fileOrCursor.end;
const state = fileOrCursor.state;
// Note: avoid using `{...fileOrCursor.state}` here as that has a severe performance penalty.
// In ES5 bundles the object spread operator is translated into the `__assign` helper, which
// is not optimized by VMs as efficiently as a raw object literal. Since this constructor is
// called in tight loops, this difference matters.
this.state = {
peek: state.peek,
offset: state.offset,
line: state.line,
column: state.column,
};
}
else {
if (!range) {
throw new Error('Programming error: the range argument must be provided with a file argument.');
}
this.file = fileOrCursor;
this.input = fileOrCursor.content;
this.end = range.endPos;
this.state = {
peek: -1,
offset: range.startPos,
line: range.startLine,
column: range.startCol,
};
}
}
clone() {
return new PlainCharacterCursor(this);
}
peek() {
return this.state.peek;
}
charsLeft() {
return this.end - this.state.offset;
}
diff(other) {
return this.state.offset - other.state.offset;
}
advance() {
this.advanceState(this.state);
}
init() {
this.updatePeek(this.state);
}
getSpan(start, leadingTriviaCodePoints) {
start = start || this;
let fullStart = start;
if (leadingTriviaCodePoints) {
while (this.diff(start) > 0 && leadingTriviaCodePoints.indexOf(start.peek()) !== -1) {
if (fullStart === start) {
start = start.clone();
}
start.advance();
}
}
const startLocation = this.locationFromCursor(start);
const endLocation = this.locationFromCursor(this);
const fullStartLocation = fullStart !== start ? this.locationFromCursor(fullStart) : startLocation;
return new ParseSourceSpan(startLocation, endLocation, fullStartLocation);
}
getChars(start) {
return this.input.substring(start.state.offset, this.state.offset);
}
charAt(pos) {
return this.input.charCodeAt(pos);
}
advanceState(state) {
if (state.offset >= this.end) {
this.state = state;
throw new CursorError('Unexpected character "EOF"', this);
}
const currentChar = this.charAt(state.offset);
if (currentChar === chars.$LF) {
state.line++;
state.column = 0;
}
else if (!chars.isNewLine(currentChar)) {
state.column++;
}
state.offset++;
this.updatePeek(state);
}
updatePeek(state) {
state.peek = state.offset >= this.end ? chars.$EOF : this.charAt(state.offset);
}
locationFromCursor(cursor) {
return new ParseLocation(cursor.file, cursor.state.offset, cursor.state.line, cursor.state.column);
}
}
class EscapedCharacterCursor extends PlainCharacterCursor {
constructor(fileOrCursor, range) {
if (fileOrCursor instanceof EscapedCharacterCursor) {
super(fileOrCursor);
this.internalState = { ...fileOrCursor.internalState };
}
else {
super(fileOrCursor, range);
this.internalState = this.state;
}
}
advance() {
this.state = this.internalState;
super.advance();
this.processEscapeSequence();
}
init() {
super.init();
this.processEscapeSequence();
}
clone() {
return new EscapedCharacterCursor(this);
}
getChars(start) {
const cursor = start.clone();
let chars = '';
while (cursor.internalState.offset < this.internalState.offset) {
chars += String.fromCodePoint(cursor.peek());
cursor.advance();
}
return chars;
}
/**
* Process the escape sequence that starts at the current position in the text.
*
* This method is calle