UNPKG

angular2

Version:

Angular 2 - a web framework for modern web apps

662 lines (661 loc) 26 kB
import { StringWrapper, NumberWrapper, isPresent, isBlank } from 'angular2/src/facade/lang'; import { ListWrapper } from 'angular2/src/facade/collection'; import { ParseLocation, ParseError, ParseSourceFile, ParseSourceSpan } from './parse_util'; import { getHtmlTagDefinition, HtmlTagContentType, NAMED_ENTITIES } from './html_tags'; export var HtmlTokenType; (function (HtmlTokenType) { HtmlTokenType[HtmlTokenType["TAG_OPEN_START"] = 0] = "TAG_OPEN_START"; HtmlTokenType[HtmlTokenType["TAG_OPEN_END"] = 1] = "TAG_OPEN_END"; HtmlTokenType[HtmlTokenType["TAG_OPEN_END_VOID"] = 2] = "TAG_OPEN_END_VOID"; HtmlTokenType[HtmlTokenType["TAG_CLOSE"] = 3] = "TAG_CLOSE"; HtmlTokenType[HtmlTokenType["TEXT"] = 4] = "TEXT"; HtmlTokenType[HtmlTokenType["ESCAPABLE_RAW_TEXT"] = 5] = "ESCAPABLE_RAW_TEXT"; HtmlTokenType[HtmlTokenType["RAW_TEXT"] = 6] = "RAW_TEXT"; HtmlTokenType[HtmlTokenType["COMMENT_START"] = 7] = "COMMENT_START"; HtmlTokenType[HtmlTokenType["COMMENT_END"] = 8] = "COMMENT_END"; HtmlTokenType[HtmlTokenType["CDATA_START"] = 9] = "CDATA_START"; HtmlTokenType[HtmlTokenType["CDATA_END"] = 10] = "CDATA_END"; HtmlTokenType[HtmlTokenType["ATTR_NAME"] = 11] = "ATTR_NAME"; HtmlTokenType[HtmlTokenType["ATTR_VALUE"] = 12] = "ATTR_VALUE"; HtmlTokenType[HtmlTokenType["DOC_TYPE"] = 13] = "DOC_TYPE"; HtmlTokenType[HtmlTokenType["EXPANSION_FORM_START"] = 14] = "EXPANSION_FORM_START"; HtmlTokenType[HtmlTokenType["EXPANSION_CASE_VALUE"] = 15] = "EXPANSION_CASE_VALUE"; HtmlTokenType[HtmlTokenType["EXPANSION_CASE_EXP_START"] = 16] = "EXPANSION_CASE_EXP_START"; HtmlTokenType[HtmlTokenType["EXPANSION_CASE_EXP_END"] = 17] = "EXPANSION_CASE_EXP_END"; HtmlTokenType[HtmlTokenType["EXPANSION_FORM_END"] = 18] = "EXPANSION_FORM_END"; HtmlTokenType[HtmlTokenType["EOF"] = 19] = "EOF"; })(HtmlTokenType || (HtmlTokenType = {})); export class HtmlToken { constructor(type, parts, sourceSpan) { this.type = type; this.parts = parts; this.sourceSpan = sourceSpan; } } export class HtmlTokenError extends ParseError { constructor(errorMsg, tokenType, span) { super(span, errorMsg); this.tokenType = tokenType; } } export class HtmlTokenizeResult { constructor(tokens, errors) { this.tokens = tokens; this.errors = errors; } } export function tokenizeHtml(sourceContent, sourceUrl, tokenizeExpansionForms = false) { return new _HtmlTokenizer(new ParseSourceFile(sourceContent, sourceUrl), tokenizeExpansionForms) .tokenize(); } const $EOF = 0; const $TAB = 9; const $LF = 10; const $FF = 12; const $CR = 13; const $SPACE = 32; const $BANG = 33; const $DQ = 34; const $HASH = 35; const $$ = 36; const $AMPERSAND = 38; const $SQ = 39; const $MINUS = 45; const $SLASH = 47; const $0 = 48; const $SEMICOLON = 59; const $9 = 57; const $COLON = 58; const $LT = 60; const $EQ = 61; const $GT = 62; const $QUESTION = 63; const $LBRACKET = 91; const $RBRACKET = 93; const $LBRACE = 123; const $RBRACE = 125; const $COMMA = 44; const $A = 65; const $F = 70; const $X = 88; const $Z = 90; const $a = 97; const $f = 102; const $z = 122; const $x = 120; const $NBSP = 160; var CR_OR_CRLF_REGEXP = /\r\n?/g; function unexpectedCharacterErrorMsg(charCode) { var char = charCode === $EOF ? 'EOF' : StringWrapper.fromCharCode(charCode); return `Unexpected character "${char}"`; } function unknownEntityErrorMsg(entitySrc) { return `Unknown entity "${entitySrc}" - use the "&#<decimal>;" or "&#x<hex>;" syntax`; } class ControlFlowError { constructor(error) { this.error = error; } } // See http://www.w3.org/TR/html51/syntax.html#writing class _HtmlTokenizer { constructor(file, tokenizeExpansionForms) { this.file = file; this.tokenizeExpansionForms = tokenizeExpansionForms; // Note: this is always lowercase! this.peek = -1; this.nextPeek = -1; this.index = -1; this.line = 0; this.column = -1; this.expansionCaseStack = []; this.tokens = []; this.errors = []; this.input = file.content; this.length = file.content.length; this._advance(); } _processCarriageReturns(content) { // http://www.w3.org/TR/html5/syntax.html#preprocessing-the-input-stream // In order to keep the original position in the source, we can not // pre-process it. // Instead CRs are processed right before instantiating the tokens. return StringWrapper.replaceAll(content, CR_OR_CRLF_REGEXP, '\n'); } tokenize() { while (this.peek !== $EOF) { var start = this._getLocation(); try { if (this._attemptCharCode($LT)) { if (this._attemptCharCode($BANG)) { if (this._attemptCharCode($LBRACKET)) { this._consumeCdata(start); } else if (this._attemptCharCode($MINUS)) { this._consumeComment(start); } else { this._consumeDocType(start); } } else if (this._attemptCharCode($SLASH)) { this._consumeTagClose(start); } else { this._consumeTagOpen(start); } } else if (isSpecialFormStart(this.peek, this.nextPeek) && this.tokenizeExpansionForms) { this._consumeExpansionFormStart(); } else if (this.peek === $EQ && this.tokenizeExpansionForms) { this._consumeExpansionCaseStart(); } else if (this.peek === $RBRACE && this.isInExpansionCase() && this.tokenizeExpansionForms) { this._consumeExpansionCaseEnd(); } else if (this.peek === $RBRACE && this.isInExpansionForm() && this.tokenizeExpansionForms) { this._consumeExpansionFormEnd(); } else { this._consumeText(); } } catch (e) { if (e instanceof ControlFlowError) { this.errors.push(e.error); } else { throw e; } } } this._beginToken(HtmlTokenType.EOF); this._endToken([]); return new HtmlTokenizeResult(mergeTextTokens(this.tokens), this.errors); } _getLocation() { return new ParseLocation(this.file, this.index, this.line, this.column); } _getSpan(start, end) { if (isBlank(start)) { start = this._getLocation(); } if (isBlank(end)) { end = this._getLocation(); } return new ParseSourceSpan(start, end); } _beginToken(type, start = null) { if (isBlank(start)) { start = this._getLocation(); } this.currentTokenStart = start; this.currentTokenType = type; } _endToken(parts, end = null) { if (isBlank(end)) { end = this._getLocation(); } var token = new HtmlToken(this.currentTokenType, parts, new ParseSourceSpan(this.currentTokenStart, end)); this.tokens.push(token); this.currentTokenStart = null; this.currentTokenType = null; return token; } _createError(msg, span) { var error = new HtmlTokenError(msg, this.currentTokenType, span); this.currentTokenStart = null; this.currentTokenType = null; return new ControlFlowError(error); } _advance() { if (this.index >= this.length) { throw this._createError(unexpectedCharacterErrorMsg($EOF), this._getSpan()); } if (this.peek === $LF) { this.line++; this.column = 0; } else if (this.peek !== $LF && this.peek !== $CR) { this.column++; } this.index++; this.peek = this.index >= this.length ? $EOF : StringWrapper.charCodeAt(this.input, this.index); this.nextPeek = this.index + 1 >= this.length ? $EOF : StringWrapper.charCodeAt(this.input, this.index + 1); } _attemptCharCode(charCode) { if (this.peek === charCode) { this._advance(); return true; } return false; } _attemptCharCodeCaseInsensitive(charCode) { if (compareCharCodeCaseInsensitive(this.peek, charCode)) { this._advance(); return true; } return false; } _requireCharCode(charCode) { var location = this._getLocation(); if (!this._attemptCharCode(charCode)) { throw this._createError(unexpectedCharacterErrorMsg(this.peek), this._getSpan(location, location)); } } _attemptStr(chars) { for (var i = 0; i < chars.length; i++) { if (!this._attemptCharCode(StringWrapper.charCodeAt(chars, i))) { return false; } } return true; } _attemptStrCaseInsensitive(chars) { for (var i = 0; i < chars.length; i++) { if (!this._attemptCharCodeCaseInsensitive(StringWrapper.charCodeAt(chars, i))) { return false; } } return true; } _requireStr(chars) { var location = this._getLocation(); if (!this._attemptStr(chars)) { throw this._createError(unexpectedCharacterErrorMsg(this.peek), this._getSpan(location)); } } _attemptCharCodeUntilFn(predicate) { while (!predicate(this.peek)) { this._advance(); } } _requireCharCodeUntilFn(predicate, len) { var start = this._getLocation(); this._attemptCharCodeUntilFn(predicate); if (this.index - start.offset < len) { throw this._createError(unexpectedCharacterErrorMsg(this.peek), this._getSpan(start, start)); } } _attemptUntilChar(char) { while (this.peek !== char) { this._advance(); } } _readChar(decodeEntities) { if (decodeEntities && this.peek === $AMPERSAND) { return this._decodeEntity(); } else { var index = this.index; this._advance(); return this.input[index]; } } _decodeEntity() { var start = this._getLocation(); this._advance(); if (this._attemptCharCode($HASH)) { let isHex = this._attemptCharCode($x) || this._attemptCharCode($X); let numberStart = this._getLocation().offset; this._attemptCharCodeUntilFn(isDigitEntityEnd); if (this.peek != $SEMICOLON) { throw this._createError(unexpectedCharacterErrorMsg(this.peek), this._getSpan()); } this._advance(); let strNum = this.input.substring(numberStart, this.index - 1); try { let charCode = NumberWrapper.parseInt(strNum, isHex ? 16 : 10); return StringWrapper.fromCharCode(charCode); } catch (e) { let entity = this.input.substring(start.offset + 1, this.index - 1); throw this._createError(unknownEntityErrorMsg(entity), this._getSpan(start)); } } else { let startPosition = this._savePosition(); this._attemptCharCodeUntilFn(isNamedEntityEnd); if (this.peek != $SEMICOLON) { this._restorePosition(startPosition); return '&'; } this._advance(); let name = this.input.substring(start.offset + 1, this.index - 1); let char = NAMED_ENTITIES[name]; if (isBlank(char)) { throw this._createError(unknownEntityErrorMsg(name), this._getSpan(start)); } return char; } } _consumeRawText(decodeEntities, firstCharOfEnd, attemptEndRest) { var tagCloseStart; var textStart = this._getLocation(); this._beginToken(decodeEntities ? HtmlTokenType.ESCAPABLE_RAW_TEXT : HtmlTokenType.RAW_TEXT, textStart); var parts = []; while (true) { tagCloseStart = this._getLocation(); if (this._attemptCharCode(firstCharOfEnd) && attemptEndRest()) { break; } if (this.index > tagCloseStart.offset) { parts.push(this.input.substring(tagCloseStart.offset, this.index)); } while (this.peek !== firstCharOfEnd) { parts.push(this._readChar(decodeEntities)); } } return this._endToken([this._processCarriageReturns(parts.join(''))], tagCloseStart); } _consumeComment(start) { this._beginToken(HtmlTokenType.COMMENT_START, start); this._requireCharCode($MINUS); this._endToken([]); var textToken = this._consumeRawText(false, $MINUS, () => this._attemptStr('->')); this._beginToken(HtmlTokenType.COMMENT_END, textToken.sourceSpan.end); this._endToken([]); } _consumeCdata(start) { this._beginToken(HtmlTokenType.CDATA_START, start); this._requireStr('CDATA['); this._endToken([]); var textToken = this._consumeRawText(false, $RBRACKET, () => this._attemptStr(']>')); this._beginToken(HtmlTokenType.CDATA_END, textToken.sourceSpan.end); this._endToken([]); } _consumeDocType(start) { this._beginToken(HtmlTokenType.DOC_TYPE, start); this._attemptUntilChar($GT); this._advance(); this._endToken([this.input.substring(start.offset + 2, this.index - 1)]); } _consumePrefixAndName() { var nameOrPrefixStart = this.index; var prefix = null; while (this.peek !== $COLON && !isPrefixEnd(this.peek)) { this._advance(); } var nameStart; if (this.peek === $COLON) { this._advance(); prefix = this.input.substring(nameOrPrefixStart, this.index - 1); nameStart = this.index; } else { nameStart = nameOrPrefixStart; } this._requireCharCodeUntilFn(isNameEnd, this.index === nameStart ? 1 : 0); var name = this.input.substring(nameStart, this.index); return [prefix, name]; } _consumeTagOpen(start) { let savedPos = this._savePosition(); let lowercaseTagName; try { if (!isAsciiLetter(this.peek)) { throw this._createError(unexpectedCharacterErrorMsg(this.peek), this._getSpan()); } var nameStart = this.index; this._consumeTagOpenStart(start); lowercaseTagName = this.input.substring(nameStart, this.index).toLowerCase(); this._attemptCharCodeUntilFn(isNotWhitespace); while (this.peek !== $SLASH && this.peek !== $GT) { this._consumeAttributeName(); this._attemptCharCodeUntilFn(isNotWhitespace); if (this._attemptCharCode($EQ)) { this._attemptCharCodeUntilFn(isNotWhitespace); this._consumeAttributeValue(); } this._attemptCharCodeUntilFn(isNotWhitespace); } this._consumeTagOpenEnd(); } catch (e) { if (e instanceof ControlFlowError) { // When the start tag is invalid, assume we want a "<" this._restorePosition(savedPos); // Back to back text tokens are merged at the end this._beginToken(HtmlTokenType.TEXT, start); this._endToken(['<']); return; } throw e; } var contentTokenType = getHtmlTagDefinition(lowercaseTagName).contentType; if (contentTokenType === HtmlTagContentType.RAW_TEXT) { this._consumeRawTextWithTagClose(lowercaseTagName, false); } else if (contentTokenType === HtmlTagContentType.ESCAPABLE_RAW_TEXT) { this._consumeRawTextWithTagClose(lowercaseTagName, true); } } _consumeRawTextWithTagClose(lowercaseTagName, decodeEntities) { var textToken = this._consumeRawText(decodeEntities, $LT, () => { if (!this._attemptCharCode($SLASH)) return false; this._attemptCharCodeUntilFn(isNotWhitespace); if (!this._attemptStrCaseInsensitive(lowercaseTagName)) return false; this._attemptCharCodeUntilFn(isNotWhitespace); if (!this._attemptCharCode($GT)) return false; return true; }); this._beginToken(HtmlTokenType.TAG_CLOSE, textToken.sourceSpan.end); this._endToken([null, lowercaseTagName]); } _consumeTagOpenStart(start) { this._beginToken(HtmlTokenType.TAG_OPEN_START, start); var parts = this._consumePrefixAndName(); this._endToken(parts); } _consumeAttributeName() { this._beginToken(HtmlTokenType.ATTR_NAME); var prefixAndName = this._consumePrefixAndName(); this._endToken(prefixAndName); } _consumeAttributeValue() { this._beginToken(HtmlTokenType.ATTR_VALUE); var value; if (this.peek === $SQ || this.peek === $DQ) { var quoteChar = this.peek; this._advance(); var parts = []; while (this.peek !== quoteChar) { parts.push(this._readChar(true)); } value = parts.join(''); this._advance(); } else { var valueStart = this.index; this._requireCharCodeUntilFn(isNameEnd, 1); value = this.input.substring(valueStart, this.index); } this._endToken([this._processCarriageReturns(value)]); } _consumeTagOpenEnd() { var tokenType = this._attemptCharCode($SLASH) ? HtmlTokenType.TAG_OPEN_END_VOID : HtmlTokenType.TAG_OPEN_END; this._beginToken(tokenType); this._requireCharCode($GT); this._endToken([]); } _consumeTagClose(start) { this._beginToken(HtmlTokenType.TAG_CLOSE, start); this._attemptCharCodeUntilFn(isNotWhitespace); var prefixAndName; prefixAndName = this._consumePrefixAndName(); this._attemptCharCodeUntilFn(isNotWhitespace); this._requireCharCode($GT); this._endToken(prefixAndName); } _consumeExpansionFormStart() { this._beginToken(HtmlTokenType.EXPANSION_FORM_START, this._getLocation()); this._requireCharCode($LBRACE); this._endToken([]); this._beginToken(HtmlTokenType.RAW_TEXT, this._getLocation()); let condition = this._readUntil($COMMA); this._endToken([condition], this._getLocation()); this._requireCharCode($COMMA); this._attemptCharCodeUntilFn(isNotWhitespace); this._beginToken(HtmlTokenType.RAW_TEXT, this._getLocation()); let type = this._readUntil($COMMA); this._endToken([type], this._getLocation()); this._requireCharCode($COMMA); this._attemptCharCodeUntilFn(isNotWhitespace); this.expansionCaseStack.push(HtmlTokenType.EXPANSION_FORM_START); } _consumeExpansionCaseStart() { this._requireCharCode($EQ); this._beginToken(HtmlTokenType.EXPANSION_CASE_VALUE, this._getLocation()); let value = this._readUntil($LBRACE).trim(); this._endToken([value], this._getLocation()); this._attemptCharCodeUntilFn(isNotWhitespace); this._beginToken(HtmlTokenType.EXPANSION_CASE_EXP_START, this._getLocation()); this._requireCharCode($LBRACE); this._endToken([], this._getLocation()); this._attemptCharCodeUntilFn(isNotWhitespace); this.expansionCaseStack.push(HtmlTokenType.EXPANSION_CASE_EXP_START); } _consumeExpansionCaseEnd() { this._beginToken(HtmlTokenType.EXPANSION_CASE_EXP_END, this._getLocation()); this._requireCharCode($RBRACE); this._endToken([], this._getLocation()); this._attemptCharCodeUntilFn(isNotWhitespace); this.expansionCaseStack.pop(); } _consumeExpansionFormEnd() { this._beginToken(HtmlTokenType.EXPANSION_FORM_END, this._getLocation()); this._requireCharCode($RBRACE); this._endToken([]); this.expansionCaseStack.pop(); } _consumeText() { var start = this._getLocation(); this._beginToken(HtmlTokenType.TEXT, start); var parts = []; let interpolation = false; if (this.peek === $LBRACE && this.nextPeek === $LBRACE) { parts.push(this._readChar(true)); parts.push(this._readChar(true)); interpolation = true; } else { parts.push(this._readChar(true)); } while (!this.isTextEnd(interpolation)) { if (this.peek === $LBRACE && this.nextPeek === $LBRACE) { parts.push(this._readChar(true)); parts.push(this._readChar(true)); interpolation = true; } else if (this.peek === $RBRACE && this.nextPeek === $RBRACE && interpolation) { parts.push(this._readChar(true)); parts.push(this._readChar(true)); interpolation = false; } else { parts.push(this._readChar(true)); } } this._endToken([this._processCarriageReturns(parts.join(''))]); } isTextEnd(interpolation) { if (this.peek === $LT || this.peek === $EOF) return true; if (this.tokenizeExpansionForms) { if (isSpecialFormStart(this.peek, this.nextPeek)) return true; if (this.peek === $RBRACE && !interpolation && (this.isInExpansionCase() || this.isInExpansionForm())) return true; } return false; } _savePosition() { return [this.peek, this.index, this.column, this.line, this.tokens.length]; } _readUntil(char) { let start = this.index; this._attemptUntilChar(char); return this.input.substring(start, this.index); } _restorePosition(position) { this.peek = position[0]; this.index = position[1]; this.column = position[2]; this.line = position[3]; let nbTokens = position[4]; if (nbTokens < this.tokens.length) { // remove any extra tokens this.tokens = ListWrapper.slice(this.tokens, 0, nbTokens); } } isInExpansionCase() { return this.expansionCaseStack.length > 0 && this.expansionCaseStack[this.expansionCaseStack.length - 1] === HtmlTokenType.EXPANSION_CASE_EXP_START; } isInExpansionForm() { return this.expansionCaseStack.length > 0 && this.expansionCaseStack[this.expansionCaseStack.length - 1] === HtmlTokenType.EXPANSION_FORM_START; } } function isNotWhitespace(code) { return !isWhitespace(code) || code === $EOF; } function isWhitespace(code) { return (code >= $TAB && code <= $SPACE) || (code === $NBSP); } function isNameEnd(code) { return isWhitespace(code) || code === $GT || code === $SLASH || code === $SQ || code === $DQ || code === $EQ; } function isPrefixEnd(code) { return (code < $a || $z < code) && (code < $A || $Z < code) && (code < $0 || code > $9); } function isDigitEntityEnd(code) { return code == $SEMICOLON || code == $EOF || !isAsciiHexDigit(code); } function isNamedEntityEnd(code) { return code == $SEMICOLON || code == $EOF || !isAsciiLetter(code); } function isSpecialFormStart(peek, nextPeek) { return peek === $LBRACE && nextPeek != $LBRACE; } function isAsciiLetter(code) { return code >= $a && code <= $z || code >= $A && code <= $Z; } function isAsciiHexDigit(code) { return code >= $a && code <= $f || code >= $A && code <= $F || code >= $0 && code <= $9; } function compareCharCodeCaseInsensitive(code1, code2) { return toUpperCaseCharCode(code1) == toUpperCaseCharCode(code2); } function toUpperCaseCharCode(code) { return code >= $a && code <= $z ? code - $a + $A : code; } function mergeTextTokens(srcTokens) { let dstTokens = []; let lastDstToken; for (let i = 0; i < srcTokens.length; i++) { let token = srcTokens[i]; if (isPresent(lastDstToken) && lastDstToken.type == HtmlTokenType.TEXT && token.type == HtmlTokenType.TEXT) { lastDstToken.parts[0] += token.parts[0]; lastDstToken.sourceSpan.end = token.sourceSpan.end; } else { lastDstToken = token; dstTokens.push(lastDstToken); } } return dstTokens; }