tag-soup
Version:
The fastest pure JS SAX/DOM XML/HTML parser.
578 lines (577 loc) • 26.8 kB
JavaScript
/**
* The error thrown by a parser if a {@link ParserError.input input} is malformed.
*
* @group Parser
*/
export class ParserError extends SyntaxError {
/**
* Creates a new {@link ParserError} instance.
*
* @param message The error message.
* @param input The text where an error was detected.
* @param startIndex The index of the first char in text where an error was detected, inclusive.
* @param endIndex The index of the last char in text where an error was detected, exclusive.
*/
constructor(message, input, startIndex = -1, endIndex = startIndex) {
super(message);
this.input = input;
this.startIndex = startIndex;
this.endIndex = endIndex;
}
}
/**
* @internal
*/
ParserError.prototype.name = 'ParserError';
/**
* Reads tokens from text and returns them by invoking a callback.
*
* Tokens are _guaranteed_ to be returned in correct order. Missing tokens are inserted to restore the correct order if
* needed.
*
* @example
* tokenizeMarkup(
* 'Hello, <b>Bob</b>!',
* (token, startIndex, endIndex) => {
* // Handle token here
* },
* resolveTokenizerOptions(htmlTokenizerOptions)
* );
*
* @param input The text to read tokens from.
* @param callback The callback that is invoked when a token is read.
* @param options Tokenizer options prepared by {@link resolveTokenizerOptions}.
*/
export function tokenizeMarkup(input, callback, options = {}) {
const { readTag = getCaseSensitiveHashCode, voidTags, implicitlyClosedTags, implicitlyOpenedTags, areUnbalancedStartTagsImplicitlyClosed = false, areUnbalancedEndTagsIgnored = false, } = options;
const tagStack = [0, 0, 0, 0, 0, 0, 0, 0];
let tagStackCursor = -1;
const tokenCallback = (token, startIndex, endIndex) => {
switch (token) {
case TOKEN_START_TAG_NAME:
const startTag = readTag(input, startIndex, endIndex);
if (implicitlyClosedTags !== undefined) {
tagStackCursor = insertEndTags(implicitlyClosedTags.get(startTag), tagStack, tagStackCursor, callback, startIndex - 1);
}
callback(TOKEN_START_TAG_NAME, startIndex, endIndex);
tagStack[++tagStackCursor] = startTag;
break;
case TOKEN_START_TAG_CLOSING:
callback(TOKEN_START_TAG_CLOSING, startIndex, endIndex);
if (voidTags !== undefined && voidTags.has(tagStack[tagStackCursor])) {
callback(TOKEN_END_TAG_NAME, endIndex, endIndex);
--tagStackCursor;
}
break;
case TOKEN_START_TAG_SELF_CLOSING:
callback(TOKEN_START_TAG_SELF_CLOSING, startIndex, endIndex);
--tagStackCursor;
break;
case TOKEN_END_TAG_NAME:
const endTag = readTag(input, startIndex, endIndex);
if (tagStackCursor !== -1 && tagStack[tagStackCursor] === endTag) {
// Correctly closed parent tag
callback(TOKEN_END_TAG_NAME, startIndex, endIndex);
--tagStackCursor;
break;
}
// Include "</"
const endTagStartIndex = startIndex - 2;
const startTagIndex = tagStackCursor !== -1 ? tagStack.lastIndexOf(endTag, tagStackCursor) : -1;
// Found a start tag
if (startTagIndex !== -1) {
if (!areUnbalancedStartTagsImplicitlyClosed && startTagIndex !== tagStackCursor) {
throw new ParserError('Expected an end tag.', input, endTagStartIndex);
}
// Insert unbalanced end tags before the opened start tag
while (startTagIndex < tagStackCursor) {
callback(TOKEN_END_TAG_NAME, endTagStartIndex, endTagStartIndex);
--tagStackCursor;
}
callback(TOKEN_END_TAG_NAME, startIndex, endIndex);
--tagStackCursor;
break;
}
if (implicitlyOpenedTags === undefined || !implicitlyOpenedTags.has(endTag)) {
if (!areUnbalancedEndTagsIgnored) {
throw new ParserError('Unexpected end tag.', input, startIndex, endIndex);
}
break;
}
if (implicitlyClosedTags !== undefined) {
tagStackCursor = insertEndTags(implicitlyClosedTags.get(endTag), tagStack, tagStackCursor, callback, endTagStartIndex);
}
callback(TOKEN_START_TAG_NAME, startIndex, endIndex);
callback(TOKEN_START_TAG_CLOSING, endIndex, endIndex + 1);
callback(TOKEN_END_TAG_NAME, startIndex, endIndex);
break;
default:
callback(token, startIndex, endIndex);
break;
}
};
readTokens(input, tokenCallback, options);
if (tagStackCursor !== -1 && !areUnbalancedStartTagsImplicitlyClosed) {
throw new ParserError('Unexpected end of the document.', input, input.length);
}
while (tagStackCursor-- !== -1) {
callback(TOKEN_END_TAG_NAME, input.length, input.length);
}
}
function insertEndTags(tagsToClose, tagStack, tagStackCursor, callback, insertionIndex) {
if (tagsToClose === undefined) {
return tagStackCursor;
}
let index = 0;
while (index <= tagStackCursor && !tagsToClose.has(tagStack[index])) {
++index;
}
while (index <= tagStackCursor) {
callback(TOKEN_END_TAG_NAME, insertionIndex, insertionIndex);
--tagStackCursor;
}
return tagStackCursor;
}
const SCOPE_PROLOGUE = 0;
const SCOPE_TEXT = 1;
const SCOPE_START_TAG = 2;
const TOKEN_TEXT = 'TEXT';
const TOKEN_START_TAG_NAME = 'START_TAG_NAME';
const TOKEN_START_TAG_CLOSING = 'START_TAG_CLOSING';
const TOKEN_START_TAG_SELF_CLOSING = 'START_TAG_SELF_CLOSING';
const TOKEN_END_TAG_NAME = 'END_TAG_NAME';
const TOKEN_ATTRIBUTE_NAME = 'ATTRIBUTE_NAME';
const TOKEN_ATTRIBUTE_VALUE = 'ATTRIBUTE_VALUE';
const TOKEN_COMMENT = 'COMMENT';
const TOKEN_PROCESSING_INSTRUCTION_TARGET = 'PROCESSING_INSTRUCTION_TARGET';
const TOKEN_PROCESSING_INSTRUCTION_DATA = 'PROCESSING_INSTRUCTION_DATA';
const TOKEN_CDATA_SECTION = 'CDATA_SECTION';
const TOKEN_DOCTYPE_NAME = 'DOCTYPE_NAME';
/**
* Reads tokens from the text and returns tokens by invoking a callback.
*
* Tokens returned in the same order they are listed in text.
*/
export function readTokens(input, callback, options = {}) {
const { readTag = getCaseSensitiveHashCode, rawTextTags, isFragment, isStrict = false } = options;
let scope = isFragment ? SCOPE_TEXT : SCOPE_PROLOGUE;
let textStartIndex = isFragment ? 0 : skipSpaces(input, 0);
const inputLength = input.length;
const skipName = isStrict ? skipXMLName : skipHTMLName;
const skipAttributeName = isStrict ? skipXMLName : skipHTMLAttributeName;
const foreignTagStack = [0, 0, 0, 0];
let foreignTagStackCursor = -1;
let enclosingRawTextTag = 0;
for (let index = textStartIndex, nextIndex = index; index < inputLength; index = nextIndex) {
let charCode = input.charCodeAt(index);
if (scope === SCOPE_START_TAG) {
// ----------------------------------------------------------
// Self-closing start tag
// ----------------------------------------------------------
if (options.areSelfClosingTagsRecognized &&
charCode === /* / */ 47 &&
getCharCodeAt(input, index + 1) === /* > */ 62) {
// Skip "/>"
nextIndex += 2;
callback(TOKEN_START_TAG_SELF_CLOSING, index, nextIndex);
scope = SCOPE_TEXT;
textStartIndex = nextIndex;
nextIndex = skipUntilTagOpening(input, nextIndex);
continue;
}
// ----------------------------------------------------------
// Closing of a start tag
// ----------------------------------------------------------
if (charCode === /* > */ 62) {
// Skip ">"
++nextIndex;
callback(TOKEN_START_TAG_CLOSING, index, nextIndex);
scope = SCOPE_TEXT;
textStartIndex = nextIndex;
nextIndex = skipUntilTagOpening(input, nextIndex);
continue;
}
// ----------------------------------------------------------
// Attribute
// ----------------------------------------------------------
nextIndex = skipAttributeName(input, index);
// No attribute name
if (nextIndex === index) {
// Skip illegal char
if (!isStrict) {
textStartIndex = ++nextIndex;
continue;
}
throw new ParserError('Expected an attribute name' +
(options.areSelfClosingTagsRecognized ? ", a self-closing start tag ('/>')," : '') +
" or a start tag closing ('>').", input, index, index + 1);
}
callback(TOKEN_ATTRIBUTE_NAME, index, nextIndex);
nextIndex = skipSpaces(input, nextIndex);
// No attribute value
if (getCharCodeAt(input, nextIndex) !== /* = */ 61) {
if (!isStrict) {
callback(TOKEN_ATTRIBUTE_VALUE, nextIndex, nextIndex);
continue;
}
throw new ParserError("Expected an attribute value separated by an equals sign ('=').", input, nextIndex, nextIndex + 1);
}
// Skip "="
++nextIndex;
nextIndex = skipSpaces(input, nextIndex);
const quoteCharCode = getCharCodeAt(input, nextIndex);
if (isStrict && quoteCharCode !== /* " */ 34) {
throw new ParserError("Expected a double-quoted attribute value ('\"').", input, nextIndex, nextIndex + 1);
}
// ----------------------------------------------------------
// Unquoted attribute value
// ----------------------------------------------------------
if (quoteCharCode !== /* " */ 34 && quoteCharCode !== /* ' */ 39) {
callback(TOKEN_ATTRIBUTE_VALUE, nextIndex, (nextIndex = skipChars(input, nextIndex, isHTMLAttributeNameChar)));
textStartIndex = nextIndex = skipSpaces(input, nextIndex);
continue;
}
// ----------------------------------------------------------
// Quoted attribute value
// ----------------------------------------------------------
// Skip opening quote char
const attributeValueStartIndex = ++nextIndex;
nextIndex = getIndexOfOrLength(input, quoteCharCode === /* " */ 34 ? '"' : "'", nextIndex);
if (isStrict && nextIndex === inputLength) {
throw new ParserError("Expected the attribute value to be closed with a double-quote ('\"').", input, attributeValueStartIndex, inputLength);
}
callback(TOKEN_ATTRIBUTE_VALUE, attributeValueStartIndex, nextIndex);
// Skip closing quote char
++nextIndex;
textStartIndex = nextIndex = skipSpaces(input, nextIndex);
continue;
}
// ----------------------------------------------------------
// Skip to the next tag
// ----------------------------------------------------------
if (charCode !== /* < */ 60) {
scope = SCOPE_TEXT;
nextIndex = skipUntilTagOpening(input, nextIndex);
continue;
}
// Skip "<"
++nextIndex;
charCode = getCharCodeAt(input, nextIndex);
// ----------------------------------------------------------
// Processing instruction
// ----------------------------------------------------------
if (options.areProcessingInstructionsRecognized && enclosingRawTextTag === 0 && charCode === /* ? */ 63) {
if (textStartIndex !== index) {
scope = SCOPE_TEXT;
callback(TOKEN_TEXT, textStartIndex, index);
}
// Skip "?"
++nextIndex;
const targetStartIndex = nextIndex;
nextIndex = skipName(input, nextIndex);
// No target
if (isStrict && targetStartIndex === nextIndex) {
throw new ParserError('Expected a processing instruction target.', input, targetStartIndex, targetStartIndex + 1);
}
callback(TOKEN_PROCESSING_INSTRUCTION_TARGET, targetStartIndex, nextIndex);
// https://www.w3.org/TR/xml/#sec-pi
nextIndex = skipSpaces(input, nextIndex);
callback(TOKEN_PROCESSING_INSTRUCTION_DATA, nextIndex, (nextIndex = getIndexOfOrLength(input, '?>', nextIndex)));
// Skip "?>"
nextIndex += 2;
textStartIndex = nextIndex =
scope == SCOPE_PROLOGUE ? skipSpaces(input, nextIndex) : skipUntilTagOpening(input, nextIndex);
continue;
}
// ----------------------------------------------------------
// DOCTYPE
// ----------------------------------------------------------
if (scope === SCOPE_PROLOGUE &&
enclosingRawTextTag === 0 &&
charCode === /* ! */ 33 &&
getCaseInsensitiveCharCodeAt(input, nextIndex + 1) === /* d */ 100 &&
getCaseInsensitiveCharCodeAt(input, nextIndex + 2) === /* o */ 111 &&
getCaseInsensitiveCharCodeAt(input, nextIndex + 3) === /* c */ 99 &&
getCaseInsensitiveCharCodeAt(input, nextIndex + 4) === /* t */ 116 &&
getCaseInsensitiveCharCodeAt(input, nextIndex + 5) === /* y */ 121 &&
getCaseInsensitiveCharCodeAt(input, nextIndex + 6) === /* p */ 112 &&
getCaseInsensitiveCharCodeAt(input, nextIndex + 7) === /* e */ 101) {
// Skip "!DOCTYPE"
nextIndex += 8;
nextIndex = skipSpaces(input, nextIndex);
callback(TOKEN_DOCTYPE_NAME, nextIndex, (nextIndex = skipName(input, nextIndex)));
textStartIndex = nextIndex = skipSpaces(input, skipUntilTagClosing(input, nextIndex) + 1);
continue;
}
// ----------------------------------------------------------
// CDATA section
// ----------------------------------------------------------
if (options.areCDATASectionsRecognized &&
enclosingRawTextTag === 0 &&
getCharCodeAt(input, nextIndex + 1) === /* [ */ 91 &&
getCaseInsensitiveCharCodeAt(input, nextIndex + 2) === /* c */ 99 &&
getCaseInsensitiveCharCodeAt(input, nextIndex + 3) === /* d */ 100 &&
getCaseInsensitiveCharCodeAt(input, nextIndex + 4) === /* a */ 97 &&
getCaseInsensitiveCharCodeAt(input, nextIndex + 5) === /* t */ 116 &&
getCaseInsensitiveCharCodeAt(input, nextIndex + 6) === /* a */ 97 &&
getCharCodeAt(input, nextIndex + 7) === /* [ */ 91) {
if (textStartIndex !== index) {
callback(TOKEN_TEXT, textStartIndex, index);
}
// Skip "![CDATA["
nextIndex += 8;
callback(TOKEN_CDATA_SECTION, nextIndex, (nextIndex = getIndexOfOrLength(input, ']]>', nextIndex)));
scope = SCOPE_TEXT;
textStartIndex = nextIndex += 3;
nextIndex = skipUntilTagOpening(input, nextIndex);
continue;
}
// ----------------------------------------------------------
// Comment
// ----------------------------------------------------------
if (enclosingRawTextTag === 0 &&
charCode === /* ! */ 33 &&
getCharCodeAt(input, nextIndex + 1) === /* - */ 45 &&
getCharCodeAt(input, nextIndex + 2) === /* - */ 45) {
if (textStartIndex !== index) {
scope = SCOPE_TEXT;
callback(TOKEN_TEXT, textStartIndex, index);
}
// Skip "!--"
nextIndex += 3;
callback(TOKEN_COMMENT, nextIndex, (nextIndex = getIndexOfOrLength(input, '-->', nextIndex)));
// Skip "-->"
textStartIndex = nextIndex += 3;
if (scope === SCOPE_PROLOGUE) {
textStartIndex = nextIndex = skipSpaces(input, nextIndex);
continue;
}
nextIndex = skipUntilTagOpening(input, nextIndex);
continue;
}
// ----------------------------------------------------------
// Quirky comment
// ----------------------------------------------------------
if (enclosingRawTextTag === 0 && (charCode === /* ? */ 63 || charCode === /* ! */ 33)) {
if (textStartIndex !== index) {
scope = SCOPE_TEXT;
callback(TOKEN_TEXT, textStartIndex, index);
}
if (isStrict) {
throw new ParserError(charCode === /* ? */ 63
? 'Processing instructions are forbidden.'
: "Expected a comment ('<!--')" +
(isFragment || scope !== SCOPE_PROLOGUE ? '' : ", a doctype declaration ('<!DOCTYPE')") +
(options.areCDATASectionsRecognized ? ", or a CDATA section ('<![CDATA[[')" : '') +
'.', input, nextIndex - 1, nextIndex + 1);
}
callback(TOKEN_COMMENT, nextIndex, (nextIndex = skipUntilTagClosing(input, nextIndex)));
// Skip ">"
textStartIndex = ++nextIndex;
if (scope === SCOPE_PROLOGUE) {
textStartIndex = nextIndex = skipSpaces(input, nextIndex);
continue;
}
nextIndex = skipUntilTagOpening(input, nextIndex);
continue;
}
// ----------------------------------------------------------
// End of prolog
// ----------------------------------------------------------
scope = SCOPE_TEXT;
// ----------------------------------------------------------
// End tag
// ----------------------------------------------------------
if (charCode === /* / */ 47) {
// Skip "/"
const tagNameStartIndex = ++nextIndex;
nextIndex = skipName(input, nextIndex);
// No tag name
if (tagNameStartIndex === nextIndex) {
if (isStrict) {
new ParserError('Expected a name for the end tag.', input, tagNameStartIndex, tagNameStartIndex + 1);
}
nextIndex = skipUntilTagOpening(input, nextIndex);
continue;
}
// Doesn't match the current raw text tag
if (enclosingRawTextTag !== 0 && enclosingRawTextTag !== readTag(input, tagNameStartIndex, nextIndex)) {
nextIndex = skipUntilTagOpening(input, nextIndex);
continue;
}
if (textStartIndex !== index) {
callback(TOKEN_TEXT, textStartIndex, index);
}
let nextForeignTagStackCursor = -1;
if (enclosingRawTextTag === 0 &&
foreignTagStackCursor !== -1 &&
(nextForeignTagStackCursor = foreignTagStack.lastIndexOf(readTag(input, tagNameStartIndex, nextIndex), foreignTagStackCursor)) !== -1) {
// Pop foreign tags off the stack
while (foreignTagStackCursor !== nextForeignTagStackCursor - 1) {
--foreignTagStackCursor;
options = options.parentOptions;
}
}
callback(TOKEN_END_TAG_NAME, tagNameStartIndex, nextIndex);
// Close the raw text tag
enclosingRawTextTag = 0;
if (!isStrict) {
// Skip unparsable characters after the tag name
textStartIndex = nextIndex = skipUntilTagClosing(input, nextIndex) + 1;
nextIndex = skipUntilTagOpening(input, nextIndex);
continue;
}
nextIndex = skipSpaces(input, nextIndex);
if (getCharCodeAt(input, nextIndex) !== /* > */ 62) {
throw new ParserError("Expected a closing angle bracket ('>').", input, nextIndex, nextIndex + 1);
}
// Skip ">"
textStartIndex = ++nextIndex;
nextIndex = skipUntilTagOpening(input, nextIndex);
continue;
}
// ----------------------------------------------------------
// Start tag
// ----------------------------------------------------------
// Start tags are ignored inside raw text tags
if (enclosingRawTextTag !== 0) {
nextIndex = skipUntilTagOpening(input, nextIndex);
continue;
}
const tagNameStartIndex = nextIndex;
nextIndex = skipName(input, nextIndex);
// No tag name
if (tagNameStartIndex === nextIndex) {
if (isStrict) {
new ParserError('Expected a name for the start tag.', input, tagNameStartIndex, tagNameStartIndex + 1);
}
nextIndex = skipUntilTagOpening(input, nextIndex);
continue;
}
if (textStartIndex !== index) {
callback(TOKEN_TEXT, textStartIndex, index);
}
enclosingRawTextTag = 0;
let startTag;
let nextOptions;
if (rawTextTags !== undefined && rawTextTags.has((startTag = readTag(input, tagNameStartIndex, nextIndex)))) {
// Start of the raw text tag
enclosingRawTextTag = startTag;
}
if (enclosingRawTextTag === 0 &&
options.foreignTags !== undefined &&
(nextOptions = options.foreignTags.get((startTag = startTag !== undefined ? startTag : readTag(input, tagNameStartIndex, nextIndex)))) !== undefined) {
// Start of the foreign tag
foreignTagStack[++foreignTagStackCursor] = startTag;
options = nextOptions;
}
callback(TOKEN_START_TAG_NAME, tagNameStartIndex, nextIndex);
scope = SCOPE_START_TAG;
textStartIndex = nextIndex = skipSpaces(input, nextIndex);
}
if (textStartIndex < inputLength) {
callback(TOKEN_TEXT, textStartIndex, inputLength);
}
}
/**
* Returns case-insensitive djb2 hash of a substring.
*/
export function getCaseInsensitiveHashCode(input, startIndex, endIndex) {
let hashCode = 0;
for (let i = startIndex; i < endIndex; ++i) {
const charCode = input.charCodeAt(i);
hashCode = (hashCode << 5) - hashCode + (charCode < 65 || charCode > 90 ? charCode : charCode + 32);
}
return hashCode;
}
/**
* Returns case-sensitive djb2 hash of a substring.
*/
export function getCaseSensitiveHashCode(input, startIndex, endIndex) {
let hashCode = 0;
for (let i = startIndex; i < endIndex; ++i) {
hashCode = (hashCode << 5) - hashCode + input.charCodeAt(i);
}
return hashCode;
}
function getCharCodeAt(input, index) {
return index < input.length ? input.charCodeAt(index) : -1;
}
function getCaseInsensitiveCharCodeAt(input, index) {
const charCode = getCharCodeAt(input, index);
return charCode < 65 || charCode > 90 ? charCode : charCode + 32;
}
/**
* Skips chars until they match a predicate.
*/
function skipChars(input, index, predicate) {
while (index < input.length && predicate(input.charCodeAt(index))) {
++index;
}
return index;
}
/**
* Skips whitespace chars.
*/
function skipSpaces(input, index) {
return skipChars(input, index, isSpaceChar);
}
// https://www.w3.org/TR/xml/#NT-S
function isSpaceChar(charCode) {
return charCode === /* \s */ 32 || charCode === /* \n */ 10 || charCode === /* \t */ 9 || charCode === /* \r */ 13;
}
// https://www.w3.org/TR/xml/#NT-NameStartChar
function isXMLNameStartChar(charCode) {
return ((charCode >= /* a */ 97 && charCode <= /* z */ 122) ||
(charCode >= /* A */ 65 && charCode <= /* Z */ 90) ||
charCode === /* _ */ 95 ||
charCode === /* : */ 58 ||
(charCode >= 0x000c0 && charCode <= 0x000d6) ||
(charCode >= 0x000d8 && charCode <= 0x000f6) ||
(charCode >= 0x000f8 && charCode <= 0x002ff) ||
(charCode >= 0x00370 && charCode <= 0x0037d) ||
(charCode >= 0x0037f && charCode <= 0x01fff) ||
(charCode >= 0x0200c && charCode <= 0x0200d) ||
(charCode >= 0x02070 && charCode <= 0x0218f) ||
(charCode >= 0x02c00 && charCode <= 0x02fef) ||
(charCode >= 0x03001 && charCode <= 0x0d7ff) ||
(charCode >= 0x0f900 && charCode <= 0x0fdcf) ||
(charCode >= 0x0fdf0 && charCode <= 0x0fffd) ||
(charCode >= 0x10000 && charCode <= 0xeffff));
}
// https://www.w3.org/TR/xml/#NT-NameChar
function isXMLNameChar(charCode) {
return (isXMLNameStartChar(charCode) ||
charCode === /* - */ 45 ||
charCode === /* . */ 46 ||
charCode === 0xb7 ||
(charCode >= /* 0 */ 48 && charCode <= /* 9 */ 57) ||
(charCode >= 0x0300 && charCode <= 0x036f) ||
(charCode >= 0x203f && charCode <= 0x2040));
}
function isHTMLNameChar(charCode) {
return !(charCode === /* / */ 47 || charCode === /* > */ 62 || isSpaceChar(charCode));
}
function isHTMLAttributeNameChar(charCode) {
return !(charCode === /* / */ 47 || charCode === /* > */ 62 || charCode === /* = */ 61 || isSpaceChar(charCode));
}
// https://www.w3.org/TR/xml/#NT-Name
function skipXMLName(input, index) {
return isXMLNameStartChar(getCharCodeAt(input, index)) ? skipChars(input, index + 1, isXMLNameChar) : index;
}
function skipHTMLName(input, index) {
return isXMLNameStartChar(getCharCodeAt(input, index)) ? skipChars(input, index + 1, isHTMLNameChar) : index;
}
function skipHTMLAttributeName(input, index) {
return skipChars(input, index, isHTMLAttributeNameChar);
}
function getIndexOfOrLength(input, searchString, index) {
index = input.indexOf(searchString, index);
return index !== -1 ? index : input.length;
}
function skipUntilTagOpening(input, index) {
return getIndexOfOrLength(input, '<', index);
}
function skipUntilTagClosing(input, index) {
return getIndexOfOrLength(input, '>', index);
}