@nfrasser/simple-html-tokenizer
Version:
Simple HTML Tokenizer is a lightweight JavaScript library that can be used to tokenize the kind of HTML normally found in templates.
1,187 lines (1,166 loc) • 49.6 kB
JavaScript
(function (global, factory) {
typeof exports === 'object' && typeof module !== 'undefined' ? factory(require('simple-html-tokenizer')) :
typeof define === 'function' && define.amd ? define(['simple-html-tokenizer'], factory) :
(factory(global.HTML5Tokenizer));
}(this, (function (simpleHtmlTokenizer) { 'use strict';
QUnit.module('simple-html-tokenizer - tokenizer');
QUnit.test('does not fail if delegate does not include doctype methods', function (assert) {
var steps = [];
var MissingDoctypeTokenizerDelegate = /** @class */ (function () {
function MissingDoctypeTokenizerDelegate() {
}
MissingDoctypeTokenizerDelegate.prototype.reset = function () {
steps.push(['reset']);
};
MissingDoctypeTokenizerDelegate.prototype.finishData = function () {
steps.push(['finishData']);
};
MissingDoctypeTokenizerDelegate.prototype.tagOpen = function () {
steps.push(['tagOpen']);
};
MissingDoctypeTokenizerDelegate.prototype.beginData = function () {
steps.push(['beginData']);
};
MissingDoctypeTokenizerDelegate.prototype.appendToData = function (char) {
steps.push(['appendToData', char]);
};
MissingDoctypeTokenizerDelegate.prototype.beginStartTag = function () {
steps.push(['beginStartTag']);
};
MissingDoctypeTokenizerDelegate.prototype.appendToTagName = function (char) {
steps.push(['appendToTagName', char]);
};
MissingDoctypeTokenizerDelegate.prototype.beginAttribute = function () {
steps.push(['beginAttribute']);
};
MissingDoctypeTokenizerDelegate.prototype.appendToAttributeName = function (char) {
steps.push(['appendToAttributeName', char]);
};
MissingDoctypeTokenizerDelegate.prototype.beginAttributeValue = function (quoted) {
steps.push(['beginAttributeValue', "" + quoted]);
};
MissingDoctypeTokenizerDelegate.prototype.appendToAttributeValue = function (char) {
steps.push(['appendToAttributeValue', char]);
};
MissingDoctypeTokenizerDelegate.prototype.finishAttributeValue = function () {
steps.push(['finishAttributeValue']);
};
MissingDoctypeTokenizerDelegate.prototype.markTagAsSelfClosing = function () {
steps.push(['markTagAsSelfClosing']);
};
MissingDoctypeTokenizerDelegate.prototype.beginEndTag = function () {
steps.push(['beginEndTag']);
};
MissingDoctypeTokenizerDelegate.prototype.finishTag = function () {
steps.push(['finishTag']);
};
MissingDoctypeTokenizerDelegate.prototype.beginComment = function () {
steps.push(['beginComment']);
};
MissingDoctypeTokenizerDelegate.prototype.appendToCommentData = function (char) {
steps.push(['appendToCommentData', char]);
};
MissingDoctypeTokenizerDelegate.prototype.finishComment = function () {
steps.push(['finishComment']);
};
MissingDoctypeTokenizerDelegate.prototype.reportSyntaxError = function (error) {
steps.push(['reportSyntaxError', error]);
};
return MissingDoctypeTokenizerDelegate;
}());
var delegate = new MissingDoctypeTokenizerDelegate();
var tokenizer = new simpleHtmlTokenizer.EventedTokenizer(delegate, new simpleHtmlTokenizer.EntityParser({}));
tokenizer.tokenize('\n<!-- comment here --><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">\n<!-- comment here -->');
assert.deepEqual(steps, [
["reset"],
["reset"],
["beginData"],
["appendToData", "\n"],
["finishData"],
["tagOpen"],
["beginComment"],
["appendToCommentData", " "],
["appendToCommentData", "c"],
["appendToCommentData", "o"],
["appendToCommentData", "m"],
["appendToCommentData", "m"],
["appendToCommentData", "e"],
["appendToCommentData", "n"],
["appendToCommentData", "t"],
["appendToCommentData", " "],
["appendToCommentData", "h"],
["appendToCommentData", "e"],
["appendToCommentData", "r"],
["appendToCommentData", "e"],
["appendToCommentData", " "],
["finishComment"],
["tagOpen"],
["beginData"],
["appendToData", "\n"],
["finishData"],
["tagOpen"],
["beginComment"],
["appendToCommentData", " "],
["appendToCommentData", "c"],
["appendToCommentData", "o"],
["appendToCommentData", "m"],
["appendToCommentData", "m"],
["appendToCommentData", "e"],
["appendToCommentData", "n"],
["appendToCommentData", "t"],
["appendToCommentData", " "],
["appendToCommentData", "h"],
["appendToCommentData", "e"],
["appendToCommentData", "r"],
["appendToCommentData", "e"],
["appendToCommentData", " "],
["finishComment"]
]);
});
QUnit.test('Doctype', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">');
assert.deepEqual(tokens, [doctype('-//W3C//DTD HTML 4.01//EN', 'http://www.w3.org/TR/html4/strict.dtd')], 'Standard HTML 4.01 Strict doctype');
tokens = simpleHtmlTokenizer.tokenize('<!DOCTYPE html><html><body></body></html>');
assert.deepEqual(tokens, [
doctype(),
startTag('html'),
startTag('body'),
endTag('body'),
endTag('html'),
], 'DOCTYPE is included in tokens');
tokens = simpleHtmlTokenizer.tokenize('<!-- comment --><!DOCTYPE html>');
assert.deepEqual(tokens, [comment(' comment '), doctype()], 'DOCTYPE after comments is valid');
tokens = simpleHtmlTokenizer.tokenize('<!-- comment --><!DOCTYPE html PUBLIC >');
assert.deepEqual(tokens, [comment(' comment '), doctype()], 'DOCTYPE after comments is valid');
});
QUnit.test('Simple content', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('hello');
assert.deepEqual(tokens, [chars('hello')]);
});
QUnit.test('A simple tag', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<div>');
assert.deepEqual(tokens, [startTag('div')]);
});
QUnit.test('A simple tag with trailing spaces', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<div \t\n>');
assert.deepEqual(tokens, [startTag('div')]);
});
QUnit.test('A simple closing tag', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('</div>');
assert.deepEqual(tokens, [endTag('div')]);
});
QUnit.test('A simple closing tag with trailing spaces', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('</div \t\n>');
assert.deepEqual(tokens, [endTag('div')]);
});
QUnit.test('A pair of hyphenated tags', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<x-foo></x-foo>');
assert.deepEqual(tokens, [startTag('x-foo'), endTag('x-foo')]);
});
QUnit.test('A tag with a single-quoted attribute', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize("<div id='foo'>");
assert.deepEqual(tokens, [startTag('div', [['id', 'foo', true]])]);
});
QUnit.test('A tag with a double-quoted attribute', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<div id="foo">');
assert.deepEqual(tokens, [startTag('div', [['id', 'foo', true]])]);
});
QUnit.test('A tag with a double-quoted empty', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<div id="">');
assert.deepEqual(tokens, [startTag('div', [['id', '', true]])]);
});
QUnit.test('A tag with unquoted attribute', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<div id=foo>');
assert.deepEqual(tokens, [startTag('div', [['id', 'foo', false]])]);
});
QUnit.test('A tag with valueless attributes', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<div foo bar>');
assert.deepEqual(tokens, [
startTag('div', [['foo', '', false], ['bar', '', false]])
]);
});
QUnit.test('Missing attribute name', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<div =foo>');
assert.deepEqual(tokens, [
withSyntaxError('attribute name cannot start with equals sign', startTag('div', [['=foo', '', false]]))
]);
});
QUnit.test('Invalid character in attribute name', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<div ">');
assert.deepEqual(tokens, [
withSyntaxError('" is not a valid character within attribute names', startTag('div', [['"', '', false]]))
]);
});
QUnit.test('A tag with multiple attributes', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<div id=foo class="bar baz" href=\'bat\'>');
assert.deepEqual(tokens, [
startTag('div', [
['id', 'foo', false],
['class', 'bar baz', true],
['href', 'bat', true]
])
]);
});
QUnit.test('A tag with capitalization in attributes', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<svg viewBox="0 0 0 0">');
assert.deepEqual(tokens, [startTag('svg', [['viewBox', '0 0 0 0', true]])]);
});
QUnit.test('A tag with capitalization in the tag', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<linearGradient>');
assert.deepEqual(tokens, [startTag('linearGradient', [])]);
});
QUnit.test('A self-closing tag', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<img />');
assert.deepEqual(tokens, [startTag('img', [], true)]);
});
QUnit.test('A self-closing tag with valueless attributes (regression)', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<input disabled />');
assert.deepEqual(tokens, [
startTag('input', [['disabled', '', false]], true)
]);
});
QUnit.test('A self-closing tag with valueless attributes without space before closing (regression)', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<input disabled/>');
assert.deepEqual(tokens, [
startTag('input', [['disabled', '', false]], true)
]);
});
QUnit.test('A self-closing tag with an attribute with unquoted value without space before closing (regression)', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<input data-foo=bar/>');
assert.deepEqual(tokens, [
startTag('input', [['data-foo', 'bar', false]], true)
]);
});
QUnit.test('A tag with a / in the middle', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<img / src="foo.png">');
assert.deepEqual(tokens, [startTag('img', [['src', 'foo.png', true]])]);
});
QUnit.test('An opening and closing tag with some content', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize("<div id='foo' class='{{bar}} baz'>Some content</div>");
assert.deepEqual(tokens, [
startTag('div', [['id', 'foo', true], ['class', '{{bar}} baz', true]]),
chars('Some content'),
endTag('div')
]);
});
QUnit.test('A comment', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<!-- hello -->');
assert.deepEqual(tokens, [comment(' hello ')]);
});
QUnit.test('A (buggy) comment with no ending --', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<!-->');
assert.deepEqual(tokens, [comment()]);
});
QUnit.test('A comment that immediately closes', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<!---->');
assert.deepEqual(tokens, [comment()]);
});
QUnit.test('A comment that contains a -', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<!-- A perfectly legal - appears -->');
assert.deepEqual(tokens, [comment(' A perfectly legal - appears ')]);
tokens = simpleHtmlTokenizer.tokenize('<!-- A perfectly legal - -->');
assert.deepEqual(tokens, [comment(' A perfectly legal - ')]);
tokens = simpleHtmlTokenizer.tokenize('<!-- A perfectly legal- -->');
assert.deepEqual(tokens, [comment(' A perfectly legal- ')]);
});
QUnit.test('A (buggy) comment that contains two --', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<!-- A questionable -- appears -->');
assert.deepEqual(tokens, [comment(' A questionable -- appears ')]);
tokens = simpleHtmlTokenizer.tokenize('<!-- A questionable -- -->');
assert.deepEqual(tokens, [comment(' A questionable -- ')]);
tokens = simpleHtmlTokenizer.tokenize('<!-- A questionable-- -->');
assert.deepEqual(tokens, [comment(' A questionable-- ')]);
});
QUnit.test('A (buggy) comment ending with more than two --', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<!-- A questionable but legal comment --->');
assert.deepEqual(tokens, [comment(' A questionable but legal comment -')]);
tokens = simpleHtmlTokenizer.tokenize('<!-- A questionable but legal comment--->');
assert.deepEqual(tokens, [comment(' A questionable but legal comment-')]);
tokens = simpleHtmlTokenizer.tokenize('<!-- A questionable but legal comment - --->');
assert.deepEqual(tokens, [comment(' A questionable but legal comment - -')]);
tokens = simpleHtmlTokenizer.tokenize('<!-- A questionable but legal comment -- --->');
assert.deepEqual(tokens, [comment(' A questionable but legal comment -- -')]);
tokens = simpleHtmlTokenizer.tokenize('<!-- A questionable but legal comment ------>');
assert.deepEqual(tokens, [comment(' A questionable but legal comment ----')]);
});
QUnit.test('A (buggy) comment starting with more than two --', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<!--- Questionable but legal -->');
assert.deepEqual(tokens, [comment('- Questionable but legal ')]);
tokens = simpleHtmlTokenizer.tokenize('<!---Questionable but legal -->');
assert.deepEqual(tokens, [comment('-Questionable but legal ')]);
});
QUnit.skip('Character references are expanded', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('"Foo & Bar" < << < < ≧̸ &Borksnorlax; ≦̸');
assert.deepEqual(tokens, [chars('"Foo & Bar" < << < < ≧̸ &Borksnorlax; ≦̸')]);
tokens = simpleHtmlTokenizer.tokenize("<div title='"Foo & Bar" ▒ < << < < ≧̸ &Borksnorlax; ≦̸'>");
assert.deepEqual(tokens, [
startTag('div', [
['title', '"Foo & Bar" ▒ < << < < ≧̸ &Borksnorlax; ≦̸', true]
])
]);
});
// https://html.spec.whatwg.org/multipage/syntax.html#element-restrictions
QUnit.test('A newline immediately following a <pre> tag is stripped', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize("<pre>\nhello</pre>");
assert.deepEqual(tokens, [startTag('pre'), chars('hello'), endTag('pre')]);
});
QUnit.test('A newline immediately following a closing </pre> tag is not stripped', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize("\n<pre>\nhello</pre>\n");
assert.deepEqual(tokens, [chars('\n'), startTag('pre'), chars('hello'), endTag('pre'), chars('\n')]);
});
// https://html.spec.whatwg.org/multipage/syntax.html#element-restrictions
QUnit.test('A newline immediately following a <PRE> tag is stripped', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize("<PRE>\nhello</PRE>");
assert.deepEqual(tokens, [startTag('PRE'), chars('hello'), endTag('PRE')]);
});
// https://html.spec.whatwg.org/multipage/syntax.html#element-restrictions
QUnit.test('A newline immediately following a <textarea> tag is stripped', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize("<textarea>\nhello</textarea>");
assert.deepEqual(tokens, [startTag('textarea'), chars('hello'), endTag('textarea')]);
});
// https://html.spec.whatwg.org/multipage/syntax.html#element-restrictions
QUnit.test('codemod: A newline immediately following a <pre> tag is stripped', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize("<pre>\nhello</pre>", { mode: 'codemod' });
assert.deepEqual(tokens, [startTag('pre'), chars('\nhello'), endTag('pre')]);
});
QUnit.test('codemod: A newline immediately following a closing </pre> tag is not stripped', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize("\n<pre>\nhello</pre>\n", { mode: 'codemod' });
assert.deepEqual(tokens, [chars('\n'), startTag('pre'), chars('\nhello'), endTag('pre'), chars('\n')]);
});
// https://html.spec.whatwg.org/multipage/syntax.html#element-restrictions
QUnit.test('codemod: A newline immediately following a <PRE> tag is stripped', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize("<PRE>\nhello</PRE>", { mode: 'codemod' });
assert.deepEqual(tokens, [startTag('PRE'), chars('\nhello'), endTag('PRE')]);
});
// https://html.spec.whatwg.org/multipage/syntax.html#element-restrictions
QUnit.test('codemod: A newline immediately following a <textarea> tag is stripped', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize("<textarea>\nhello</textarea>", { mode: 'codemod' });
assert.deepEqual(tokens, [startTag('textarea'), chars('\nhello'), endTag('textarea')]);
});
// https://html.spec.whatwg.org/multipage/semantics.html#the-title-element
QUnit.test('The title element content is always text', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize("<title>"hey <b>there</b><!-- comment --></title>");
assert.deepEqual(tokens, [startTag('title'), chars('"hey <b>there</b><!-- comment -->'), endTag('title')]);
});
// https://github.com/emberjs/ember.js/issues/18530
QUnit.test('Title element content is not text', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize("<Title><!-- hello --></Title>");
assert.deepEqual(tokens, [startTag('Title'), comment(' hello '), endTag('Title')]);
});
// https://html.spec.whatwg.org/multipage/semantics.html#the-style-element
QUnit.test('The style element content is always text', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize("<style>"hey <b>there</b><!-- comment --></style>");
assert.deepEqual(tokens, [startTag('style'), chars('"hey <b>there</b><!-- comment -->'), endTag('style')]);
});
// https://html.spec.whatwg.org/multipage/scripting.html#restrictions-for-contents-of-script-elements
QUnit.test('The script element content restrictions', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize("<script>"hey <b>there</b><!-- comment --></script>");
assert.deepEqual(tokens, [startTag('script'), chars('"hey <b>there</b><!-- comment -->'), endTag('script')]);
});
QUnit.test('Two following script tags', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize("<script><!-- comment --></script> <script>second</script>");
assert.deepEqual(tokens, [
startTag('script'),
chars('<!-- comment -->'),
endTag('script'),
chars(' '),
startTag('script'),
chars('second'),
endTag('script')
]);
});
// https://github.com/emberjs/rfcs/blob/master/text/0311-angle-bracket-invocation.md#dynamic-invocations
QUnit.test('An Emberish named arg invocation', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<@foo></@foo>');
assert.deepEqual(tokens, [startTag('@foo'), endTag('@foo')]);
});
QUnit.test('Parsing <script>s out of a complext HTML document [stefanpenner/find-scripts-srcs-in-document#1]', function (assert) {
var input = "<!DOCTYPE html><html><head><script src=\"/foo.js\"></script><script src=\"/bar.js\"></script><script src=\"/baz.js\"></script></head></html>";
var tokens = simpleHtmlTokenizer.tokenize(input);
assert.deepEqual(tokens, [
doctype(),
startTag('html'),
startTag('head'),
startTag('script', [['src', '/foo.js', true]]),
endTag('script'),
startTag('script', [['src', '/bar.js', true]]),
endTag('script'),
startTag('script', [['src', '/baz.js', true]]),
endTag('script'),
endTag('head'),
endTag('html'),
]);
});
QUnit.module('simple-html-tokenizer - preprocessing');
QUnit.test('Carriage returns are replaced with line feeds', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('\r\r\n\r\r\n\n');
assert.deepEqual(tokens, [chars('\n\n\n\n\n')]);
});
QUnit.module('simple-html-tokenizer - location info');
QUnit.test('lines are counted correctly', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('\r\r\n\r\r\n\n', { loc: true });
assert.deepEqual(tokens, [locInfo(chars('\n\n\n\n\n'), 1, 0, 6, 0)]);
});
QUnit.test('tokens: Chars', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('Chars', { loc: true });
assert.deepEqual(tokens, [locInfo(chars('Chars'), 1, 0, 1, 5)]);
});
QUnit.test('tokens: Chars start-tag Chars', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('Chars<div>Chars', { loc: true });
assert.deepEqual(tokens, [
locInfo(chars('Chars'), 1, 0, 1, 5),
locInfo(startTag('div'), 1, 5, 1, 10),
locInfo(chars('Chars'), 1, 10, 1, 15)
]);
});
QUnit.test('tokens: start-tag start-tag', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<div><div>', { loc: true });
assert.deepEqual(tokens, [
locInfo(startTag('div'), 1, 0, 1, 5),
locInfo(startTag('div'), 1, 5, 1, 10)
]);
});
QUnit.test('tokens: html char ref start-tag', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('><div>', { loc: true });
assert.deepEqual(tokens, [
locInfo(chars('>'), 1, 0, 1, 4),
locInfo(startTag('div'), 1, 4, 1, 9)
]);
});
QUnit.test('tokens: Chars start-tag Chars start-tag', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('Chars\n<div>Chars\n<div>', {
loc: true
});
assert.deepEqual(tokens, [
locInfo(chars('Chars\n'), 1, 0, 2, 0),
locInfo(startTag('div'), 2, 0, 2, 5),
locInfo(chars('Chars\n'), 2, 5, 3, 0),
locInfo(startTag('div'), 3, 0, 3, 5)
]);
});
QUnit.test('tokens: comment start-tag Chars end-tag', function (assert) {
var tokens = simpleHtmlTokenizer.tokenize('<!-- multline\ncomment --><div foo=bar>Chars\n</div>', { loc: true });
assert.deepEqual(tokens, [
locInfo(comment(' multline\ncomment '), 1, 0, 2, 11),
locInfo(startTag('div', [['foo', 'bar', false]]), 2, 11, 2, 24),
locInfo(chars('Chars\n'), 2, 24, 3, 0),
locInfo(endTag('div'), 3, 0, 3, 6)
]);
});
function chars(s) {
return {
type: "Chars" /* Chars */,
chars: s === undefined ? '' : s
};
}
function comment(s) {
return {
type: "Comment" /* Comment */,
chars: s === undefined ? '' : s
};
}
function startTag(tagName, attributes, selfClosing) {
return {
type: "StartTag" /* StartTag */,
tagName: tagName,
attributes: attributes === undefined ? [] : attributes,
selfClosing: selfClosing === undefined ? false : selfClosing
};
}
function endTag(tagName) {
return {
type: "EndTag" /* EndTag */,
tagName: tagName
};
}
function doctype(publicIdentifier, systemIdentifier) {
var doctype = {
type: "Doctype" /* Doctype */,
name: 'html',
};
if (publicIdentifier) {
doctype.publicIdentifier = publicIdentifier;
}
if (systemIdentifier) {
doctype.systemIdentifier = systemIdentifier;
}
return doctype;
}
function locInfo(token, startLine, startColumn, endLine, endColumn) {
token.loc = {
start: {
line: startLine,
column: startColumn
},
end: {
line: endLine,
column: endColumn
}
};
return token;
}
function withSyntaxError(message, result) {
result.syntaxError = message;
return result;
}
})));
QUnit.module('TSLint - src');
QUnit.test('src/entity-parser.ts should pass tslint', function(assert) {
assert.expect(1);
assert.ok(true, 'src/entity-parser.ts should pass tslint.');
});
QUnit.module('TSLint - src');
QUnit.test('src/evented-tokenizer.ts should pass tslint', function(assert) {
assert.expect(1);
assert.ok(true, 'src/evented-tokenizer.ts should pass tslint.');
});
QUnit.module('TSLint - src/generated');
QUnit.test('src/generated/html5-named-char-refs.ts should pass tslint', function(assert) {
assert.expect(1);
assert.ok(true, 'src/generated/html5-named-char-refs.ts should pass tslint.');
});
QUnit.module('TSLint - src/generated');
QUnit.test('src/generated/tokenizer-states.ts should pass tslint', function(assert) {
assert.expect(1);
assert.ok(true, 'src/generated/tokenizer-states.ts should pass tslint.');
});
QUnit.module('TSLint - src');
QUnit.test('src/index.ts should pass tslint', function(assert) {
assert.expect(1);
assert.ok(true, 'src/index.ts should pass tslint.');
});
var HEXCHARCODE = /^#[xX]([A-Fa-f0-9]+)$/;
var CHARCODE = /^#([0-9]+)$/;
var NAMED = /^([A-Za-z0-9]+)$/;
var EntityParser = (function () {
function EntityParser(named) {
this.named = named;
}
EntityParser.prototype.parse = function (entity) {
if (!entity) {
return;
}
var matches = entity.match(HEXCHARCODE);
if (matches) {
return "&#x" + matches[1] + ";";
}
matches = entity.match(CHARCODE);
if (matches) {
return "&#" + matches[1] + ";";
}
matches = entity.match(NAMED);
if (matches) {
return this.named[matches[1]] || "&" + matches[1] + ";";
}
};
return EntityParser;
}());
export default EntityParser;
import { preprocessInput, isAlpha, isSpace } from './utils';
var EventedTokenizer = (function () {
function EventedTokenizer(delegate, entityParser) {
this.delegate = delegate;
this.entityParser = entityParser;
this.state = null;
this.input = null;
this.index = -1;
this.tagLine = -1;
this.tagColumn = -1;
this.line = -1;
this.column = -1;
this.states = {
beforeData: function () {
var char = this.peek();
if (char === "<") {
this.state = 'tagOpen';
this.markTagStart();
this.consume();
}
else {
this.state = 'data';
this.delegate.beginData();
}
},
data: function () {
var char = this.peek();
if (char === "<") {
this.delegate.finishData();
this.state = 'tagOpen';
this.markTagStart();
this.consume();
}
else if (char === "&") {
this.consume();
this.delegate.appendToData(this.consumeCharRef() || "&");
}
else {
this.consume();
this.delegate.appendToData(char);
}
},
tagOpen: function () {
var char = this.consume();
if (char === "!") {
this.state = 'markupDeclaration';
}
else if (char === "/") {
this.state = 'endTagOpen';
}
else if (isAlpha(char)) {
this.state = 'tagName';
this.delegate.beginStartTag();
this.delegate.appendToTagName(char.toLowerCase());
}
},
markupDeclaration: function () {
var char = this.consume();
if (char === "-" && this.input.charAt(this.index) === "-") {
this.consume();
this.state = 'commentStart';
this.delegate.beginComment();
}
},
commentStart: function () {
var char = this.consume();
if (char === "-") {
this.state = 'commentStartDash';
}
else if (char === ">") {
this.delegate.finishComment();
this.state = 'beforeData';
}
else {
this.delegate.appendToCommentData(char);
this.state = 'comment';
}
},
commentStartDash: function () {
var char = this.consume();
if (char === "-") {
this.state = 'commentEnd';
}
else if (char === ">") {
this.delegate.finishComment();
this.state = 'beforeData';
}
else {
this.delegate.appendToCommentData("-");
this.state = 'comment';
}
},
comment: function () {
var char = this.consume();
if (char === "-") {
this.state = 'commentEndDash';
}
else {
this.delegate.appendToCommentData(char);
}
},
commentEndDash: function () {
var char = this.consume();
if (char === "-") {
this.state = 'commentEnd';
}
else {
this.delegate.appendToCommentData("-" + char);
this.state = 'comment';
}
},
commentEnd: function () {
var char = this.consume();
if (char === ">") {
this.delegate.finishComment();
this.state = 'beforeData';
}
else {
this.delegate.appendToCommentData("--" + char);
this.state = 'comment';
}
},
tagName: function () {
var char = this.consume();
if (isSpace(char)) {
this.state = 'beforeAttributeName';
}
else if (char === "/") {
this.state = 'selfClosingStartTag';
}
else if (char === ">") {
this.delegate.finishTag();
this.state = 'beforeData';
}
else {
this.delegate.appendToTagName(char);
}
},
beforeAttributeName: function () {
var char = this.peek();
if (isSpace(char)) {
this.consume();
return;
}
else if (char === "/") {
this.state = 'selfClosingStartTag';
this.consume();
}
else if (char === ">") {
this.consume();
this.delegate.finishTag();
this.state = 'beforeData';
}
else if (char === '=') {
this.delegate.reportSyntaxError("attribute name cannot start with equals sign");
this.state = 'attributeName';
this.delegate.beginAttribute();
this.consume();
this.delegate.appendToAttributeName(char);
}
else {
this.state = 'attributeName';
this.delegate.beginAttribute();
}
},
attributeName: function () {
var char = this.peek();
if (isSpace(char)) {
this.state = 'afterAttributeName';
this.consume();
}
else if (char === "/") {
this.delegate.beginAttributeValue(false);
this.delegate.finishAttributeValue();
this.consume();
this.state = 'selfClosingStartTag';
}
else if (char === "=") {
this.state = 'beforeAttributeValue';
this.consume();
}
else if (char === ">") {
this.delegate.beginAttributeValue(false);
this.delegate.finishAttributeValue();
this.consume();
this.delegate.finishTag();
this.state = 'beforeData';
}
else if (char === '"' || char === "'" || char === '<') {
this.delegate.reportSyntaxError(char + " is not a valid character within attribute names");
this.consume();
this.delegate.appendToAttributeName(char);
}
else {
this.consume();
this.delegate.appendToAttributeName(char);
}
},
afterAttributeName: function () {
var char = this.peek();
if (isSpace(char)) {
this.consume();
return;
}
else if (char === "/") {
this.delegate.beginAttributeValue(false);
this.delegate.finishAttributeValue();
this.consume();
this.state = 'selfClosingStartTag';
}
else if (char === "=") {
this.consume();
this.state = 'beforeAttributeValue';
}
else if (char === ">") {
this.delegate.beginAttributeValue(false);
this.delegate.finishAttributeValue();
this.consume();
this.delegate.finishTag();
this.state = 'beforeData';
}
else {
this.delegate.beginAttributeValue(false);
this.delegate.finishAttributeValue();
this.consume();
this.state = 'attributeName';
this.delegate.beginAttribute();
this.delegate.appendToAttributeName(char);
}
},
beforeAttributeValue: function () {
var char = this.peek();
if (isSpace(char)) {
this.consume();
}
else if (char === '"') {
this.state = 'attributeValueDoubleQuoted';
this.delegate.beginAttributeValue(true);
this.consume();
}
else if (char === "'") {
this.state = 'attributeValueSingleQuoted';
this.delegate.beginAttributeValue(true);
this.consume();
}
else if (char === ">") {
this.delegate.beginAttributeValue(false);
this.delegate.finishAttributeValue();
this.consume();
this.delegate.finishTag();
this.state = 'beforeData';
}
else {
this.state = 'attributeValueUnquoted';
this.delegate.beginAttributeValue(false);
this.consume();
this.delegate.appendToAttributeValue(char);
}
},
attributeValueDoubleQuoted: function () {
var char = this.consume();
if (char === '"') {
this.delegate.finishAttributeValue();
this.state = 'afterAttributeValueQuoted';
}
else if (char === "&") {
this.delegate.appendToAttributeValue(this.consumeCharRef('"') || "&");
}
else {
this.delegate.appendToAttributeValue(char);
}
},
attributeValueSingleQuoted: function () {
var char = this.consume();
if (char === "'") {
this.delegate.finishAttributeValue();
this.state = 'afterAttributeValueQuoted';
}
else if (char === "&") {
this.delegate.appendToAttributeValue(this.consumeCharRef("'") || "&");
}
else {
this.delegate.appendToAttributeValue(char);
}
},
attributeValueUnquoted: function () {
var char = this.peek();
if (isSpace(char)) {
this.delegate.finishAttributeValue();
this.consume();
this.state = 'beforeAttributeName';
}
else if (char === "&") {
this.consume();
this.delegate.appendToAttributeValue(this.consumeCharRef(">") || "&");
}
else if (char === ">") {
this.delegate.finishAttributeValue();
this.consume();
this.delegate.finishTag();
this.state = 'beforeData';
}
else {
this.consume();
this.delegate.appendToAttributeValue(char);
}
},
afterAttributeValueQuoted: function () {
var char = this.peek();
if (isSpace(char)) {
this.consume();
this.state = 'beforeAttributeName';
}
else if (char === "/") {
this.consume();
this.state = 'selfClosingStartTag';
}
else if (char === ">") {
this.consume();
this.delegate.finishTag();
this.state = 'beforeData';
}
else {
this.state = 'beforeAttributeName';
}
},
selfClosingStartTag: function () {
var char = this.peek();
if (char === ">") {
this.consume();
this.delegate.markTagAsSelfClosing();
this.delegate.finishTag();
this.state = 'beforeData';
}
else {
this.state = 'beforeAttributeName';
}
},
endTagOpen: function () {
var char = this.consume();
if (isAlpha(char)) {
this.state = 'tagName';
this.delegate.beginEndTag();
this.delegate.appendToTagName(char.toLowerCase());
}
}
};
this.reset();
}
EventedTokenizer.prototype.reset = function () {
this.state = 'beforeData';
this.input = '';
this.index = 0;
this.line = 1;
this.column = 0;
this.tagLine = -1;
this.tagColumn = -1;
this.delegate.reset();
};
EventedTokenizer.prototype.tokenize = function (input) {
this.reset();
this.tokenizePart(input);
this.tokenizeEOF();
};
EventedTokenizer.prototype.tokenizePart = function (input) {
this.input += preprocessInput(input);
while (this.index < this.input.length) {
this.states[this.state].call(this);
}
};
EventedTokenizer.prototype.tokenizeEOF = function () {
this.flushData();
};
EventedTokenizer.prototype.flushData = function () {
if (this.state === 'data') {
this.delegate.finishData();
this.state = 'beforeData';
}
};
EventedTokenizer.prototype.peek = function () {
return this.input.charAt(this.index);
};
EventedTokenizer.prototype.consume = function () {
var char = this.peek();
this.index++;
if (char === "\n") {
this.line++;
this.column = 0;
}
else {
this.column++;
}
return char;
};
EventedTokenizer.prototype.consumeCharRef = function () {
var endIndex = this.input.indexOf(';', this.index);
if (endIndex === -1) {
return;
}
var entity = this.input.slice(this.index, endIndex);
var chars = this.entityParser.parse(entity);
if (chars) {
var count = entity.length;
// consume the entity chars
while (count) {
this.consume();
count--;
}
// consume the `;`
this.consume();
return chars;
}
};
EventedTokenizer.prototype.markTagStart = function () {
// these properties to be removed in next major bump
this.tagLine = this.line;
this.tagColumn = this.column;
if (this.delegate.tagOpen) {
this.delegate.tagOpen();
}
};
return EventedTokenizer;
}());
export default EventedTokenizer;
export default {
// We don't need the complete named character reference because linkifyHtml
// does not modify the escape sequences. We do need so that
// whitespace is parsed properly. Other types of whitespace should already
// be accounted for
nbsp: "\u00a0"
};
export { default as HTML5NamedCharRefs } from './html5-named-char-refs';
export { default as EntityParser } from './entity-parser';
export { default as EventedTokenizer } from './evented-tokenizer';
export { default as Tokenizer } from './tokenizer';
export { default as tokenize } from './tokenize';
import Tokenizer from './tokenizer';
import EntityParser from './entity-parser';
import namedCharRefs from './html5-named-char-refs';
export default function tokenize(input, options) {
var tokenizer = new Tokenizer(new EntityParser(namedCharRefs), options);
return tokenizer.tokenize(input);
}
import EventedTokenizer from './evented-tokenizer';
;
var Tokenizer = (function () {
function Tokenizer(entityParser, options) {
if (options === void 0) { options = {}; }
this.options = options;
this.token = null;
this.startLine = 1;
this.startColumn = 0;
this.tokens = [];
this.currentAttribute = null;
this.tokenizer = new EventedTokenizer(this, entityParser);
}
Tokenizer.prototype.tokenize = function (input) {
this.tokens = [];
this.tokenizer.tokenize(input);
return this.tokens;
};
Tokenizer.prototype.tokenizePart = function (input) {
this.tokens = [];
this.tokenizer.tokenizePart(input);
return this.tokens;
};
Tokenizer.prototype.tokenizeEOF = function () {
this.tokens = [];
this.tokenizer.tokenizeEOF();
return this.tokens[0];
};
Tokenizer.prototype.reset = function () {
this.token = null;
this.startLine = 1;
this.startColumn = 0;
};
Tokenizer.prototype.addLocInfo = function () {
if (this.options.loc) {
this.token.loc = {
start: {
line: this.startLine,
column: this.startColumn
},
end: {
line: this.tokenizer.line,
column: this.tokenizer.column
}
};
}
this.startLine = this.tokenizer.line;
this.startColumn = this.tokenizer.column;
};
// Data
Tokenizer.prototype.beginData = function () {
this.token = {
type: 'Chars',
chars: ''
};
this.tokens.push(this.token);
};
Tokenizer.prototype.appendToData = function (char) {
this.token.chars += char;
};
Tokenizer.prototype.finishData = function () {
this.addLocInfo();
};
// Comment
Tokenizer.prototype.beginComment = function () {
this.token = {
type: 'Comment',
chars: ''
};
this.tokens.push(this.token);
};
Tokenizer.prototype.appendToCommentData = function (char) {
this.token.chars += char;
};
Tokenizer.prototype.finishComment = function () {
this.addLocInfo();
};
// Tags - basic
Tokenizer.prototype.beginStartTag = function () {
this.token = {
type: 'StartTag',
tagName: '',
attributes: [],
selfClosing: false
};
this.tokens.push(this.token);
};
Tokenizer.prototype.beginEndTag = function () {
this.token = {
type: 'EndTag',
tagName: ''
};
this.tokens.push(this.token);
};
Tokenizer.prototype.finishTag = function () {
this.addLocInfo();
};
Tokenizer.prototype.markTagAsSelfClosing = function () {
this.token.selfClosing = true;
};
// Tags - name
Tokenizer.prototype.appendToTagName = function (char) {
this.token.tagName += char;
};
// Tags - attributes
Tokenizer.prototype.beginAttribute = function () {
this.currentAttribute = ["", "", null];
this.token.attributes.push(this.currentAttribute);
};
Tokenizer.prototype.appendToAttributeName = function (char) {
this.currentAttribute[0] += char;
};
Tokenizer.prototype.beginAttributeValue = function (isQuoted) {
this.currentAttribute[2] = isQuoted;
};
Tokenizer.prototype.appendToAttributeValue = function (char) {
this.currentAttribute[1] = this.currentAttribute[1] || "";
this.currentAttribute[1] += char;
};
Tokenizer.prototype.finishAttributeValue = function () {
};
Tokenizer.prototype.reportSyntaxError = function (message) {
this.token.syntaxError = message;
};
return Tokenizer;
}());
export default Tokenizer;
var WSP = /[\t\n\f ]/;
var ALPHA = /[A-Za-z]/;
var CRLF = /\r\n?/g;
export function isSpace(char) {
return WSP.test(char);
}
export function isAlpha(char) {
return ALPHA.test(char);
}
export function preprocessInput(input) {
return input.replace(CRLF, "\n");
}
QUnit.module('TSLint - src');
QUnit.test('src/tokenize.ts should pass tslint', function(assert) {
assert.expect(1);
assert.ok(true, 'src/tokenize.ts should pass tslint.');
});
QUnit.module('TSLint - src');
QUnit.test('src/tokenizer.ts should pass tslint', function(assert) {
assert.expect(1);
assert.ok(true, 'src/tokenizer.ts should pass tslint.');
});
QUnit.module('TSLint - src');
QUnit.test('src/types.ts should pass tslint', function(assert) {
assert.expect(1);
assert.ok(true, 'src/types.ts should pass tslint.');
});
QUnit.module('TSLint - src');
QUnit.test('src/utils.ts should pass tslint', function(assert) {
assert.expect(1);
assert.ok(true, 'src/utils.ts should pass tslint.');
});
QUnit.module('TSLint - tests');
QUnit.test('tests/tokenizer-tests.ts should pass tslint', function(assert) {
assert.expect(1);
assert.ok(true, 'tests/tokenizer-tests.ts should pass tslint.');
});
//# sourceMappingURL=tests.map