UNPKG

@nfrasser/simple-html-tokenizer

Version:

Simple HTML Tokenizer is a lightweight JavaScript library that can be used to tokenize the kind of HTML normally found in templates.

1,187 lines (1,166 loc) 49.6 kB
(function (global, factory) { typeof exports === 'object' && typeof module !== 'undefined' ? factory(require('simple-html-tokenizer')) : typeof define === 'function' && define.amd ? define(['simple-html-tokenizer'], factory) : (factory(global.HTML5Tokenizer)); }(this, (function (simpleHtmlTokenizer) { 'use strict'; QUnit.module('simple-html-tokenizer - tokenizer'); QUnit.test('does not fail if delegate does not include doctype methods', function (assert) { var steps = []; var MissingDoctypeTokenizerDelegate = /** @class */ (function () { function MissingDoctypeTokenizerDelegate() { } MissingDoctypeTokenizerDelegate.prototype.reset = function () { steps.push(['reset']); }; MissingDoctypeTokenizerDelegate.prototype.finishData = function () { steps.push(['finishData']); }; MissingDoctypeTokenizerDelegate.prototype.tagOpen = function () { steps.push(['tagOpen']); }; MissingDoctypeTokenizerDelegate.prototype.beginData = function () { steps.push(['beginData']); }; MissingDoctypeTokenizerDelegate.prototype.appendToData = function (char) { steps.push(['appendToData', char]); }; MissingDoctypeTokenizerDelegate.prototype.beginStartTag = function () { steps.push(['beginStartTag']); }; MissingDoctypeTokenizerDelegate.prototype.appendToTagName = function (char) { steps.push(['appendToTagName', char]); }; MissingDoctypeTokenizerDelegate.prototype.beginAttribute = function () { steps.push(['beginAttribute']); }; MissingDoctypeTokenizerDelegate.prototype.appendToAttributeName = function (char) { steps.push(['appendToAttributeName', char]); }; MissingDoctypeTokenizerDelegate.prototype.beginAttributeValue = function (quoted) { steps.push(['beginAttributeValue', "" + quoted]); }; MissingDoctypeTokenizerDelegate.prototype.appendToAttributeValue = function (char) { steps.push(['appendToAttributeValue', char]); }; MissingDoctypeTokenizerDelegate.prototype.finishAttributeValue = function () { steps.push(['finishAttributeValue']); }; MissingDoctypeTokenizerDelegate.prototype.markTagAsSelfClosing = function () { steps.push(['markTagAsSelfClosing']); }; MissingDoctypeTokenizerDelegate.prototype.beginEndTag = function () { steps.push(['beginEndTag']); }; MissingDoctypeTokenizerDelegate.prototype.finishTag = function () { steps.push(['finishTag']); }; MissingDoctypeTokenizerDelegate.prototype.beginComment = function () { steps.push(['beginComment']); }; MissingDoctypeTokenizerDelegate.prototype.appendToCommentData = function (char) { steps.push(['appendToCommentData', char]); }; MissingDoctypeTokenizerDelegate.prototype.finishComment = function () { steps.push(['finishComment']); }; MissingDoctypeTokenizerDelegate.prototype.reportSyntaxError = function (error) { steps.push(['reportSyntaxError', error]); }; return MissingDoctypeTokenizerDelegate; }()); var delegate = new MissingDoctypeTokenizerDelegate(); var tokenizer = new simpleHtmlTokenizer.EventedTokenizer(delegate, new simpleHtmlTokenizer.EntityParser({})); tokenizer.tokenize('\n<!-- comment here --><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">\n<!-- comment here -->'); assert.deepEqual(steps, [ ["reset"], ["reset"], ["beginData"], ["appendToData", "\n"], ["finishData"], ["tagOpen"], ["beginComment"], ["appendToCommentData", " "], ["appendToCommentData", "c"], ["appendToCommentData", "o"], ["appendToCommentData", "m"], ["appendToCommentData", "m"], ["appendToCommentData", "e"], ["appendToCommentData", "n"], ["appendToCommentData", "t"], ["appendToCommentData", " "], ["appendToCommentData", "h"], ["appendToCommentData", "e"], ["appendToCommentData", "r"], ["appendToCommentData", "e"], ["appendToCommentData", " "], ["finishComment"], ["tagOpen"], ["beginData"], ["appendToData", "\n"], ["finishData"], ["tagOpen"], ["beginComment"], ["appendToCommentData", " "], ["appendToCommentData", "c"], ["appendToCommentData", "o"], ["appendToCommentData", "m"], ["appendToCommentData", "m"], ["appendToCommentData", "e"], ["appendToCommentData", "n"], ["appendToCommentData", "t"], ["appendToCommentData", " "], ["appendToCommentData", "h"], ["appendToCommentData", "e"], ["appendToCommentData", "r"], ["appendToCommentData", "e"], ["appendToCommentData", " "], ["finishComment"] ]); }); QUnit.test('Doctype', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">'); assert.deepEqual(tokens, [doctype('-//W3C//DTD HTML 4.01//EN', 'http://www.w3.org/TR/html4/strict.dtd')], 'Standard HTML 4.01 Strict doctype'); tokens = simpleHtmlTokenizer.tokenize('<!DOCTYPE html><html><body></body></html>'); assert.deepEqual(tokens, [ doctype(), startTag('html'), startTag('body'), endTag('body'), endTag('html'), ], 'DOCTYPE is included in tokens'); tokens = simpleHtmlTokenizer.tokenize('<!-- comment --><!DOCTYPE html>'); assert.deepEqual(tokens, [comment(' comment '), doctype()], 'DOCTYPE after comments is valid'); tokens = simpleHtmlTokenizer.tokenize('<!-- comment --><!DOCTYPE html PUBLIC >'); assert.deepEqual(tokens, [comment(' comment '), doctype()], 'DOCTYPE after comments is valid'); }); QUnit.test('Simple content', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('hello'); assert.deepEqual(tokens, [chars('hello')]); }); QUnit.test('A simple tag', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<div>'); assert.deepEqual(tokens, [startTag('div')]); }); QUnit.test('A simple tag with trailing spaces', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<div \t\n>'); assert.deepEqual(tokens, [startTag('div')]); }); QUnit.test('A simple closing tag', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('</div>'); assert.deepEqual(tokens, [endTag('div')]); }); QUnit.test('A simple closing tag with trailing spaces', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('</div \t\n>'); assert.deepEqual(tokens, [endTag('div')]); }); QUnit.test('A pair of hyphenated tags', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<x-foo></x-foo>'); assert.deepEqual(tokens, [startTag('x-foo'), endTag('x-foo')]); }); QUnit.test('A tag with a single-quoted attribute', function (assert) { var tokens = simpleHtmlTokenizer.tokenize("<div id='foo'>"); assert.deepEqual(tokens, [startTag('div', [['id', 'foo', true]])]); }); QUnit.test('A tag with a double-quoted attribute', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<div id="foo">'); assert.deepEqual(tokens, [startTag('div', [['id', 'foo', true]])]); }); QUnit.test('A tag with a double-quoted empty', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<div id="">'); assert.deepEqual(tokens, [startTag('div', [['id', '', true]])]); }); QUnit.test('A tag with unquoted attribute', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<div id=foo>'); assert.deepEqual(tokens, [startTag('div', [['id', 'foo', false]])]); }); QUnit.test('A tag with valueless attributes', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<div foo bar>'); assert.deepEqual(tokens, [ startTag('div', [['foo', '', false], ['bar', '', false]]) ]); }); QUnit.test('Missing attribute name', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<div =foo>'); assert.deepEqual(tokens, [ withSyntaxError('attribute name cannot start with equals sign', startTag('div', [['=foo', '', false]])) ]); }); QUnit.test('Invalid character in attribute name', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<div ">'); assert.deepEqual(tokens, [ withSyntaxError('" is not a valid character within attribute names', startTag('div', [['"', '', false]])) ]); }); QUnit.test('A tag with multiple attributes', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<div id=foo class="bar baz" href=\'bat\'>'); assert.deepEqual(tokens, [ startTag('div', [ ['id', 'foo', false], ['class', 'bar baz', true], ['href', 'bat', true] ]) ]); }); QUnit.test('A tag with capitalization in attributes', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<svg viewBox="0 0 0 0">'); assert.deepEqual(tokens, [startTag('svg', [['viewBox', '0 0 0 0', true]])]); }); QUnit.test('A tag with capitalization in the tag', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<linearGradient>'); assert.deepEqual(tokens, [startTag('linearGradient', [])]); }); QUnit.test('A self-closing tag', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<img />'); assert.deepEqual(tokens, [startTag('img', [], true)]); }); QUnit.test('A self-closing tag with valueless attributes (regression)', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<input disabled />'); assert.deepEqual(tokens, [ startTag('input', [['disabled', '', false]], true) ]); }); QUnit.test('A self-closing tag with valueless attributes without space before closing (regression)', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<input disabled/>'); assert.deepEqual(tokens, [ startTag('input', [['disabled', '', false]], true) ]); }); QUnit.test('A self-closing tag with an attribute with unquoted value without space before closing (regression)', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<input data-foo=bar/>'); assert.deepEqual(tokens, [ startTag('input', [['data-foo', 'bar', false]], true) ]); }); QUnit.test('A tag with a / in the middle', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<img / src="foo.png">'); assert.deepEqual(tokens, [startTag('img', [['src', 'foo.png', true]])]); }); QUnit.test('An opening and closing tag with some content', function (assert) { var tokens = simpleHtmlTokenizer.tokenize("<div id='foo' class='{{bar}} baz'>Some content</div>"); assert.deepEqual(tokens, [ startTag('div', [['id', 'foo', true], ['class', '{{bar}} baz', true]]), chars('Some content'), endTag('div') ]); }); QUnit.test('A comment', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<!-- hello -->'); assert.deepEqual(tokens, [comment(' hello ')]); }); QUnit.test('A (buggy) comment with no ending --', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<!-->'); assert.deepEqual(tokens, [comment()]); }); QUnit.test('A comment that immediately closes', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<!---->'); assert.deepEqual(tokens, [comment()]); }); QUnit.test('A comment that contains a -', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<!-- A perfectly legal - appears -->'); assert.deepEqual(tokens, [comment(' A perfectly legal - appears ')]); tokens = simpleHtmlTokenizer.tokenize('<!-- A perfectly legal - -->'); assert.deepEqual(tokens, [comment(' A perfectly legal - ')]); tokens = simpleHtmlTokenizer.tokenize('<!-- A perfectly legal- -->'); assert.deepEqual(tokens, [comment(' A perfectly legal- ')]); }); QUnit.test('A (buggy) comment that contains two --', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<!-- A questionable -- appears -->'); assert.deepEqual(tokens, [comment(' A questionable -- appears ')]); tokens = simpleHtmlTokenizer.tokenize('<!-- A questionable -- -->'); assert.deepEqual(tokens, [comment(' A questionable -- ')]); tokens = simpleHtmlTokenizer.tokenize('<!-- A questionable-- -->'); assert.deepEqual(tokens, [comment(' A questionable-- ')]); }); QUnit.test('A (buggy) comment ending with more than two --', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<!-- A questionable but legal comment --->'); assert.deepEqual(tokens, [comment(' A questionable but legal comment -')]); tokens = simpleHtmlTokenizer.tokenize('<!-- A questionable but legal comment--->'); assert.deepEqual(tokens, [comment(' A questionable but legal comment-')]); tokens = simpleHtmlTokenizer.tokenize('<!-- A questionable but legal comment - --->'); assert.deepEqual(tokens, [comment(' A questionable but legal comment - -')]); tokens = simpleHtmlTokenizer.tokenize('<!-- A questionable but legal comment -- --->'); assert.deepEqual(tokens, [comment(' A questionable but legal comment -- -')]); tokens = simpleHtmlTokenizer.tokenize('<!-- A questionable but legal comment ------>'); assert.deepEqual(tokens, [comment(' A questionable but legal comment ----')]); }); QUnit.test('A (buggy) comment starting with more than two --', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<!--- Questionable but legal -->'); assert.deepEqual(tokens, [comment('- Questionable but legal ')]); tokens = simpleHtmlTokenizer.tokenize('<!---Questionable but legal -->'); assert.deepEqual(tokens, [comment('-Questionable but legal ')]); }); QUnit.skip('Character references are expanded', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('&quot;Foo &amp; Bar&quot; &lt; &#60;&#x3c; &#x3C; &LT; &NotGreaterFullEqual; &Borksnorlax; &nleqq;'); assert.deepEqual(tokens, [chars('"Foo & Bar" < << < < ≧̸ &Borksnorlax; ≦̸')]); tokens = simpleHtmlTokenizer.tokenize("<div title='&quot;Foo &amp; Bar&quot; &blk12; &lt; &#60;&#x3c; &#x3C; &LT; &NotGreaterFullEqual; &Borksnorlax; &nleqq;'>"); assert.deepEqual(tokens, [ startTag('div', [ ['title', '"Foo & Bar" ▒ < << < < ≧̸ &Borksnorlax; ≦̸', true] ]) ]); }); // https://html.spec.whatwg.org/multipage/syntax.html#element-restrictions QUnit.test('A newline immediately following a <pre> tag is stripped', function (assert) { var tokens = simpleHtmlTokenizer.tokenize("<pre>\nhello</pre>"); assert.deepEqual(tokens, [startTag('pre'), chars('hello'), endTag('pre')]); }); QUnit.test('A newline immediately following a closing </pre> tag is not stripped', function (assert) { var tokens = simpleHtmlTokenizer.tokenize("\n<pre>\nhello</pre>\n"); assert.deepEqual(tokens, [chars('\n'), startTag('pre'), chars('hello'), endTag('pre'), chars('\n')]); }); // https://html.spec.whatwg.org/multipage/syntax.html#element-restrictions QUnit.test('A newline immediately following a <PRE> tag is stripped', function (assert) { var tokens = simpleHtmlTokenizer.tokenize("<PRE>\nhello</PRE>"); assert.deepEqual(tokens, [startTag('PRE'), chars('hello'), endTag('PRE')]); }); // https://html.spec.whatwg.org/multipage/syntax.html#element-restrictions QUnit.test('A newline immediately following a <textarea> tag is stripped', function (assert) { var tokens = simpleHtmlTokenizer.tokenize("<textarea>\nhello</textarea>"); assert.deepEqual(tokens, [startTag('textarea'), chars('hello'), endTag('textarea')]); }); // https://html.spec.whatwg.org/multipage/syntax.html#element-restrictions QUnit.test('codemod: A newline immediately following a <pre> tag is stripped', function (assert) { var tokens = simpleHtmlTokenizer.tokenize("<pre>\nhello</pre>", { mode: 'codemod' }); assert.deepEqual(tokens, [startTag('pre'), chars('\nhello'), endTag('pre')]); }); QUnit.test('codemod: A newline immediately following a closing </pre> tag is not stripped', function (assert) { var tokens = simpleHtmlTokenizer.tokenize("\n<pre>\nhello</pre>\n", { mode: 'codemod' }); assert.deepEqual(tokens, [chars('\n'), startTag('pre'), chars('\nhello'), endTag('pre'), chars('\n')]); }); // https://html.spec.whatwg.org/multipage/syntax.html#element-restrictions QUnit.test('codemod: A newline immediately following a <PRE> tag is stripped', function (assert) { var tokens = simpleHtmlTokenizer.tokenize("<PRE>\nhello</PRE>", { mode: 'codemod' }); assert.deepEqual(tokens, [startTag('PRE'), chars('\nhello'), endTag('PRE')]); }); // https://html.spec.whatwg.org/multipage/syntax.html#element-restrictions QUnit.test('codemod: A newline immediately following a <textarea> tag is stripped', function (assert) { var tokens = simpleHtmlTokenizer.tokenize("<textarea>\nhello</textarea>", { mode: 'codemod' }); assert.deepEqual(tokens, [startTag('textarea'), chars('\nhello'), endTag('textarea')]); }); // https://html.spec.whatwg.org/multipage/semantics.html#the-title-element QUnit.test('The title element content is always text', function (assert) { var tokens = simpleHtmlTokenizer.tokenize("<title>&quot;hey <b>there</b><!-- comment --></title>"); assert.deepEqual(tokens, [startTag('title'), chars('"hey <b>there</b><!-- comment -->'), endTag('title')]); }); // https://github.com/emberjs/ember.js/issues/18530 QUnit.test('Title element content is not text', function (assert) { var tokens = simpleHtmlTokenizer.tokenize("<Title><!-- hello --></Title>"); assert.deepEqual(tokens, [startTag('Title'), comment(' hello '), endTag('Title')]); }); // https://html.spec.whatwg.org/multipage/semantics.html#the-style-element QUnit.test('The style element content is always text', function (assert) { var tokens = simpleHtmlTokenizer.tokenize("<style>&quot;hey <b>there</b><!-- comment --></style>"); assert.deepEqual(tokens, [startTag('style'), chars('&quot;hey <b>there</b><!-- comment -->'), endTag('style')]); }); // https://html.spec.whatwg.org/multipage/scripting.html#restrictions-for-contents-of-script-elements QUnit.test('The script element content restrictions', function (assert) { var tokens = simpleHtmlTokenizer.tokenize("<script>&quot;hey <b>there</b><!-- comment --></script>"); assert.deepEqual(tokens, [startTag('script'), chars('&quot;hey <b>there</b><!-- comment -->'), endTag('script')]); }); QUnit.test('Two following script tags', function (assert) { var tokens = simpleHtmlTokenizer.tokenize("<script><!-- comment --></script> <script>second</script>"); assert.deepEqual(tokens, [ startTag('script'), chars('<!-- comment -->'), endTag('script'), chars(' '), startTag('script'), chars('second'), endTag('script') ]); }); // https://github.com/emberjs/rfcs/blob/master/text/0311-angle-bracket-invocation.md#dynamic-invocations QUnit.test('An Emberish named arg invocation', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<@foo></@foo>'); assert.deepEqual(tokens, [startTag('@foo'), endTag('@foo')]); }); QUnit.test('Parsing <script>s out of a complext HTML document [stefanpenner/find-scripts-srcs-in-document#1]', function (assert) { var input = "<!DOCTYPE html><html><head><script src=\"/foo.js\"></script><script src=\"/bar.js\"></script><script src=\"/baz.js\"></script></head></html>"; var tokens = simpleHtmlTokenizer.tokenize(input); assert.deepEqual(tokens, [ doctype(), startTag('html'), startTag('head'), startTag('script', [['src', '/foo.js', true]]), endTag('script'), startTag('script', [['src', '/bar.js', true]]), endTag('script'), startTag('script', [['src', '/baz.js', true]]), endTag('script'), endTag('head'), endTag('html'), ]); }); QUnit.module('simple-html-tokenizer - preprocessing'); QUnit.test('Carriage returns are replaced with line feeds', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('\r\r\n\r\r\n\n'); assert.deepEqual(tokens, [chars('\n\n\n\n\n')]); }); QUnit.module('simple-html-tokenizer - location info'); QUnit.test('lines are counted correctly', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('\r\r\n\r\r\n\n', { loc: true }); assert.deepEqual(tokens, [locInfo(chars('\n\n\n\n\n'), 1, 0, 6, 0)]); }); QUnit.test('tokens: Chars', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('Chars', { loc: true }); assert.deepEqual(tokens, [locInfo(chars('Chars'), 1, 0, 1, 5)]); }); QUnit.test('tokens: Chars start-tag Chars', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('Chars<div>Chars', { loc: true }); assert.deepEqual(tokens, [ locInfo(chars('Chars'), 1, 0, 1, 5), locInfo(startTag('div'), 1, 5, 1, 10), locInfo(chars('Chars'), 1, 10, 1, 15) ]); }); QUnit.test('tokens: start-tag start-tag', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<div><div>', { loc: true }); assert.deepEqual(tokens, [ locInfo(startTag('div'), 1, 0, 1, 5), locInfo(startTag('div'), 1, 5, 1, 10) ]); }); QUnit.test('tokens: html char ref start-tag', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('&gt;<div>', { loc: true }); assert.deepEqual(tokens, [ locInfo(chars('>'), 1, 0, 1, 4), locInfo(startTag('div'), 1, 4, 1, 9) ]); }); QUnit.test('tokens: Chars start-tag Chars start-tag', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('Chars\n<div>Chars\n<div>', { loc: true }); assert.deepEqual(tokens, [ locInfo(chars('Chars\n'), 1, 0, 2, 0), locInfo(startTag('div'), 2, 0, 2, 5), locInfo(chars('Chars\n'), 2, 5, 3, 0), locInfo(startTag('div'), 3, 0, 3, 5) ]); }); QUnit.test('tokens: comment start-tag Chars end-tag', function (assert) { var tokens = simpleHtmlTokenizer.tokenize('<!-- multline\ncomment --><div foo=bar>Chars\n</div>', { loc: true }); assert.deepEqual(tokens, [ locInfo(comment(' multline\ncomment '), 1, 0, 2, 11), locInfo(startTag('div', [['foo', 'bar', false]]), 2, 11, 2, 24), locInfo(chars('Chars\n'), 2, 24, 3, 0), locInfo(endTag('div'), 3, 0, 3, 6) ]); }); function chars(s) { return { type: "Chars" /* Chars */, chars: s === undefined ? '' : s }; } function comment(s) { return { type: "Comment" /* Comment */, chars: s === undefined ? '' : s }; } function startTag(tagName, attributes, selfClosing) { return { type: "StartTag" /* StartTag */, tagName: tagName, attributes: attributes === undefined ? [] : attributes, selfClosing: selfClosing === undefined ? false : selfClosing }; } function endTag(tagName) { return { type: "EndTag" /* EndTag */, tagName: tagName }; } function doctype(publicIdentifier, systemIdentifier) { var doctype = { type: "Doctype" /* Doctype */, name: 'html', }; if (publicIdentifier) { doctype.publicIdentifier = publicIdentifier; } if (systemIdentifier) { doctype.systemIdentifier = systemIdentifier; } return doctype; } function locInfo(token, startLine, startColumn, endLine, endColumn) { token.loc = { start: { line: startLine, column: startColumn }, end: { line: endLine, column: endColumn } }; return token; } function withSyntaxError(message, result) { result.syntaxError = message; return result; } }))); QUnit.module('TSLint - src'); QUnit.test('src/entity-parser.ts should pass tslint', function(assert) { assert.expect(1); assert.ok(true, 'src/entity-parser.ts should pass tslint.'); }); QUnit.module('TSLint - src'); QUnit.test('src/evented-tokenizer.ts should pass tslint', function(assert) { assert.expect(1); assert.ok(true, 'src/evented-tokenizer.ts should pass tslint.'); }); QUnit.module('TSLint - src/generated'); QUnit.test('src/generated/html5-named-char-refs.ts should pass tslint', function(assert) { assert.expect(1); assert.ok(true, 'src/generated/html5-named-char-refs.ts should pass tslint.'); }); QUnit.module('TSLint - src/generated'); QUnit.test('src/generated/tokenizer-states.ts should pass tslint', function(assert) { assert.expect(1); assert.ok(true, 'src/generated/tokenizer-states.ts should pass tslint.'); }); QUnit.module('TSLint - src'); QUnit.test('src/index.ts should pass tslint', function(assert) { assert.expect(1); assert.ok(true, 'src/index.ts should pass tslint.'); }); var HEXCHARCODE = /^#[xX]([A-Fa-f0-9]+)$/; var CHARCODE = /^#([0-9]+)$/; var NAMED = /^([A-Za-z0-9]+)$/; var EntityParser = (function () { function EntityParser(named) { this.named = named; } EntityParser.prototype.parse = function (entity) { if (!entity) { return; } var matches = entity.match(HEXCHARCODE); if (matches) { return "&#x" + matches[1] + ";"; } matches = entity.match(CHARCODE); if (matches) { return "&#" + matches[1] + ";"; } matches = entity.match(NAMED); if (matches) { return this.named[matches[1]] || "&" + matches[1] + ";"; } }; return EntityParser; }()); export default EntityParser; import { preprocessInput, isAlpha, isSpace } from './utils'; var EventedTokenizer = (function () { function EventedTokenizer(delegate, entityParser) { this.delegate = delegate; this.entityParser = entityParser; this.state = null; this.input = null; this.index = -1; this.tagLine = -1; this.tagColumn = -1; this.line = -1; this.column = -1; this.states = { beforeData: function () { var char = this.peek(); if (char === "<") { this.state = 'tagOpen'; this.markTagStart(); this.consume(); } else { this.state = 'data'; this.delegate.beginData(); } }, data: function () { var char = this.peek(); if (char === "<") { this.delegate.finishData(); this.state = 'tagOpen'; this.markTagStart(); this.consume(); } else if (char === "&") { this.consume(); this.delegate.appendToData(this.consumeCharRef() || "&"); } else { this.consume(); this.delegate.appendToData(char); } }, tagOpen: function () { var char = this.consume(); if (char === "!") { this.state = 'markupDeclaration'; } else if (char === "/") { this.state = 'endTagOpen'; } else if (isAlpha(char)) { this.state = 'tagName'; this.delegate.beginStartTag(); this.delegate.appendToTagName(char.toLowerCase()); } }, markupDeclaration: function () { var char = this.consume(); if (char === "-" && this.input.charAt(this.index) === "-") { this.consume(); this.state = 'commentStart'; this.delegate.beginComment(); } }, commentStart: function () { var char = this.consume(); if (char === "-") { this.state = 'commentStartDash'; } else if (char === ">") { this.delegate.finishComment(); this.state = 'beforeData'; } else { this.delegate.appendToCommentData(char); this.state = 'comment'; } }, commentStartDash: function () { var char = this.consume(); if (char === "-") { this.state = 'commentEnd'; } else if (char === ">") { this.delegate.finishComment(); this.state = 'beforeData'; } else { this.delegate.appendToCommentData("-"); this.state = 'comment'; } }, comment: function () { var char = this.consume(); if (char === "-") { this.state = 'commentEndDash'; } else { this.delegate.appendToCommentData(char); } }, commentEndDash: function () { var char = this.consume(); if (char === "-") { this.state = 'commentEnd'; } else { this.delegate.appendToCommentData("-" + char); this.state = 'comment'; } }, commentEnd: function () { var char = this.consume(); if (char === ">") { this.delegate.finishComment(); this.state = 'beforeData'; } else { this.delegate.appendToCommentData("--" + char); this.state = 'comment'; } }, tagName: function () { var char = this.consume(); if (isSpace(char)) { this.state = 'beforeAttributeName'; } else if (char === "/") { this.state = 'selfClosingStartTag'; } else if (char === ">") { this.delegate.finishTag(); this.state = 'beforeData'; } else { this.delegate.appendToTagName(char); } }, beforeAttributeName: function () { var char = this.peek(); if (isSpace(char)) { this.consume(); return; } else if (char === "/") { this.state = 'selfClosingStartTag'; this.consume(); } else if (char === ">") { this.consume(); this.delegate.finishTag(); this.state = 'beforeData'; } else if (char === '=') { this.delegate.reportSyntaxError("attribute name cannot start with equals sign"); this.state = 'attributeName'; this.delegate.beginAttribute(); this.consume(); this.delegate.appendToAttributeName(char); } else { this.state = 'attributeName'; this.delegate.beginAttribute(); } }, attributeName: function () { var char = this.peek(); if (isSpace(char)) { this.state = 'afterAttributeName'; this.consume(); } else if (char === "/") { this.delegate.beginAttributeValue(false); this.delegate.finishAttributeValue(); this.consume(); this.state = 'selfClosingStartTag'; } else if (char === "=") { this.state = 'beforeAttributeValue'; this.consume(); } else if (char === ">") { this.delegate.beginAttributeValue(false); this.delegate.finishAttributeValue(); this.consume(); this.delegate.finishTag(); this.state = 'beforeData'; } else if (char === '"' || char === "'" || char === '<') { this.delegate.reportSyntaxError(char + " is not a valid character within attribute names"); this.consume(); this.delegate.appendToAttributeName(char); } else { this.consume(); this.delegate.appendToAttributeName(char); } }, afterAttributeName: function () { var char = this.peek(); if (isSpace(char)) { this.consume(); return; } else if (char === "/") { this.delegate.beginAttributeValue(false); this.delegate.finishAttributeValue(); this.consume(); this.state = 'selfClosingStartTag'; } else if (char === "=") { this.consume(); this.state = 'beforeAttributeValue'; } else if (char === ">") { this.delegate.beginAttributeValue(false); this.delegate.finishAttributeValue(); this.consume(); this.delegate.finishTag(); this.state = 'beforeData'; } else { this.delegate.beginAttributeValue(false); this.delegate.finishAttributeValue(); this.consume(); this.state = 'attributeName'; this.delegate.beginAttribute(); this.delegate.appendToAttributeName(char); } }, beforeAttributeValue: function () { var char = this.peek(); if (isSpace(char)) { this.consume(); } else if (char === '"') { this.state = 'attributeValueDoubleQuoted'; this.delegate.beginAttributeValue(true); this.consume(); } else if (char === "'") { this.state = 'attributeValueSingleQuoted'; this.delegate.beginAttributeValue(true); this.consume(); } else if (char === ">") { this.delegate.beginAttributeValue(false); this.delegate.finishAttributeValue(); this.consume(); this.delegate.finishTag(); this.state = 'beforeData'; } else { this.state = 'attributeValueUnquoted'; this.delegate.beginAttributeValue(false); this.consume(); this.delegate.appendToAttributeValue(char); } }, attributeValueDoubleQuoted: function () { var char = this.consume(); if (char === '"') { this.delegate.finishAttributeValue(); this.state = 'afterAttributeValueQuoted'; } else if (char === "&") { this.delegate.appendToAttributeValue(this.consumeCharRef('"') || "&"); } else { this.delegate.appendToAttributeValue(char); } }, attributeValueSingleQuoted: function () { var char = this.consume(); if (char === "'") { this.delegate.finishAttributeValue(); this.state = 'afterAttributeValueQuoted'; } else if (char === "&") { this.delegate.appendToAttributeValue(this.consumeCharRef("'") || "&"); } else { this.delegate.appendToAttributeValue(char); } }, attributeValueUnquoted: function () { var char = this.peek(); if (isSpace(char)) { this.delegate.finishAttributeValue(); this.consume(); this.state = 'beforeAttributeName'; } else if (char === "&") { this.consume(); this.delegate.appendToAttributeValue(this.consumeCharRef(">") || "&"); } else if (char === ">") { this.delegate.finishAttributeValue(); this.consume(); this.delegate.finishTag(); this.state = 'beforeData'; } else { this.consume(); this.delegate.appendToAttributeValue(char); } }, afterAttributeValueQuoted: function () { var char = this.peek(); if (isSpace(char)) { this.consume(); this.state = 'beforeAttributeName'; } else if (char === "/") { this.consume(); this.state = 'selfClosingStartTag'; } else if (char === ">") { this.consume(); this.delegate.finishTag(); this.state = 'beforeData'; } else { this.state = 'beforeAttributeName'; } }, selfClosingStartTag: function () { var char = this.peek(); if (char === ">") { this.consume(); this.delegate.markTagAsSelfClosing(); this.delegate.finishTag(); this.state = 'beforeData'; } else { this.state = 'beforeAttributeName'; } }, endTagOpen: function () { var char = this.consume(); if (isAlpha(char)) { this.state = 'tagName'; this.delegate.beginEndTag(); this.delegate.appendToTagName(char.toLowerCase()); } } }; this.reset(); } EventedTokenizer.prototype.reset = function () { this.state = 'beforeData'; this.input = ''; this.index = 0; this.line = 1; this.column = 0; this.tagLine = -1; this.tagColumn = -1; this.delegate.reset(); }; EventedTokenizer.prototype.tokenize = function (input) { this.reset(); this.tokenizePart(input); this.tokenizeEOF(); }; EventedTokenizer.prototype.tokenizePart = function (input) { this.input += preprocessInput(input); while (this.index < this.input.length) { this.states[this.state].call(this); } }; EventedTokenizer.prototype.tokenizeEOF = function () { this.flushData(); }; EventedTokenizer.prototype.flushData = function () { if (this.state === 'data') { this.delegate.finishData(); this.state = 'beforeData'; } }; EventedTokenizer.prototype.peek = function () { return this.input.charAt(this.index); }; EventedTokenizer.prototype.consume = function () { var char = this.peek(); this.index++; if (char === "\n") { this.line++; this.column = 0; } else { this.column++; } return char; }; EventedTokenizer.prototype.consumeCharRef = function () { var endIndex = this.input.indexOf(';', this.index); if (endIndex === -1) { return; } var entity = this.input.slice(this.index, endIndex); var chars = this.entityParser.parse(entity); if (chars) { var count = entity.length; // consume the entity chars while (count) { this.consume(); count--; } // consume the `;` this.consume(); return chars; } }; EventedTokenizer.prototype.markTagStart = function () { // these properties to be removed in next major bump this.tagLine = this.line; this.tagColumn = this.column; if (this.delegate.tagOpen) { this.delegate.tagOpen(); } }; return EventedTokenizer; }()); export default EventedTokenizer; export default { // We don't need the complete named character reference because linkifyHtml // does not modify the escape sequences. We do need &nbsp; so that // whitespace is parsed properly. Other types of whitespace should already // be accounted for nbsp: "\u00a0" }; export { default as HTML5NamedCharRefs } from './html5-named-char-refs'; export { default as EntityParser } from './entity-parser'; export { default as EventedTokenizer } from './evented-tokenizer'; export { default as Tokenizer } from './tokenizer'; export { default as tokenize } from './tokenize'; import Tokenizer from './tokenizer'; import EntityParser from './entity-parser'; import namedCharRefs from './html5-named-char-refs'; export default function tokenize(input, options) { var tokenizer = new Tokenizer(new EntityParser(namedCharRefs), options); return tokenizer.tokenize(input); } import EventedTokenizer from './evented-tokenizer'; ; var Tokenizer = (function () { function Tokenizer(entityParser, options) { if (options === void 0) { options = {}; } this.options = options; this.token = null; this.startLine = 1; this.startColumn = 0; this.tokens = []; this.currentAttribute = null; this.tokenizer = new EventedTokenizer(this, entityParser); } Tokenizer.prototype.tokenize = function (input) { this.tokens = []; this.tokenizer.tokenize(input); return this.tokens; }; Tokenizer.prototype.tokenizePart = function (input) { this.tokens = []; this.tokenizer.tokenizePart(input); return this.tokens; }; Tokenizer.prototype.tokenizeEOF = function () { this.tokens = []; this.tokenizer.tokenizeEOF(); return this.tokens[0]; }; Tokenizer.prototype.reset = function () { this.token = null; this.startLine = 1; this.startColumn = 0; }; Tokenizer.prototype.addLocInfo = function () { if (this.options.loc) { this.token.loc = { start: { line: this.startLine, column: this.startColumn }, end: { line: this.tokenizer.line, column: this.tokenizer.column } }; } this.startLine = this.tokenizer.line; this.startColumn = this.tokenizer.column; }; // Data Tokenizer.prototype.beginData = function () { this.token = { type: 'Chars', chars: '' }; this.tokens.push(this.token); }; Tokenizer.prototype.appendToData = function (char) { this.token.chars += char; }; Tokenizer.prototype.finishData = function () { this.addLocInfo(); }; // Comment Tokenizer.prototype.beginComment = function () { this.token = { type: 'Comment', chars: '' }; this.tokens.push(this.token); }; Tokenizer.prototype.appendToCommentData = function (char) { this.token.chars += char; }; Tokenizer.prototype.finishComment = function () { this.addLocInfo(); }; // Tags - basic Tokenizer.prototype.beginStartTag = function () { this.token = { type: 'StartTag', tagName: '', attributes: [], selfClosing: false }; this.tokens.push(this.token); }; Tokenizer.prototype.beginEndTag = function () { this.token = { type: 'EndTag', tagName: '' }; this.tokens.push(this.token); }; Tokenizer.prototype.finishTag = function () { this.addLocInfo(); }; Tokenizer.prototype.markTagAsSelfClosing = function () { this.token.selfClosing = true; }; // Tags - name Tokenizer.prototype.appendToTagName = function (char) { this.token.tagName += char; }; // Tags - attributes Tokenizer.prototype.beginAttribute = function () { this.currentAttribute = ["", "", null]; this.token.attributes.push(this.currentAttribute); }; Tokenizer.prototype.appendToAttributeName = function (char) { this.currentAttribute[0] += char; }; Tokenizer.prototype.beginAttributeValue = function (isQuoted) { this.currentAttribute[2] = isQuoted; }; Tokenizer.prototype.appendToAttributeValue = function (char) { this.currentAttribute[1] = this.currentAttribute[1] || ""; this.currentAttribute[1] += char; }; Tokenizer.prototype.finishAttributeValue = function () { }; Tokenizer.prototype.reportSyntaxError = function (message) { this.token.syntaxError = message; }; return Tokenizer; }()); export default Tokenizer; var WSP = /[\t\n\f ]/; var ALPHA = /[A-Za-z]/; var CRLF = /\r\n?/g; export function isSpace(char) { return WSP.test(char); } export function isAlpha(char) { return ALPHA.test(char); } export function preprocessInput(input) { return input.replace(CRLF, "\n"); } QUnit.module('TSLint - src'); QUnit.test('src/tokenize.ts should pass tslint', function(assert) { assert.expect(1); assert.ok(true, 'src/tokenize.ts should pass tslint.'); }); QUnit.module('TSLint - src'); QUnit.test('src/tokenizer.ts should pass tslint', function(assert) { assert.expect(1); assert.ok(true, 'src/tokenizer.ts should pass tslint.'); }); QUnit.module('TSLint - src'); QUnit.test('src/types.ts should pass tslint', function(assert) { assert.expect(1); assert.ok(true, 'src/types.ts should pass tslint.'); }); QUnit.module('TSLint - src'); QUnit.test('src/utils.ts should pass tslint', function(assert) { assert.expect(1); assert.ok(true, 'src/utils.ts should pass tslint.'); }); QUnit.module('TSLint - tests'); QUnit.test('tests/tokenizer-tests.ts should pass tslint', function(assert) { assert.expect(1); assert.ok(true, 'tests/tokenizer-tests.ts should pass tslint.'); }); //# sourceMappingURL=tests.map