UNPKG

@nfrasser/simple-html-tokenizer

Version:

Simple HTML Tokenizer is a lightweight JavaScript library that can be used to tokenize the kind of HTML normally found in templates.

128 lines 13 kB
import EventedTokenizer from './evented-tokenizer'; ; var Tokenizer = (function () { function Tokenizer(entityParser, options) { if (options === void 0) { options = {}; } this.options = options; this.token = null; this.startLine = 1; this.startColumn = 0; this.tokens = []; this.currentAttribute = null; this.tokenizer = new EventedTokenizer(this, entityParser); } Tokenizer.prototype.tokenize = function (input) { this.tokens = []; this.tokenizer.tokenize(input); return this.tokens; }; Tokenizer.prototype.tokenizePart = function (input) { this.tokens = []; this.tokenizer.tokenizePart(input); return this.tokens; }; Tokenizer.prototype.tokenizeEOF = function () { this.tokens = []; this.tokenizer.tokenizeEOF(); return this.tokens[0]; }; Tokenizer.prototype.reset = function () { this.token = null; this.startLine = 1; this.startColumn = 0; }; Tokenizer.prototype.addLocInfo = function () { if (this.options.loc) { this.token.loc = { start: { line: this.startLine, column: this.startColumn }, end: { line: this.tokenizer.line, column: this.tokenizer.column } }; } this.startLine = this.tokenizer.line; this.startColumn = this.tokenizer.column; }; // Data Tokenizer.prototype.beginData = function () { this.token = { type: 'Chars', chars: '' }; this.tokens.push(this.token); }; Tokenizer.prototype.appendToData = function (char) { this.token.chars += char; }; Tokenizer.prototype.finishData = function () { this.addLocInfo(); }; // Comment Tokenizer.prototype.beginComment = function () { this.token = { type: 'Comment', chars: '' }; this.tokens.push(this.token); }; Tokenizer.prototype.appendToCommentData = function (char) { this.token.chars += char; }; Tokenizer.prototype.finishComment = function () { this.addLocInfo(); }; // Tags - basic Tokenizer.prototype.beginStartTag = function () { this.token = { type: 'StartTag', tagName: '', attributes: [], selfClosing: false }; this.tokens.push(this.token); }; Tokenizer.prototype.beginEndTag = function () { this.token = { type: 'EndTag', tagName: '' }; this.tokens.push(this.token); }; Tokenizer.prototype.finishTag = function () { this.addLocInfo(); }; Tokenizer.prototype.markTagAsSelfClosing = function () { this.token.selfClosing = true; }; // Tags - name Tokenizer.prototype.appendToTagName = function (char) { this.token.tagName += char; }; // Tags - attributes Tokenizer.prototype.beginAttribute = function () { this.currentAttribute = ["", "", null]; this.token.attributes.push(this.currentAttribute); }; Tokenizer.prototype.appendToAttributeName = function (char) { this.currentAttribute[0] += char; }; Tokenizer.prototype.beginAttributeValue = function (isQuoted) { this.currentAttribute[2] = isQuoted; }; Tokenizer.prototype.appendToAttributeValue = function (char) { this.currentAttribute[1] = this.currentAttribute[1] || ""; this.currentAttribute[1] += char; }; Tokenizer.prototype.finishAttributeValue = function () { }; Tokenizer.prototype.reportSyntaxError = function (message) { this.token.syntaxError = message; }; return Tokenizer; }()); export default Tokenizer; //# sourceMappingURL=data:application/json;base64,eyJ2ZXJzaW9uIjozLCJmaWxlIjoidG9rZW5pemVyLmpzIiwic291cmNlUm9vdCI6IiIsInNvdXJjZXMiOlsidG9rZW5pemVyLnRzIl0sIm5hbWVzIjpbXSwibWFwcGluZ3MiOiJBQUFBLE9BQU8sZ0JBQWdCLE1BQU0scUJBQXFCLENBQUM7QUFJbEQsQ0FBQztBQXVCRjtJQVFFLG1CQUFZLFlBQVksRUFBVSxPQUE4QjtRQUE5Qix3QkFBQSxFQUFBLFlBQThCO1FBQTlCLFlBQU8sR0FBUCxPQUFPLENBQXVCO1FBUHhELFVBQUssR0FBVSxJQUFJLENBQUM7UUFDcEIsY0FBUyxHQUFHLENBQUMsQ0FBQztRQUNkLGdCQUFXLEdBQUcsQ0FBQyxDQUFDO1FBRWhCLFdBQU0sR0FBWSxFQUFFLENBQUM7UUFDckIscUJBQWdCLEdBQWMsSUFBSSxDQUFDO1FBR3pDLElBQUksQ0FBQyxTQUFTLEdBQUcsSUFBSSxnQkFBZ0IsQ0FBQyxJQUFJLEVBQUUsWUFBWSxDQUFDLENBQUM7SUFDNUQsQ0FBQztJQUVELDRCQUFRLEdBQVIsVUFBUyxLQUFLO1FBQ1osSUFBSSxDQUFDLE1BQU0sR0FBRyxFQUFFLENBQUM7UUFDakIsSUFBSSxDQUFDLFNBQVMsQ0FBQyxRQUFRLENBQUMsS0FBSyxDQUFDLENBQUM7UUFDL0IsTUFBTSxDQUFDLElBQUksQ0FBQyxNQUFNLENBQUM7SUFDckIsQ0FBQztJQUVELGdDQUFZLEdBQVosVUFBYSxLQUFLO1FBQ2hCLElBQUksQ0FBQyxNQUFNLEdBQUcsRUFBRSxDQUFDO1FBQ2pCLElBQUksQ0FBQyxTQUFTLENBQUMsWUFBWSxDQUFDLEtBQUssQ0FBQyxDQUFDO1FBQ25DLE1BQU0sQ0FBQyxJQUFJLENBQUMsTUFBTSxDQUFDO0lBQ3JCLENBQUM7SUFFRCwrQkFBVyxHQUFYO1FBQ0UsSUFBSSxDQUFDLE1BQU0sR0FBRyxFQUFFLENBQUM7UUFDakIsSUFBSSxDQUFDLFNBQVMsQ0FBQyxXQUFXLEVBQUUsQ0FBQztRQUM3QixNQUFNLENBQUMsSUFBSSxDQUFDLE1BQU0sQ0FBQyxDQUFDLENBQUMsQ0FBQztJQUN4QixDQUFDO0lBRUQseUJBQUssR0FBTDtRQUNFLElBQUksQ0FBQyxLQUFLLEdBQUcsSUFBSSxDQUFDO1FBQ2xCLElBQUksQ0FBQyxTQUFTLEdBQUcsQ0FBQyxDQUFDO1FBQ25CLElBQUksQ0FBQyxXQUFXLEdBQUcsQ0FBQyxDQUFDO0lBQ3ZCLENBQUM7SUFFRCw4QkFBVSxHQUFWO1FBQ0UsRUFBRSxDQUFDLENBQUMsSUFBSSxDQUFDLE9BQU8sQ0FBQyxHQUFHLENBQUMsQ0FBQyxDQUFDO1lBQ3JCLElBQUksQ0FBQyxLQUFLLENBQUMsR0FBRyxHQUFHO2dCQUNmLEtBQUssRUFBRTtvQkFDTCxJQUFJLEVBQUUsSUFBSSxDQUFDLFNBQVM7b0JBQ3BCLE1BQU0sRUFBRSxJQUFJLENBQUMsV0FBVztpQkFDekI7Z0JBQ0QsR0FBRyxFQUFFO29CQUNILElBQUksRUFBRSxJQUFJLENBQUMsU0FBUyxDQUFDLElBQUk7b0JBQ3pCLE1BQU0sRUFBRSxJQUFJLENBQUMsU0FBUyxDQUFDLE1BQU07aUJBQzlCO2FBQ0YsQ0FBQztRQUNKLENBQUM7UUFDRCxJQUFJLENBQUMsU0FBUyxHQUFHLElBQUksQ0FBQyxTQUFTLENBQUMsSUFBSSxDQUFDO1FBQ3JDLElBQUksQ0FBQyxXQUFXLEdBQUcsSUFBSSxDQUFDLFNBQVMsQ0FBQyxNQUFNLENBQUM7SUFDM0MsQ0FBQztJQUVELE9BQU87SUFFUCw2QkFBUyxHQUFUO1FBQ0UsSUFBSSxDQUFDLEtBQUssR0FBRztZQUNYLElBQUksRUFBRSxPQUFPO1lBQ2IsS0FBSyxFQUFFLEVBQUU7U0FDVixDQUFDO1FBQ0YsSUFBSSxDQUFDLE1BQU0sQ0FBQyxJQUFJLENBQUMsSUFBSSxDQUFDLEtBQUssQ0FBQyxDQUFDO0lBQy9CLENBQUM7SUFFRCxnQ0FBWSxHQUFaLFVBQWEsSUFBSTtRQUNmLElBQUksQ0FBQyxLQUFLLENBQUMsS0FBSyxJQUFJLElBQUksQ0FBQztJQUMzQixDQUFDO0lBRUQsOEJBQVUsR0FBVjtRQUNFLElBQUksQ0FBQyxVQUFVLEVBQUUsQ0FBQztJQUNwQixDQUFDO0lBRUQsVUFBVTtJQUVWLGdDQUFZLEdBQVo7UUFDRSxJQUFJLENBQUMsS0FBSyxHQUFHO1lBQ1gsSUFBSSxFQUFFLFNBQVM7WUFDZixLQUFLLEVBQUUsRUFBRTtTQUNWLENBQUM7UUFDRixJQUFJLENBQUMsTUFBTSxDQUFDLElBQUksQ0FBQyxJQUFJLENBQUMsS0FBSyxDQUFDLENBQUM7SUFDL0IsQ0FBQztJQUVELHVDQUFtQixHQUFuQixVQUFvQixJQUFJO1FBQ3RCLElBQUksQ0FBQyxLQUFLLENBQUMsS0FBSyxJQUFJLElBQUksQ0FBQztJQUMzQixDQUFDO0lBRUQsaUNBQWEsR0FBYjtRQUNFLElBQUksQ0FBQyxVQUFVLEVBQUUsQ0FBQztJQUNwQixDQUFDO0lBRUQsZUFBZTtJQUVmLGlDQUFhLEdBQWI7UUFDRSxJQUFJLENBQUMsS0FBSyxHQUFHO1lBQ1gsSUFBSSxFQUFFLFVBQVU7WUFDaEIsT0FBTyxFQUFFLEVBQUU7WUFDWCxVQUFVLEVBQUUsRUFBRTtZQUNkLFdBQVcsRUFBRSxLQUFLO1NBQ25CLENBQUM7UUFDRixJQUFJLENBQUMsTUFBTSxDQUFDLElBQUksQ0FBQyxJQUFJLENBQUMsS0FBSyxDQUFDLENBQUM7SUFDL0IsQ0FBQztJQUVELCtCQUFXLEdBQVg7UUFDRSxJQUFJLENBQUMsS0FBSyxHQUFHO1lBQ1gsSUFBSSxFQUFFLFFBQVE7WUFDZCxPQUFPLEVBQUUsRUFBRTtTQUNaLENBQUM7UUFDRixJQUFJLENBQUMsTUFBTSxDQUFDLElBQUksQ0FBQyxJQUFJLENBQUMsS0FBSyxDQUFDLENBQUM7SUFDL0IsQ0FBQztJQUVELDZCQUFTLEdBQVQ7UUFDRSxJQUFJLENBQUMsVUFBVSxFQUFFLENBQUM7SUFDcEIsQ0FBQztJQUVELHdDQUFvQixHQUFwQjtRQUNFLElBQUksQ0FBQyxLQUFLLENBQUMsV0FBVyxHQUFHLElBQUksQ0FBQztJQUNoQyxDQUFDO0lBRUQsY0FBYztJQUVkLG1DQUFlLEdBQWYsVUFBZ0IsSUFBSTtRQUNsQixJQUFJLENBQUMsS0FBSyxDQUFDLE9BQU8sSUFBSSxJQUFJLENBQUM7SUFDN0IsQ0FBQztJQUVELG9CQUFvQjtJQUVwQixrQ0FBYyxHQUFkO1FBQ0UsSUFBSSxDQUFDLGdCQUFnQixHQUFHLENBQUMsRUFBRSxFQUFFLEVBQUUsRUFBRSxJQUFJLENBQUMsQ0FBQztRQUN2QyxJQUFJLENBQUMsS0FBSyxDQUFDLFVBQVUsQ0FBQyxJQUFJLENBQUMsSUFBSSxDQUFDLGdCQUFnQixDQUFDLENBQUM7SUFDcEQsQ0FBQztJQUVELHlDQUFxQixHQUFyQixVQUFzQixJQUFJO1FBQ3hCLElBQUksQ0FBQyxnQkFBZ0IsQ0FBQyxDQUFDLENBQUMsSUFBSSxJQUFJLENBQUM7SUFDbkMsQ0FBQztJQUVELHVDQUFtQixHQUFuQixVQUFvQixRQUFRO1FBQzFCLElBQUksQ0FBQyxnQkFBZ0IsQ0FBQyxDQUFDLENBQUMsR0FBRyxRQUFRLENBQUM7SUFDdEMsQ0FBQztJQUVELDBDQUFzQixHQUF0QixVQUF1QixJQUFJO1FBQ3pCLElBQUksQ0FBQyxnQkFBZ0IsQ0FBQyxDQUFDLENBQUMsR0FBRyxJQUFJLENBQUMsZ0JBQWdCLENBQUMsQ0FBQyxDQUFDLElBQUksRUFBRSxDQUFDO1FBQzFELElBQUksQ0FBQyxnQkFBZ0IsQ0FBQyxDQUFDLENBQUMsSUFBSSxJQUFJLENBQUM7SUFDbkMsQ0FBQztJQUVELHdDQUFvQixHQUFwQjtJQUNBLENBQUM7SUFFRCxxQ0FBaUIsR0FBakIsVUFBa0IsT0FBZTtRQUMvQixJQUFJLENBQUMsS0FBSyxDQUFDLFdBQVcsR0FBRyxPQUFPLENBQUM7SUFDbkMsQ0FBQztJQUNILGdCQUFDO0FBQUQsQ0FBQyxBQXJKRCxJQXFKQyIsInNvdXJjZXNDb250ZW50IjpbImltcG9ydCBFdmVudGVkVG9rZW5pemVyIGZyb20gJy4vZXZlbnRlZC10b2tlbml6ZXInO1xuXG5leHBvcnQgaW50ZXJmYWNlIFRva2VuaXplck9wdGlvbnMge1xuICBsb2M/OiBhbnk7XG59O1xuXG5leHBvcnQgdHlwZSBBdHRyaWJ1dGUgPSBbc3RyaW5nLCBzdHJpbmcsIGJvb2xlYW5dO1xuXG5leHBvcnQgaW50ZXJmYWNlIFRva2VuIHtcbiAgdHlwZTogc3RyaW5nO1xuICBjaGFycz86IHN0cmluZztcbiAgYXR0cmlidXRlcz86IEF0dHJpYnV0ZVtdO1xuICB0YWdOYW1lPzogc3RyaW5nO1xuICBzZWxmQ2xvc2luZz86IGJvb2xlYW47XG4gIGxvYz86IHtcbiAgICBzdGFydDoge1xuICAgICAgbGluZTogbnVtYmVyO1xuICAgICAgY29sdW1uOiBudW1iZXI7XG4gICAgfSxcbiAgICBlbmQ6IHtcbiAgICAgIGxpbmU6IG51bWJlcjtcbiAgICAgIGNvbHVtbjogbnVtYmVyO1xuICAgIH1cbiAgfTtcbiAgc3ludGF4RXJyb3I/OiBzdHJpbmc7XG59XG5cbmV4cG9ydCBkZWZhdWx0IGNsYXNzIFRva2VuaXplciB7XG4gIHByaXZhdGUgdG9rZW46IFRva2VuID0gbnVsbDtcbiAgcHJpdmF0ZSBzdGFydExpbmUgPSAxO1xuICBwcml2YXRlIHN0YXJ0Q29sdW1uID0gMDtcbiAgcHJpdmF0ZSB0b2tlbml6ZXI6IEV2ZW50ZWRUb2tlbml6ZXI7XG4gIHByaXZhdGUgdG9rZW5zOiBUb2tlbltdID0gW107XG4gIHByaXZhdGUgY3VycmVudEF0dHJpYnV0ZTogQXR0cmlidXRlID0gbnVsbDtcblxuICBjb25zdHJ1Y3RvcihlbnRpdHlQYXJzZXIsIHByaXZhdGUgb3B0aW9uczogVG9rZW5pemVyT3B0aW9ucyA9IHt9KSB7XG4gICAgdGhpcy50b2tlbml6ZXIgPSBuZXcgRXZlbnRlZFRva2VuaXplcih0aGlzLCBlbnRpdHlQYXJzZXIpO1xuICB9XG5cbiAgdG9rZW5pemUoaW5wdXQpIHtcbiAgICB0aGlzLnRva2VucyA9IFtdO1xuICAgIHRoaXMudG9rZW5pemVyLnRva2VuaXplKGlucHV0KTtcbiAgICByZXR1cm4gdGhpcy50b2tlbnM7XG4gIH1cblxuICB0b2tlbml6ZVBhcnQoaW5wdXQpIHtcbiAgICB0aGlzLnRva2VucyA9IFtdO1xuICAgIHRoaXMudG9rZW5pemVyLnRva2VuaXplUGFydChpbnB1dCk7XG4gICAgcmV0dXJuIHRoaXMudG9rZW5zO1xuICB9XG5cbiAgdG9rZW5pemVFT0YoKSB7XG4gICAgdGhpcy50b2tlbnMgPSBbXTtcbiAgICB0aGlzLnRva2VuaXplci50b2tlbml6ZUVPRigpO1xuICAgIHJldHVybiB0aGlzLnRva2Vuc1swXTtcbiAgfVxuXG4gIHJlc2V0KCkge1xuICAgIHRoaXMudG9rZW4gPSBudWxsO1xuICAgIHRoaXMuc3RhcnRMaW5lID0gMTtcbiAgICB0aGlzLnN0YXJ0Q29sdW1uID0gMDtcbiAgfVxuXG4gIGFkZExvY0luZm8oKSB7XG4gICAgaWYgKHRoaXMub3B0aW9ucy5sb2MpIHtcbiAgICAgIHRoaXMudG9rZW4ubG9jID0ge1xuICAgICAgICBzdGFydDoge1xuICAgICAgICAgIGxpbmU6IHRoaXMuc3RhcnRMaW5lLFxuICAgICAgICAgIGNvbHVtbjogdGhpcy5zdGFydENvbHVtblxuICAgICAgICB9LFxuICAgICAgICBlbmQ6IHtcbiAgICAgICAgICBsaW5lOiB0aGlzLnRva2VuaXplci5saW5lLFxuICAgICAgICAgIGNvbHVtbjogdGhpcy50b2tlbml6ZXIuY29sdW1uXG4gICAgICAgIH1cbiAgICAgIH07XG4gICAgfVxuICAgIHRoaXMuc3RhcnRMaW5lID0gdGhpcy50b2tlbml6ZXIubGluZTtcbiAgICB0aGlzLnN0YXJ0Q29sdW1uID0gdGhpcy50b2tlbml6ZXIuY29sdW1uO1xuICB9XG5cbiAgLy8gRGF0YVxuXG4gIGJlZ2luRGF0YSgpIHtcbiAgICB0aGlzLnRva2VuID0ge1xuICAgICAgdHlwZTogJ0NoYXJzJyxcbiAgICAgIGNoYXJzOiAnJ1xuICAgIH07XG4gICAgdGhpcy50b2tlbnMucHVzaCh0aGlzLnRva2VuKTtcbiAgfVxuXG4gIGFwcGVuZFRvRGF0YShjaGFyKSB7XG4gICAgdGhpcy50b2tlbi5jaGFycyArPSBjaGFyO1xuICB9XG5cbiAgZmluaXNoRGF0YSgpIHtcbiAgICB0aGlzLmFkZExvY0luZm8oKTtcbiAgfVxuXG4gIC8vIENvbW1lbnRcblxuICBiZWdpbkNvbW1lbnQoKSB7XG4gICAgdGhpcy50b2tlbiA9IHtcbiAgICAgIHR5cGU6ICdDb21tZW50JyxcbiAgICAgIGNoYXJzOiAnJ1xuICAgIH07XG4gICAgdGhpcy50b2tlbnMucHVzaCh0aGlzLnRva2VuKTtcbiAgfVxuXG4gIGFwcGVuZFRvQ29tbWVudERhdGEoY2hhcikge1xuICAgIHRoaXMudG9rZW4uY2hhcnMgKz0gY2hhcjtcbiAgfVxuXG4gIGZpbmlzaENvbW1lbnQoKSB7XG4gICAgdGhpcy5hZGRMb2NJbmZvKCk7XG4gIH1cblxuICAvLyBUYWdzIC0gYmFzaWNcblxuICBiZWdpblN0YXJ0VGFnKCkge1xuICAgIHRoaXMudG9rZW4gPSB7XG4gICAgICB0eXBlOiAnU3RhcnRUYWcnLFxuICAgICAgdGFnTmFtZTogJycsXG4gICAgICBhdHRyaWJ1dGVzOiBbXSxcbiAgICAgIHNlbGZDbG9zaW5nOiBmYWxzZVxuICAgIH07XG4gICAgdGhpcy50b2tlbnMucHVzaCh0aGlzLnRva2VuKTtcbiAgfVxuXG4gIGJlZ2luRW5kVGFnKCkge1xuICAgIHRoaXMudG9rZW4gPSB7XG4gICAgICB0eXBlOiAnRW5kVGFnJyxcbiAgICAgIHRhZ05hbWU6ICcnXG4gICAgfTtcbiAgICB0aGlzLnRva2Vucy5wdXNoKHRoaXMudG9rZW4pO1xuICB9XG5cbiAgZmluaXNoVGFnKCkge1xuICAgIHRoaXMuYWRkTG9jSW5mbygpO1xuICB9XG5cbiAgbWFya1RhZ0FzU2VsZkNsb3NpbmcoKSB7XG4gICAgdGhpcy50b2tlbi5zZWxmQ2xvc2luZyA9IHRydWU7XG4gIH1cblxuICAvLyBUYWdzIC0gbmFtZVxuXG4gIGFwcGVuZFRvVGFnTmFtZShjaGFyKSB7XG4gICAgdGhpcy50b2tlbi50YWdOYW1lICs9IGNoYXI7XG4gIH1cblxuICAvLyBUYWdzIC0gYXR0cmlidXRlc1xuXG4gIGJlZ2luQXR0cmlidXRlKCkge1xuICAgIHRoaXMuY3VycmVudEF0dHJpYnV0ZSA9IFtcIlwiLCBcIlwiLCBudWxsXTtcbiAgICB0aGlzLnRva2VuLmF0dHJpYnV0ZXMucHVzaCh0aGlzLmN1cnJlbnRBdHRyaWJ1dGUpO1xuICB9XG5cbiAgYXBwZW5kVG9BdHRyaWJ1dGVOYW1lKGNoYXIpIHtcbiAgICB0aGlzLmN1cnJlbnRBdHRyaWJ1dGVbMF0gKz0gY2hhcjtcbiAgfVxuXG4gIGJlZ2luQXR0cmlidXRlVmFsdWUoaXNRdW90ZWQpIHtcbiAgICB0aGlzLmN1cnJlbnRBdHRyaWJ1dGVbMl0gPSBpc1F1b3RlZDtcbiAgfVxuXG4gIGFwcGVuZFRvQXR0cmlidXRlVmFsdWUoY2hhcikge1xuICAgIHRoaXMuY3VycmVudEF0dHJpYnV0ZVsxXSA9IHRoaXMuY3VycmVudEF0dHJpYnV0ZVsxXSB8fCBcIlwiO1xuICAgIHRoaXMuY3VycmVudEF0dHJpYnV0ZVsxXSArPSBjaGFyO1xuICB9XG5cbiAgZmluaXNoQXR0cmlidXRlVmFsdWUoKSB7XG4gIH1cblxuICByZXBvcnRTeW50YXhFcnJvcihtZXNzYWdlOiBzdHJpbmcpIHtcbiAgICB0aGlzLnRva2VuLnN5bnRheEVycm9yID0gbWVzc2FnZTtcbiAgfVxufVxuIl19