UNPKG

html-tokenizer

Version:

Small, fast, event-driven, fault-tolerant html tokenizer. Works in node or browsers.

103 lines 3.94 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.Parser = void 0; const tokenizer_1 = require("./tokenizer"); const stack_1 = __importDefault(require("./stack")); const util_1 = require("./util"); /** * An object capable of parsing HTML. */ class Parser { constructor(opts) { this.tokenizer = tokenizer_1.Tokenizer.from(opts); Object.freeze(this); } /** * Static method to parse HTML without instantiating a Parser instance. * @param html HTML string to parse. * @param opts Optional parser configuration options. */ static parse(html, opts = {}) { const parser = new Parser(opts); return parser.parse(html); } /** * Static factory to create a parser. * @param opts Parser options. */ static from(opts) { return new Parser(opts); } /** * Parse an HTML string. Returns an iterator, thus allowing parse * tokens to be consumed via for/of or other iteration mechanisms. * @param html HTML string to parse. */ *parse(html) { const tkzr = this.tokenizer; const stack = new stack_1.default(); let pendingTag = undefined; for (const tkn of tkzr.tokenize(html)) { if (tkn.type === 'opening-tag') { pendingTag = { name: tkn.name, attributes: {} }; } else if (tkn.type === 'closing-tag') { const current = stack.peek(); const parent = stack.peek(1); if (current) { if (current.name === tkn.name) { stack.pop(); yield { type: 'close', name: current.name, selfClosing: false }; } else { if (parent && parent.name === tkn.name && util_1.isClosedByParent(current.name)) { stack.pop(); yield { type: 'close', name: current.name, selfClosing: false }; stack.pop(); yield { type: 'close', name: parent.name, selfClosing: false }; } } } } else if (tkn.type === 'opening-tag-end') { if (pendingTag) { const mightBeClosed = stack.peek(); const isSelfClose = tkn.token === '/>' || util_1.isSelfClosing(tkn.name); if (mightBeClosed && util_1.isClosedBy(mightBeClosed.name, pendingTag.name)) { stack.pop(); yield { type: 'close', name: mightBeClosed.name, selfClosing: false }; } yield { type: 'open', name: pendingTag.name, attributes: pendingTag.attributes, selfClosing: isSelfClose }; if (isSelfClose) { yield { type: 'close', name: pendingTag.name, selfClosing: true }; } else { stack.push(pendingTag); } } else { yield { type: 'text', text: tkn.token }; } } else if (tkn.type === 'text') { yield tkn; } else if (tkn.type === 'comment') { yield tkn; } else if (tkn.type === 'attribute') { if (pendingTag) { pendingTag.attributes[tkn.name] = tkn.value; } } } for (const next of stack.drain()) { yield { type: 'close', name: next.name, selfClosing: false }; } } } exports.Parser = Parser; //# sourceMappingURL=parser.js.map