html-tokenizer
Version:
Small, fast, event-driven, fault-tolerant html tokenizer. Works in node or browsers.
103 lines • 3.94 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.Parser = void 0;
const tokenizer_1 = require("./tokenizer");
const stack_1 = __importDefault(require("./stack"));
const util_1 = require("./util");
/**
* An object capable of parsing HTML.
*/
class Parser {
constructor(opts) {
this.tokenizer = tokenizer_1.Tokenizer.from(opts);
Object.freeze(this);
}
/**
* Static method to parse HTML without instantiating a Parser instance.
* @param html HTML string to parse.
* @param opts Optional parser configuration options.
*/
static parse(html, opts = {}) {
const parser = new Parser(opts);
return parser.parse(html);
}
/**
* Static factory to create a parser.
* @param opts Parser options.
*/
static from(opts) {
return new Parser(opts);
}
/**
* Parse an HTML string. Returns an iterator, thus allowing parse
* tokens to be consumed via for/of or other iteration mechanisms.
* @param html HTML string to parse.
*/
*parse(html) {
const tkzr = this.tokenizer;
const stack = new stack_1.default();
let pendingTag = undefined;
for (const tkn of tkzr.tokenize(html)) {
if (tkn.type === 'opening-tag') {
pendingTag = { name: tkn.name, attributes: {} };
}
else if (tkn.type === 'closing-tag') {
const current = stack.peek();
const parent = stack.peek(1);
if (current) {
if (current.name === tkn.name) {
stack.pop();
yield { type: 'close', name: current.name, selfClosing: false };
}
else {
if (parent && parent.name === tkn.name && util_1.isClosedByParent(current.name)) {
stack.pop();
yield { type: 'close', name: current.name, selfClosing: false };
stack.pop();
yield { type: 'close', name: parent.name, selfClosing: false };
}
}
}
}
else if (tkn.type === 'opening-tag-end') {
if (pendingTag) {
const mightBeClosed = stack.peek();
const isSelfClose = tkn.token === '/>' || util_1.isSelfClosing(tkn.name);
if (mightBeClosed && util_1.isClosedBy(mightBeClosed.name, pendingTag.name)) {
stack.pop();
yield { type: 'close', name: mightBeClosed.name, selfClosing: false };
}
yield { type: 'open', name: pendingTag.name, attributes: pendingTag.attributes, selfClosing: isSelfClose };
if (isSelfClose) {
yield { type: 'close', name: pendingTag.name, selfClosing: true };
}
else {
stack.push(pendingTag);
}
}
else {
yield { type: 'text', text: tkn.token };
}
}
else if (tkn.type === 'text') {
yield tkn;
}
else if (tkn.type === 'comment') {
yield tkn;
}
else if (tkn.type === 'attribute') {
if (pendingTag) {
pendingTag.attributes[tkn.name] = tkn.value;
}
}
}
for (const next of stack.drain()) {
yield { type: 'close', name: next.name, selfClosing: false };
}
}
}
exports.Parser = Parser;
//# sourceMappingURL=parser.js.map