UNPKG

domhandler

Version:

Handler for htmlparser2 that turns pages into a dom

156 lines (155 loc) 4.8 kB
import { ElementType } from "domelementtype"; import { CDATA, Comment, Document, Element, ProcessingInstruction, Text, } from "./node.js"; export * from "./node.js"; // Default options const defaultOptions = { withStartIndices: false, withEndIndices: false, xmlMode: false, }; /** * Event-based handler that builds a DOM tree from parser callbacks. */ export class DomHandler { /** The elements of the DOM */ dom = []; /** The root element for the DOM */ root = new Document(this.dom); /** Called once parsing has completed. */ callback; /** Settings for the handler. */ options; /** Callback whenever a tag is closed. */ elementCB; /** Indicated whether parsing has been completed. */ done = false; /** Stack of open tags. */ tagStack = [this.root]; /** A data node that is still being written to. */ lastNode = null; /** Reference to the parser instance. Used for location information. */ parser = null; /** * @param callback Called once parsing has completed. * @param options Settings for the handler. * @param elementCB Callback whenever a tag is closed. */ constructor(callback, options, elementCB) { // Make it possible to skip arguments, for backwards-compatibility if (typeof options === "function") { elementCB = options; options = defaultOptions; } if (typeof callback === "object") { options = callback; callback = undefined; } this.callback = callback ?? null; this.options = options ?? defaultOptions; this.elementCB = elementCB ?? null; } onparserinit(parser) { this.parser = parser; } // Resets the handler back to starting state onreset() { this.dom = []; this.root = new Document(this.dom); this.done = false; this.tagStack = [this.root]; this.lastNode = null; this.parser = null; } // Signals the handler that parsing is done onend() { if (this.done) return; this.done = true; this.parser = null; this.handleCallback(null); } onerror(error) { this.handleCallback(error); } onclosetag() { this.lastNode = null; const element = this.tagStack.pop(); if (this.options.withEndIndices && this.parser) { element.endIndex = this.parser.endIndex; } if (this.elementCB) this.elementCB(element); } onopentag(name, attribs) { const type = this.options.xmlMode ? ElementType.Tag : undefined; const element = new Element(name, attribs, undefined, type); this.addNode(element); this.tagStack.push(element); } ontext(data) { const { lastNode } = this; if (lastNode && lastNode.type === ElementType.Text) { lastNode.data += data; if (this.options.withEndIndices && this.parser) { lastNode.endIndex = this.parser.endIndex; } } else { const node = new Text(data); this.addNode(node); this.lastNode = node; } } oncomment(data) { if (this.lastNode && this.lastNode.type === ElementType.Comment) { this.lastNode.data += data; return; } const node = new Comment(data); this.addNode(node); this.lastNode = node; } oncommentend() { this.lastNode = null; } oncdatastart() { const text = new Text(""); const node = new CDATA([text]); this.addNode(node); text.parent = node; this.lastNode = text; } oncdataend() { this.lastNode = null; } onprocessinginstruction(name, data) { const node = new ProcessingInstruction(name, data); this.addNode(node); } handleCallback(error) { if (typeof this.callback === "function") { this.callback(error, this.dom); } else if (error) { throw error; } } addNode(node) { const parent = this.tagStack[this.tagStack.length - 1]; const previousSibling = parent.children[parent.children.length - 1]; if (this.options.withStartIndices && this.parser) { node.startIndex = this.parser.startIndex; } if (this.options.withEndIndices && this.parser) { node.endIndex = this.parser.endIndex; } parent.children.push(node); if (previousSibling) { node.prev = previousSibling; previousSibling.next = node; } node.parent = parent; this.lastNode = null; } } export default DomHandler;