UNPKG

tag-soup

Version:

The fastest pure JS SAX/DOM XML/HTML parser.

90 lines (89 loc) 3.4 kB
import { resolveTokenizerOptions } from './createTokenizer.js'; import { ParserError, tokenizeMarkup } from './tokenizeMarkup.js'; import { CDATASection, Comment, Document, DocumentFragment, DocumentType, Element, ProcessingInstruction, Text, } from 'flyweight-dom'; /** * Parses text as a DOM. * * @example * import { createDOMParser, htmlTokenizerOptions } from 'tag-soup'; * * const parser = createDOMParser(htmlTokenizerOptions); * * parser.parseFragment('Hello, <b>Bob</b>!'); * // ⮕ DocumentFragment * * @param options Parser options. * @group DOM */ export function createDOMParser(options = {}) { const { decodeText } = options; const documentOptions = Object.assign(Object.assign({}, resolveTokenizerOptions(options)), { decodeText }); const fragmentOptions = Object.assign(Object.assign({}, documentOptions), { isFragment: true }); return { parseDocument(input) { return parseDOM(input, documentOptions); }, parseFragment(input) { return parseDOM(input, fragmentOptions); }, }; } /** * Parses text as a DOM. * * @param input The text to parse. * @param options Parser options. * @returns The document or document fragment node. */ export function parseDOM(input, options = {}) { const { isStrict, isFragment, decodeText = identity } = options; const root = isFragment ? new DocumentFragment() : new Document(); let parent = root; let attributeName; let piTarget; const tokenCallback = (token, startIndex, endIndex) => { switch (token) { case 'TEXT': parent.appendChild(new Text(decodeText(input.substring(startIndex, endIndex)))); break; case 'START_TAG_NAME': parent.appendChild((parent = new Element(input.substring(startIndex, endIndex)))); break; case 'START_TAG_CLOSING': break; case 'START_TAG_SELF_CLOSING': case 'END_TAG_NAME': parent = parent.parentNode; break; case 'ATTRIBUTE_NAME': attributeName = input.substring(startIndex, endIndex); break; case 'ATTRIBUTE_VALUE': parent.setAttribute(attributeName, decodeText(input.substring(startIndex, endIndex))); break; case 'CDATA_SECTION': parent.appendChild(new CDATASection(input.substring(startIndex, endIndex))); break; case 'COMMENT': parent.appendChild(new Comment(decodeText(input.substring(startIndex, endIndex)))); break; case 'DOCTYPE_NAME': parent.appendChild(new DocumentType(input.substring(startIndex, endIndex))); break; case 'PROCESSING_INSTRUCTION_TARGET': piTarget = input.substring(startIndex, endIndex); break; case 'PROCESSING_INSTRUCTION_DATA': parent.appendChild(new ProcessingInstruction(piTarget, input.substring(startIndex, endIndex))); break; } }; tokenizeMarkup(input, tokenCallback, options); if (isStrict && parent !== root) { throw new ParserError('Expected an end tag.', input, input.length); } return root; } function identity(value) { return value; }