UNPKG

html-to-article-json

Version:
82 lines (65 loc) 2.05 kB
import Set from 'es6-set'; import setupEmbed from './embeds'; import setupText from './text'; import setupBlockElement from './block-element'; import toDOM from 'query-dom'; const HEAD_NODE_NAMES = new Set([ 'title', 'base', 'link', 'meta', 'script', 'noscript', 'style' ]); const linebreak = elm => elm.tagName.toLowerCase() === 'br' ? { type: 'linebreak' } : null; const isEmptyTextNode = elm => (elm.nodeName === '#text' && elm.data.length === 0); export default opts => { const text = setupText(opts); const embed = setupEmbed(opts); const parse = (elms, textOpts, result) => { for (let i = 0; i < elms.length; i++) { let elm = elms[i]; // ELEMENT_NODE if (elm.tagName && !HEAD_NODE_NAMES.has(elm.tagName.toLowerCase())) { elementNode(elm, textOpts, result); } // TEXT_NODE if (elm.nodeName === '#text' && elm.data.length > 0) { result.push(text(textOpts, elm)); } } return result; }; const blockElement = setupBlockElement(parse, text); const elementNode = (elm, textOpts, result) => { const linebreakResult = linebreak(elm); if (linebreakResult) { result.push(linebreakResult); return; } const embedResult = embed(elm); if (embedResult) { result.push(embedResult); return; } const blockElementResult = blockElement(elm, textOpts); if (blockElementResult) { result.push(blockElementResult); return; } const emptyMark = elm.tagName.toLowerCase() === 'mark' && (elm.childNodes.length === 0 || elm.childNodes.length === 1 && isEmptyTextNode(elm.childNodes[0])); if (emptyMark) { result.push(text(textOpts, elm)); return; } if (elm.childNodes.length) { parse(elm.childNodes, text(textOpts, elm), result); } }; return input => { if (typeof input === 'string') { return parse(toDOM(input), {}, []); } if (input.nodeName) { return parse([input], {}, []); } return parse(input, {}, []); }; };