UNPKG

html-to-article-json

Version:
103 lines (75 loc) 2.63 kB
'use strict'; Object.defineProperty(exports, "__esModule", { value: true }); var _es6Set = require('es6-set'); var _es6Set2 = _interopRequireDefault(_es6Set); var _embeds = require('./embeds'); var _embeds2 = _interopRequireDefault(_embeds); var _text = require('./text'); var _text2 = _interopRequireDefault(_text); var _blockElement = require('./block-element'); var _blockElement2 = _interopRequireDefault(_blockElement); var _queryDom = require('query-dom'); var _queryDom2 = _interopRequireDefault(_queryDom); function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } var HEAD_NODE_NAMES = new _es6Set2.default(['title', 'base', 'link', 'meta', 'script', 'noscript', 'style']); var linebreak = function linebreak(elm) { return elm.tagName.toLowerCase() === 'br' ? { type: 'linebreak' } : null; }; var isEmptyTextNode = function isEmptyTextNode(elm) { return elm.nodeName === '#text' && elm.data.length === 0; }; exports.default = function (opts) { var text = (0, _text2.default)(opts); var embed = (0, _embeds2.default)(opts); var parse = function parse(elms, textOpts, result) { for (var i = 0; i < elms.length; i++) { var elm = elms[i]; // ELEMENT_NODE if (elm.tagName && !HEAD_NODE_NAMES.has(elm.tagName.toLowerCase())) { elementNode(elm, textOpts, result); } // TEXT_NODE if (elm.nodeName === '#text' && elm.data.length > 0) { result.push(text(textOpts, elm)); } } return result; }; var blockElement = (0, _blockElement2.default)(parse, text); var elementNode = function elementNode(elm, textOpts, result) { var linebreakResult = linebreak(elm); if (linebreakResult) { result.push(linebreakResult); return; } var embedResult = embed(elm); if (embedResult) { result.push(embedResult); return; } var blockElementResult = blockElement(elm, textOpts); if (blockElementResult) { result.push(blockElementResult); return; } var emptyMark = elm.tagName.toLowerCase() === 'mark' && (elm.childNodes.length === 0 || elm.childNodes.length === 1 && isEmptyTextNode(elm.childNodes[0])); if (emptyMark) { result.push(text(textOpts, elm)); return; } if (elm.childNodes.length) { parse(elm.childNodes, text(textOpts, elm), result); } }; return function (input) { if (typeof input === 'string') { return parse((0, _queryDom2.default)(input), {}, []); } if (input.nodeName) { return parse([input], {}, []); } return parse(input, {}, []); }; };