html-to-article-json
Version:
Converting HTML to article-json
103 lines (75 loc) • 2.63 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", {
value: true
});
var _es6Set = require('es6-set');
var _es6Set2 = _interopRequireDefault(_es6Set);
var _embeds = require('./embeds');
var _embeds2 = _interopRequireDefault(_embeds);
var _text = require('./text');
var _text2 = _interopRequireDefault(_text);
var _blockElement = require('./block-element');
var _blockElement2 = _interopRequireDefault(_blockElement);
var _queryDom = require('query-dom');
var _queryDom2 = _interopRequireDefault(_queryDom);
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
var HEAD_NODE_NAMES = new _es6Set2.default(['title', 'base', 'link', 'meta', 'script', 'noscript', 'style']);
var linebreak = function linebreak(elm) {
return elm.tagName.toLowerCase() === 'br' ? { type: 'linebreak' } : null;
};
var isEmptyTextNode = function isEmptyTextNode(elm) {
return elm.nodeName === '#text' && elm.data.length === 0;
};
exports.default = function (opts) {
var text = (0, _text2.default)(opts);
var embed = (0, _embeds2.default)(opts);
var parse = function parse(elms, textOpts, result) {
for (var i = 0; i < elms.length; i++) {
var elm = elms[i];
// ELEMENT_NODE
if (elm.tagName && !HEAD_NODE_NAMES.has(elm.tagName.toLowerCase())) {
elementNode(elm, textOpts, result);
}
// TEXT_NODE
if (elm.nodeName === '#text' && elm.data.length > 0) {
result.push(text(textOpts, elm));
}
}
return result;
};
var blockElement = (0, _blockElement2.default)(parse, text);
var elementNode = function elementNode(elm, textOpts, result) {
var linebreakResult = linebreak(elm);
if (linebreakResult) {
result.push(linebreakResult);
return;
}
var embedResult = embed(elm);
if (embedResult) {
result.push(embedResult);
return;
}
var blockElementResult = blockElement(elm, textOpts);
if (blockElementResult) {
result.push(blockElementResult);
return;
}
var emptyMark = elm.tagName.toLowerCase() === 'mark' && (elm.childNodes.length === 0 || elm.childNodes.length === 1 && isEmptyTextNode(elm.childNodes[0]));
if (emptyMark) {
result.push(text(textOpts, elm));
return;
}
if (elm.childNodes.length) {
parse(elm.childNodes, text(textOpts, elm), result);
}
};
return function (input) {
if (typeof input === 'string') {
return parse((0, _queryDom2.default)(input), {}, []);
}
if (input.nodeName) {
return parse([input], {}, []);
}
return parse(input, {}, []);
};
};