datocms-html-to-structured-text
Version:
Convert HTML (or a `hast` syntax tree) to a valid DatoCMS Structured Text `dast` document
117 lines • 3.71 kB
JavaScript
/* eslint-disable @typescript-eslint/ban-ts-comment */
// @ts-nocheck
import extend from 'extend';
import convert from 'unist-util-is/convert';
var isPhrasing = convert(['span', 'link']);
export function wrap(nodes) {
return runs(nodes, onphrasing);
function onphrasing(nodes) {
var head = nodes[0];
if (nodes.length === 1 &&
head.type === 'span' &&
(head.value === ' ' || head.value === '\n')) {
return [];
}
return { type: 'paragraph', children: nodes };
}
}
// Wrap all runs of dast phrasing content in `paragraph` nodes.
function runs(nodes, onphrasing, onnonphrasing) {
var nonphrasing = onnonphrasing || identity;
var flattened = flatten(nodes);
var result = [];
var index = -1;
var node;
var queue;
while (++index < flattened.length) {
node = flattened[index];
if (isPhrasing(node)) {
if (!queue)
queue = [];
queue.push(node);
}
else {
if (queue) {
result = result.concat(onphrasing(queue));
queue = undefined;
}
result = result.concat(nonphrasing(node));
}
}
if (queue) {
result = result.concat(onphrasing(queue));
}
return result;
}
// Flatten a list of nodes.
function flatten(nodes) {
var flattened = [];
var index = -1;
var node;
while (++index < nodes.length) {
node = nodes[index];
// Straddling: some elements are *weird*.
// Namely: `map`, `ins`, `del`, and `a`, as they are hybrid elements.
// See: <https://html.spec.whatwg.org/#paragraphs>.
// Paragraphs are the weirdest of them all.
// See the straddling fixture for more info!
// `ins` is ignored in mdast, so we don’t need to worry about that.
// `map` maps to its content, so we don’t need to worry about that either.
// `del` maps to `delete` and `a` to `link`, so we do handle those.
// What we’ll do is split `node` over each of its children.
if ((node.type === 'delete' || node.type === 'link') &&
needed(node.children)) {
flattened = flattened.concat(split(node));
}
else {
flattened.push(node);
}
}
return flattened;
}
// Check if there are non-phrasing mdast nodes returned.
// This is needed if a fragment is given, which could just be a sentence, and
// doesn’t need a wrapper paragraph.
export function needed(nodes) {
var index = -1;
var node;
while (++index < nodes.length) {
node = nodes[index];
if (!isPhrasing(node) || (node.children && needed(node.children))) {
return true;
}
}
return false;
}
function split(node) {
return runs(node.children, onphrasing, onnonphrasing);
// Use `child`, add `parent` as its first child, put the original children
// into `parent`.
function onnonphrasing(child) {
var parent = extend(true, {}, shallow(node));
var copy = shallow(child);
copy.children = [parent];
parent.children = child.children;
return copy;
}
// Use `parent`, put the phrasing run inside it.
function onphrasing(nodes) {
var parent = extend(true, {}, shallow(node));
parent.children = nodes;
return parent;
}
}
function identity(n) {
return n;
}
function shallow(node) {
var copy = {};
var key;
for (key in node) {
if ({}.hasOwnProperty.call(node, key) && key !== 'children') {
copy[key] = node[key];
}
}
return copy;
}
//# sourceMappingURL=wrap.js.map