datocms-html-to-structured-text
Version:
Convert HTML (or a `hast` syntax tree) to a valid DatoCMS Structured Text `dast` document
459 lines • 16.5 kB
JavaScript
import { convertElement } from 'hast-util-is-element';
import { toText } from 'hast-util-to-text';
import { hasProperty } from 'hast-util-has-property';
import { allowedChildren, inlineNodeTypes, } from 'datocms-structured-text-utils';
import visitChildren from './visit-children.js';
import { wrap } from './wrap.js';
export const root = async function root(createNode, node, context) {
let children = await visitChildren(createNode, node, {
...context,
parentNodeType: 'root',
});
if (Array.isArray(children) &&
children.some((child) => child && !allowedChildren.root.includes(child.type))) {
children = wrap(children);
}
if (!Array.isArray(children) || children.length === 0) {
return null;
}
return createNode('root', {
children: Array.isArray(children) ? children : [],
});
};
export const paragraph = async function paragraph(createNode, node, context) {
const isAllowedChild = allowedChildren[context.parentNodeType].includes('paragraph');
const children = await visitChildren(createNode, node, {
...context,
parentNodeType: isAllowedChild ? 'paragraph' : context.parentNodeType,
});
if (Array.isArray(children) && children.length) {
return isAllowedChild ? createNode('paragraph', { children }) : children;
}
return undefined;
};
export const thematicBreak = async function thematicBreak(createNode, _node, context) {
const isAllowedChild = allowedChildren[context.parentNodeType].includes('thematicBreak');
return isAllowedChild ? createNode('thematicBreak', {}) : undefined;
};
export const heading = async function heading(createNode, node, context) {
if (node.type !== 'element')
return undefined;
const level = (Number(node.tagName.charAt(1)) || 1);
const isAllowedChild = allowedChildren[context.parentNodeType].includes('heading') &&
context.allowedBlocks.includes('heading') &&
context.allowedHeadingLevels.includes(level);
const children = await visitChildren(createNode, node, {
...context,
parentNodeType: isAllowedChild ? 'heading' : context.parentNodeType,
wrapText: isAllowedChild ? false : context.wrapText,
});
if (Array.isArray(children) && children.length) {
return isAllowedChild
? createNode('heading', {
level,
children,
})
: children;
}
return undefined;
};
export const code = async function code(createNode, node, context) {
const isAllowedChild = allowedChildren[context.parentNodeType].includes('code');
if (!isAllowedChild) {
return inlineCode(createNode, node, context);
}
if (!context.allowedBlocks.includes('code')) {
return visitChildren(createNode, node, context);
}
const prefix = typeof context.codePrefix === 'string' ? context.codePrefix : 'language-';
const isPre = convertElement('pre');
const isCode = convertElement('code');
const children = node.type === 'element' || node.type === 'root' ? node.children || [] : [];
let index = -1;
let classList = null;
let language = {};
if (isPre(node)) {
while (++index < children.length) {
const child = children[index];
if (typeof child === 'object' &&
child.type === 'element' &&
isCode(child) &&
hasProperty(child, 'className')) {
const cn = child.properties?.className;
classList = Array.isArray(cn) ? cn : null;
break;
}
}
}
else if (node.type === 'element' &&
isCode(node) &&
hasProperty(node, 'className')) {
const cn = node.properties?.className;
classList = Array.isArray(cn) ? cn : null;
}
if (Array.isArray(classList)) {
index = -1;
while (++index < classList.length) {
const className = String(classList[index]);
if (className.slice(0, prefix.length) === prefix) {
language = { language: className.slice(prefix.length) };
break;
}
}
}
return createNode('code', {
...language,
code: String(wrapText(context, toText(node))).replace(/\n+$/, ''),
});
};
export const blockquote = async function blockquote(createNode, node, context) {
const isAllowedChild = allowedChildren[context.parentNodeType].includes('blockquote') &&
context.allowedBlocks.includes('blockquote');
const children = await visitChildren(createNode, node, {
...context,
parentNodeType: isAllowedChild ? 'blockquote' : context.parentNodeType,
});
if (Array.isArray(children) && children.length) {
return isAllowedChild
? createNode('blockquote', { children: wrap(children) })
: children;
}
return undefined;
};
export const list = async function list(createNode, node, context) {
const isAllowedChild = allowedChildren[context.parentNodeType].includes('list') &&
context.allowedBlocks.includes('list');
if (!isAllowedChild) {
return await visitChildren(createNode, node, context);
}
const children = await wrapListItems(createNode, node, {
...context,
parentNodeType: 'list',
});
if (Array.isArray(children) && children.length) {
return createNode('list', {
children,
style: node.type === 'element' && node.tagName === 'ol'
? 'numbered'
: 'bulleted',
});
}
return undefined;
};
export const listItem = async function listItem(createNode, node, context) {
const isAllowedChild = allowedChildren[context.parentNodeType].includes('listItem') &&
context.allowedBlocks.includes('list');
const children = await visitChildren(createNode, node, {
...context,
parentNodeType: isAllowedChild ? 'listItem' : context.parentNodeType,
});
if (Array.isArray(children) && children.length) {
return isAllowedChild
? createNode('listItem', {
children: wrap(children),
})
: children;
}
return undefined;
};
export const link = async function link(createNode, node, context) {
if (!context.allowedBlocks.includes('link')) {
return visitChildren(createNode, node, context);
}
let isAllowedChild = false;
if (allowedChildren[context.parentNodeType] === 'inlineNodes') {
isAllowedChild = inlineNodeTypes.includes('link');
}
else if (Array.isArray(allowedChildren[context.parentNodeType])) {
isAllowedChild = allowedChildren[context.parentNodeType].includes('link');
}
if (!isAllowedChild) {
// Links that aren't inside of a allowedChildren context
// can still be valid `dast` nodes in the following contexts if wrapped.
const allowedChildrenWrapped = ['root', 'list', 'listItem'];
isAllowedChild = allowedChildrenWrapped.includes(context.parentNodeType);
}
if (node.type !== 'element')
return undefined;
const e = node;
const nodeChildren = e.children || [];
// When a link wraps headings we try to preserve the heading by inverting the parent-child relationship.
// Essentially we tweak the nodes so that the heading wraps the link.
//
// @TODO this is only checking for headings that are direct descendants of links.
// Decide if it is worth looking deeper.
const wrapsHeadings = nodeChildren.some((child) => child.type === 'element' && child.tagName.startsWith('h'));
if (wrapsHeadings) {
let i = 0;
const splitChildren = [];
nodeChildren.forEach((child) => {
if (child.type === 'element' && child.tagName.startsWith('h')) {
if (splitChildren.length > 0) {
i++;
}
splitChildren.push({
...child,
children: [
{
...e,
children: child.children,
},
],
});
i++;
}
else if (splitChildren[i]) {
const sc = splitChildren[i];
sc.children = sc.children || [];
sc.children.push(child);
}
else {
splitChildren[i] = {
...e,
children: [child],
};
}
});
e.children = splitChildren;
isAllowedChild = false;
}
const children = await visitChildren(createNode, node, {
...context,
parentNodeType: isAllowedChild ? 'link' : context.parentNodeType,
marks: Array.isArray(context.marks)
? context.marks.filter((m) => m !== 'underline')
: context.marks,
});
if (Array.isArray(children) && children.length) {
if (!isAllowedChild) {
return children;
}
const nodeProps = e.properties;
const meta = [];
if (nodeProps) {
['target', 'rel', 'title'].forEach((attr) => {
const raw = nodeProps[attr];
const value = Array.isArray(raw) ? raw.join(' ') : raw;
if (value !== undefined &&
value !== null &&
value !== false &&
value !== '') {
meta.push({ id: attr, value: String(value) });
}
});
}
return createNode('link', {
url: resolveUrl(context, nodeProps?.href),
children,
...(meta.length > 0 ? { meta } : {}),
});
}
return undefined;
};
export const span = async function span(createNode, node, context) {
if (node.type !== 'text')
return undefined;
const marks = {};
if (Array.isArray(context.marks)) {
const allowedMarks = context.marks.filter((mark) => context.allowedMarks.includes(mark));
if (allowedMarks.length > 0) {
marks.marks = allowedMarks;
}
}
return createNode('span', {
value: wrapText(context, node.value),
...marks,
});
};
export const newLine = async function newLine(createNode) {
return createNode('span', {
value: '\n',
});
};
export const inlineCode = withMark('code');
export const strong = withMark('strong');
export const italic = withMark('emphasis');
export const underline = withMark('underline');
export const strikethrough = withMark('strikethrough');
export const highlight = withMark('highlight');
export const head = async function head(createNode, node, context) {
if (node.type !== 'element')
return undefined;
const baseElement = (node.children || []).find((child) => child.type === 'element' && child.tagName === 'base');
if (baseElement) {
return context.handlers.base(createNode, baseElement, context);
}
else {
return undefined;
}
};
export const base = async function base(_createNode, node, context) {
if (node.type !== 'element')
return undefined;
if (!context.global.baseUrlFound &&
typeof node.properties === 'object' &&
node.properties?.href) {
context.global.baseUrl = String(node.properties.href).replace(/\/$/, '');
context.global.baseUrlFound = true;
}
return undefined;
};
export const extractInlineStyles = async function extractInlineStyles(createNode, node, context) {
const accumulated = Array.isArray(context.marks)
? [...context.marks]
: [];
const properties = node.type === 'element' ? node.properties : undefined;
if (properties && typeof properties.style === 'string') {
const newMarks = [];
String(properties.style)
.split(';')
.forEach((declaration) => {
const [firstChunk, ...otherChunks] = declaration.split(':');
const prop = firstChunk.trim();
const value = otherChunks.join(':').trim();
switch (prop) {
case 'font-weight':
if (value === 'bold' || Number(value) > 400) {
newMarks.push('strong');
}
break;
case 'font-style':
if (value === 'italic') {
newMarks.push('emphasis');
}
break;
case 'text-decoration':
if (value === 'underline') {
newMarks.push('underline');
}
break;
default:
break;
}
});
newMarks.forEach((mark) => {
if (!accumulated.includes(mark) &&
context.allowedMarks.includes(mark) &&
!(context.parentNodeType === 'link' && mark === 'underline')) {
accumulated.push(mark);
}
});
}
const marksContext = accumulated.length > 0 ? { marks: accumulated } : {};
return visitChildren(createNode, node, {
...context,
...marksContext,
});
};
// eslint-disable-next-line @typescript-eslint/no-empty-function
export function noop() { }
export function withMark(type) {
return function markHandler(createNode, node, context) {
if (!context.allowedMarks.includes(type) ||
(context.parentNodeType === 'link' && type === 'underline')) {
return visitChildren(createNode, node, context);
}
const marks = Array.isArray(context.marks)
? {
marks: context.marks.includes(type)
? context.marks
: context.marks.concat([type]),
}
: { marks: [type] };
return visitChildren(createNode, node, {
...context,
...marks,
});
};
}
export const handlers = {
root: root,
p: paragraph,
summary: paragraph,
h1: heading,
h2: heading,
h3: heading,
h4: heading,
h5: heading,
h6: heading,
ul: list,
ol: list,
dir: list,
dt: listItem,
dd: listItem,
li: listItem,
listing: code,
plaintext: code,
pre: code,
xmp: code,
blockquote: blockquote,
a: link,
code: code,
kbd: code,
samp: code,
tt: code,
var: code,
strong: strong,
b: strong,
em: italic,
i: italic,
u: underline,
strike: strikethrough,
s: strikethrough,
mark: highlight,
base: base,
span: extractInlineStyles,
text: span,
br: newLine,
hr: thematicBreak,
head: head,
comment: noop,
script: noop,
style: noop,
title: noop,
video: noop,
audio: noop,
embed: noop,
iframe: noop,
};
export async function wrapListItems(createNode, node, context) {
const children = await visitChildren(createNode, node, context);
if (!Array.isArray(children)) {
return [];
}
let index = -1;
while (++index < children.length) {
const child = children[index];
if (typeof child !== 'undefined' && child.type !== 'listItem') {
const wrappedChild = allowedChildren.listItem.includes(child.type)
? child
: createNode('paragraph', { children: [child] });
children[index] = createNode('listItem', {
children: [wrappedChild],
});
}
}
return children;
}
export function wrapText(context, value) {
return context.wrapText ? value : value.replace(/\r?\n|\r/g, ' ');
}
export function resolveUrl(context, url) {
if (url === null || url === undefined) {
return '';
}
const urlString = String(url);
if (context.global.baseUrl && typeof URL !== 'undefined') {
const isRelative = /^\.?\//.test(urlString);
const parsed = new URL(urlString, context.global.baseUrl);
if (isRelative) {
const parsedBase = new URL(context.global.baseUrl);
if (!parsed.pathname.startsWith(parsedBase.pathname)) {
parsed.pathname = `${parsedBase.pathname}${parsed.pathname}`;
}
}
return parsed.toString();
}
return urlString;
}
//# sourceMappingURL=handlers.js.map