datocms-html-to-structured-text
Version:
Convert HTML (or a `hast` syntax tree) to a valid DatoCMS Structured Text `dast` document
610 lines (530 loc) • 15 kB
text/typescript
/* eslint-disable @typescript-eslint/ban-ts-comment */
// @ts-nocheck
import convert from 'hast-util-is-element/convert';
import toText from 'hast-util-to-text';
import has from 'hast-util-has-property';
import {
allowedChildren,
inlineNodeTypes,
} from 'datocms-structured-text-utils';
import {
Handler,
Mark,
Context,
HastNode,
HastTextNode,
HastElementNode,
HastRootNode,
} from './types';
import visitChildren from './visit-children';
import { wrap } from './wrap';
import { MetaEntry } from '../../utils/dist/types';
export const root: Handler<HastRootNode> = async function root(
createNode,
node,
context,
) {
let children = await visitChildren(createNode, node, {
...context,
parentNodeType: 'root',
});
if (
Array.isArray(children) &&
children.some(
(child: HastNode) => child && !allowedChildren.root.includes(child.type),
)
) {
children = wrap(children);
}
if (!Array.isArray(children) || children.length === 0) {
return null;
}
return createNode('root', {
children: Array.isArray(children) ? children : [],
});
};
export const paragraph: Handler<HastElementNode> = async function paragraph(
createNode,
node,
context,
) {
const isAllowedChild = allowedChildren[context.parentNodeType].includes(
'paragraph',
);
const children = await visitChildren(createNode, node, {
...context,
parentNodeType: isAllowedChild ? 'paragraph' : context.parentNodeType,
});
if (Array.isArray(children) && children.length) {
return isAllowedChild ? createNode('paragraph', { children }) : children;
}
return undefined;
};
export const thematicBreak: Handler<HastElementNode> = async function thematicBreak(
createNode,
node,
context,
) {
const isAllowedChild = allowedChildren[context.parentNodeType].includes(
'thematicBreak',
);
return isAllowedChild ? createNode('thematicBreak', {}) : undefined;
};
export const heading: Handler<HastElementNode> = async function heading(
createNode,
node,
context,
) {
const level = Number(node.tagName.charAt(1)) || 1;
const isAllowedChild =
allowedChildren[context.parentNodeType].includes('heading') &&
context.allowedBlocks.includes('heading') &&
context.allowedHeadingLevels.includes(level);
const children = await visitChildren(createNode, node, {
...context,
parentNodeType: isAllowedChild ? 'heading' : context.parentNodeType,
wrapText: isAllowedChild ? false : context.wrapText,
});
if (Array.isArray(children) && children.length) {
return isAllowedChild
? createNode('heading', {
level,
children,
})
: children;
}
return undefined;
};
export const code: Handler<HastElementNode> = async function code(
createNode,
node,
context,
) {
const isAllowedChild = allowedChildren[context.parentNodeType].includes(
'code',
);
if (!isAllowedChild) {
return inlineCode(createNode, node, context);
}
if (!context.allowedBlocks.includes('code')) {
return visitChildren(createNode, node, context);
}
const prefix =
typeof context.codePrefix === 'string' ? context.codePrefix : 'language-';
const isPre = convert('pre');
const isCode = convert('code');
const children = node.children;
let index = -1;
let classList = null;
let language = {};
if (isPre(node)) {
while (++index < children.length) {
if (
typeof children[index] === 'object' &&
isCode(children[index]) &&
has(children[index], 'className')
) {
// error TS2339: Property 'properties' does not exist on type 'HastNode'.
// Property 'properties' does not exist on type 'HastTextNode'
// isCode (convert) checks that the node is an element and therefore it'll have properties
// @ts-ignore
classList = children[index].properties.className;
break;
}
}
} else if (isCode(node) && has(node, 'className')) {
classList = node.properties.className;
}
if (Array.isArray(classList)) {
index = -1;
while (++index < classList.length) {
if (classList[index].slice(0, prefix.length) === prefix) {
language = { language: classList[index].slice(prefix.length) };
break;
}
}
}
return createNode('code', {
...language,
code: String(wrapText(context, toText(node))).replace(/\n+$/, ''),
});
};
export const blockquote: Handler<HastElementNode> = async function blockquote(
createNode,
node,
context,
) {
const isAllowedChild =
allowedChildren[context.parentNodeType].includes('blockquote') &&
context.allowedBlocks.includes('blockquote');
const children = await visitChildren(createNode, node, {
...context,
parentNodeType: isAllowedChild ? 'blockquote' : context.parentNodeType,
});
if (Array.isArray(children) && children.length) {
return isAllowedChild
? createNode('blockquote', { children: wrap(children) })
: children;
}
return undefined;
};
export const list: Handler<HastElementNode> = async function list(
createNode,
node,
context,
) {
const isAllowedChild =
allowedChildren[context.parentNodeType].includes('list') &&
context.allowedBlocks.includes('list');
if (!isAllowedChild) {
return await visitChildren(createNode, node, context);
}
const children = await wrapListItems(createNode, node, {
...context,
parentNodeType: 'list',
});
if (Array.isArray(children) && children.length) {
return createNode('list', {
children,
style: node.tagName === 'ol' ? 'numbered' : 'bulleted',
});
}
return undefined;
};
export const listItem: Handler<HastElementNode> = async function listItem(
createNode,
node,
context,
) {
const isAllowedChild =
allowedChildren[context.parentNodeType].includes('listItem') &&
context.allowedBlocks.includes('list');
const children = await visitChildren(createNode, node, {
...context,
parentNodeType: isAllowedChild ? 'listItem' : context.parentNodeType,
});
if (Array.isArray(children) && children.length) {
return isAllowedChild
? createNode('listItem', {
children: wrap(children),
})
: children;
}
return undefined;
};
export const link: Handler<HastElementNode> = async function link(
createNode,
node,
context,
) {
if (!context.allowedBlocks.includes('link')) {
return visitChildren(createNode, node, context);
}
let isAllowedChild = false;
if (allowedChildren[context.parentNodeType] === 'inlineNodes') {
isAllowedChild = inlineNodeTypes.includes('link');
} else if (Array.isArray(allowedChildren[context.parentNodeType])) {
isAllowedChild = allowedChildren[context.parentNodeType].includes('link');
}
if (!isAllowedChild) {
// Links that aren't inside of a allowedChildren context
// can still be valid `dast` nodes in the following contexts if wrapped.
const allowedChildrenWrapped = ['root', 'list', 'listItem'];
isAllowedChild = allowedChildrenWrapped.includes(context.parentNodeType);
}
// When a link wraps headings we try to preserve the heading by inverting the parent-child relationship.
// Essentially we tweak the nodes so that the heading wraps the link.
//
// @TODO this is only checking for headings that are direct descendants of links.
// Decide if it is worth looking deeper.
const wrapsHeadings = node.children.some(
(child) => child.type === 'element' && child.tagName.startsWith('h'),
);
if (wrapsHeadings) {
let i = 0;
const splitChildren: HastElementNode[] = [];
node.children.forEach((child) => {
if (child.type === 'element' && child.tagName.startsWith('h')) {
if (splitChildren.length > 0) {
i++;
}
splitChildren.push({
...child,
children: [
{
...node,
children: child.children,
},
],
});
i++;
} else if (splitChildren[i]) {
splitChildren[i].children.push(child);
} else {
splitChildren[i] = {
...node,
children: [child],
};
}
});
node.children = splitChildren;
isAllowedChild = false;
}
const children = await visitChildren(createNode, node, {
...context,
parentNodeType: isAllowedChild ? 'link' : context.parentNodeType,
});
if (Array.isArray(children) && children.length) {
if (!isAllowedChild) {
return children;
}
const props = {
url: resolveUrl(context, node.properties.href),
children,
};
const meta: Array<MetaEntry> = [];
if (node.properties) {
['target', 'rel', 'title'].forEach((attr) => {
const value = Array.isArray(node.properties[attr])
? node.properties[attr].join(' ')
: node.properties[attr];
if (value) {
meta.push({ id: attr, value });
}
});
}
if (meta.length > 0) {
props.meta = meta;
}
return createNode('link', props);
}
return undefined;
};
export const span: Handler<HastTextNode> = async function span(
createNode,
node,
context,
) {
const marks = {};
if (Array.isArray(context.marks)) {
const allowedMarks = context.marks.filter((mark) =>
context.allowedMarks.includes(mark),
);
if (allowedMarks.length > 0) {
marks.marks = allowedMarks;
}
}
return createNode('span', {
value: wrapText(context, node.value),
...marks,
});
};
export const newLine: Handler<HastTextNode> = async function newLine(
createNode,
) {
return createNode('span', {
value: '\n',
});
};
export const inlineCode = withMark('code');
export const strong = withMark('strong');
export const italic = withMark('emphasis');
export const underline = withMark('underline');
export const strikethrough = withMark('strikethrough');
export const highlight = withMark('highlight');
export const head: Handler<HastElementNode> = async function head(
createNode,
node,
context,
) {
const baseElement = node.children.find((child) => child.tagName === 'base');
if (baseElement) {
return context.handlers.base(createNode, baseElement, context);
} else {
return undefined;
}
};
export const base: Handler<HastElementNode> = async function base(
createNode,
node,
context,
) {
if (
!context.global.baseUrlFound &&
typeof node.properties === 'object' &&
node.properties.href
) {
context.global.baseUrl = node.properties.href.replace(/\/$/, '');
context.global.baseUrlFound = true;
}
};
export const extractInlineStyles: Handler<HastElementNode> = async function extractInlineStyles(
createNode,
node,
context,
) {
let marks = { marks: Array.isArray(context.marks) ? context.marks : [] };
if (node.properties && typeof node.properties.style === 'string') {
const newMarks = [];
node.properties.style.split(';').forEach((declaration) => {
const [firstChunk, ...otherChunks] = declaration.split(':');
const prop = firstChunk.trim();
const value = otherChunks.join(':').trim();
switch (prop) {
case 'font-weight':
if (value === 'bold' || Number(value) > 400) {
newMarks.push('strong');
}
break;
case 'font-style':
if (value === 'italic') {
newMarks.push('emphasis');
}
break;
case 'text-decoration':
if (value === 'underline') {
newMarks.push('underline');
}
break;
default:
break;
}
});
if (newMarks.length > 0) {
marks.marks = marks.marks.concat(
newMarks.filter(
(mark) =>
!marks.marks.includes(mark) && context.allowedMarks.includes(mark),
),
);
}
}
if (marks.marks.length === 0) {
marks = {};
}
return visitChildren(createNode, node, {
...context,
...marks,
});
};
// eslint-disable-next-line @typescript-eslint/no-empty-function, @typescript-eslint/explicit-module-boundary-types
export async function noop() {}
export function withMark(type: Mark): Handler<HastElementNode> {
return function markHandler(createNode, node, context) {
if (!context.allowedMarks.includes(type)) {
return visitChildren(createNode, node, context);
}
let marks = { marks: [type] };
if (Array.isArray(context.marks)) {
marks = {
marks: context.marks.includes(type)
? context.marks
: context.marks.concat([type]),
};
}
return visitChildren(createNode, node, {
...context,
...marks,
});
};
}
export const handlers = {
root: root,
p: paragraph,
summary: paragraph,
h1: heading,
h2: heading,
h3: heading,
h4: heading,
h5: heading,
h6: heading,
ul: list,
ol: list,
dir: list,
dt: listItem,
dd: listItem,
li: listItem,
listing: code,
plaintext: code,
pre: code,
xmp: code,
blockquote: blockquote,
a: link,
code: code,
kbd: code,
samp: code,
tt: code,
var: code,
strong: strong,
b: strong,
em: italic,
i: italic,
u: underline,
strike: strikethrough,
s: strikethrough,
mark: highlight,
base: base,
span: extractInlineStyles,
text: span,
br: newLine,
hr: thematicBreak,
head: head,
comment: noop,
script: noop,
style: noop,
title: noop,
video: noop,
audio: noop,
embed: noop,
iframe: noop,
};
export const wrapListItems: Handler<HastElementNode> = async function wrapListItems(
createNode,
node,
context,
) {
const children = await visitChildren(createNode, node, context);
if (!Array.isArray(children)) {
return [];
}
let index = -1;
while (++index < children.length) {
if (
typeof children[index] !== 'undefined' &&
children[index].type !== 'listItem'
) {
children[index] = {
type: 'listItem',
children: [
allowedChildren.listItem.includes(children[index].type)
? children[index]
: createNode('paragraph', { children: [children[index]] }),
],
};
}
}
return children;
};
export function wrapText(context: Context, value: string): string {
return context.wrapText ? value : value.replace(/\r?\n|\r/g, ' ');
}
export function resolveUrl(
context: Context,
url: string | null | undefined,
): string {
if (url === null || url === undefined) {
return '';
}
if (context.global.baseUrl && typeof URL !== 'undefined') {
const isRelative = /^\.?\//.test(url);
const parsed = new URL(url, context.global.baseUrl);
if (isRelative) {
const parsedBase = new URL(context.global.baseUrl);
if (!parsed.pathname.startsWith(parsedBase.pathname)) {
parsed.pathname = `${parsedBase.pathname}${parsed.pathname}`;
}
}
return parsed.toString();
}
return url;
}