@f-fjs/tidy-markdown
Version:
Fix ugly markdown.
319 lines • 13 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const front_matter_1 = __importDefault(require("front-matter"));
const js_yaml_1 = __importDefault(require("js-yaml"));
const lodash_1 = __importDefault(require("lodash"));
const marked_1 = __importDefault(require("marked"));
const parse5_1 = require("parse5");
const converters_1 = require("./converters");
const node_1 = require("./node");
const tree_adapter_1 = __importDefault(require("./tree-adapter"));
const utils_1 = require("./utils");
const { createElement, detachNode, getCommentNodeContent, getTextNodeContent, insertBefore, insertText, isCommentNode, isTextNode } = tree_adapter_1.default;
/**
* Some people accidently skip levels in their headers (like jumping from h1 to
* h3), which screws up things like tables of contents. This function fixes
* that.
* The algorithm assumes that relations between nearby headers are correct and
* will try to preserve them. For example, "h1, h3, h3" becomes "h1, h2, h2"
* rather than "h1, h2, h3".
*/
function fixHeaders(dom, ensureFirstHeaderIsH1) {
const topLevelHeaders = new Array(); // the headers that aren't nested in any other elements
if (utils_1.isParentNode(dom))
for (const child of Array.from(dom.childNodes)) {
if (utils_1.isElement(child) && /h[0-6]/.test(child.tagName)) {
topLevelHeaders.push(child);
}
}
// there are no headers in this document, so skip
if (topLevelHeaders.length === 0) {
return;
}
// by starting at 0, we force the first header to be an h1 (or an h0, but that
// doesn't exist)
let lastHeaderDepth = 0;
if (!ensureFirstHeaderIsH1) {
// set the depth to `firstHeaderDepth - 1` so the rest of the function will
// act as though that was the root
lastHeaderDepth = parseInt(topLevelHeaders[0].tagName.charAt(1), 10) - 1 || 0;
}
// we track the rootDepth to ensure that no headers go "below" the level of the
// first one. for example h3, h4, h2 would need to be corrected to h3, h4, h3.
// this is really only needed when the first header isn't an h1.
const rootDepth = lastHeaderDepth + 1;
let i = 0;
while (i < topLevelHeaders.length) {
const headerDepth = parseInt(topLevelHeaders[i].tagName.charAt(1), 10);
if (rootDepth <= headerDepth && headerDepth <= lastHeaderDepth + 1) {
lastHeaderDepth = headerDepth; // header follows all rules, move on to next
}
else {
// find all the children of that header and cut them down by the amount in
// the gap between the offending header and the last good header. For
// example, a jump from h1 to h3 would be `gap = 1` and all headers
// directly following that h3 which are h3 or greater would need to be
// reduced by 1 level. and of course the offending header is reduced too.
// if the issue is that the offending header is below the root header, then
// the same procedure is applied, but *increasing* the offending header &
// children to the nearest acceptable level.
const gap = headerDepth <= rootDepth
? headerDepth - rootDepth
: headerDepth - (lastHeaderDepth + 1);
for (let e = i; e < topLevelHeaders.length; e++) {
const childHeaderDepth = parseInt(topLevelHeaders[e].tagName.charAt(1), 10);
if (childHeaderDepth >= headerDepth) {
topLevelHeaders[e].tagName = `h${childHeaderDepth - gap}`;
}
else {
break;
}
}
// don't let it increment `i`. we need to get the offending header checked
// again so it sets the new `lastHeaderDepth`
continue;
}
i++;
}
}
function convertCommentNode(node) {
const commentElement = createElement('_comment', null, []);
insertText(commentElement, getCommentNodeContent(node));
insertBefore(node.parent, commentElement, node);
detachNode(node);
return commentElement;
}
/**
* Flattens DOM tree into single array
*/
function bfsOrder(node) {
const inqueue = [node];
const outqueue = new Array();
while (inqueue.length > 0) {
const elem = inqueue.shift();
outqueue.push(elem);
if (utils_1.isParentNode(elem))
inqueue.push(...elem.childNodes
.map(child => isCommentNode(child)
? convertCommentNode(child)
: child)
.filter(utils_1.isElement));
}
outqueue.shift(); // remove root node
return outqueue;
}
function getChildText(child) {
if (node_1.isConverterNode(child)) {
return child._replacement;
}
else if (isTextNode(child)) {
return utils_1.cleanText(child);
}
else {
throw new Error(`Unsupported node type: ${child.type}`);
}
}
/**
* Contructs a Markdown string of replacement text for a given node
*/
function getContent(node) {
if (isTextNode(node)) {
return getTextNodeContent(node);
}
let content = '';
let previousSibling = null;
if (utils_1.isParentNode(node))
node.childNodes.forEach(child => {
var _a, _b;
let childText = getChildText(child);
// prevent extra whitespace around `<br>`s
if (utils_1.isElement(child) && child.tagName === 'br') {
content = content.trimRight();
}
if ((utils_1.isElement(previousSibling) ? previousSibling.tagName : undefined) === 'br') {
childText = childText.trimLeft();
}
if (previousSibling != null) {
const leading = node_1.isConverterNode(child) && ((_a = child._whitespace) === null || _a === void 0 ? void 0 : _a.leading) || '';
const trailing = node_1.isConverterNode(previousSibling) && ((_b = previousSibling._whitespace) === null || _b === void 0 ? void 0 : _b.trailing) || '';
content += `${leading}${trailing}`.replace(/\n{3,}/, '\n\n');
}
content += childText;
previousSibling = child;
});
return content;
}
function canConvert(node, filter) {
if (typeof filter === 'string') {
return utils_1.isElement(node) && filter === node.tagName;
}
else if (Array.isArray(filter)) {
return utils_1.isElement(node) && Array.from(filter).includes(node.tagName);
}
else if (typeof filter === 'function') {
return filter(node);
}
else {
throw new TypeError('`filter` needs to be a string, array, or function');
}
}
function findConverter(node) {
return converters_1.Converters.find(converter => canConvert(node, converter.filter));
}
function isFlankedByWhitespace(side, node) {
let regExp;
let sibling;
if (side === 'left') {
sibling = node.previousSibling;
regExp = /\s$/;
}
else {
sibling = node.nextSibling;
regExp = /^\s/;
}
if (sibling && !utils_1.isBlock(sibling)) {
return regExp.test(getContent(sibling));
}
else {
return false;
}
}
function flankingWhitespace(node) {
var _a, _b, _c;
let leading = '';
let trailing = '';
if (!utils_1.isBlock(node)) {
const content = getContent(node);
const hasLeading = /^\s/.test(content);
const hasTrailing = /\s$/.test(content);
if (hasLeading && !isFlankedByWhitespace('left', node)) {
leading = ' ';
}
if (hasTrailing && !isFlankedByWhitespace('right', node)) {
trailing = ' ';
}
}
// add whitespace from leading / trailing whitespace attributes in first / last
// child nodes
if (utils_1.isParentNode(node)) {
const first = node.childNodes[0];
const last = (_a = node.childNodes.slice(-1)) === null || _a === void 0 ? void 0 : _a[0];
leading += node_1.isConverterNode(first) && ((_b = first._whitespace) === null || _b === void 0 ? void 0 : _b.leading) || '';
trailing += node_1.isConverterNode(last) && ((_c = last._whitespace) === null || _c === void 0 ? void 0 : _c.trailing) || '';
}
return { leading, trailing };
}
/*
* Finds a Markdown converter, gets the replacement, and sets it on
* `_replacement`
*/
function process(node, links) {
node_1.assertIsConverterNode(node);
let whitespace = { leading: '', trailing: '' };
const content = getContent(node).trim();
const converter = node._converter;
if (converter.surroundingBlankLines) {
const surround = typeof converter.surroundingBlankLines === 'function'
? converter.surroundingBlankLines(node)
: converter.surroundingBlankLines;
if (typeof surround === 'object')
whitespace = surround;
else if (surround)
whitespace = { leading: '\n\n', trailing: '\n\n' };
}
else {
whitespace = flankingWhitespace(node);
if (converter.trailingWhitespace) {
whitespace.trailing += typeof converter.trailingWhitespace === 'function'
? converter.trailingWhitespace(node) || ''
: converter.trailingWhitespace || '';
}
}
if (utils_1.isElement(node) && node.tagName === 'li') {
// li isn't allowed to have leading whitespace
whitespace.leading = '';
}
node._replacement = converter.replacement(content, node, links);
node._whitespace = whitespace;
}
/**
* Remove whitespace text nodes from children
*/
function removeEmptyNodes(node) {
if (utils_1.isParentNode(node))
node.childNodes
.filter(child => {
if (isTextNode(child) && getTextNodeContent(child).trim() === '') {
const { previousSibling } = child;
const { nextSibling } = child;
if (!previousSibling || !nextSibling || utils_1.isBlock(previousSibling) || utils_1.isBlock(nextSibling)) {
return true;
}
}
})
.forEach(child => detachNode(child));
}
function default_1(dirtyMarkdown, options = {}) {
let content;
if (typeof dirtyMarkdown !== 'string') {
throw new TypeError('Markdown input is not a string');
}
const { ensureFirstHeaderIsH1 = true, alignHeaders = true } = options;
let out = '';
// handle yaml front-matter
try {
content = front_matter_1.default(dirtyMarkdown);
if (Object.keys(content.attributes).length !== 0) {
out += `---\n${js_yaml_1.default.safeDump(content.attributes).trim()}\n---\n\n`;
}
content = content.body;
}
catch (error) {
// parsing failed, just ignore front-matter
content = dirtyMarkdown;
}
const ast = marked_1.default.lexer(content);
const rawLinks = ast.links; // see issue: https://github.com/chjj/marked/issues/472
let links = Object.keys(rawLinks).map(link => ({
name: link.toLowerCase(),
url: rawLinks[link].href,
title: rawLinks[link].title || null
}));
links = lodash_1.default.sortBy(links, ['name', 'url']);
let html = marked_1.default.parser(ast);
// Escape potential ol triggers
html = html.replace(/(\d+)\. /g, '$1\\. ');
const root = parse5_1.parseFragment(html, { treeAdapter: tree_adapter_1.default });
// remove empty nodes that are direct children of the root first
removeEmptyNodes(root);
bfsOrder(root).forEach(removeEmptyNodes);
if (alignHeaders) {
fixHeaders(root, ensureFirstHeaderIsH1);
}
bfsOrder(root)
.map(node => {
const converter = findConverter(node);
if (converter) {
const converterNode = node;
converterNode._converter = converter;
return converterNode;
}
return node;
})
.reverse() // Process nodes in reverse (so deepest child elements are first).
.forEach(node => process(node, links));
out += getContent(root).trimRight() + '\n';
if (links.length > 0) {
out += '\n';
}
for (const { name, url, title } of Array.from(links)) {
const optionalTitle = title ? ` \"${title}\"` : '';
out += `[${name}]: ${url}${optionalTitle}\n`;
}
return out;
}
exports.default = default_1;
//# sourceMappingURL=index.js.map