UNPKG

@f-fjs/tidy-markdown

Version:

Fix ugly markdown.

github.com/slang800/tidy-markdown

slang800/tidy-markdown

319 lines • 13 kB

JavaScript

"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const front_matter_1 = __importDefault(require("front-matter")); const js_yaml_1 = __importDefault(require("js-yaml")); const lodash_1 = __importDefault(require("lodash")); const marked_1 = __importDefault(require("marked")); const parse5_1 = require("parse5"); const converters_1 = require("./converters"); const node_1 = require("./node"); const tree_adapter_1 = __importDefault(require("./tree-adapter")); const utils_1 = require("./utils"); const { createElement, detachNode, getCommentNodeContent, getTextNodeContent, insertBefore, insertText, isCommentNode, isTextNode } = tree_adapter_1.default; /** * Some people accidently skip levels in their headers (like jumping from h1 to * h3), which screws up things like tables of contents. This function fixes * that. * The algorithm assumes that relations between nearby headers are correct and * will try to preserve them. For example, "h1, h3, h3" becomes "h1, h2, h2" * rather than "h1, h2, h3". */ function fixHeaders(dom, ensureFirstHeaderIsH1) { const topLevelHeaders = new Array(); // the headers that aren't nested in any other elements if (utils_1.isParentNode(dom)) for (const child of Array.from(dom.childNodes)) { if (utils_1.isElement(child) && /h[0-6]/.test(child.tagName)) { topLevelHeaders.push(child); } } // there are no headers in this document, so skip if (topLevelHeaders.length === 0) { return; } // by starting at 0, we force the first header to be an h1 (or an h0, but that // doesn't exist) let lastHeaderDepth = 0; if (!ensureFirstHeaderIsH1) { // set the depth to `firstHeaderDepth - 1` so the rest of the function will // act as though that was the root lastHeaderDepth = parseInt(topLevelHeaders[0].tagName.charAt(1), 10) - 1 || 0; } // we track the rootDepth to ensure that no headers go "below" the level of the // first one. for example h3, h4, h2 would need to be corrected to h3, h4, h3. // this is really only needed when the first header isn't an h1. const rootDepth = lastHeaderDepth + 1; let i = 0; while (i < topLevelHeaders.length) { const headerDepth = parseInt(topLevelHeaders[i].tagName.charAt(1), 10); if (rootDepth <= headerDepth && headerDepth <= lastHeaderDepth + 1) { lastHeaderDepth = headerDepth; // header follows all rules, move on to next } else { // find all the children of that header and cut them down by the amount in // the gap between the offending header and the last good header. For // example, a jump from h1 to h3 would be `gap = 1` and all headers // directly following that h3 which are h3 or greater would need to be // reduced by 1 level. and of course the offending header is reduced too. // if the issue is that the offending header is below the root header, then // the same procedure is applied, but *increasing* the offending header & // children to the nearest acceptable level. const gap = headerDepth <= rootDepth ? headerDepth - rootDepth : headerDepth - (lastHeaderDepth + 1); for (let e = i; e < topLevelHeaders.length; e++) { const childHeaderDepth = parseInt(topLevelHeaders[e].tagName.charAt(1), 10); if (childHeaderDepth >= headerDepth) { topLevelHeaders[e].tagName = `h${childHeaderDepth - gap}`; } else { break; } } // don't let it increment `i`. we need to get the offending header checked // again so it sets the new `lastHeaderDepth` continue; } i++; } } function convertCommentNode(node) { const commentElement = createElement('_comment', null, []); insertText(commentElement, getCommentNodeContent(node)); insertBefore(node.parent, commentElement, node); detachNode(node); return commentElement; } /** * Flattens DOM tree into single array */ function bfsOrder(node) { const inqueue = [node]; const outqueue = new Array(); while (inqueue.length > 0) { const elem = inqueue.shift(); outqueue.push(elem); if (utils_1.isParentNode(elem)) inqueue.push(...elem.childNodes .map(child => isCommentNode(child) ? convertCommentNode(child) : child) .filter(utils_1.isElement)); } outqueue.shift(); // remove root node return outqueue; } function getChildText(child) { if (node_1.isConverterNode(child)) { return child._replacement; } else if (isTextNode(child)) { return utils_1.cleanText(child); } else { throw new Error(`Unsupported node type: ${child.type}`); } } /** * Contructs a Markdown string of replacement text for a given node */ function getContent(node) { if (isTextNode(node)) { return getTextNodeContent(node); } let content = ''; let previousSibling = null; if (utils_1.isParentNode(node)) node.childNodes.forEach(child => { var _a, _b; let childText = getChildText(child); // prevent extra whitespace around `<br>`s if (utils_1.isElement(child) && child.tagName === 'br') { content = content.trimRight(); } if ((utils_1.isElement(previousSibling) ? previousSibling.tagName : undefined) === 'br') { childText = childText.trimLeft(); } if (previousSibling != null) { const leading = node_1.isConverterNode(child) && ((_a = child._whitespace) === null || _a === void 0 ? void 0 : _a.leading) || ''; const trailing = node_1.isConverterNode(previousSibling) && ((_b = previousSibling._whitespace) === null || _b === void 0 ? void 0 : _b.trailing) || ''; content += `${leading}${trailing}`.replace(/\n{3,}/, '\n\n'); } content += childText; previousSibling = child; }); return content; } function canConvert(node, filter) { if (typeof filter === 'string') { return utils_1.isElement(node) && filter === node.tagName; } else if (Array.isArray(filter)) { return utils_1.isElement(node) && Array.from(filter).includes(node.tagName); } else if (typeof filter === 'function') { return filter(node); } else { throw new TypeError('`filter` needs to be a string, array, or function'); } } function findConverter(node) { return converters_1.Converters.find(converter => canConvert(node, converter.filter)); } function isFlankedByWhitespace(side, node) { let regExp; let sibling; if (side === 'left') { sibling = node.previousSibling; regExp = /\s$/; } else { sibling = node.nextSibling; regExp = /^\s/; } if (sibling && !utils_1.isBlock(sibling)) { return regExp.test(getContent(sibling)); } else { return false; } } function flankingWhitespace(node) { var _a, _b, _c; let leading = ''; let trailing = ''; if (!utils_1.isBlock(node)) { const content = getContent(node); const hasLeading = /^\s/.test(content); const hasTrailing = /\s$/.test(content); if (hasLeading && !isFlankedByWhitespace('left', node)) { leading = ' '; } if (hasTrailing && !isFlankedByWhitespace('right', node)) { trailing = ' '; } } // add whitespace from leading / trailing whitespace attributes in first / last // child nodes if (utils_1.isParentNode(node)) { const first = node.childNodes[0]; const last = (_a = node.childNodes.slice(-1)) === null || _a === void 0 ? void 0 : _a[0]; leading += node_1.isConverterNode(first) && ((_b = first._whitespace) === null || _b === void 0 ? void 0 : _b.leading) || ''; trailing += node_1.isConverterNode(last) && ((_c = last._whitespace) === null || _c === void 0 ? void 0 : _c.trailing) || ''; } return { leading, trailing }; } /* * Finds a Markdown converter, gets the replacement, and sets it on * `_replacement` */ function process(node, links) { node_1.assertIsConverterNode(node); let whitespace = { leading: '', trailing: '' }; const content = getContent(node).trim(); const converter = node._converter; if (converter.surroundingBlankLines) { const surround = typeof converter.surroundingBlankLines === 'function' ? converter.surroundingBlankLines(node) : converter.surroundingBlankLines; if (typeof surround === 'object') whitespace = surround; else if (surround) whitespace = { leading: '\n\n', trailing: '\n\n' }; } else { whitespace = flankingWhitespace(node); if (converter.trailingWhitespace) { whitespace.trailing += typeof converter.trailingWhitespace === 'function' ? converter.trailingWhitespace(node) || '' : converter.trailingWhitespace || ''; } } if (utils_1.isElement(node) && node.tagName === 'li') { // li isn't allowed to have leading whitespace whitespace.leading = ''; } node._replacement = converter.replacement(content, node, links); node._whitespace = whitespace; } /** * Remove whitespace text nodes from children */ function removeEmptyNodes(node) { if (utils_1.isParentNode(node)) node.childNodes .filter(child => { if (isTextNode(child) && getTextNodeContent(child).trim() === '') { const { previousSibling } = child; const { nextSibling } = child; if (!previousSibling || !nextSibling || utils_1.isBlock(previousSibling) || utils_1.isBlock(nextSibling)) { return true; } } }) .forEach(child => detachNode(child)); } function default_1(dirtyMarkdown, options = {}) { let content; if (typeof dirtyMarkdown !== 'string') { throw new TypeError('Markdown input is not a string'); } const { ensureFirstHeaderIsH1 = true, alignHeaders = true } = options; let out = ''; // handle yaml front-matter try { content = front_matter_1.default(dirtyMarkdown); if (Object.keys(content.attributes).length !== 0) { out += `---\n${js_yaml_1.default.safeDump(content.attributes).trim()}\n---\n\n`; } content = content.body; } catch (error) { // parsing failed, just ignore front-matter content = dirtyMarkdown; } const ast = marked_1.default.lexer(content); const rawLinks = ast.links; // see issue: https://github.com/chjj/marked/issues/472 let links = Object.keys(rawLinks).map(link => ({ name: link.toLowerCase(), url: rawLinks[link].href, title: rawLinks[link].title || null })); links = lodash_1.default.sortBy(links, ['name', 'url']); let html = marked_1.default.parser(ast); // Escape potential ol triggers html = html.replace(/(\d+)\. /g, '$1\\. '); const root = parse5_1.parseFragment(html, { treeAdapter: tree_adapter_1.default }); // remove empty nodes that are direct children of the root first removeEmptyNodes(root); bfsOrder(root).forEach(removeEmptyNodes); if (alignHeaders) { fixHeaders(root, ensureFirstHeaderIsH1); } bfsOrder(root) .map(node => { const converter = findConverter(node); if (converter) { const converterNode = node; converterNode._converter = converter; return converterNode; } return node; }) .reverse() // Process nodes in reverse (so deepest child elements are first). .forEach(node => process(node, links)); out += getContent(root).trimRight() + '\n'; if (links.length > 0) { out += '\n'; } for (const { name, url, title } of Array.from(links)) { const optionalTitle = title ? ` \"${title}\"` : ''; out += `[${name}]: ${url}${optionalTitle}\n`; } return out; } exports.default = default_1; //# sourceMappingURL=index.js.map