@kingsword/node-html-markdown
Version:
Fast HTML to markdown cross-compiler, compatible with both node and the browser
251 lines • 11.2 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.Visitor = void 0;
exports.getMarkdownForHtmlNodes = getMarkdownForHtmlNodes;
const nodes_1 = require("./nodes");
const utilities_1 = require("./utilities");
const translator_1 = require("./translator");
const config_1 = require("./config");
// endregion
/* ****************************************************************************************************************** */
// region: Visitor
/* ****************************************************************************************************************** */
/**
* Properties & methods marked public are designated as such due to the fact that we may add middleware / transformer
* support in the future
*/
class Visitor {
constructor(instance, rootNode, fileName) {
this.instance = instance;
this.rootNode = rootNode;
this.fileName = fileName;
this.nodeMetadata = new Map();
this.urlDefinitions = [];
this.result = {
text: '',
trailingNewlineStats: {
whitespace: 0,
newLines: 0
}
};
this.options = instance.options;
this.optimizeTree(rootNode);
this.visitNode(rootNode);
}
/* ********************************************************* */
// region: Methods
/* ********************************************************* */
addOrGetUrlDefinition(url) {
let id = this.urlDefinitions.findIndex(u => u === url);
if (id < 0)
id = this.urlDefinitions.push(url) - 1;
return id + 1;
}
appendResult(s, startPos, spaceIfRepeatingChar) {
if (!s && startPos === undefined)
return;
const { result } = this;
if (startPos !== undefined)
result.text = result.text.substr(0, startPos);
result.text += (spaceIfRepeatingChar && result.text.slice(-1) === s[0] ? ' ' : '') + s;
result.trailingNewlineStats = (0, utilities_1.getTrailingWhitespaceInfo)(result.text);
}
appendNewlines(count) {
const { newLines } = this.result.trailingNewlineStats;
this.appendResult('\n'.repeat(Math.max(0, (+count - newLines))));
}
// endregion
/* ********************************************************* */
// region: Internal Methods
/* ********************************************************* */
/**
* Optimize tree, flagging nodes that have usable content
*/
optimizeTree(node) {
(0, utilities_1.perfStart)('Optimize tree');
const { translators } = this.instance;
(function visit(node) {
let res = false;
if ((0, nodes_1.isTextNode)(node) || ((0, nodes_1.isElementNode)(node) && config_1.contentlessElements.includes(node.tagName))) {
res = true;
}
else {
const childNodes = (0, utilities_1.getChildNodes)(node);
if (!childNodes.length) {
const translator = translators[node.tagName];
if ((translator === null || translator === void 0 ? void 0 : translator.preserveIfEmpty) || typeof translator === 'function')
res = true;
}
else
for (const child of childNodes) {
if (!res)
res = visit(child);
else
visit(child);
}
}
return node.preserve = res;
})(node);
(0, utilities_1.perfStop)('Optimize tree');
}
/**
* Apply escaping and custom replacement rules
*/
processText(text, metadata) {
let res = text;
if (!(metadata === null || metadata === void 0 ? void 0 : metadata.preserveWhitespace))
res = res.replace(/\s+/g, ' ');
if (metadata === null || metadata === void 0 ? void 0 : metadata.noEscape)
return res;
const { lineStartEscape, globalEscape, textReplace } = this.options;
res = res
.replace(globalEscape[0], globalEscape[1])
.replace(lineStartEscape[0], lineStartEscape[1]);
/* If specified, apply custom replacement patterns */
if (textReplace)
for (const [pattern, r] of textReplace)
res = res.replace(pattern, r);
return res;
}
visitNode(node, textOnly, metadata) {
var _a, _b, _c, _d;
var _e, _f;
const { result } = this;
if (!node.preserve)
return;
/* Handle text node */
if ((0, nodes_1.isTextNode)(node)) {
if (node.wholeText) {
(_a = (_e = node).text) !== null && _a !== void 0 ? _a : (_e.text = node.wholeText);
(_b = (_f = node).trimmedText) !== null && _b !== void 0 ? _b : (_f.trimmedText = (0, utilities_1.trimNewLines)(node.wholeText));
}
return node.isWhitespace && !(metadata === null || metadata === void 0 ? void 0 : metadata.preserveWhitespace)
? (!result.text.length || result.trailingNewlineStats.whitespace > 0) ? void 0 : this.appendResult(' ')
: this.appendResult(this.processText((metadata === null || metadata === void 0 ? void 0 : metadata.preserveWhitespace) ? node.text : node.trimmedText, metadata));
}
if (textOnly || !(0, nodes_1.isElementNode)(node))
return;
/* Handle element node */
const translatorCfgOrFactory = (metadata === null || metadata === void 0 ? void 0 : metadata.translators) ? metadata.translators[node.tagName] : this.instance.translators[node.tagName];
/* Update metadata with list detail */
switch (node.tagName) {
case 'UL':
case 'OL':
metadata = {
...metadata,
listItemNumber: 0,
listKind: node.tagName,
indentLevel: ((_c = metadata === null || metadata === void 0 ? void 0 : metadata.indentLevel) !== null && _c !== void 0 ? _c : -1) + 1
};
break;
case 'LI':
if ((metadata === null || metadata === void 0 ? void 0 : metadata.listKind) === 'OL')
metadata.listItemNumber = ((_d = metadata.listItemNumber) !== null && _d !== void 0 ? _d : 0) + 1;
break;
case 'PRE':
metadata = {
...metadata,
preserveWhitespace: true
};
break;
case 'TABLE':
metadata = {
...metadata,
tableMeta: {
node: node
}
};
}
if (metadata)
this.nodeMetadata.set(node, metadata);
// If no translator for element, visit children
if (!translatorCfgOrFactory) {
for (const child of (0, utilities_1.getChildNodes)(node))
this.visitNode(child, textOnly, metadata);
return;
}
/* Get Translator Config */
let cfg;
let ctx;
if (!(0, translator_1.isTranslatorConfig)(translatorCfgOrFactory)) {
ctx = (0, translator_1.createTranslatorContext)(this, node, metadata, translatorCfgOrFactory.base);
cfg = { ...translatorCfgOrFactory.base, ...translatorCfgOrFactory(ctx) };
}
else
cfg = translatorCfgOrFactory;
// Skip and don't check children if ignore flag set
if (cfg.ignore)
return;
/* Update metadata if needed */
if (cfg.noEscape && !(metadata === null || metadata === void 0 ? void 0 : metadata.noEscape)) {
metadata = { ...metadata, noEscape: cfg.noEscape };
this.nodeMetadata.set(node, metadata);
}
if (cfg.childTranslators && (cfg.childTranslators !== (metadata === null || metadata === void 0 ? void 0 : metadata.translators))) {
metadata = { ...metadata, translators: cfg.childTranslators };
this.nodeMetadata.set(node, metadata);
}
const startPosOuter = result.text.length;
/* Write opening */
if (cfg.surroundingNewlines)
this.appendNewlines(+cfg.surroundingNewlines);
if (cfg.prefix)
this.appendResult(cfg.prefix);
/* Write inner content */
if (typeof cfg.content === 'string')
this.appendResult(cfg.content, void 0, cfg.spaceIfRepeatingChar);
else {
const startPos = result.text.length;
// Process child nodes
for (const child of (0, utilities_1.getChildNodes)(node))
this.visitNode(child, (cfg.recurse === false), metadata);
/* Apply translator post-processing */
if (cfg.postprocess) {
const postRes = cfg.postprocess({
...(ctx || (0, translator_1.createTranslatorContext)(this, node, metadata)),
content: result.text.substr(startPos)
});
// If remove flag sent, remove / omit everything for this node (prefix, newlines, content, postfix)
if (postRes === translator_1.PostProcessResult.RemoveNode) {
if (node.tagName === 'LI' && (metadata === null || metadata === void 0 ? void 0 : metadata.listItemNumber))
--metadata.listItemNumber;
return this.appendResult('', startPosOuter);
}
if (typeof postRes === 'string')
this.appendResult(postRes, startPos, cfg.spaceIfRepeatingChar);
}
}
/* Write closing */
if (cfg.postfix)
this.appendResult(cfg.postfix);
if (cfg.surroundingNewlines)
this.appendNewlines(+cfg.surroundingNewlines);
}
}
exports.Visitor = Visitor;
// endregion
/* ****************************************************************************************************************** */
// region: Utilities
/* ****************************************************************************************************************** */
function getMarkdownForHtmlNodes(instance, rootNode, fileName) {
(0, utilities_1.perfStart)('walk');
const visitor = new Visitor(instance, rootNode, fileName);
let result = visitor.result.text;
(0, utilities_1.perfStop)('walk');
/* Post-processing */
// Add link references, if set
if (instance.options.useLinkReferenceDefinitions) {
if (/[^\r\n]/.test(result.slice(-1)))
result += '\n';
visitor.urlDefinitions.forEach((url, idx) => {
result += `\n[${idx + 1}]: ${url}`;
});
}
// Fixup repeating newlines
const { maxConsecutiveNewlines } = instance.options;
if (maxConsecutiveNewlines)
result = result.replace(new RegExp(String.raw `(?:\r?\n\s*)+((?:\r?\n\s*){${maxConsecutiveNewlines}})`, 'g'), '$1');
return (0, utilities_1.trimNewLines)(result);
}
// endregion
//# sourceMappingURL=visitor.js.map