@diplodoc/transform
Version:
A simple transformer of text in YFM (Yandex Flavored Markdown) to HTML
297 lines • 11 kB
JavaScript
;
const utils_1 = require("../utils");
const termDefinitions_1 = require("./termDefinitions");
const constants_1 = require("./constants");
function setTermAttrs(token, termKey) {
token.attrSet('class', 'yfm yfm-term_title');
token.attrSet('term-key', ':' + termKey);
token.attrSet('role', 'button');
token.attrSet('aria-controls', ':' + termKey + '_element');
token.attrSet('tabindex', '0');
token.attrSet('id', (0, utils_1.generateID)());
}
/**
* Scans src for [text](*termId) starting at `start`.
*
* @param src - source string
* @param start - position of the opening `[`
* @param max - maximum position to scan
* @returns match info or null
*/
function matchTermPattern(src, start, max) {
// Scan for closing ] (no nested [ allowed, matching the original regex)
let pos = start + 1;
while (pos <= max &&
src.charCodeAt(pos) !== 0x5d /* ] */ &&
src.charCodeAt(pos) !== 0x5b /* [ */) {
if (src.charCodeAt(pos) === 0x5c /* \ */) {
pos++;
}
pos++;
}
if (pos > max || src.charCodeAt(pos) !== 0x5d /* ] */) {
return null;
}
const labelEnd = pos;
if (labelEnd === start + 1) {
return null;
}
// Expect (*
pos = labelEnd + 1;
if (pos + 1 > max ||
src.charCodeAt(pos) !== 0x28 /* ( */ ||
src.charCodeAt(pos + 1) !== 0x2a /* * */) {
return null;
}
pos += 2;
// Read term id until )
const termIdStart = pos;
while (pos <= max &&
src.charCodeAt(pos) !== 0x29 /* ) */ &&
src.charCodeAt(pos) !== 0x0a /* \n */) {
pos++;
}
if (pos > max || src.charCodeAt(pos) !== 0x29 /* ) */) {
return null;
}
const termId = src.slice(termIdStart, pos);
if (!termId) {
return null;
}
return { labelEnd, termId, endPos: pos + 1 };
}
/**
* Inline rule that matches [text](*termId) for defined terms.
* Runs before the link rule so that * characters inside the
* pattern are consumed and never trigger emphasis.
*
* @param state - inline parser state
* @param silent - if true, only validate without creating tokens
* @returns true if the pattern was matched
*/
function termInlineRule(state, silent) {
if (state.src.charCodeAt(state.pos) !== 0x5b /* [ */ || !state.env.terms) {
return false;
}
const match = matchTermPattern(state.src, state.pos, state.posMax);
if (!match || !state.env.terms[':' + match.termId]) {
return false;
}
if (!silent) {
const labelContent = state.src.slice(state.pos + 1, match.labelEnd).replace(/\\(.)/g, '$1');
const termOpen = state.push('term_open', 'i', 1);
setTermAttrs(termOpen, match.termId);
const textToken = state.push('text', '', 0);
textToken.content = labelContent;
state.push('term_close', 'i', -1);
}
state.pos = match.endPos;
return true;
}
const RAW_CODE_TOKEN_TYPES = new Set(['fence', 'code_block', 'yfm_page-constructor']);
function collectRawTermMatches(content, reg, out) {
reg.lastIndex = 0;
let match;
while ((match = reg.exec(content))) {
out.add(':' + match[3]);
}
}
function collectTermsFromRawContent(tokens, reg, out) {
for (const token of tokens) {
if (RAW_CODE_TOKEN_TYPES.has(token.type) && token.content) {
collectRawTermMatches(token.content, reg, out);
}
if (token.type === 'inline' && token.children) {
for (const child of token.children) {
if (child.type === 'code_inline' && child.content) {
collectRawTermMatches(child.content, reg, out);
}
}
}
}
}
function collectTermKeysFromInlineChildren(children, out) {
for (const child of children) {
if (child.type === 'term_open') {
const key = child.attrGet('term-key');
if (key) {
out.add(key);
}
}
}
}
function collectTermsFromContent(tokens, out) {
let inDfn = false;
for (const token of tokens) {
if (token.type === 'dfn_open') {
inDfn = true;
continue;
}
if (token.type === 'dfn_close') {
inDfn = false;
continue;
}
if (!inDfn && token.type === 'inline' && token.children) {
collectTermKeysFromInlineChildren(token.children, out);
}
}
}
function collectTermsFromDfns(tokens, out, referencedDfns) {
let inReferencedDfn = false;
for (const token of tokens) {
if (token.type === 'dfn_open') {
const key = (token.attrGet('id') || '').replace(/_element$/, '');
inReferencedDfn = Boolean(key) && referencedDfns.has(key);
continue;
}
if (token.type === 'dfn_close') {
inReferencedDfn = false;
continue;
}
if (inReferencedDfn && token.type === 'inline' && token.children) {
collectTermKeysFromInlineChildren(token.children, out);
}
}
}
function findDfnCloseIdx(tokens, from) {
let endIdx = from;
while (endIdx < tokens.length && tokens[endIdx].type !== 'dfn_close') {
endIdx++;
}
return endIdx;
}
function removeUnreferencedDefinitions(tokens, referencedTerms) {
let idx = 0;
while (idx < tokens.length) {
const tok = tokens[idx];
if (tok.type === 'dfn_open') {
const termKey = (tok.attrGet('id') || '').replace(/_element$/, '');
if (termKey && !referencedTerms.has(termKey)) {
const endIdx = findDfnCloseIdx(tokens, idx + 1);
if (endIdx < tokens.length) {
tokens.splice(idx, endIdx - idx + 1);
continue;
}
}
}
idx++;
}
}
const term = (md, options) => {
const escapeRE = md.utils.escapeRE;
const arrayReplaceAt = md.utils.arrayReplaceAt;
const { isLintRun } = options;
// Prevent * URLs from being parsed as regular links (backward compatibility)
const defaultLinkValidation = md.validateLink;
md.validateLink = function (url) {
if (url.startsWith('*')) {
return false;
}
return defaultLinkValidation(url);
};
// Inline rule: handles [text](*termId) before emphasis can interfere with *
md.inline.ruler.before('link', 'term_inline', termInlineRule);
function termReplace(state) {
let i, j, l, tokens, token, text, nodes, pos, termMatch, currentToken;
const blockTokens = state.tokens;
if (!state.env.terms) {
return;
}
const termKeys = Object.keys(state.env.terms);
if (!termKeys.length) {
return;
}
const referencedTerms = new Set();
const regTerms = termKeys
.map((el) => el.slice(1))
.map(escapeRE)
.join('|');
const regText = '\\[([^\\[]+)\\](\\(\\*(' + regTerms + ')\\))';
const reg = new RegExp(regText, 'g');
collectTermsFromRawContent(blockTokens, reg, referencedTerms);
collectTermsFromContent(blockTokens, referencedTerms);
// Collect terms transitively referenced inside definitions of referenced terms.
collectTermsFromDfns(blockTokens, referencedTerms, referencedTerms);
for (j = 0, l = blockTokens.length; j < l; j++) {
if (blockTokens[j].type === 'heading_open') {
while (blockTokens[j].type !== 'heading_close') {
j++;
}
continue;
}
if (blockTokens[j].type !== 'inline') {
continue;
}
tokens = blockTokens[j].children;
for (i = tokens.length - 1; i >= 0; i--) {
currentToken = tokens[i];
if (currentToken.type === 'link_close') {
while (tokens[i].type !== 'link_open') {
i--;
}
continue;
}
if (currentToken.type !== 'text') {
continue;
}
pos = 0;
text = currentToken.content;
reg.lastIndex = 0;
nodes = [];
// Find terms without definitions
const regexAllTerms = new RegExp(constants_1.BASIC_TERM_REGEXP, 'gm');
const uniqueTerms = [
...new Set([...text.matchAll(regexAllTerms)].map((el) => `:${el[3]}`)),
];
const notDefinedTerms = uniqueTerms.filter((el) => !Object.keys(state.env.terms).includes(el));
if (notDefinedTerms.length && isLintRun) {
token = new state.Token('__yfm_lint', '', 0);
token.hidden = true;
token.map = blockTokens[j].map;
token.attrSet('YFM007', 'true');
nodes.push(token);
}
while ((termMatch = reg.exec(text))) {
const termTitle = termMatch[1];
const termKey = termMatch[3];
referencedTerms.add(':' + termKey);
if (termMatch.index > 0 || termMatch[1].length > 0) {
token = new state.Token('text', '', 0);
token.content = text.slice(pos, termMatch.index);
nodes.push(token);
}
token = new state.Token('term_open', 'i', 1);
setTermAttrs(token, termKey);
nodes.push(token);
token = new state.Token('text', '', 0);
token.content = termTitle;
nodes.push(token);
token = new state.Token('term_close', 'i', -1);
nodes.push(token);
pos = reg.lastIndex;
}
if (!nodes.length) {
continue;
}
if (pos < text.length) {
token = new state.Token('text', '', 0);
token.content = text.slice(pos);
nodes.push(token);
}
// replace current node
blockTokens[j].children = tokens = arrayReplaceAt(tokens, i, nodes);
}
}
// Remove definitions without any reference on the page
// Skip during linting to allow lint rules to check term definitions
if (!isLintRun) {
removeUnreferencedDefinitions(state.tokens, referencedTerms);
}
}
md.block.ruler.before('reference', 'termDefinitions', (0, termDefinitions_1.termDefinitions)(md, options), {
alt: ['paragraph', 'reference'],
});
md.core.ruler.after('linkify', 'termReplace', termReplace);
};
module.exports = term;
//# sourceMappingURL=index.js.map