UNPKG

@eslint/markdown

Version:

The official ESLint language plugin for Markdown

394 lines (393 loc) 15.6 kB
/** * @fileoverview Processes Markdown files for consumption by ESLint. * @author Brandon Mills */ //----------------------------------------------------------------------------- // Imports //----------------------------------------------------------------------------- import { fromMarkdown } from "mdast-util-from-markdown"; //----------------------------------------------------------------------------- // Type Definitions //----------------------------------------------------------------------------- /** * @import { Node, Parent, Code, Html } from "mdast"; * @import { Linter, Rule, AST } from "eslint"; * @import { Block, RangeMap } from "./types.js"; * @typedef {Linter.LintMessage} Message * @typedef {Rule.Fix} Fix * @typedef {AST.Range} Range */ //----------------------------------------------------------------------------- // Helpers //----------------------------------------------------------------------------- const UNSATISFIABLE_RULES = new Set([ "eol-last", // The Markdown parser strips trailing newlines in code fences "unicode-bom", // Code blocks will begin in the middle of Markdown files ]); const SUPPORTS_AUTOFIX = true; const BOM = "\uFEFF"; /** * @type {Map<string, Block[]>} */ const blocksCache = new Map(); /** * Performs a depth-first traversal of the Markdown AST. * @param {Node} node A Markdown AST node. * @param {{[key: string]: (node?: Node) => void}} callbacks A map of node types to callbacks. * @returns {void} */ function traverse(node, callbacks) { if (callbacks[node.type]) { callbacks[node.type](node); } else { callbacks["*"](); } const parent = /** @type {Parent} */ (node); if (typeof parent.children !== "undefined") { for (let i = 0; i < parent.children.length; i++) { traverse(parent.children[i], callbacks); } } } /** * Extracts `eslint-*` or `global` comments from HTML comments if present. * @param {string} html The text content of an HTML AST node. * @returns {string} The comment's text without the opening and closing tags or * an empty string if the text is not an ESLint HTML comment. */ function getComment(html) { const commentStart = "<!--"; const commentEnd = "-->"; const regex = /^(eslint\b|global\s)/u; if (html.slice(0, commentStart.length) !== commentStart || html.slice(-commentEnd.length) !== commentEnd) { return ""; } const comment = html.slice(commentStart.length, -commentEnd.length); if (!regex.test(comment.trim())) { return ""; } return comment; } // Before a code block, blockquote characters (`>`) are also considered // "whitespace". const leadingWhitespaceRegex = /^[>\s]*/u; /** * Gets the offset for the first column of the node's first line in the * original source text. * @param {Node} node A Markdown code block AST node. * @returns {number} The offset for the first column of the node's first line. */ function getBeginningOfLineOffset(node) { return node.position.start.offset - node.position.start.column + 1; } /** * Gets the leading text, typically whitespace with possible blockquote chars, * used to indent a code block. * @param {string} text The text of the file. * @param {Node} node A Markdown code block AST node. * @returns {string} The text from the start of the first line to the opening * fence of the code block. */ function getIndentText(text, node) { return leadingWhitespaceRegex.exec(text.slice(getBeginningOfLineOffset(node)))[0]; } /** * When applying fixes, the postprocess step needs to know how to map fix ranges * from their location in the linted JS to the original offset in the Markdown. * Configuration comments and indentation trimming both complicate this process. * * Configuration comments appear in the linted JS but not in the Markdown code * block. Fixes to configuration comments would cause undefined behavior and * should be ignored during postprocessing. Fixes to actual code after * configuration comments need to be mapped back to the code block after * removing any offset due to configuration comments. * * Fenced code blocks can be indented by up to three spaces at the opening * fence. Inside of a list, for example, this indent can be in addition to the * indent already required for list item children. Leading whitespace inside * indented code blocks is trimmed up to the level of the opening fence and does * not appear in the linted code. Further, lines can have less leading * whitespace than the opening fence, so not all lines are guaranteed to have * the same column offset as the opening fence. * * The source code of a non-configuration-comment line in the linted JS is a * suffix of the corresponding line in the Markdown code block. There are no * differences within the line, so the mapping need only provide the offset * delta at the beginning of each line. * @param {string} text The text of the file. * @param {Node} node A Markdown code block AST node. * @param {string[]} comments List of configuration comment strings that will be * inserted at the beginning of the code block. * @returns {RangeMap[]} A list of offset-based adjustments, where lookups are * done based on the `js` key, which represents the range in the linted JS, * and the `md` key is the offset delta that, when added to the JS range, * returns the corresponding location in the original Markdown source. */ function getBlockRangeMap(text, node, comments) { /* * The parser sets the fenced code block's start offset to wherever content * should normally begin (typically the first column of the line, but more * inside a list item, for example). The code block's opening fence may be * further indented by up to three characters. If the code block has * additional indenting, the opening fence's first backtick may be up to * three whitespace characters after the start offset. */ const startOffset = getBeginningOfLineOffset(node); /* * Extract the Markdown source to determine the leading whitespace for each * line. */ const code = text.slice(startOffset, node.position.end.offset); const lines = code.split("\n"); /* * The parser trims leading whitespace from each line of code within the * fenced code block up to the opening fence's first backtick. The first * backtick's column is the AST node's starting column plus any additional * indentation. */ const baseIndent = getIndentText(text, node).length; /* * Track the length of any inserted configuration comments at the beginning * of the linted JS and start the JS offset lookup keys at this index. */ const commentLength = comments.reduce((len, comment) => len + comment.length + 1, 0); /* * In case there are configuration comments, initialize the map so that the * first lookup index is always 0. If there are no configuration comments, * the lookup index will also be 0, and the lookup should always go to the * last range that matches, skipping this initialization entry. */ const rangeMap = [ { indent: baseIndent, js: 0, md: 0, }, ]; // Start the JS offset after any configuration comments. let jsOffset = commentLength; /* * Start the Markdown offset at the beginning of the block's first line of * actual code. The first line of the block is always the opening fence, so * the code begins on the second line. */ let mdOffset = startOffset + lines[0].length + 1; /* * For each line, determine how much leading whitespace was trimmed due to * indentation. Increase the JS lookup offset by the length of the line * post-trimming and the Markdown offset by the total line length. */ for (let i = 0; i + 1 < lines.length; i++) { const line = lines[i + 1]; const leadingWhitespaceLength = leadingWhitespaceRegex.exec(line)[0].length; // The parser trims leading whitespace up to the level of the opening // fence, so keep any additional indentation beyond that. const trimLength = Math.min(baseIndent, leadingWhitespaceLength); rangeMap.push({ indent: trimLength, js: jsOffset, // Advance `trimLength` character from the beginning of the Markdown // line to the beginning of the equivalent JS line, then compute the // delta. md: mdOffset + trimLength - jsOffset, }); // Accumulate the current line in the offsets, and don't forget the // newline. mdOffset += line.length + 1; jsOffset += line.length - trimLength + 1; } return rangeMap; } const codeBlockFileNameRegex = /filename=(?<quote>["'])(?<filename>.*?)\1/u; /** * Parses the file name from a block meta, if available. * @param {Block} block A code block. * @returns {string | null | undefined} The filename, if parsed from block meta. */ function fileNameFromMeta(block) { return block.meta ?.match(codeBlockFileNameRegex) ?.groups.filename.replaceAll(/\s+/gu, "_"); } const languageToFileExtension = { javascript: "js", ecmascript: "js", typescript: "ts", markdown: "md", }; /** * Extracts lintable code blocks from Markdown text. * @param {string} sourceText The text of the file. * @param {string} filename The filename of the file * @returns {Array<{ filename: string, text: string }>} Source code blocks to lint. */ function preprocess(sourceText, filename) { const text = sourceText.startsWith(BOM) ? sourceText.slice(1) : sourceText; const ast = fromMarkdown(text); const blocks = []; blocksCache.set(filename, blocks); /** * During the depth-first traversal, keep track of any sequences of HTML * comment nodes containing `eslint-*` or `global` comments. If a code * block immediately follows such a sequence, insert the comments at the * top of the code block. Any non-ESLint comment or other node type breaks * and empties the sequence. * @type {string[]} */ let htmlComments = []; traverse(ast, { "*"() { htmlComments = []; }, /** * Visit a code node. * @param {Code} node The visited node. * @returns {void} */ code(node) { if (node.lang) { const comments = []; for (const comment of htmlComments) { if (comment.trim() === "eslint-skip") { htmlComments = []; return; } comments.push(`/*${comment}*/`); } htmlComments = []; blocks.push({ ...node, baseIndentText: getIndentText(text, node), comments, rangeMap: getBlockRangeMap(text, node, comments), }); } }, /** * Visit an HTML node. * @param {Html} node The visited node. * @returns {void} */ html(node) { const comment = getComment(node.value); if (comment) { htmlComments.push(comment); } else { htmlComments = []; } }, }); return blocks.map((block, index) => { const [language] = block.lang.trim().split(" "); const fileExtension = Object.hasOwn(languageToFileExtension, language) ? languageToFileExtension[language] : language; return { filename: fileNameFromMeta(block) ?? `${index}.${fileExtension}`, text: [...block.comments, block.value, ""].join("\n"), }; }); } /** * Adjusts a fix in a code block. * @param {Block} block A code block. * @param {Fix} fix A fix to adjust. * @returns {Fix} The fix with adjusted ranges. */ function adjustFix(block, fix) { return { range: /** @type {Range} */ (fix.range.map(range => { // Advance through the block's range map to find the last // matching range by finding the first range too far and // then going back one. let i = 1; while (i < block.rangeMap.length && block.rangeMap[i].js <= range) { i++; } // Apply the mapping delta for this range. return range + block.rangeMap[i - 1].md; })), text: fix.text.replace(/\n/gu, `\n${block.baseIndentText}`), }; } /** * Creates a map function that adjusts messages in a code block. * @param {Block} block A code block. * @returns {(message: Message) => Message} A function that adjusts messages in a code block. */ function adjustBlock(block) { const leadingCommentLines = block.comments.reduce((count, comment) => count + comment.split("\n").length, 0); const blockStart = block.position.start.line; /** * Adjusts ESLint messages to point to the correct location in the Markdown. * @param {Message} message A message from ESLint. * @returns {Message} The same message, but adjusted to the correct location. */ return function adjustMessage(message) { if (!Number.isInteger(message.line)) { return { ...message, line: blockStart, column: block.position.start.column, }; } const lineInCode = message.line - leadingCommentLines; if (lineInCode < 1 || lineInCode >= block.rangeMap.length) { return null; } const out = { line: lineInCode + blockStart, column: message.column + block.rangeMap[lineInCode].indent, }; if (Number.isInteger(message.endLine)) { out.endLine = message.endLine - leadingCommentLines + blockStart; } if (Array.isArray(message.suggestions)) { out.suggestions = message.suggestions.map(suggestion => ({ ...suggestion, fix: adjustFix(block, suggestion.fix), })); } const adjustedFix = {}; if (message.fix) { adjustedFix.fix = adjustFix(block, message.fix); } return { ...message, ...out, ...adjustedFix }; }; } /** * Excludes unsatisfiable rules from the list of messages. * @param {Message} message A message from the linter. * @returns {boolean} True if the message should be included in output. */ function excludeUnsatisfiableRules(message) { return message && !UNSATISFIABLE_RULES.has(message.ruleId); } /** * Transforms generated messages for output. * @param {Array<Message[]>} messages An array containing one array of messages * for each code block returned from `preprocess`. * @param {string} filename The filename of the file * @returns {Message[]} A flattened array of messages with mapped locations. */ function postprocess(messages, filename) { const blocks = blocksCache.get(filename); blocksCache.delete(filename); return messages.flatMap((group, i) => { const adjust = adjustBlock(blocks[i]); return group.map(adjust).filter(excludeUnsatisfiableRules); }); } export const processor = { meta: { name: "@eslint/markdown/markdown", version: "7.0.0", // x-release-please-version }, preprocess, postprocess, supportsAutofix: SUPPORTS_AUTOFIX, };