UNPKG

typedoc

Version:

Create api documentation for TypeScript projects.

364 lines (363 loc) 14.1 kB
import { HtmlAttributeParser, ParserState } from "#node-utils"; import { TokenSyntaxKind } from "./lexer.js"; import MarkdownIt from "markdown-it"; const MdHelpers = new MarkdownIt().helpers; /** * This is incredibly unfortunate. The comment lexer owns the responsibility * for splitting up text into text/code, this is totally fine for HTML links * but for markdown links, ``[`code`](./link)`` is valid, so we need to keep * track of state across calls to {@link textContent}. */ export class TextParserReentryState { withinLinkLabel = false; withinLinkDest = false; lastPartWasNewline = false; checkState(token) { switch (token.kind) { case TokenSyntaxKind.Code: if (/\n\s*\n/.test(token.text)) { this.withinLinkLabel = false; this.withinLinkDest = false; } break; case TokenSyntaxKind.NewLine: if (this.lastPartWasNewline) { this.withinLinkLabel = false; this.withinLinkDest = false; } break; } this.lastPartWasNewline = token.kind === TokenSyntaxKind.NewLine; } } /** * Look for relative links within a piece of text and add them to the {@link FileRegistry} * so that they can be correctly resolved during rendering. */ export function textContent(sourcePath, token, i18n, warning, outContent, files, atNewLine, reentry) { let lastPartEnd = 0; let canEndMarkdownLink = true; const data = { sourcePath, token, pos: 0, // relative to the token warning, files: files, atNewLine, }; function addRef(ref) { canEndMarkdownLink = true; outContent.push({ kind: "text", text: token.text.slice(lastPartEnd, ref.pos), }); const link = { kind: "relative-link", text: token.text.slice(ref.pos, ref.end), target: ref.target, targetAnchor: ref.targetAnchor, }; outContent.push(link); lastPartEnd = ref.end; data.pos = ref.end; if (!ref.target) { warning(i18n.relative_path_0_is_not_a_file_and_will_not_be_copied_to_output(token.text.slice(ref.pos, ref.end)), { kind: TokenSyntaxKind.Text, // ref.pos is relative to the token, but this pos is relative to the file. pos: token.pos + ref.pos, text: token.text.slice(ref.pos, ref.end), }); } } while (data.pos < token.text.length) { if (canEndMarkdownLink) { const link = checkMarkdownLink(data, reentry); if (link) { addRef(link); continue; } // If we're within a Markdown link, then `checkMarkdownLink` // already scanned `token` up to a line feed (if any). canEndMarkdownLink = !reentry.withinLinkLabel && !reentry.withinLinkDest; } const reference = checkReference(data); if (reference) { addRef(reference); continue; } const tagLinks = checkTagLink(data); if (tagLinks.length) { for (const tagLink of tagLinks) { addRef(tagLink); } continue; } const atNewLine = token.text[data.pos] === "\n"; data.atNewLine = atNewLine; if (atNewLine && !reentry.withinLinkDest) canEndMarkdownLink = true; ++data.pos; } if (lastPartEnd !== token.text.length) { outContent.push({ kind: "text", text: token.text.slice(lastPartEnd) }); } } /** * Links are inline text with the form `[ text ]( url title )`. * * Images are just links with a leading `!` and lack of support for `[ref]` referring to a path * defined elsewhere, we don't care about that distinction here as we'll only replace the path * piece of the image. * * Reference: https://github.com/markdown-it/markdown-it/blob/14.1.0/lib/rules_inline/link.mjs * Reference: https://github.com/markdown-it/markdown-it/blob/14.1.0/lib/rules_inline/image.mjs */ function checkMarkdownLink(data, reentry) { const { token, sourcePath, files } = data; let searchStart; if (reentry.withinLinkLabel || reentry.withinLinkDest) { searchStart = data.pos; } else if (token.text[data.pos] === "[") { searchStart = data.pos + 1; } else { return; } if (!reentry.withinLinkDest) { const labelEnd = findLabelEnd(token.text, searchStart); if (labelEnd === -1 || token.text[labelEnd] === "\n") { // This markdown link might be split across multiple lines or input tokens // [prefix `code` suffix](target) // ........^^^^^^................ // Unless we encounter two consecutive line feeds, expect it to keep going. reentry.withinLinkLabel = labelEnd !== data.pos || !data.atNewLine; return; } reentry.withinLinkLabel = false; if (!token.text.startsWith("](", labelEnd)) return; searchStart = labelEnd + 2; } // Skip whitespace (including line breaks) between "](" and the link destination. // https://spec.commonmark.org/0.31.2/#links const end = token.text.length; let lookahead = searchStart; for (let newlines = 0;; ++lookahead) { if (lookahead === end) { reentry.withinLinkDest = true; return; } switch (token.text[lookahead]) { case "\n": if (++newlines === 2) { reentry.withinLinkDest = false; return; } continue; case " ": case "\t": continue; } break; } reentry.withinLinkDest = false; const link = MdHelpers.parseLinkDestination(token.text, lookahead, end); if (link.ok) { // Only make a relative-link display part if it's actually a relative link. // Discard protocol:// links, unix style absolute paths, and windows style absolute paths. if (isRelativePath(link.str)) { const { target, anchor } = files.register(sourcePath, link.str) || { target: undefined, anchor: undefined }; return { pos: lookahead, end: link.pos, target, targetAnchor: anchor, }; } // This was a link, skip ahead to ensure we don't happen to parse // something else as a link within the link. data.pos = link.pos - 1; } } /** * Reference definitions are blocks with the form `[label]: link title` * Reference: https://github.com/markdown-it/markdown-it/blob/14.1.0/lib/rules_block/reference.mjs * * Note: This may include false positives where TypeDoc recognizes a reference block that markdown * does not if users start lines with something that looks like a reference block without fully * separating it from an above paragraph. For a first cut, this is good enough. */ function checkReference(data) { const { atNewLine, pos, token, files, sourcePath } = data; if (atNewLine) { let lookahead = pos; while (/[ \t]/.test(token.text[lookahead])) { ++lookahead; } // #2991, we check that this reference also doesn't look like a footnote reference // as it is unlikely that someone uses that syntax without intending for footnote behavior. // This introduces a problem if someone has an [^ref] and doesn't intend for that to // be interpreted as a footnote, but as a reference, but we can't have it both ways, // and having people rename their reference to not be confused with a footnote isn't a // horrible workaround. if (token.text[lookahead] === "[" && token.text[lookahead + 1] !== "^") { while (lookahead < token.text.length && /[^\n\]]/.test(token.text[lookahead])) { ++lookahead; } if (token.text.startsWith("]:", lookahead)) { lookahead += 2; while (/[ \t]/.test(token.text[lookahead])) { ++lookahead; } const link = MdHelpers.parseLinkDestination(token.text, lookahead, token.text.length); if (link.ok) { if (isRelativePath(link.str)) { const { target, anchor } = files.register(sourcePath, link.str) || { target: undefined, anchor: undefined }; return { pos: lookahead, end: link.pos, target, targetAnchor: anchor, }; } data.pos = link.pos - 1; } } } } } /** * Looks for `<a href="./relative">`, `<img src="./relative">`, and `<source srcset="./relative">` */ function checkTagLink(data) { const { pos, token } = data; if (token.text.startsWith("<img ", pos)) { data.pos += 4; return checkAttributes(data, { src: checkAttributeDirectPath, srcset: checkAttributeSrcSet, }); } if (token.text.startsWith("<link ", pos)) { data.pos += 4; return checkAttributes(data, { imagesrcset: checkAttributeSrcSet, }); } if (token.text.startsWith("<a ", pos)) { data.pos += 3; return checkAttributes(data, { href: checkAttributeDirectPath }); } if (token.text.startsWith("<source ", pos)) { data.pos += 8; return checkAttributes(data, { src: checkAttributeDirectPath, srcset: checkAttributeSrcSet, }); } return []; } function checkAttributes(data, attributes) { const links = []; const parser = new HtmlAttributeParser(data.token.text, data.pos); while (parser.state !== ParserState.END) { if (parser.state === ParserState.BeforeAttributeValue && Object.prototype.hasOwnProperty.call(attributes, parser.currentAttributeName)) { parser.step(); links.push(...attributes[parser.currentAttributeName](data, parser.currentAttributeValue, parser.currentAttributeValueStart, parser.currentAttributeValueEnd)); } parser.step(); } return links; } function checkAttributeDirectPath(data, text, pos, end) { if (isRelativePath(text.trim())) { const { target, anchor } = data.files.register(data.sourcePath, text.trim()) || { target: undefined, anchor: undefined }; return [{ pos, end, target, targetAnchor: anchor, }]; } return []; } // See https://html.spec.whatwg.org/multipage/images.html#srcset-attribute function checkAttributeSrcSet(data, text, pos, _end) { const result = []; let textPos = 0; parseImageCandidate(); while (textPos < text.length && text[textPos] == ",") { ++textPos; parseImageCandidate(); } return result; function parseImageCandidate() { // 1. Zero or more ASCII whitespace while (textPos < text.length && /[\t\r\f\n ]/.test(text[textPos])) ++textPos; // 2. A valid non-empty URL that does not start or end with a comma // TypeDoc: We don't exactly match this, PR welcome! For now, just permit anything // that's not whitespace or a comma const url = text.slice(textPos).match(/^[^\t\r\f\n ,]+/); if (url && isRelativePath(url[0])) { const { target, anchor } = data.files.register(data.sourcePath, url[0]) || { target: undefined, anchor: undefined }; result.push({ pos: pos + textPos, end: pos + textPos + url[0].length, target, targetAnchor: anchor, }); } textPos += url ? url[0].length : 0; // 3. Zero or more ASCII whitespace while (textPos < text.length && /[\t\r\f\n ]/.test(text[textPos])) ++textPos; // 4. Zero or one of the following: { // A width descriptor, consisting of: ASCII whitespace, a valid non-negative integer giving // a number greater than zero representing the width descriptor value, and a U+0077 LATIN // SMALL LETTER W character. const w = text.slice(textPos).match(/^\+?\d+\s*w/); textPos += w ? w[0].length : 0; // A pixel density descriptor, consisting of: ASCII whitespace, a valid floating-point number // giving a number greater than zero representing the pixel density descriptor value, and a // U+0078 LATIN SMALL LETTER X character. if (!w) { const x = text.slice(textPos).match(/^\+?\d+(\.\d+)?([eE][+-]\d+)?\s*x/); textPos += x ? x[0].length : 0; } } // 5. Zero or more ASCII whitespace while (textPos < text.length && /[\t\r\f\n ]/.test(text[textPos])) ++textPos; } } function isRelativePath(link) { // Lots of edge cases encoded right here! // Originally, this attempted to match protocol://, but... // `mailto:example@example.com` is not a relative path // `C:\foo` is not a relative path // `/etc/passwd` is not a relative path // `#anchor` is not a relative path return !/^[a-z]+:|^\/|^#/i.test(link); } function findLabelEnd(text, pos) { while (pos < text.length) { switch (text[pos]) { case "\\": ++pos; if (pos < text.length && text[pos] === "\n") return pos; break; case "\n": case "]": case "[": return pos; } ++pos; } return -1; }