@signalwire/docusaurus-plugin-llms-txt
Version:
Generate Markdown versions of Docusaurus HTML pages and an llms.txt index file
187 lines (186 loc) • 7.26 kB
JavaScript
import { parseLocalURLPath, createMatcher } from '@docusaurus/utils';
import { visit } from 'unist-util-visit';
import { HTML_OR_MD_EXTENSION_REGEX } from '../../constants';
import { formatUrl, ensureLeadingSlash } from '../../utils/url';
/**
* Check if a URL is an internal relative link using Docusaurus utilities
* Uses parseLocalURLPath which handles edge cases better than custom regex
*/
function isInternalLink(href) {
if (!href || href.startsWith('#')) {
return false;
}
// Use Docusaurus parseLocalURLPath which returns null for external URLs
const parsed = parseLocalURLPath(href);
return parsed !== null;
}
/**
* Resolve a link pathname to its actual route using the route lookup table
*/
function resolvePathname(pathname, routeLookup) {
if (!routeLookup) {
return pathname;
}
// Direct lookup
if (routeLookup.has(pathname)) {
return pathname;
}
// Try with trailing slash
const withTrailingSlash = pathname.endsWith('/') ? pathname : pathname + '/';
if (routeLookup.has(withTrailingSlash)) {
return withTrailingSlash;
}
// Try without trailing slash
const withoutTrailingSlash = pathname.endsWith('/') ? pathname.slice(0, -1) : pathname;
if (routeLookup.has(withoutTrailingSlash)) {
return withoutTrailingSlash;
}
// Try with index suffix
const withIndex = pathname.endsWith('/') ? pathname + 'index' : pathname + '/index';
if (routeLookup.has(withIndex)) {
return withIndex;
}
// Return original if not found
return pathname;
}
/**
* Check if a URL should be excluded from transformation based on all configuration options
*/
function isExcludedLink(href, options) {
// Parse the URL to get the pathname
const parsed = parseLocalURLPath(href);
if (!parsed) {
return false;
}
let { pathname } = parsed;
// Normalize pathname to match exclude patterns (ensure it starts with /)
pathname = ensureLeadingSlash(pathname);
// If we have a route lookup table, resolve the pathname first
const resolvedPathname = resolvePathname(pathname, options.routeLookup);
// First check explicit exclusion patterns for backward compatibility
if (options.excludeRoutes?.length) {
const isExcluded = createMatcher([...options.excludeRoutes]);
if (isExcluded(resolvedPathname)) {
return true;
}
}
// Use route lookup if available (preferred method)
if (options.routeLookup) {
const routeInfo = options.routeLookup.get(resolvedPathname);
// If route is not in lookup, it won't be processed
// If route is in lookup but has no HTML path, it won't be processed
return !routeInfo?.htmlPath;
}
// Fallback to false if no full config available
return false;
}
/**
* Check if link transformation should be skipped entirely
*/
function shouldSkipLinkTransformation(options) {
const { enableMarkdownFiles = true, relativePaths = true } = options;
// If relative paths are enabled and markdown files are disabled,
// skip processing as links don't need transformation
return relativePaths && !enableMarkdownFiles;
}
/**
* Determine transformation options for excluded links
*/
function getExcludedLinkOptions(options) {
const { relativePaths = true } = options;
// For excluded links, still apply base URL transformation if using absolute paths
// but don't add .md extension since we're not generating files for excluded paths
if (!relativePaths) {
return {
...options,
enableMarkdownFiles: false, // Force no .md extension for excluded links
};
}
// If using relative paths, leave excluded links unchanged
return null;
}
/**
* Transform a relative link based on configuration
*/
function transformInternalLink(href, options) {
const { enableMarkdownFiles = true, relativePaths = true, baseUrl = '', } = options;
// Parse the URL to handle query params and hash fragments properly
const parsed = parseLocalURLPath(href);
if (!parsed) {
return href; // Fallback if parsing fails
}
let { pathname } = parsed;
// Convert to standardized path (remove leading ./ if present)
if (pathname.startsWith('./')) {
pathname = pathname.slice(2);
}
// Ensure it starts with / for absolute path from site root
pathname = ensureLeadingSlash(pathname);
// Remove any existing file extensions and trailing slashes for consistent processing
pathname = pathname.replace(HTML_OR_MD_EXTENSION_REGEX, '');
// Remove trailing slashes (except for root path)
if (pathname !== '/' && pathname.endsWith('/')) {
pathname = pathname.slice(0, -1);
}
// If we have a route lookup table, resolve the pathname to its actual route
const resolvedPathname = resolvePathname(pathname, options.routeLookup);
// Use our URL formatting utility for the pathname
const transformedPathname = formatUrl(resolvedPathname, { relativePaths, enableMarkdownFiles }, baseUrl);
// Reconstruct the URL with query params and hash if they exist
const search = parsed.search ? `?${parsed.search}` : '';
const hash = parsed.hash ? `#${parsed.hash}` : '';
return `${transformedPathname}${search}${hash}`;
}
/**
* Process a single anchor element for link transformation
*/
function processAnchorElement(node, options) {
// Only process anchor elements with href attributes
if (node.tagName !== 'a' || !node.properties?.href) {
return;
}
const href = String(node.properties.href);
// Only transform internal links
if (!isInternalLink(href)) {
return;
}
// Check if the link is excluded
const isExcluded = isExcludedLink(href, options);
if (isExcluded) {
const excludedOptions = getExcludedLinkOptions(options);
if (excludedOptions) {
const transformedHref = transformInternalLink(href, excludedOptions);
node.properties.href = transformedHref;
}
return;
}
// Transform non-excluded links normally
const transformedHref = transformInternalLink(href, options);
node.properties.href = transformedHref;
}
/**
* Rehype plugin that transforms internal links based on plugin configuration.
*
* This plugin automatically determines when to run:
* - If relativePaths=true AND enableMarkdownFiles=false → plugin disabled
* - If relativePaths=false → prepend baseUrl to internal links
* - If enableMarkdownFiles=true → append .md to internal links
* - If relativePaths=false AND enableMarkdownFiles=true → do both
*
* Special handling for excluded links:
* - If relativePaths=false → excluded links get baseUrl but NO .md extension
* - If relativePaths=true → excluded links are left unchanged
*/
const rehypeLinks = function (options = {}) {
return function transformer(tree) {
// Check if we should skip transformation entirely
if (shouldSkipLinkTransformation(options)) {
return tree;
}
visit(tree, 'element', (node) => {
processAnchorElement(node, options);
});
return tree;
};
};
export default rehypeLinks;