linkinator
Version:
Find broken links, missing images, etc in your HTML. Scurry around your site and find all those broken links.
535 lines (503 loc) • 21.8 kB
JavaScript
#!/usr/bin/env node
import process from 'node:process';
import chalk from 'chalk';
import meow from 'meow';
import packageJson from '../package.json' with { type: 'json' };
import { getConfig } from './config.js';
import { LinkChecker, LinkState, } from './index.js';
import { Format, Logger, LogLevel } from './logger.js';
const cli = meow(`
Usage
$ linkinator LOCATION [ --arguments ]
Positional arguments
LOCATION
Required. Either the URLs or the paths on disk to check for broken links.
Flags
--concurrency
The number of connections to make simultaneously. Defaults to 100.
--config
Path to the config file to use. Looks for \`linkinator.config.json\` by default.
--directory-listing
Include an automatic directory index file when linking to a directory.
Defaults to 'false'.
--clean-urls
Enable clean URLs (extensionless links). When enabled, links like '/about'
will automatically resolve to '/about.html' if the file exists.
Mimics behavior of modern static hosting platforms like Vercel.
Defaults to 'false'.
--format, -f
Return the data in CSV or JSON format.
--header, -h
List of additional headers to be include in the request. use key:value notation.
--help
Show this command.
--version
Show the version number.
--markdown
Automatically parse and scan markdown if scanning from a location on disk.
--recurse, -r
Recursively follow links on the same root domain.
--check-css
Extract and check URLs found in CSS properties (inline styles, <style> tags, and external CSS files).
This includes url() functions, @import statements, and other CSS URL references.
Defaults to false.
--check-fragments
Validate fragment identifiers (URL anchors like #section-name) exist on the target HTML page.
Invalid fragments will be marked as broken. Only checks server-rendered HTML (not JavaScript-added fragments).
Defaults to false.
--redirects
Control how redirects are handled. Options are 'allow' (default, follows redirects),
'warn' (follows but emits warnings), or 'error' (treats redirects as broken).
--require-https
Enforce HTTPS links. Options are 'off' (default, accepts both HTTP and HTTPS),
'warn' (accepts both but emits warnings for HTTP), or 'error' (treats HTTP links as broken).
--allow-insecure-certs
Allow invalid or self-signed SSL certificates. Useful for local development with
untrusted certificates. Defaults to false.
--retry,
Automatically retry requests that return HTTP 429 responses and include
a 'retry-after' header. Defaults to false.
--retry-errors,
Automatically retry requests that return 5xx or unknown response.
--retry-errors-count,
How many times should an error be retried?
--retry-errors-jitter,
Random jitter applied to error retry.
--server-root
When scanning a locally directory, customize the location on disk
where the server is started. Defaults to the path passed in [LOCATION].
--skip, -s
List of urls in regexy form to not include in the check.
Can be specified multiple times.
--status-code
Control how specific HTTP status codes are handled. Format: "CODE:ACTION"
where CODE is a numeric status code (e.g., 403) or pattern (e.g., 4xx, 5xx)
and ACTION is one of: ok (success), warn (success with warning),
skip (ignore link), or error (force failure).
Can be specified multiple times. Example: --status-code "403:warn"
--timeout
Request timeout in ms. Defaults to 0 (no timeout).
--url-rewrite-search
Pattern to search for in urls. Must be used with --url-rewrite-replace.
--url-rewrite-replace
Expression used to replace search content. Must be used with --url-rewrite-search.
--user-agent
The user agent passed in all HTTP requests. Defaults to 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.117 Safari/537.36'
--verbosity
Override the default verbosity for this command. Available options are
'debug', 'info', 'warning', 'error', and 'none'. Defaults to 'warning'.
Examples
$ linkinator docs/
$ linkinator https://www.google.com
$ linkinator . --recurse
$ linkinator . --skip www.googleapis.com
$ linkinator . --skip example.com --skip github.com
$ linkinator . --format CSV
$ linkinator https://example.com --recurse --check-fragments --redirects error --require-https error --check-css
`, {
importMeta: import.meta,
version: packageJson.version,
flags: {
config: { type: 'string' },
concurrency: { type: 'number' },
recurse: { type: 'boolean', shortFlag: 'r' },
skip: { type: 'string', shortFlag: 's', isMultiple: true },
statusCode: { type: 'string', isMultiple: true },
format: { type: 'string', shortFlag: 'f' },
silent: { type: 'boolean' },
timeout: { type: 'number' },
markdown: { type: 'boolean' },
checkCss: { type: 'boolean' },
checkFragments: { type: 'boolean' },
serverRoot: { type: 'string' },
verbosity: { type: 'string' },
directoryListing: { type: 'boolean' },
cleanUrls: { type: 'boolean' },
redirects: { type: 'string', choices: ['allow', 'warn', 'error'] },
requireHttps: { type: 'string', choices: ['off', 'warn', 'error'] },
allowInsecureCerts: { type: 'boolean' },
retry: { type: 'boolean' },
retryErrors: { type: 'boolean' },
retryErrorsCount: { type: 'number', default: 5 },
retryErrorsJitter: { type: 'number', default: 3000 },
urlRewriteSearch: { type: 'string' },
urlReWriteReplace: { type: 'string' },
header: { type: 'string', shortFlag: 'h', isMultiple: true },
},
booleanDefault: undefined,
});
let flags;
function isBunExecutable() {
// When compiled with `bun build --compile`, process.argv[0] is typically "bun".
// When run directly with `bun`, process.argv[0] is the path to the bun executable.
// This check assumes that the compiled executable itself is not named "bun".
return process.argv[0] === 'bun';
}
async function main() {
if (cli.input.length === 0) {
cli.showHelp();
return;
}
// Type assertion needed because meow's type for cli.flags uses generic string
// but meow validates the 'choices' at runtime to ensure it's one of the valid values
flags = await getConfig(cli.flags);
if ((flags.urlRewriteReplace && !flags.urlRewriteSearch) ||
(flags.urlRewriteSearch && !flags.urlRewriteReplace)) {
throw new Error('The url-rewrite-replace flag must be used with the url-rewrite-search flag.');
}
// This is a workaround for a bug in bun where the `dispatcher` option in
// `fetch` is not respected. This causes the `allowInsecureCerts` option to
// be ignored. By setting the `NODE_TLS_REJECT_UNAUTHORIZED` environment
// variable to '0', we can bypass certificate validation for all requests.
if (flags.allowInsecureCerts && isBunExecutable()) {
console.warn('Info: Certificate validation is being bypassed for this run due to --allow-insecure-certs flag in a bun executable environment. This is a workaround for a known bun issue.');
process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0';
}
const start = Date.now();
const verbosity = parseVerbosity(flags);
const format = parseFormat(flags);
const logger = new Logger(verbosity, format);
const header = flags.header ?? [];
const headers = Object.fromEntries(header.map((item) => {
const colonIndex = item.indexOf(':');
if (colonIndex === -1) {
throw new Error(`Invalid header format: "${item}". Use "Header-Name:value" format.`);
}
const key = item.slice(0, colonIndex).trim();
const value = item.slice(colonIndex + 1).trim();
if (!key) {
throw new Error(`Invalid header format: "${item}". Header name cannot be empty.`);
}
if (value === undefined || value === '') {
throw new Error(`Invalid header format: "${item}". Header value cannot be empty.`);
}
return [key, value];
}));
logger.error(`→ crawling ${cli.input.join(' ')}`);
const checker = new LinkChecker();
if (format === Format.CSV) {
const header = 'url,status,state,parent,failureDetails';
console.log(header);
}
checker.on('retry', (info) => {
logger.warn(`Retrying: ${info.url} in ${info.secondsUntilRetry} seconds.`);
});
checker.on('redirect', (info) => {
const nonStandardNote = info.isNonStandard ? ' (non-standard)' : '';
const target = info.targetUrl ? ` → ${info.targetUrl}` : '';
logger.warn(`${chalk.yellow('[REDIRECT]')} ${chalk.gray(info.url)}${target} ${chalk.dim(`(${info.status}${nonStandardNote})`)}`);
});
checker.on('statusCodeWarning', (info) => {
logger.warn(`${chalk.yellow('[WARN]')} ${chalk.gray(info.url)} ${chalk.dim(`(${info.status})`)}`);
});
checker.on('link', (link) => {
let state = '';
const isFragmentFailure = link.failureDetails?.some((detail) => detail instanceof Error &&
detail.message.includes('Fragment identifier'));
switch (link.state) {
case LinkState.BROKEN: {
if (isFragmentFailure) {
state = `[${chalk.red('#')}]`;
// Highlight the fragment portion with red text
const hashIndex = link.url.indexOf('#');
if (hashIndex !== -1) {
const baseUrl = link.url.substring(0, hashIndex);
const fragment = link.url.substring(hashIndex);
logger.error(`${state} ${chalk.gray(baseUrl)}${chalk.red(fragment)}`);
}
else {
logger.error(`${state} ${chalk.gray(link.url)}`);
}
}
else {
state = `[${chalk.red(link.status?.toString())}]`;
logger.error(`${state} ${chalk.gray(link.url)}`);
}
break;
}
case LinkState.OK: {
state = `[${chalk.green(link.status?.toString())}]`;
logger.warn(`${state} ${chalk.gray(link.url)}`);
break;
}
case LinkState.SKIPPED: {
if (link.status === 999 || link.status === 403) {
state = `[${chalk.grey(link.status.toString())}]`;
logger.info(`${state} ${chalk.gray(link.url)} ${chalk.dim('(bot-protected)')}`);
}
else {
state = `[${chalk.grey('SKP')}]`;
logger.info(`${state} ${chalk.gray(link.url)}`);
}
break;
}
}
if (format === Format.CSV) {
const showIt = shouldShowResult(link, verbosity);
if (showIt) {
const failureDetails = link.failureDetails
? JSON.stringify(link.failureDetails, null, 2)
: '';
// Helper function to escape CSV fields only when needed
const escapeCsvField = (field) => {
if (!field)
return '';
// Quote if field contains comma, quote, or newline
if (field.includes(',') ||
field.includes('"') ||
field.includes('\n')) {
return `"${field.replace(/"/g, '""')}"`;
}
return field;
};
console.log(`${escapeCsvField(link.url)},${link.status},${link.state},${escapeCsvField(link.parent || '')},${escapeCsvField(failureDetails)}`);
}
}
});
const options = {
path: cli.input,
recurse: flags.recurse,
timeout: Number(flags.timeout),
markdown: flags.markdown,
checkCss: flags.checkCss,
checkFragments: flags.checkFragments,
concurrency: Number(flags.concurrency),
serverRoot: flags.serverRoot,
directoryListing: flags.directoryListing,
cleanUrls: flags.cleanUrls,
redirects: flags.redirects,
requireHttps: flags.requireHttps,
allowInsecureCerts: flags.allowInsecureCerts,
retry: flags.retry,
retryErrors: flags.retryErrors,
retryErrorsCount: Number(flags.retryErrorsCount),
retryErrorsJitter: Number(flags.retryErrorsJitter),
headers,
};
if (flags.skip) {
if (typeof flags.skip === 'string') {
options.linksToSkip = flags.skip.split(/[\s,]+/).filter(Boolean);
}
else if (Array.isArray(flags.skip)) {
// With `isMultiple` enabled in meow, a comma delimeted list will still
// be passed as an array, but with a single element that still needs to
// be split.
options.linksToSkip = [];
for (const skip of flags.skip) {
const rules = skip.split(/[\s,]+/).filter(Boolean);
options.linksToSkip.push(...rules);
}
}
}
if (flags.urlRewriteSearch && flags.urlRewriteReplace) {
options.urlRewriteExpressions = [
{
pattern: new RegExp(flags.urlRewriteSearch),
replacement: flags.urlRewriteReplace,
},
];
}
// Merge statusCodes from config file and CLI flags
// Start with config file statusCodes if present
if (flags.statusCodes) {
options.statusCodes = { ...flags.statusCodes };
}
// Parse and add CLI statusCode flags (these override config file)
if (flags.statusCode) {
options.statusCodes = options.statusCodes || {};
const statusCodes = Array.isArray(flags.statusCode)
? flags.statusCode
: [flags.statusCode];
for (const item of statusCodes) {
const colonIndex = item.indexOf(':');
if (colonIndex === -1) {
throw new Error(`Invalid status-code format: "${item}". Use "CODE:ACTION" format (e.g., "403:warn").`);
}
const code = item.slice(0, colonIndex).trim();
const action = item.slice(colonIndex + 1).trim();
if (!code) {
throw new Error(`Invalid status-code format: "${item}". Status code cannot be empty.`);
}
if (!['ok', 'warn', 'skip', 'error'].includes(action)) {
throw new Error(`Invalid status-code action: "${action}". Must be one of: ok, warn, skip, error.`);
}
options.statusCodes[code] = action;
}
}
const result = await checker.check(options);
const filteredResults = result.links.filter((link) => shouldShowResult(link, verbosity));
if (format === Format.JSON) {
result.links = filteredResults;
console.log(JSON.stringify(result, null, 2));
gracefulExit(result.passed ? 0 : 1);
return;
}
if (format === Format.CSV) {
gracefulExit(result.passed ? 0 : 1);
return;
}
// Build a collection scanned links, collated by the parent link used in
// the scan. For example:
// {
// "./README.md": [
// {
// url: "https://img.shields.io/npm/v/linkinator.svg",
// status: 200
// ....
// }
// ],
// }
const parents = result.links.reduce((accumulator, current) => {
const parent = current.parent || '';
accumulator[parent] ||= [];
accumulator[parent].push(current);
return accumulator;
}, {});
for (const parent of Object.keys(parents)) {
// Prune links based on verbosity
const links = parents[parent].filter((link) => {
if (verbosity === LogLevel.NONE) {
return false;
}
if (link.state === LinkState.BROKEN) {
return true;
}
if (link.state === LinkState.OK && verbosity <= LogLevel.WARNING) {
return true;
}
if (link.state === LinkState.SKIPPED && verbosity <= LogLevel.INFO) {
return true;
}
return false;
});
if (links.length === 0) {
continue;
}
logger.error(chalk.blue(parent));
for (const link of links) {
let state = '';
// Check if this is a fragment failure by looking at failureDetails OR checking if URL has fragment with 2xx status
const isFragmentFailure = link.failureDetails?.some((detail) => detail instanceof Error &&
detail.message.includes('Fragment identifier')) ||
(link.state === LinkState.BROKEN &&
link.status &&
link.status >= 200 &&
link.status < 300 &&
link.url.includes('#'));
switch (link.state) {
case LinkState.BROKEN: {
if (isFragmentFailure) {
state = `[${chalk.red('#')}]`;
// Highlight the fragment portion with red text
const hashIndex = link.url.indexOf('#');
if (hashIndex !== -1) {
const baseUrl = link.url.substring(0, hashIndex);
const fragment = link.url.substring(hashIndex);
logger.error(` ${state} ${chalk.gray(baseUrl)}${chalk.red(fragment)}`);
}
else {
logger.error(` ${state} ${chalk.gray(link.url)}`);
}
}
else {
state = `[${chalk.red(link.status?.toString())}]`;
logger.error(` ${state} ${chalk.gray(link.url)}`);
}
logger.debug(JSON.stringify(link.failureDetails, null, 2));
break;
}
case LinkState.OK: {
state = `[${chalk.green(link.status?.toString())}]`;
logger.warn(` ${state} ${chalk.gray(link.url)}`);
break;
}
case LinkState.SKIPPED: {
if (link.status === 999 || link.status === 403) {
state = `[${chalk.grey(link.status.toString())}]`;
logger.info(` ${state} ${chalk.gray(link.url)} ${chalk.dim('(bot-protected)')}`);
}
else {
state = `[${chalk.grey('SKP')}]`;
logger.info(` ${state} ${chalk.gray(link.url)}`);
}
break;
}
}
}
}
const total = (Date.now() - start) / 1000;
const scannedLinks = result.links.filter((x) => x.state !== LinkState.SKIPPED);
if (!result.passed) {
const borked = result.links.filter((x) => x.state === LinkState.BROKEN);
logger.error(chalk.bold(`${chalk.red('ERROR')}: Detected ${borked.length} broken links. Scanned ${chalk.yellow(scannedLinks.length.toString())} links in ${chalk.cyan(total.toString())} seconds.`));
gracefulExit(1);
return;
}
logger.error(chalk.bold(`✓ Successfully scanned ${chalk.green(scannedLinks.length.toString())} links in ${chalk.cyan(total.toString())} seconds.`));
gracefulExit(0);
}
/**
* Exit the process gracefully with a timeout fallback.
* This allows Node.js a brief moment to clean up resources (like closing
* connection pools) but forces exit after 100ms to prevent hanging.
*/
function gracefulExit(code) {
process.exitCode = code;
// Schedule a forced exit after 100ms in case resources don't clean up
const exitTimer = setTimeout(() => {
process.exit(code);
}, 100);
// If the process exits naturally before the timeout, clear the timer
exitTimer.unref();
}
function parseVerbosity(flags) {
if (flags.silent && flags.verbosity) {
throw new Error('The SILENT and VERBOSITY flags cannot both be defined. Please consider using VERBOSITY only.');
}
if (flags.silent) {
return LogLevel.ERROR;
}
if (!flags.verbosity) {
return LogLevel.WARNING;
}
const verbosity = flags.verbosity.toUpperCase();
const options = Object.values(LogLevel);
if (!options.includes(verbosity)) {
throw new Error(`Invalid flag: VERBOSITY must be one of [${options.join(',')}]`);
}
return LogLevel[verbosity];
}
function parseFormat(flags) {
if (!flags.format) {
return Format.TEXT;
}
flags.format = flags.format.toUpperCase();
const options = Object.values(Format);
if (!options.includes(flags.format)) {
throw new Error("Invalid flag: FORMAT must be 'TEXT', 'JSON', or 'CSV'.");
}
return Format[flags.format];
}
function shouldShowResult(link, verbosity) {
switch (link.state) {
case LinkState.OK: {
return verbosity <= LogLevel.WARNING;
}
case LinkState.BROKEN: {
if (verbosity > LogLevel.DEBUG) {
link.failureDetails = undefined;
}
return verbosity <= LogLevel.ERROR;
}
case LinkState.SKIPPED: {
return verbosity <= LogLevel.INFO;
}
}
}
try {
await main();
}
catch (error) {
console.error(error instanceof Error ? error.message : error);
gracefulExit(1);
}