w3c-linkchecker-local
Version:
Run w3c link checker on local directory
444 lines (400 loc) • 13.6 kB
JavaScript
/**
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License v2.0 which accompanies this
* distribution, and is available at https://www.eclipse.org/legal/epl-v20.html
*
* SPDX-License-Identifier: EPL-2.0
*/
const url = require('url');
const getPort = require('get-port');
const express = require('express');
const colors = require('chalk');
const util = require('util');
const child_process = require('child_process');
const exec = util.promisify(child_process.exec);
const resolve = require('path').resolve;
// default options and values
const defaultOptions = {
logger: console,
verbose: false,
noColor: false,
baseDomain: 'localhost',
baseUrl: '/',
startPoint: null,
checklinkCommand: null,
ignoreRobotsForbidden: false,
ignoreBrokenFragments: false,
ignoreRedirection: false,
};
const { CheckLinkOptions } = require('./cli-options');
// default command name of checklink
const CHECKLINK_COMMAND = 'checklink';
// default docker image to run checklink
const DOCKER_IMAGE = 'jackjiaibm/w3c-linkchecker';
class W3CLinkChecker {
/**
* W3CLinkChecker Constructor
*
* @param {String} directory directory or url to test
* @param {Object} options [description]
* @return {[type]} [description]
*/
constructor(directory, options) {
this.directory = directory;
this.options = { ...defaultOptions, ...options };
this.logger = this.options.logger;
this.server = null;
// disable chalk color
if (this.options.noColor) {
process.env.FORCE_COLOR = 0;
}
}
/**
* Check if a string is URL
*
* @param {String} str string to test
* @return {Boolean}
*/
isUrl(str) {
const parsedUrl = url.parse(str);
return !!parsedUrl.hostname;
}
/**
* Try to serve the static files as HTTP server
*
* @param {String} path The OS path where the html locates
* @param {String} baseUrl Base url to serve the files
* @param {Number} port HTTP server port
* @return {Promise}
*/
startHttpServer(path, baseUrl, port) {
return new Promise((resolve, reject) => {
this.options.verbose && this.logger.debug('%s %s: %s', colors.yellow('[debug][W3CLinkChecker.startHttpServer]'), 'starting server on path', path);
if (baseUrl !== '/') {
this.options.verbose && this.logger.debug('%s %s: %s', colors.yellow('[debug][W3CLinkChecker.startHttpServer]'), 'using base url', baseUrl);
}
const app = express();
app.use(baseUrl, express.static(path));
this.server = app.listen(port);
this.server.on('listening', () => resolve(port));
this.server.on('error', err => reject(err));
});
}
/**
* Run a command using spawn
*
* This method may throw exception if stderr is not empty
*
* @param {String} cmd Full command line
* @return {String} stdout if the command is successful
*/
async runCommand(cmd) {
let result;
try {
result = await exec(cmd);
} catch (stderr) {
result = {
stderr,
};
}
if (result) {
if (result.stdout) {
// trim the stdout
result.stdout = result.stdout.trim();
}
if (result.stderr) {
this.options.verbose && this.logger.debug('%s %s %s', colors.yellow('[debug][W3CLinkChecker.runCommand]'), colors.magenta('[warn]'), result.stderr);
} else if (!result.stdout) {
this.options.verbose && this.logger.debug('%s %s "%s" returns empty result', colors.yellow('[debug][W3CLinkChecker.runCommand]'), colors.magenta('[warn]'), cmd);
} else {
this.options.verbose && this.logger.debug('%s "%s" succeeded with output "%s"', colors.yellow('[debug][W3CLinkChecker.runCommand]'), colors.green(cmd), result.stdout);
return result.stdout;
}
}
throw new Error(`Failed to run '${cmd}', enable --verbose mode to check error messages`);
}
/**
* Run a command using spawn
*
* This method will print out stdout/stderr whenever available on running the command.
*
* This method won't throw exception if stderr is not empty.
*
* @param {String} cmd Command
* @param {Array} args Arguments for the command
* @return {Promise} object of stdout and stderr if the command succeeds
*/
spawnCommand(cmd, args) {
return new Promise((resolve, reject) => {
const ps = child_process.spawn(cmd, args);
let stdout = [],
stderr = [];
ps.stdout.on('data', function(data) {
process.stdout.write(data.toString());
stdout.push(data.toString());
});
ps.stderr.on('data', function(data) {
process.stdout.write(colors.red(data.toString()));
stderr.push(data.toString());
});
ps.on('error', (err) => {
process.stdout.write(colors.red(err.toString()));
reject(err);
});
ps.on('close', function(code) {
if (code === 0) {
resolve({ stdout: stdout.join(''), stderr: stderr.join('') });
} else {
reject(new Error(`Failed to run '${cmd}', exit code ${code}`));
}
});
});
}
/**
* Local w3c link checker
*
* @return {String} checklink command location, or docker if available
*/
async locateW3cLinkChecker() {
if (this.options.checklinkCommand) {
return this.options.checklinkCommand;
}
// try to find checklink command in current OS
let testCommand = `which ${CHECKLINK_COMMAND}`;
if (process.platform === 'win32') {
testCommand = `where.exe ${CHECKLINK_COMMAND}`;
}
try {
const result = await this.runCommand(testCommand);
return result;
} catch (e) {
// ignore error
}
// try to use docker
try {
await this.runCommand('docker -v');
return 'docker';
} catch (e) {
// ignore error
}
throw new Error(`Failed to find w3c '${CHECKLINK_COMMAND}' command, try to specify '--checklink-command' option`);
}
/**
* Parse checklink output and find errors
*
* @param {String} stdout output of checklink command
* @return {Array} array of errors
*/
parseOutputErrors(stdout) {
// locate errors
//
// Output pattern:
//
// Processing\t[utl]
// ...
// Valid links.|List of broken links and other issues:
// ...
// Anchors
// Found [0-9]+ anchors.
//
// Broken links and other issues pattern:
// [url]
// Line: [0-9]+
// Code: [http status code & text]
// To do: Some of the links to this resource point to broken URI fragments
// (such as index.html#fragment).
// The following fragments need to be fixed:
// [hash] Line: [0-9]+
// ...
const _this = this;
const lines = stdout && stdout.split('\n');
let errors = [],
error;
let urlTested = null;
let issuesSection = false;
let fragmentSection = false;
for (let line of lines) {
line = line.trim();
if (issuesSection) {
if (line.indexOf('Anchors') > -1 ||
line.match(/Found [0-9]+ anchor/) ||
line.match(/Checked [0-9]+ document/)) {
if (error) {
errors.push(error);
error = null;
}
issuesSection = false;
fragmentSection = false;
} else if (line.startsWith('http://') || line.startsWith('https://')) {
if (error) {
errors.push(error);
error = null;
}
fragmentSection = false;
error = {
source: urlTested,
target: line,
};
} else if (error && line.startsWith('Line: ')) {
error.lines = line.substr(6);
} else if (error && line.startsWith('Lines: ')) {
error.lines = line.substr(7);
} else if (error && line.startsWith('Code: ')) {
error.code = line.substr(6);
} else if (error && line.startsWith('To do: ')) {
error.todo = line.substr(7);
} else if (error && line.indexOf('The following fragments need to be fixed') > -1) {
fragmentSection = true;
error.fragments = [];
} else if (error && fragmentSection && line.match(/^(\S+)\s+Lines?:\s+([0-9, ]+)$/)) {
let m = line.match(/^(\S+)\s+Lines?:\s+([0-9, ]+)$/);
error.fragments.push({
hash: m[1],
lines: m[2],
});
} else if (error && line) {
error.todo += ' ' + line;
}
} else if (line.indexOf('List of broken links and other issues') > -1) {
issuesSection = true;
} else if (line.match(/Processing\s+(.+)/)) {
if (error) {
errors.push(error);
error = null;
}
let m = line.match(/Processing\s+(.+)/);
urlTested = m[1];
issuesSection = false;
fragmentSection = false;
}
}
if (error) {
errors.push(error);
error = null;
}
// filter out errors
let warnings = [];
const filteredErrors = errors.filter((error) => {
if (_this.options.ignoreRobotsForbidden && error && error.code &&
error.code.indexOf('Forbidden by robots.txt') > -1) {
warnings.push(error);
return false;
}
if (_this.options.ignoreBrokenFragments && error && error.todo &&
error.todo.indexOf('broken URI fragments') > -1) {
warnings.push(error);
return false;
}
if (_this.options.ignoreRedirection && error && error.code &&
error.code.indexOf(' -> ') > -1) {
warnings.push(error);
return false;
}
return true;
});
return { errors: filteredErrors, warnings, };
}
/**
* Run broken link checker on target
*
* @return {Object} an object including stdout, stderr, or errors (optional)
*/
async check() {
try {
const isUrl = this.isUrl(this.directory);
if (!isUrl) {
this.directory = resolve(this.directory);
}
const checklink = await this.locateW3cLinkChecker();
this.logger.info('checking links of "%s" ...', colors.blue(this.directory));
// prepare command
let command = null,
args = [];
if (checklink === 'docker') {
command = 'docker';
args = ['run', '--rm'];
if (this.options.baseDomain !== 'localhost') {
args.push(`--add-host=${this.options.baseDomain}:127.0.0.1`);
}
if (isUrl) {
args = [...args,
DOCKER_IMAGE,
this.directory,
...CheckLinkOptions,
];
} else {
args = [...args,
'-v', `${this.directory}:/usr/share/nginx/html${this.options.baseUrl}`,
DOCKER_IMAGE,
`http://${this.options.baseDomain}${this.options.startPoint ? this.options.startPoint : this.options.baseUrl}`,
...CheckLinkOptions,
];
}
} else {
command = checklink;
if (isUrl) {
args = [...args,
this.directory,
...CheckLinkOptions,
];
} else {
const port = await getPort();
await this.startHttpServer(this.directory, this.options.baseUrl, port);
args = [...args,
`http://${this.options.baseDomain}:${port}${this.options.startPoint ? this.options.startPoint : this.options.baseUrl}`,
...CheckLinkOptions,
];
}
}
// copy these boolean options directly to checklink
for (let opt of ['summary', 'broken', 'directory', 'recursive', 'no-referer', 'quiet', 'verbose', 'indicator', 'hide-same-realm', 'suppress-temp-redirects', ]) {
if (this.options[opt]) {
args.push(`--${opt}`);
}
}
// copy these string/number options directly to checklink
for (let opt of ['depth', 'exclude', 'user', 'password', 'sleep', 'timeout', 'languages', 'cookies', 'connection-cache', 'domain', ]) {
if (this.options[opt]) {
args.push(`--${opt}`);
args.push(this.options[opt]);
}
}
// copy these array options directly to checklink
for (let opt of ['location', 'exclude-docs', 'suppress-redirect', 'suppress-redirect-prefix', 'suppress-broken', 'suppress-fragment', ]) {
if (this.options[opt]) {
for (let one of this.options[opt]) {
args.push(`--${opt}`);
args.push(one);
}
}
}
this.logger.info('with command "%s %s" ...', colors.blue(command), colors.green(args.join(' ')));
// run test
const result = await this.spawnCommand(command, args);
if (result && result.stdout) {
const { errors, warnings, } = this.parseOutputErrors(result.stdout);
if (errors && errors.length > 0) {
result.errors = errors;
}
if (warnings && warnings.length > 0) {
result.warnings = warnings;
}
}
if (this.server) {
this.options.verbose && this.logger.debug('%s %s', colors.yellow('[debug][W3CLinkChecker.check]'), 'closing server');
this.server.close();
}
return result;
} catch (err) {
if (this.server) {
this.options.verbose && this.logger.debug('%s %s', colors.yellow('[debug][W3CLinkChecker.check]'), 'closing server');
this.server.close();
}
this.options.verbose && this.logger.debug('%s throwing error %s', colors.yellow('[debug][W3CLinkChecker.check]'), err);
throw err;
}
}
}
module.exports = {
W3CLinkChecker,
};