anydownload
Version:
A powerful website downloader with GUI support
295 lines (268 loc) • 12.8 kB
JavaScript
const { Downloader, checkNeedDynamic } = require('../src/downloader');
const yargs = require('yargs');
const ora = require('ora').default;
const path = require('path');
const { exec } = require('child_process');
const os = require('os');
const fs = require('fs-extra');
const inquirerImport = require('inquirer');
const cosmiconfig = require('cosmiconfig').cosmiconfigSync;
const chalk = require('chalk');
const pkg = require('../package.json');
const { program } = require('commander');
const { version } = require('../package.json');
// Compatible with inquirer v8/v9
const inquirer = inquirerImport.prompt ? inquirerImport : inquirerImport.default;
const MSG = {
provideUrl: 'Please enter the website URL to download:',
checking: 'Checking website type...',
detectedDynamic: 'Dynamic site detected, using browser rendering...',
detectedStatic: 'Static site detected, downloading directly...',
downloading: 'Downloading: ',
done: 'Website download complete!',
saved: 'All content saved to',
failedList: 'Failed resources:',
summary: 'Summary:',
total: 'Total',
success: 'Success',
fail: 'Fail',
size: 'Total size',
time: 'Elapsed',
robots: 'Checking robots.txt...',
robotsBlocked: 'Blocked by robots.txt, skipped (use --ignore-robots to override)',
disk: 'Checking disk space...',
diskLow: 'Low disk space, aborting download.',
openIndex: 'Open homepage in browser after download?',
homepage: 'Homepage path:',
pause: 'Press "p" to pause, "r" to resume, "c" to cancel.',
updateAvailable: 'Update available! Run npm install -g anydownload to update.',
proxyError: 'Proxy server error:',
speedLimit: 'Speed limit:',
resumeDownload: 'Resume download:',
sitemapGenerated: 'Sitemap generated:',
validationError: 'Resource validation error:',
cleaningUrls: 'Cleaning URLs...',
parallelLimit: 'Parallel download limit:',
timeout: 'Timeout:',
retryDelay: 'Retry delay:',
maxFileSize: 'Maximum file size:',
validateSSL: 'SSL validation:',
followRedirects: 'Follow redirects:',
maxRedirects: 'Maximum redirects:',
keepOriginalUrls: 'Keep original URLs:',
cleanUrls: 'Clean URLs:',
ignoreErrors: 'Ignore errors:'
};
let config = {};
try {
const explorer = cosmiconfig('websitedownloader');
const result = explorer.search();
if (result && result.config) config = result.config;
} catch {}
// Create CLI program
program
.version(version)
.description('A powerful website downloader')
.argument('[url]', 'URL to download')
.option('--gui', 'Start the web graphical interface')
.option('-o, --output <dir>', 'Custom output folder', config.output || 'downloaded_site')
.option('-r, --recursive', 'Recursively download same-domain pages', config.recursive || false)
.option('-m, --max-depth <number>', 'Set recursion depth', config['max-depth'] || 1)
.option('-t, --type <type>', 'Download specific resource types', config.type || 'all')
.option('-d, --dynamic', 'Enable dynamic mode', config.dynamic || false)
.option('-v, --verbose', 'Show detailed logs', config.verbose || false)
.option('--ignore-robots', 'Ignore robots.txt', config['ignore-robots'] || false)
.option('--retry <number>', 'Retry count for failed downloads', config.retry || 3)
.option('--concurrency <number>', 'Maximum concurrent downloads', config.concurrency || 5)
.option('--delay <number>', 'Delay between downloads (ms)', config.delay || 1000)
.option('--filter <regex>', 'Regex to filter resource URLs', config.filter)
.option('--headless', 'Use headless browser', config.headless !== false)
.option('--browser <type>', 'Choose browser engine (puppeteer/playwright)', config.browser || 'puppeteer')
.option('--proxy <url>', 'Use proxy server', config.proxy)
.option('--speed-limit <number>', 'Download speed limit (KB/s)', config.speedLimit || 0)
.option('--resume', 'Enable resume download', config.resumeDownload || false)
.option('--sitemap', 'Generate sitemap', config.sitemapEnabled || false)
.option('--timeout <number>', 'Request timeout (ms)', config.timeout || 30000)
.option('--max-file-size <number>', 'Maximum file size (MB)', config.maxFileSize || 0)
.option('--retry-delay <number>', 'Retry delay (ms)', config.retryDelay || 1000)
.option('--no-validate-ssl', 'Disable SSL validation', config.validateSSL !== false)
.option('--no-follow-redirects', 'Disable redirect following', config.followRedirects !== false)
.option('--max-redirects <number>', 'Maximum redirects', config.maxRedirects || 5)
.option('--keep-original-urls', 'Keep original URLs', config.keepOriginalUrls || false)
.option('--clean-urls', 'Clean URLs', config.cleanUrls || false)
.option('--ignore-errors', 'Ignore errors', config.ignoreErrors || false)
.option('--parallel-limit <number>', 'Parallel download limit', config.parallelLimit || 5);
// Parse arguments first
program.parse(process.argv);
const options = program.opts();
const urlArgument = program.args[0]; // Get the URL argument if provided
// If --gui is enabled, start the web GUI and open browser
if (options.gui) {
console.log('Starting web GUI...');
console.log('http://localhost:3000/');
// Use spawn to run the web-gui.js as a detached process
const { spawn } = require('child_process');
const webGuiPath = path.join(__dirname, '..', 'web-gui.js');
console.log('Web GUI path:', webGuiPath);
// Create a promise to wait for the server to start
const serverStarted = new Promise((resolve) => {
const guiProcess = spawn('node', [webGuiPath], {
detached: true,
stdio: ['ignore', 'pipe', 'pipe']
});
let port = 3000;
guiProcess.stdout.on('data', (data) => {
const output = data.toString();
console.log(output);
if (output.includes('Web GUI running at http://localhost:')) {
const match = output.match(/http:\/\/localhost:(\d+)/);
if (match) {
port = parseInt(match[1]);
resolve(port);
}
}
});
guiProcess.stderr.on('data', (data) => {
console.error(data.toString());
});
guiProcess.unref();
});
// Wait for server to start and then open browser
serverStarted.then((port) => {
const url = `http://localhost:${port}`;
console.log(`Opening browser at ${url}...`);
let command;
if (process.platform === 'win32') {
command = `start "" "${url}"`;
} else if (process.platform === 'darwin') {
command = `open "${url}"`;
} else {
command = `xdg-open "${url}"`;
}
exec(command, (error) => {
if (error) {
console.error(`Failed to open browser: ${error}`);
}
});
});
// Exit the main process immediately
process.exit(0);
} else {
// Original download logic
async function runDownload(url) {
if (!url) {
const answer = await inquirer.prompt([{ type: 'input', name: 'url', message: MSG.provideUrl }]);
url = answer.url;
}
if (!url) {
console.error(MSG.provideUrl);
process.exit(1);
}
// Ensure URL has protocol
if (typeof url === 'string' && !url.startsWith('http://') && !url.startsWith('https://')) {
url = 'https://' + url;
}
const downloader = new Downloader({
...options,
userAgent: options['user-agent'] || config['user-agent'] || 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
cookie: options.cookie || config.cookie,
outputDir: options.output,
verbose: options.verbose,
recursive: options.recursive,
maxDepth: parseInt(options.maxDepth),
type: options.type,
dynamic: options.dynamic,
ignoreRobots: options.ignoreRobots,
retry: parseInt(options.retry),
concurrency: parseInt(options.concurrency),
delay: parseInt(options.delay),
filterRegex: options.filter,
headless: options.headless,
browserType: options.browser,
proxy: options.proxy,
speedLimit: parseInt(options.speedLimit),
resumeDownload: options.resume,
sitemapEnabled: options.sitemap,
timeout: parseInt(options.timeout),
maxFileSize: parseInt(options.maxFileSize) * 1024 * 1024,
retryDelay: parseInt(options.retryDelay),
validateSSL: options.validateSSL,
followRedirects: options.followRedirects,
maxRedirects: parseInt(options.maxRedirects),
keepOriginalUrls: options.keepOriginalUrls,
cleanUrls: options.cleanUrls,
ignoreErrors: options.ignoreErrors,
parallelLimit: parseInt(options.parallelLimit)
});
const spinner = ora(MSG.downloading + url).start();
const startTime = Date.now();
try {
await downloader.downloadWebsite(url);
spinner.succeed(MSG.done);
console.log(`${MSG.saved} ${options.output}`);
// Summary
console.log(`\n${MSG.summary}`);
console.log(`${MSG.total}: ${downloader.successCount + downloader.failCount}`);
console.log(`${MSG.success}: ${downloader.successCount}`);
console.log(`${MSG.fail}: ${downloader.failCount}`);
console.log(`${MSG.size}: ${(downloader.downloadedBytes / 1024).toFixed(1)} KB`);
console.log(`${MSG.time}: ${((Date.now() - startTime) / 1000).toFixed(1)}s`);
// Homepage path (fix: use host subfolder and auto-detect html file)
const urlObj = new URL(url);
const hostDir = urlObj.host.replace(/[:\/\\]/g, '_');
const outputDir = path.join(options.output, hostDir);
// Try to find the main HTML file
let homepageFile = path.join(outputDir, 'index.html');
if (!fs.existsSync(homepageFile)) {
// Find the first .html file in the output directory
const htmlFiles = fs.readdirSync(outputDir)
.filter(f => f.endsWith('.html'));
if (htmlFiles.length > 0) {
homepageFile = path.join(outputDir, htmlFiles[0]);
} else {
homepageFile = null;
}
}
console.log(`${MSG.homepage} ${homepageFile || '[Not found]'}`);
let openHome = options.open;
if (options.open === undefined) {
const answer = await inquirer.prompt([{
type: 'confirm',
name: 'open',
message: MSG.openIndex,
default: false
}]);
openHome = answer.open;
}
if (openHome && homepageFile && fs.existsSync(homepageFile)) {
if (process.platform === 'win32') {
exec(`start "" "${homepageFile}"`);
} else if (process.platform === 'darwin') {
exec(`open "${homepageFile}"`);
} else {
exec(`xdg-open "${homepageFile}"`);
}
} else if (openHome) {
console.log('[DEBUG] No homepage HTML file found to open.');
}
// Failed list
if (downloader.failedResources.length) {
console.log('\n' + MSG.failedList);
downloader.failedResources.forEach(r => {
if (options.verbose && r.error) {
console.log(`${r.url} (${r.error})`);
} else {
console.log(r.url || r);
}
});
}
} catch (error) {
spinner.fail('Download failed: ' + (error.message || error));
process.exit(1);
}
}
// Run the download logic with the provided URL argument
runDownload(urlArgument);
}
// Removed original program.parse(process.argv) call here