moshai-cli
Version:
A modern, fast Node.js CLI powered by arasadrahman
92 lines (81 loc) • 3.79 kB
JavaScript
const fs = require('fs');
const path = require('path');
const os = require('os');
const axios = require('axios');
const cheerio = require('cheerio');
const mkdirp = require('mkdirp');
const chalk = require('chalk').default;
const urlLib = require('url');
module.exports = class DownloadFileAssetsCommand {
static signature = 'download:assets {htmlFilePath} {outputDir?}';
static description = 'Download assets (img, css, js, fonts) from main site only and rewrite paths';
async handle({ htmlFilePath, outputDir }) {
if (!fs.existsSync(htmlFilePath)) {
console.error(chalk.red(`❌ File not found: ${htmlFilePath}`));
return;
}
const html = fs.readFileSync(htmlFilePath, 'utf8');
const $ = cheerio.load(html);
const baseUrl = this.getBaseUrl($, htmlFilePath);
const hostname = new URL(baseUrl).hostname;
const defaultOut = path.join(os.homedir(), 'Desktop', hostname);
outputDir = outputDir ? path.resolve(outputDir) : defaultOut;
mkdirp.sync(outputDir);
const assets = [];
$('img[src],script[src],link[href],style').each((_, el) => {
const tag = $(el);
let attr = tag.is('link') || tag.is('style') ? 'href' : 'src';
if (tag.is('style')) {
const cssText = tag.html();
const matches = [...cssText.matchAll(/url\(["']?(https?:\/\/[^"')]+)["']?\)/g)];
matches.forEach(match => assets.push(match[1]));
return;
}
const url = tag.attr(attr);
if (url && url.startsWith('http') && new URL(url).origin === new URL(baseUrl).origin) {
assets.push(url);
}
});
const uniqueAssets = [...new Set(assets)];
for (const assetUrl of uniqueAssets) {
try {
const parsed = urlLib.parse(assetUrl);
const relativePath = parsed.pathname.replace(/^\//, '');
const assetPath = path.join(outputDir, relativePath);
mkdirp.sync(path.dirname(assetPath));
const res = await axios.get(assetUrl, { responseType: 'arraybuffer' });
fs.writeFileSync(assetPath, res.data);
console.log(chalk.green(`✔ Downloaded: ${relativePath}`));
// Rewrite HTML
$('*').each((_, el) => {
const tag = $(el);
['src', 'href'].forEach(attr => {
if (tag.attr(attr) === assetUrl) {
tag.attr(attr, relativePath);
}
});
if (tag.is('style')) {
let newCss = tag.html().replace(
new RegExp(assetUrl, 'g'),
relativePath
);
tag.html(newCss);
}
});
} catch (e) {
console.error(chalk.red(`✖ Failed to download ${assetUrl}: ${e.message}`));
}
}
const newHtml = $.html();
fs.writeFileSync(path.join(outputDir, 'index.html'), newHtml);
console.log(chalk.green('✅ HTML updated and saved.'));
}
getBaseUrl($, filePath) {
const baseTag = $('base').attr('href');
if (baseTag && baseTag.startsWith('http')) return baseTag;
const html = fs.readFileSync(filePath, 'utf8');
const matches = html.match(/https?:\/\/[^\s"'<>]+/g);
if (matches && matches.length) return matches[0];
throw new Error('Could not determine base URL from HTML');
}
};