UNPKG

moshai-cli

Version:

A modern, fast Node.js CLI powered by arasadrahman

92 lines (81 loc) 3.79 kB
const fs = require('fs'); const path = require('path'); const os = require('os'); const axios = require('axios'); const cheerio = require('cheerio'); const mkdirp = require('mkdirp'); const chalk = require('chalk').default; const urlLib = require('url'); module.exports = class DownloadFileAssetsCommand { static signature = 'download:assets {htmlFilePath} {outputDir?}'; static description = 'Download assets (img, css, js, fonts) from main site only and rewrite paths'; async handle({ htmlFilePath, outputDir }) { if (!fs.existsSync(htmlFilePath)) { console.error(chalk.red(`❌ File not found: ${htmlFilePath}`)); return; } const html = fs.readFileSync(htmlFilePath, 'utf8'); const $ = cheerio.load(html); const baseUrl = this.getBaseUrl($, htmlFilePath); const hostname = new URL(baseUrl).hostname; const defaultOut = path.join(os.homedir(), 'Desktop', hostname); outputDir = outputDir ? path.resolve(outputDir) : defaultOut; mkdirp.sync(outputDir); const assets = []; $('img[src],script[src],link[href],style').each((_, el) => { const tag = $(el); let attr = tag.is('link') || tag.is('style') ? 'href' : 'src'; if (tag.is('style')) { const cssText = tag.html(); const matches = [...cssText.matchAll(/url\(["']?(https?:\/\/[^"')]+)["']?\)/g)]; matches.forEach(match => assets.push(match[1])); return; } const url = tag.attr(attr); if (url && url.startsWith('http') && new URL(url).origin === new URL(baseUrl).origin) { assets.push(url); } }); const uniqueAssets = [...new Set(assets)]; for (const assetUrl of uniqueAssets) { try { const parsed = urlLib.parse(assetUrl); const relativePath = parsed.pathname.replace(/^\//, ''); const assetPath = path.join(outputDir, relativePath); mkdirp.sync(path.dirname(assetPath)); const res = await axios.get(assetUrl, { responseType: 'arraybuffer' }); fs.writeFileSync(assetPath, res.data); console.log(chalk.green(`✔ Downloaded: ${relativePath}`)); // Rewrite HTML $('*').each((_, el) => { const tag = $(el); ['src', 'href'].forEach(attr => { if (tag.attr(attr) === assetUrl) { tag.attr(attr, relativePath); } }); if (tag.is('style')) { let newCss = tag.html().replace( new RegExp(assetUrl, 'g'), relativePath ); tag.html(newCss); } }); } catch (e) { console.error(chalk.red(`✖ Failed to download ${assetUrl}: ${e.message}`)); } } const newHtml = $.html(); fs.writeFileSync(path.join(outputDir, 'index.html'), newHtml); console.log(chalk.green('✅ HTML updated and saved.')); } getBaseUrl($, filePath) { const baseTag = $('base').attr('href'); if (baseTag && baseTag.startsWith('http')) return baseTag; const html = fs.readFileSync(filePath, 'utf8'); const matches = html.match(/https?:\/\/[^\s"'<>]+/g); if (matches && matches.length) return matches[0]; throw new Error('Could not determine base URL from HTML'); } };