choco-bot
Version:
Whatsapp-bot
73 lines (55 loc) • 2.51 kB
JavaScript
const puppeteer = require('puppeteer');
const fs = require('fs');
const path = require('path');
const archiver = require('archiver');
const { URL } = require('url'); // Added to parse the URL
const sanitizeFilename = require('sanitize-filename'); // Added to sanitize filenames
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
const url = 'https://preview.colorlib.com/theme/neutral/index.html'; // Replace with your desired URL
await page.goto(url, { waitUntil: 'networkidle2' });
const requests = await page.evaluate(() => {
return Array.from(window.performance.getEntriesByType('resource'), (resource) => {
return resource.name;
});
});
const downloadFolder = './downloaded_resources'; // Change this to your desired folder path
if (!fs.existsSync(downloadFolder)) {
fs.mkdirSync(downloadFolder);
}
const archive = archiver('zip', {
zlib: { level: 9 }, // Maximum compression level
});
const parsedURL = new URL(url);
const hostname = parsedURL.hostname;
const zipFileName = `${hostname}_downloaded_resources.zip`; // Adding hostname as a unique identifier
const zipFilePath = path.join(downloadFolder, zipFileName);
const output = fs.createWriteStream(zipFilePath);
archive.pipe(output);
for (const request of requests) {
// Skip specific resources by URL pattern
if (request.includes('https://fonts.googleapis.com/css?family=Nunito+Sans')) {
continue;
}
// Sanitize the URL to remove invalid characters and replace colons with underscores
const sanitizedFileName = sanitizeFilename(request.replace(/[:/]/g, '_'));
if (sanitizedFileName) {
// Save resources with sanitized filenames to the zip file
const filePath = path.join(downloadFolder, sanitizedFileName);
await page._client.send('Page.setDownloadBehavior', {
behavior: 'allow',
downloadPath: downloadFolder,
});
await page.goto(request);
await page.waitForTimeout(2000); // Adjust this if needed to ensure all resources are downloaded
fs.renameSync(path.join(downloadFolder, request), filePath);
archive.file(filePath, { name: sanitizedFileName });
} else {
// Save links without extensions to a notepad file in the zip
archive.append(request, { name: 'links_without_extension.txt' });
}
}
archive.finalize();
await browser.close();
})();