anydownload
Version:
A powerful website downloader with GUI support
861 lines (762 loc) • 33.7 kB
JavaScript
const axios = require('axios');
const fs = require('fs-extra');
const path = require('path');
const cheerio = require('cheerio');
const puppeteer = require('puppeteer');
const cliProgress = require('cli-progress');
const { URL } = require('url');
const crypto = require('crypto');
const mime = require('mime-types');
const { pipeline } = require('stream');
const { promisify } = require('util');
const undici = require('undici');
const EventEmitter = require('events');
const { SitemapStream, streamToPromise } = require('sitemap');
const { createGzip, createGunzip, createInflate } = require('zlib');
const { createProxyMiddleware } = require('http-proxy-middleware');
const rateLimit = require('express-rate-limit');
const playwright = require('playwright');
// Add brotli decompression support
const brotli = require('brotli');
const streamPipeline = promisify(pipeline);
// Check if a string is a valid URL
function isValidUrl(url) {
if (typeof url !== 'string') {
return false;
}
try {
new URL(url);
return true;
} catch {
return false;
}
}
// Check if dynamic rendering is needed for a URL
async function checkNeedDynamic(url, userAgent) {
if (!isValidUrl(url)) {
throw new Error('Invalid URL');
}
try {
const res = await axios.get(url, {
headers: { 'User-Agent': userAgent }
});
const html = res.data;
if (html.length < 5000 || /<div id="app"|ng-app|window\.__INITIAL_STATE__|<script src="\/_next\//.test(html)) {
return true;
}
return false;
} catch {
return true;
}
}
// Normalize a URL relative to a base URL
function normalizeUrl(u, base) {
if (typeof u !== 'string' || typeof base !== 'string') {
return null;
}
try {
const absoluteUrl = new URL(u, base).href;
if (!absoluteUrl.startsWith('http://') && !absoluteUrl.startsWith('https://')) {
return null;
}
return absoluteUrl;
} catch (e) {
return null;
}
}
// Get filename from URL, add extension if missing
function getFilenameFromUrl(resourceUrl, contentType = '') {
const url = new URL(resourceUrl);
let filepath = url.pathname;
// Remove leading slash if exists
if (filepath.startsWith('/')) {
filepath = filepath.substring(1);
}
// If path is empty or just '/', use 'index'
if (!filepath || filepath === '/') {
filepath = 'index';
}
// Add extension if missing
if (!path.extname(filepath) && contentType) {
const ext = mime.extension(contentType);
if (ext) filepath += '.' + ext;
}
// Replace invalid characters
filepath = filepath.replace(/[\\?%*:|"<>]/g, '_');
return filepath;
}
// Hash a URL for deduplication
function hashUrl(url) {
return crypto.createHash('sha1').update(url).digest('hex');
}
class Downloader extends EventEmitter {
constructor(options = {}) {
super();
// Download options
this.delay = options.delay || 1000;
this.userAgent = options.userAgent;
this.dynamic = options.dynamic || false;
this.onResource = options.onResource || (() => { });
this.onError = options.onError || (() => { });
this.concurrency = options.concurrency || 5;
this.cookie = options.cookie || '';
this.failedResources = [];
this.successCount = 0;
this.failCount = 0;
this.downloadedBytes = 0;
this.visited = new Set();
this.recursive = options.recursive || false;
this.maxDepth = options.maxDepth || 1;
this.outputDir = options.outputDir || path.join(__dirname, '..', 'downloaded_site');
this.verbose = options.verbose || false;
this.retry = options.retry || 3;
this.type = options.type || 'all';
this.gzip = options.gzip !== false;
this.resourceHashSet = new Set();
this.filterRegex = options.filterRegex ? new RegExp(options.filterRegex) : null;
this.headless = options.headless !== false;
this.browserType = options.browserType || 'puppeteer';
this.paused = false;
this.cancelled = false;
this.resumeCallback = null;
this.proxy = options.proxy || null;
this.speedLimit = options.speedLimit || 0;
this.resumeDownload = options.resumeDownload || false;
this.sitemapEnabled = options.sitemapEnabled || false;
this.rateLimit = options.rateLimit || null;
this.timeout = options.timeout || 30000;
this.maxFileSize = options.maxFileSize || 0;
this.retryDelay = options.retryDelay || 1000;
this.validateSSL = options.validateSSL !== false;
this.followRedirects = options.followRedirects !== false;
this.maxRedirects = options.maxRedirects || 5;
this.keepOriginalUrls = options.keepOriginalUrls || false;
this.cleanUrls = options.cleanUrls || false;
this.ignoreErrors = options.ignoreErrors || false;
this.parallelLimit = options.parallelLimit || 5;
this.downloadQueue = [];
this.activeDownloads = 0;
this.totalSize = 0;
this.startTime = Date.now();
this.lastProgressUpdate = Date.now();
this.progressInterval = options.progressInterval || 1000;
this.loginUrl = options.loginUrl || null;
this.loginForm = options.loginForm || null;
this.loginCredentials = options.loginCredentials || null;
}
pause() {
this.paused = true;
}
resume() {
this.paused = false;
if (this.resumeCallback) this.resumeCallback();
}
cancel() {
this.cancelled = true;
}
// Generate sitemap.xml.gz if enabled
async generateSitemap() {
if (!this.sitemapEnabled) return;
const sitemap = new SitemapStream({ hostname: this.baseUrl });
const pipeline = sitemap.pipe(createGzip());
for (const url of this.visited) {
sitemap.write({ url, changefreq: 'daily', priority: 0.7 });
}
sitemap.end();
const data = await streamToPromise(pipeline);
await fs.writeFile(path.join(this.outputDir, 'sitemap.xml.gz'), data);
}
// Download with resume support
async downloadWithResume(url, filePath) {
if (!this.resumeDownload) {
return this.downloadResource(url, filePath);
}
const fileExists = await fs.pathExists(filePath);
if (!fileExists) {
return this.downloadResource(url, filePath);
}
const stat = await fs.stat(filePath);
const headers = { 'Range': `bytes=${stat.size}-` };
try {
const response = await axios.get(url, { headers, responseType: 'stream' });
const writer = fs.createWriteStream(filePath, { flags: 'a' });
await streamPipeline(response.data, writer);
} catch (error) {
if (error.response?.status === 416) {
// File already complete
return;
}
throw error;
}
}
// Download with speed limit
async downloadWithSpeedLimit(url, filePath) {
if (!this.speedLimit) {
return this.downloadResource(url, filePath);
}
const response = await axios.get(url, { responseType: 'stream' });
const writer = fs.createWriteStream(filePath);
const reader = response.data;
let downloaded = 0;
const startTime = Date.now();
reader.on('data', (chunk) => {
downloaded += chunk.length;
const elapsed = (Date.now() - startTime) / 1000;
const speed = downloaded / elapsed;
if (speed > this.speedLimit) {
const delay = (downloaded / this.speedLimit) - elapsed;
if (delay > 0) {
reader.pause();
setTimeout(() => reader.resume(), delay * 1000);
}
}
});
await streamPipeline(reader, writer);
}
// Download with proxy (not implemented)
async downloadWithProxy(url, filePath) {
if (!this.proxy) {
return this.downloadResource(url, filePath);
}
const proxyConfig = {
target: url,
changeOrigin: true,
...this.proxy
};
const proxyMiddleware = createProxyMiddleware(proxyConfig);
// Proxy download logic to be implemented
}
// Validate resource (SSL, file size)
async validateResource(url, filePath) {
if (!this.validateSSL) return true;
try {
const response = await axios.head(url);
const contentType = response.headers['content-type'];
const contentLength = response.headers['content-length'];
if (this.maxFileSize && contentLength > this.maxFileSize) {
throw new Error('File size exceeds limit');
}
return true;
} catch (error) {
if (this.ignoreErrors) return false;
throw error;
}
}
// Clean URL (remove query/hash)
async cleanUrl(url) {
if (!this.cleanUrls) return url;
const parsed = new URL(url);
parsed.search = '';
parsed.hash = '';
return parsed.toString();
}
// Main website download logic
async downloadWebsite(url, depth = 0, baseDir = null) {
console.log('[DEBUG] url received in downloadWebsite:', url);
if (this.visited.has(url) || this.cancelled) return;
this.visited.add(url);
const host = new URL(url).host.replace(/[:\/\\]/g, '_');
baseDir = baseDir || path.join(this.outputDir, host);
await fs.ensureDir(baseDir);
if (this.sitemapEnabled) {
await this.generateSitemap();
}
let html;
try {
if (this.dynamic) {
console.log('[DEBUG] Dynamic mode enabled, fetching dynamic HTML...');
html = await this.fetchDynamicHtml(url);
} else {
console.log('[DEBUG] Static mode enabled, fetching static HTML...');
html = await this.fetchStaticHtml(url);
}
} catch (error) {
console.error('[DEBUG] Error fetching HTML:', error);
this.onError && this.onError(error.message);
throw error;
}
const $ = cheerio.load(html);
let resources = [];
// Collect resources
$('img[src],link[rel="stylesheet"][href],script[src],link[rel="manifest"][href]').each((_, el) => {
const src = $(el).attr('src') || $(el).attr('href');
if (src && !src.startsWith('data:')) resources.push(src);
if ($(el).attr('srcset')) {
$(el).attr('srcset').split(',').forEach(item => {
const s = item.trim().split(' ')[0];
if (s && !s.startsWith('data:')) resources.push(s);
});
}
});
$('link[rel="icon"],link[rel="shortcut icon"],link[rel="apple-touch-icon"]').each((_, el) => {
const href = $(el).attr('href');
if (href && !href.startsWith('data:')) resources.push(href);
});
$('link[rel="preload"][as="font"],style').each((_, el) => {
if ($(el).attr('href')) resources.push($(el).attr('href'));
if (el.tagName === 'style') {
const css = $(el).html();
const fontUrls = [...css.matchAll(/url\(['"]?([^'")]+)['"]?\)/g)].map(m => m[1]);
fontUrls.forEach(fu => { if (!fu.startsWith('data:')) resources.push(fu); });
}
});
$('video[src],audio[src],source[src]').each((_, el) => {
const src = $(el).attr('src');
if (src && !src.startsWith('data:')) resources.push(src);
});
$('iframe[src],object[data],embed[src]').each((_, el) => {
const src = $(el).attr('src') || $(el).attr('data');
if (src && !src.startsWith('data:')) resources.push(src);
});
$('[style]').each((_, el) => {
const style = $(el).attr('style');
const matches = [...style.matchAll(/url\(['"]?([^'")]+)['"]?\)/g)];
matches.forEach(m => {
if (m[1] && !m[1].startsWith('data:')) resources.push(m[1]);
});
});
// Debug: print raw resources
console.log('[DEBUG] Raw resources found by Cheerio before initial processing:', resources.length, resources);
// Manual collection for debugging
let rawCollectedResources = [];
for (const resource of resources) {
rawCollectedResources.push(resource);
}
resources = rawCollectedResources;
// 新增:取得本地儲存路徑(支援外部資源)
const getLocalPathForResource = (absUrl) => {
try {
const urlObj = new URL(absUrl);
// 外部資源存 external/域名/路徑
if (urlObj.hostname !== baseUrl.hostname) {
return path.join('external', urlObj.hostname, urlObj.pathname.replace(/^\//, '')).replace(/\\/g, '/');
} else {
// 主站資源維持原本結構
return urlObj.pathname.replace(/^\//, '');
}
} catch {
return null;
}
};
// 取代 HTML 內所有資源連結為本地路徑
$('img[src],link[rel="stylesheet"][href],script[src],link[rel="manifest"][href]').each((_, el) => {
const attr = $(el).attr('src') ? 'src' : 'href';
const orig = $(el).attr(attr);
if (orig && !orig.startsWith('data:') && !orig.startsWith('#')) {
const abs = normalizeUrl(orig, url);
const localPath = getLocalPathForResource(abs);
if (localPath) $(el).attr(attr, localPath);
}
});
// 取代 srcset
$('[srcset]').each((_, el) => {
const srcset = $(el).attr('srcset');
if (srcset) {
const newSrcset = srcset.split(',').map(item => {
const [src, size] = item.trim().split(' ');
if (src && !src.startsWith('data:') && !src.startsWith('#')) {
const abs = normalizeUrl(src, url);
const localPath = getLocalPathForResource(abs);
if (localPath) {
return size ? `${localPath} ${size}` : localPath;
}
}
return item;
}).join(', ');
$(el).attr('srcset', newSrcset);
}
});
// 取代 style 內的背景圖
$('[style]').each((_, el) => {
const style = $(el).attr('style');
if (style) {
const newStyle = style.replace(/url\(['"]?([^'")]+)['"]?\)/g, (match, sUrl) => {
if (!sUrl.startsWith('data:') && !sUrl.startsWith('#')) {
const abs = normalizeUrl(sUrl, url);
const localPath = getLocalPathForResource(abs);
if (localPath) {
return `url(\"${localPath}\")`;
}
}
return match;
});
$(el).attr('style', newStyle);
}
});
// 收集所有資源(不論主域名或外部)
resources = resources
.map(r => normalizeUrl(r, url))
.filter(r => !!r)
.filter(r => {
const hash = hashUrl(r);
if (this.resourceHashSet.has(hash)) return false;
this.resourceHashSet.add(hash);
return true;
});
// Type and regex filters
resources = resources.filter(r => {
if (this.type !== 'all') {
const ext = path.extname(r).toLowerCase();
const isFiltered = (this.type === 'image' && !/\.(png|jpe?g|gif|svg|webp|bmp|ico|avif)$/i.test(ext)) ||
(this.type === 'css' && ext !== '.css') ||
(this.type === 'js' && ext !== '.js') ||
(this.type === 'html' && !/\.html?$/i.test(ext)) ||
(this.type === 'media' && !/\.(mp4|mp3|ogg|wav|webm|m4a|aac)$/i.test(ext));
return !isFiltered;
}
return true;
});
resources = resources.filter(r => {
if (this.filterRegex && !this.filterRegex.test(r)) {
return false;
}
return true;
});
// Save HTML file
const baseUrl = new URL(url);
const hostDir = baseUrl.host.replace(/[:\/\\]/g, '_');
// Function to convert absolute URL to relative path
const getRelativePath = (absUrl) => {
if (!absUrl) return null;
try {
const targetUrl = new URL(absUrl);
// Check if it's from the same domain or a subdomain
if (targetUrl.hostname === baseUrl.hostname ||
targetUrl.hostname.endsWith('.' + baseUrl.hostname) ||
baseUrl.hostname.endsWith('.' + targetUrl.hostname)) {
// Get the path relative to the base directory
const targetPath = targetUrl.pathname;
// Remove leading slash and ensure it's relative
return targetPath.startsWith('/') ? targetPath.substring(1) : targetPath;
}
} catch (e) {
return null;
}
return null;
};
// Convert all resource URLs to local paths using getLocalPathForResource
$('a[href],img[src],link[rel="stylesheet"][href],script[src],link[rel="manifest"][href]').each((_, el) => {
const attr = $(el).attr('src') ? 'src' : 'href';
const orig = $(el).attr(attr);
if (orig && !orig.startsWith('data:') && !orig.startsWith('#')) {
const abs = normalizeUrl(orig, url);
const localPath = getLocalPathForResource(abs);
if (localPath) {
$(el).attr(attr, localPath);
}
}
});
// Handle srcset attributes
$('[srcset]').each((_, el) => {
const srcset = $(el).attr('srcset');
if (srcset) {
const newSrcset = srcset.split(',').map(item => {
const [src, size] = item.trim().split(' ');
if (src && !src.startsWith('data:') && !src.startsWith('#')) {
const abs = normalizeUrl(src, url);
const localPath = getLocalPathForResource(abs);
if (localPath) {
return size ? `${localPath} ${size}` : localPath;
}
}
return item;
}).join(', ');
$(el).attr('srcset', newSrcset);
}
});
// Handle background images in style attributes
$('[style]').each((_, el) => {
const style = $(el).attr('style');
if (style) {
const newStyle = style.replace(/url\(['"]?([^'")]+)['"]?\)/g, (match, sUrl) => {
if (!sUrl.startsWith('data:') && !sUrl.startsWith('#')) {
const abs = normalizeUrl(sUrl, url);
const localPath = getLocalPathForResource(abs);
if (localPath) {
return `url("${localPath}")`;
}
}
return match;
});
$(el).attr('style', newStyle);
}
});
await fs.writeFile(path.join(baseDir, this._getPageFilename(url)), $.html());
// Download resources
let resIdx = 0;
const downloadResource = async (resource, idx) => {
let attempt = 0;
const abs = resource;
if (!abs) {
this.failCount++;
this.failedResources.push({ url: resource, error: 'Invalid URL' });
this.onError && this.onError(`Invalid URL: ${resource}`);
return;
}
// 使用 getLocalPathForResource 決定本地儲存路徑
let localPath = getLocalPathForResource(abs);
if (!localPath) {
this.failCount++;
this.failedResources.push({ url: resource, error: 'Invalid URL' });
this.onError && this.onError(`Invalid URL: ${resource}`);
return;
}
// 確保檔案有正確的副檔名
const contentType = '';
const ext = mime.extension(contentType);
if (ext && !localPath.endsWith('.' + ext)) {
localPath += '.' + ext;
}
const savePath = path.join(baseDir, localPath);
// Ensure the directory exists
await fs.ensureDir(path.dirname(savePath));
while (attempt < this.retry) {
if (this.cancelled) return;
if (this.paused) {
await new Promise(resolve => this.resumeCallback = resolve);
}
try {
// Update progress
const now = Date.now();
const elapsed = (now - this.startTime) / 1000;
const speed = elapsed > 0 ? (this.downloadedBytes / 1024 / elapsed).toFixed(1) : 0;
const eta = speed > 0 ? ((resources.length - (idx + 1)) * 1024 / speed).toFixed(1) : 0;
this.onResource(abs, idx + 1, resources.length, speed, eta);
if (fs.existsSync(savePath)) {
this.successCount++;
return;
}
const res = await undici.request(abs, {
method: 'GET',
headers: {
'User-Agent': this.userAgent,
...(this.cookie ? { Cookie: this.cookie } : {}),
'Accept-Encoding': this.gzip ? 'gzip, deflate, br' : undefined
},
maxRedirections: 5
});
const contentType = res.headers['content-type'] || '';
const contentEncoding = res.headers['content-encoding'] || '';
const fileStream = fs.createWriteStream(savePath);
// Handle compression based on content-encoding header
let streamToWrite;
if (contentEncoding === 'gzip') {
streamToWrite = res.body.pipe(createGunzip());
} else if (contentEncoding === 'deflate') {
streamToWrite = res.body.pipe(createInflate());
} else if (contentEncoding === 'br') {
// For brotli, we need to handle it differently since it's not a stream
try {
const chunks = [];
for await (const chunk of res.body) {
chunks.push(chunk);
}
const buffer = Buffer.concat(chunks);
const decompressed = brotli.decompress(buffer);
if (decompressed) {
await fs.writeFile(savePath, decompressed);
} else {
// If brotli decompression fails, write the original buffer
await fs.writeFile(savePath, buffer);
}
const stat = await fs.stat(savePath);
this.downloadedBytes += stat.size;
this.successCount++;
await new Promise(r => setTimeout(r, this.delay));
return;
} catch (brotliError) {
console.log(`[DEBUG] Brotli decompression failed for ${abs}, writing original buffer`);
// If brotli decompression fails, try to write the original buffer
const chunks = [];
for await (const chunk of res.body) {
chunks.push(chunk);
}
const buffer = Buffer.concat(chunks);
await fs.writeFile(savePath, buffer);
const stat = await fs.stat(savePath);
this.downloadedBytes += stat.size;
this.successCount++;
await new Promise(r => setTimeout(r, this.delay));
return;
}
} else {
streamToWrite = res.body;
}
await streamPipeline(streamToWrite, fileStream);
const stat = await fs.stat(savePath);
this.downloadedBytes += stat.size;
this.successCount++;
await new Promise(r => setTimeout(r, this.delay));
return;
} catch (err) {
attempt++;
if (attempt >= this.retry) {
this.failCount++;
let msg = err.message;
if (msg.includes('403')) msg += ' (Permission denied, maybe anti-bot)';
if (msg.includes('429')) msg += ' (Too many requests, try slower)';
if (msg.match(/cloudflare|captcha/i)) msg += ' (Cloudflare/captcha detected)';
this.failedResources.push({ url: abs, error: msg });
this.onError && this.onError(`Failed: ${abs} (${msg})`);
}
}
}
};
// Download resources with concurrency
const runBatch = async () => {
while (resIdx < resources.length && !this.cancelled) {
const batch = [];
for (let c = 0; c < this.concurrency && resIdx < resources.length; c++, resIdx++) {
batch.push(downloadResource(resources[resIdx], resIdx));
}
await Promise.all(batch);
}
};
await runBatch();
// Recursively download same-domain pages
if (this.recursive && depth < this.maxDepth) {
const pageLinks = [];
$('a[href]').each((_, el) => {
const href = $(el).attr('href');
const abs = normalizeUrl(href, url);
if (abs && abs.startsWith(new URL(url).origin) && !this.visited.has(abs)) {
if (this.filterRegex && !this.filterRegex.test(abs)) return;
pageLinks.push(abs);
}
});
for (const link of pageLinks) {
await this.downloadWebsite(link, depth + 1, baseDir);
}
}
}
// Get filename for a page
_getPageFilename(url) {
const u = new URL(url);
let filename = u.pathname.replace(/\/$/, '') || 'index';
filename = filename.replace(/[\/\\?%*:|"<>]/g, '_');
if (!filename.endsWith('.html')) filename += '.html';
return filename;
}
// Fetch static HTML
async fetchStaticHtml(url) {
const res = await axios.get(url, {
headers: {
'User-Agent': this.userAgent,
...(this.cookie ? { Cookie: this.cookie } : {}),
'Accept-Encoding': this.gzip ? 'gzip, deflate, br' : undefined
}
});
return res.data;
}
// Puppeteer login logic
async handleLogin(page) {
if (!this.loginUrl || !this.loginForm || !this.loginCredentials) return;
console.log('[DEBUG] Attempting to login...');
try {
await page.goto(this.loginUrl);
// Fill login form
for (const [field, value] of Object.entries(this.loginForm)) {
await page.type(field, this.loginCredentials[value]);
}
// Submit form
await Promise.all([
page.waitForNavigation(),
page.click('button[type="submit"]')
]);
// Check if login succeeded
const currentUrl = page.url();
const pageContent = await page.content();
if (currentUrl === this.loginUrl ||
pageContent.includes('error') ||
pageContent.includes('invalid') ||
pageContent.includes('incorrect')) {
throw new Error('invalid_credentials');
}
console.log('[DEBUG] Login completed successfully');
} catch (error) {
console.log('[DEBUG] Login failed:', error.message);
if (error.message === 'invalid_credentials') {
throw new Error('invalid_credentials');
}
throw new Error('login_failed');
}
}
// Fetch dynamic HTML using Puppeteer or Playwright (all features supported)
async fetchDynamicHtml(url) {
console.log('[DEBUG] Inside fetchDynamicHtml for URL:', url);
// Puppeteer branch
if (this.browserType === 'puppeteer') {
const browser = await puppeteer.launch({ headless: this.headless ? 'new' : false });
const page = await browser.newPage();
await page.setUserAgent(this.userAgent);
if (this.cookie) {
await page.setExtraHTTPHeaders({ Cookie: this.cookie });
}
try {
if (this.loginUrl) {
await this.handleLogin(page);
}
await page.goto(url, { waitUntil: 'networkidle2', timeout: 60000 });
// Wait for a fixed duration
console.log('[DEBUG] Waiting for 5 seconds...');
await new Promise(resolve => setTimeout(resolve, 5000));
console.log('[DEBUG] Finished waiting.');
const html = await page.content();
await browser.close();
console.log('[DEBUG] Successfully fetched dynamic HTML. Length:', html.length);
return html;
} catch (error) {
await browser.close();
console.log('[DEBUG] Error in fetchDynamicHtml:', error.message);
throw error;
}
}
// Playwright branch (all features supported)
if (this.browserType === 'playwright') {
const browser = await playwright.chromium.launch({ headless: this.headless !== false });
const context = await browser.newContext({
userAgent: this.userAgent,
...(this.cookie ? { extraHTTPHeaders: { Cookie: this.cookie } } : {})
});
const page = await context.newPage();
try {
// Login logic for Playwright (same as Puppeteer)
if (this.loginUrl && this.loginForm && this.loginCredentials) {
console.log('[DEBUG] Attempting to login with Playwright...');
await page.goto(this.loginUrl);
// Fill login form
for (const [field, value] of Object.entries(this.loginForm)) {
await page.fill(field, this.loginCredentials[value]);
}
// Submit form
await Promise.all([
page.waitForNavigation(),
page.click('button[type="submit"]')
]);
// Check if login succeeded
const currentUrl = page.url();
const pageContent = await page.content();
if (currentUrl === this.loginUrl ||
pageContent.includes('error') ||
pageContent.includes('invalid') ||
pageContent.includes('incorrect')) {
throw new Error('invalid_credentials');
}
console.log('[DEBUG] Playwright login completed successfully');
}
await page.goto(url, { waitUntil: 'networkidle', timeout: 60000 });
// Wait for a fixed duration
await page.waitForTimeout(5000);
const html = await page.content();
await browser.close();
return html;
} catch (error) {
await browser.close();
console.log('[DEBUG] Error in Playwright fetchDynamicHtml:', error.message);
throw error;
}
}
throw new Error('Only puppeteer and playwright supported now');
}
}
module.exports = { Downloader, checkNeedDynamic };