UNPKG

bookgrabs

Version:

Interactive CLI tool for LibGen ebook searches and downloads with batch processing support

432 lines (365 loc) 14 kB
import fs from 'fs'; import path from 'path'; import axios from 'axios'; import * as cheerio from 'cheerio'; import readline from 'readline'; import { addToBlacklist } from './blacklist.js'; import { sanitizeFilename, displayResults, getBookGrabsDirectory } from './utils.js'; import { cleanupFilename } from './ai.js'; // Global abort controller for cancelling operations let globalAbortController = null; // Helper function to create cancellable axios instance function createCancellableAxios(signal) { return axios.create({ signal: signal, timeout: 30000, headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } }); } // Interactive user prompt for result selection export async function promptUser(results) { const rl = readline.createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { rl.question('\nEnter the number of the book to download (or 0 to cancel): ', (answer) => { rl.close(); const num = parseInt(answer, 10); if (num > 0 && num <= results.length) { resolve(results[num - 1]); } else { console.log('Cancelled.'); resolve(null); } }); }); } // Main download function for interactive mode export async function downloadBook(book, allResults = null, signal = null) { if (!book) return; // Use provided signal or create a new one if (!signal) { globalAbortController = new AbortController(); signal = globalAbortController.signal; } const md5 = book.md5; const downloadPageUrl = `https://libgen.bz/ads.php?md5=${md5}`; try { // Check if operation was cancelled if (signal.aborted) { throw new Error('Operation cancelled'); } console.log(`Fetching download page: ${downloadPageUrl}`); const axiosInstance = createCancellableAxios(signal); const response = await axiosInstance.get(downloadPageUrl); const $ = cheerio.load(response.data); // Try multiple selectors to find the download link let actualUrl = null; const selectors = [ 'a[href*="get"]', 'a:contains("GET")', 'a[href*="download"]', 'a[href*=".epub"]', 'a[href*=".pdf"]', 'a[href*=".mobi"]', 'a[href*=".azw3"]', 'a[href*="cloudflare"]', 'a[href*="library"]' ]; for (const selector of selectors) { const link = $(selector).attr('href'); if (link) { actualUrl = link; console.log(`Found download link using selector "${selector}": ${actualUrl}`); break; } } // If no specific selectors work, try to find any external link if (!actualUrl) { $('a').each((i, elem) => { const href = $(elem).attr('href'); if (href && (href.startsWith('http') || href.startsWith('//'))) { actualUrl = href; console.log(`Found external link: ${actualUrl}`); return false; // break } }); } if (!actualUrl) { console.log('Available links on page:'); $('a').each((i, elem) => { const href = $(elem).attr('href'); const text = $(elem).text().trim(); if (href) { console.log(` - ${href} (text: "${text}")`); } }); // Add to blacklist and offer retry if we have other results await addToBlacklist(md5, 'Could not find download link'); if (allResults && allResults.length > 1) { console.log('\nThis result has been blacklisted. Would you like to try another result?'); const filteredResults = allResults.filter(r => r.md5 !== md5); if (filteredResults.length > 0) { displayResults(filteredResults.slice(0, 10)); const nextBook = await promptUser(filteredResults.slice(0, 10)); if (nextBook) { return await downloadBook(nextBook, filteredResults, signal); } } } throw new Error('Could not find download link on mirror page.'); } // Handle different URL formats if (actualUrl.startsWith('//')) { actualUrl = `https:${actualUrl}`; } else if (actualUrl.startsWith('/')) { actualUrl = `https://libgen.bz${actualUrl}`; } else if (!actualUrl.startsWith('http')) { actualUrl = `https://libgen.bz/${actualUrl}`; } console.log(`Final download URL: ${actualUrl}`); // Clean up filename using AI const safeTitle = await cleanupFilename(book.title); const filename = `${safeTitle}.${book.ext}`; // Create author directory in BookGrabs folder const bookGrabsDir = getBookGrabsDirectory(); const authorDir = path.join(bookGrabsDir, sanitizeFilename(book.author)); await fs.promises.mkdir(authorDir, { recursive: true }); const filePath = path.join(authorDir, filename); // Check if file already exists if (fs.existsSync(filePath)) { console.log(`File already exists: ${filePath}`); return; } // Check if operation was cancelled before starting download if (signal.aborted) { throw new Error('Operation cancelled'); } const writer = fs.createWriteStream(filePath); console.log(`Downloading to: ${filePath}`); const fileResponse = await axiosInstance({ url: actualUrl, method: 'GET', responseType: 'stream', maxRedirects: 5, // Limit redirects to prevent infinite loops }); fileResponse.data.pipe(writer); return new Promise((resolve, reject) => { // Handle cancellation during download const onAbort = () => { writer.destroy(); if (fs.existsSync(filePath)) { fs.unlinkSync(filePath); // Clean up partial file } reject(new Error('Download cancelled')); }; if (signal.aborted) { onAbort(); return; } signal.addEventListener('abort', onAbort); writer.on('finish', () => { signal.removeEventListener('abort', onAbort); console.log(`Downloaded: ${filePath}`); resolve(); }); writer.on('error', async (error) => { signal.removeEventListener('abort', onAbort); console.error('Download error:', error.message); // Add to blacklist and offer retry if we have other results await addToBlacklist(md5, `Download error: ${error.message}`); if (allResults && allResults.length > 1) { console.log('\nThis result has been blacklisted. Would you like to try another result?'); const filteredResults = allResults.filter(r => r.md5 !== md5); if (filteredResults.length > 0) { displayResults(filteredResults.slice(0, 10)); const nextBook = await promptUser(filteredResults.slice(0, 10)); if (nextBook) { return await downloadBook(nextBook, filteredResults, signal); } } } reject(error); }); }); } catch (error) { if (error.name === 'AbortError' || error.name === 'CanceledError' || error.message === 'Operation cancelled') { console.log('\nDownload cancelled by user.'); return; } console.error('Error downloading book:', error.message); if (error.response) { console.error('Response status:', error.response.status); console.error('Response headers:', error.response.headers); } // Determine if this should be blacklisted let shouldBlacklist = true; if (error.code === 'ENOTFOUND' || error.code === 'ECONNREFUSED') { shouldBlacklist = false; // Network issues } else if (error.response && error.response.status >= 500) { shouldBlacklist = false; // Server errors } if (shouldBlacklist) { await addToBlacklist(md5, error.message); if (allResults && allResults.length > 1) { console.log('\nThis result has been blacklisted. Would you like to try another result?'); const filteredResults = allResults.filter(r => r.md5 !== md5); if (filteredResults.length > 0) { displayResults(filteredResults.slice(0, 10)); const nextBook = await promptUser(filteredResults.slice(0, 10)); if (nextBook) { return await downloadBook(nextBook, filteredResults, signal); } } } } throw error; } } // Silent download function for batch processing export async function downloadBookSilent(book, onProgress = null, signal = null) { if (!book) return { success: false, error: 'No book provided' }; // Use provided signal or create a new one if (!signal) { globalAbortController = new AbortController(); signal = globalAbortController.signal; } const md5 = book.md5; const downloadPageUrl = `https://libgen.bz/ads.php?md5=${md5}`; try { // Check if operation was cancelled if (signal.aborted) { return { success: false, error: 'Operation cancelled' }; } const axiosInstance = createCancellableAxios(signal); const response = await axiosInstance.get(downloadPageUrl); const $ = cheerio.load(response.data); // Try multiple selectors to find the download link let actualUrl = null; const selectors = [ 'a[href*="get"]', 'a:contains("GET")', 'a[href*="download"]', 'a[href*=".epub"]', 'a[href*=".pdf"]', 'a[href*=".mobi"]', 'a[href*=".azw3"]', 'a[href*="cloudflare"]', 'a[href*="library"]' ]; for (const selector of selectors) { const link = $(selector).attr('href'); if (link) { actualUrl = link; break; } } // If no specific selectors work, try to find any external link if (!actualUrl) { $('a').each((i, elem) => { const href = $(elem).attr('href'); if (href && (href.startsWith('http') || href.startsWith('//'))) { actualUrl = href; return false; // break } }); } if (!actualUrl) { return { success: false, error: 'Could not find download link', shouldBlacklist: true }; } // Handle different URL formats if (actualUrl.startsWith('//')) { actualUrl = `https:${actualUrl}`; } else if (actualUrl.startsWith('/')) { actualUrl = `https://libgen.bz${actualUrl}`; } else if (!actualUrl.startsWith('http')) { actualUrl = `https://libgen.bz/${actualUrl}`; } // Clean up filename using AI const safeTitle = await cleanupFilename(book.title); const filename = `${safeTitle}.${book.ext}`; // Create author directory in BookGrabs folder const bookGrabsDir = getBookGrabsDirectory(); const authorDir = path.join(bookGrabsDir, sanitizeFilename(book.author)); await fs.promises.mkdir(authorDir, { recursive: true }); const filePath = path.join(authorDir, filename); // Check if file already exists if (fs.existsSync(filePath)) { return { success: false, error: 'File already exists', alreadyExists: true }; } // Check if operation was cancelled before starting download if (signal.aborted) { return { success: false, error: 'Operation cancelled' }; } const writer = fs.createWriteStream(filePath); const fileResponse = await axiosInstance({ url: actualUrl, method: 'GET', responseType: 'stream', maxRedirects: 5, // Limit redirects to prevent infinite loops }); // Get content length for progress tracking const totalLength = parseInt(fileResponse.headers['content-length'] || '0'); let downloadedLength = 0; // Track download progress if (onProgress && totalLength > 0) { fileResponse.data.on('data', (chunk) => { if (signal.aborted) return; // Stop tracking if cancelled downloadedLength += chunk.length; const progress = Math.round((downloadedLength / totalLength) * 100); onProgress(progress); }); } fileResponse.data.pipe(writer); return new Promise((resolve) => { // Handle cancellation during download const onAbort = () => { writer.destroy(); if (fs.existsSync(filePath)) { fs.unlinkSync(filePath); // Clean up partial file } resolve({ success: false, error: 'Download cancelled' }); }; if (signal.aborted) { onAbort(); return; } signal.addEventListener('abort', onAbort); writer.on('finish', () => { signal.removeEventListener('abort', onAbort); resolve({ success: true, filePath }); }); writer.on('error', (error) => { signal.removeEventListener('abort', onAbort); resolve({ success: false, error: error.message, shouldBlacklist: true }); }); }); } catch (error) { if (error.name === 'AbortError' || error.name === 'CanceledError' || error.message === 'Operation cancelled') { return { success: false, error: 'Operation cancelled' }; } let shouldBlacklist = true; let errorMessage = error.message; // Determine if this is a blacklistable error if (error.code === 'ENOTFOUND' || error.code === 'ECONNREFUSED') { shouldBlacklist = false; // Network issues, don't blacklist errorMessage = 'Network error - not blacklisting'; } else if (error.response && error.response.status >= 500) { shouldBlacklist = false; // Server errors, don't blacklist errorMessage = 'Server error - not blacklisting'; } else if (error.message.includes('too many redirects') || error.message.includes('timeout')) { shouldBlacklist = true; // These are likely permanent issues } return { success: false, error: errorMessage, shouldBlacklist }; } } // Function to cancel all active downloads export function cancelAllDownloads() { if (globalAbortController) { globalAbortController.abort(); globalAbortController = null; } }