bookgrabs
Version:
Interactive CLI tool for LibGen ebook searches and downloads with batch processing support
432 lines (365 loc) • 14 kB
JavaScript
import fs from 'fs';
import path from 'path';
import axios from 'axios';
import * as cheerio from 'cheerio';
import readline from 'readline';
import { addToBlacklist } from './blacklist.js';
import { sanitizeFilename, displayResults, getBookGrabsDirectory } from './utils.js';
import { cleanupFilename } from './ai.js';
// Global abort controller for cancelling operations
let globalAbortController = null;
// Helper function to create cancellable axios instance
function createCancellableAxios(signal) {
return axios.create({
signal: signal,
timeout: 30000,
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
});
}
// Interactive user prompt for result selection
export async function promptUser(results) {
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
rl.question('\nEnter the number of the book to download (or 0 to cancel): ', (answer) => {
rl.close();
const num = parseInt(answer, 10);
if (num > 0 && num <= results.length) {
resolve(results[num - 1]);
} else {
console.log('Cancelled.');
resolve(null);
}
});
});
}
// Main download function for interactive mode
export async function downloadBook(book, allResults = null, signal = null) {
if (!book) return;
// Use provided signal or create a new one
if (!signal) {
globalAbortController = new AbortController();
signal = globalAbortController.signal;
}
const md5 = book.md5;
const downloadPageUrl = `https://libgen.bz/ads.php?md5=${md5}`;
try {
// Check if operation was cancelled
if (signal.aborted) {
throw new Error('Operation cancelled');
}
console.log(`Fetching download page: ${downloadPageUrl}`);
const axiosInstance = createCancellableAxios(signal);
const response = await axiosInstance.get(downloadPageUrl);
const $ = cheerio.load(response.data);
// Try multiple selectors to find the download link
let actualUrl = null;
const selectors = [
'a[href*="get"]',
'a:contains("GET")',
'a[href*="download"]',
'a[href*=".epub"]',
'a[href*=".pdf"]',
'a[href*=".mobi"]',
'a[href*=".azw3"]',
'a[href*="cloudflare"]',
'a[href*="library"]'
];
for (const selector of selectors) {
const link = $(selector).attr('href');
if (link) {
actualUrl = link;
console.log(`Found download link using selector "${selector}": ${actualUrl}`);
break;
}
}
// If no specific selectors work, try to find any external link
if (!actualUrl) {
$('a').each((i, elem) => {
const href = $(elem).attr('href');
if (href && (href.startsWith('http') || href.startsWith('//'))) {
actualUrl = href;
console.log(`Found external link: ${actualUrl}`);
return false; // break
}
});
}
if (!actualUrl) {
console.log('Available links on page:');
$('a').each((i, elem) => {
const href = $(elem).attr('href');
const text = $(elem).text().trim();
if (href) {
console.log(` - ${href} (text: "${text}")`);
}
});
// Add to blacklist and offer retry if we have other results
await addToBlacklist(md5, 'Could not find download link');
if (allResults && allResults.length > 1) {
console.log('\nThis result has been blacklisted. Would you like to try another result?');
const filteredResults = allResults.filter(r => r.md5 !== md5);
if (filteredResults.length > 0) {
displayResults(filteredResults.slice(0, 10));
const nextBook = await promptUser(filteredResults.slice(0, 10));
if (nextBook) {
return await downloadBook(nextBook, filteredResults, signal);
}
}
}
throw new Error('Could not find download link on mirror page.');
}
// Handle different URL formats
if (actualUrl.startsWith('//')) {
actualUrl = `https:${actualUrl}`;
} else if (actualUrl.startsWith('/')) {
actualUrl = `https://libgen.bz${actualUrl}`;
} else if (!actualUrl.startsWith('http')) {
actualUrl = `https://libgen.bz/${actualUrl}`;
}
console.log(`Final download URL: ${actualUrl}`);
// Clean up filename using AI
const safeTitle = await cleanupFilename(book.title);
const filename = `${safeTitle}.${book.ext}`;
// Create author directory in BookGrabs folder
const bookGrabsDir = getBookGrabsDirectory();
const authorDir = path.join(bookGrabsDir, sanitizeFilename(book.author));
await fs.promises.mkdir(authorDir, { recursive: true });
const filePath = path.join(authorDir, filename);
// Check if file already exists
if (fs.existsSync(filePath)) {
console.log(`File already exists: ${filePath}`);
return;
}
// Check if operation was cancelled before starting download
if (signal.aborted) {
throw new Error('Operation cancelled');
}
const writer = fs.createWriteStream(filePath);
console.log(`Downloading to: ${filePath}`);
const fileResponse = await axiosInstance({
url: actualUrl,
method: 'GET',
responseType: 'stream',
maxRedirects: 5, // Limit redirects to prevent infinite loops
});
fileResponse.data.pipe(writer);
return new Promise((resolve, reject) => {
// Handle cancellation during download
const onAbort = () => {
writer.destroy();
if (fs.existsSync(filePath)) {
fs.unlinkSync(filePath); // Clean up partial file
}
reject(new Error('Download cancelled'));
};
if (signal.aborted) {
onAbort();
return;
}
signal.addEventListener('abort', onAbort);
writer.on('finish', () => {
signal.removeEventListener('abort', onAbort);
console.log(`Downloaded: ${filePath}`);
resolve();
});
writer.on('error', async (error) => {
signal.removeEventListener('abort', onAbort);
console.error('Download error:', error.message);
// Add to blacklist and offer retry if we have other results
await addToBlacklist(md5, `Download error: ${error.message}`);
if (allResults && allResults.length > 1) {
console.log('\nThis result has been blacklisted. Would you like to try another result?');
const filteredResults = allResults.filter(r => r.md5 !== md5);
if (filteredResults.length > 0) {
displayResults(filteredResults.slice(0, 10));
const nextBook = await promptUser(filteredResults.slice(0, 10));
if (nextBook) {
return await downloadBook(nextBook, filteredResults, signal);
}
}
}
reject(error);
});
});
} catch (error) {
if (error.name === 'AbortError' || error.name === 'CanceledError' || error.message === 'Operation cancelled') {
console.log('\nDownload cancelled by user.');
return;
}
console.error('Error downloading book:', error.message);
if (error.response) {
console.error('Response status:', error.response.status);
console.error('Response headers:', error.response.headers);
}
// Determine if this should be blacklisted
let shouldBlacklist = true;
if (error.code === 'ENOTFOUND' || error.code === 'ECONNREFUSED') {
shouldBlacklist = false; // Network issues
} else if (error.response && error.response.status >= 500) {
shouldBlacklist = false; // Server errors
}
if (shouldBlacklist) {
await addToBlacklist(md5, error.message);
if (allResults && allResults.length > 1) {
console.log('\nThis result has been blacklisted. Would you like to try another result?');
const filteredResults = allResults.filter(r => r.md5 !== md5);
if (filteredResults.length > 0) {
displayResults(filteredResults.slice(0, 10));
const nextBook = await promptUser(filteredResults.slice(0, 10));
if (nextBook) {
return await downloadBook(nextBook, filteredResults, signal);
}
}
}
}
throw error;
}
}
// Silent download function for batch processing
export async function downloadBookSilent(book, onProgress = null, signal = null) {
if (!book) return { success: false, error: 'No book provided' };
// Use provided signal or create a new one
if (!signal) {
globalAbortController = new AbortController();
signal = globalAbortController.signal;
}
const md5 = book.md5;
const downloadPageUrl = `https://libgen.bz/ads.php?md5=${md5}`;
try {
// Check if operation was cancelled
if (signal.aborted) {
return { success: false, error: 'Operation cancelled' };
}
const axiosInstance = createCancellableAxios(signal);
const response = await axiosInstance.get(downloadPageUrl);
const $ = cheerio.load(response.data);
// Try multiple selectors to find the download link
let actualUrl = null;
const selectors = [
'a[href*="get"]',
'a:contains("GET")',
'a[href*="download"]',
'a[href*=".epub"]',
'a[href*=".pdf"]',
'a[href*=".mobi"]',
'a[href*=".azw3"]',
'a[href*="cloudflare"]',
'a[href*="library"]'
];
for (const selector of selectors) {
const link = $(selector).attr('href');
if (link) {
actualUrl = link;
break;
}
}
// If no specific selectors work, try to find any external link
if (!actualUrl) {
$('a').each((i, elem) => {
const href = $(elem).attr('href');
if (href && (href.startsWith('http') || href.startsWith('//'))) {
actualUrl = href;
return false; // break
}
});
}
if (!actualUrl) {
return { success: false, error: 'Could not find download link', shouldBlacklist: true };
}
// Handle different URL formats
if (actualUrl.startsWith('//')) {
actualUrl = `https:${actualUrl}`;
} else if (actualUrl.startsWith('/')) {
actualUrl = `https://libgen.bz${actualUrl}`;
} else if (!actualUrl.startsWith('http')) {
actualUrl = `https://libgen.bz/${actualUrl}`;
}
// Clean up filename using AI
const safeTitle = await cleanupFilename(book.title);
const filename = `${safeTitle}.${book.ext}`;
// Create author directory in BookGrabs folder
const bookGrabsDir = getBookGrabsDirectory();
const authorDir = path.join(bookGrabsDir, sanitizeFilename(book.author));
await fs.promises.mkdir(authorDir, { recursive: true });
const filePath = path.join(authorDir, filename);
// Check if file already exists
if (fs.existsSync(filePath)) {
return { success: false, error: 'File already exists', alreadyExists: true };
}
// Check if operation was cancelled before starting download
if (signal.aborted) {
return { success: false, error: 'Operation cancelled' };
}
const writer = fs.createWriteStream(filePath);
const fileResponse = await axiosInstance({
url: actualUrl,
method: 'GET',
responseType: 'stream',
maxRedirects: 5, // Limit redirects to prevent infinite loops
});
// Get content length for progress tracking
const totalLength = parseInt(fileResponse.headers['content-length'] || '0');
let downloadedLength = 0;
// Track download progress
if (onProgress && totalLength > 0) {
fileResponse.data.on('data', (chunk) => {
if (signal.aborted) return; // Stop tracking if cancelled
downloadedLength += chunk.length;
const progress = Math.round((downloadedLength / totalLength) * 100);
onProgress(progress);
});
}
fileResponse.data.pipe(writer);
return new Promise((resolve) => {
// Handle cancellation during download
const onAbort = () => {
writer.destroy();
if (fs.existsSync(filePath)) {
fs.unlinkSync(filePath); // Clean up partial file
}
resolve({ success: false, error: 'Download cancelled' });
};
if (signal.aborted) {
onAbort();
return;
}
signal.addEventListener('abort', onAbort);
writer.on('finish', () => {
signal.removeEventListener('abort', onAbort);
resolve({ success: true, filePath });
});
writer.on('error', (error) => {
signal.removeEventListener('abort', onAbort);
resolve({ success: false, error: error.message, shouldBlacklist: true });
});
});
} catch (error) {
if (error.name === 'AbortError' || error.name === 'CanceledError' || error.message === 'Operation cancelled') {
return { success: false, error: 'Operation cancelled' };
}
let shouldBlacklist = true;
let errorMessage = error.message;
// Determine if this is a blacklistable error
if (error.code === 'ENOTFOUND' || error.code === 'ECONNREFUSED') {
shouldBlacklist = false; // Network issues, don't blacklist
errorMessage = 'Network error - not blacklisting';
} else if (error.response && error.response.status >= 500) {
shouldBlacklist = false; // Server errors, don't blacklist
errorMessage = 'Server error - not blacklisting';
} else if (error.message.includes('too many redirects') || error.message.includes('timeout')) {
shouldBlacklist = true; // These are likely permanent issues
}
return { success: false, error: errorMessage, shouldBlacklist };
}
}
// Function to cancel all active downloads
export function cancelAllDownloads() {
if (globalAbortController) {
globalAbortController.abort();
globalAbortController = null;
}
}