bookgrabs
Version:
Interactive CLI tool for LibGen ebook searches and downloads with batch processing support
324 lines (273 loc) • 12.5 kB
JavaScript
import fs from 'fs';
import { searchLibGen } from './search.js';
import { downloadBookSilent } from './download.js';
import { standardizeAuthorTitle, selectBestResult, retryDownloadWithAI } from './ai.js';
import { addToBlacklist } from './blacklist.js';
import { sleep } from './utils.js';
// Log function to write to file
async function logToFile(message, logFileName) {
if (!logFileName) return;
const timestamp = new Date().toISOString();
const logMessage = `[${timestamp}] ${message}\n`;
try {
await fs.promises.appendFile(logFileName, logMessage);
} catch (error) {
console.error('Error writing to log file:', error);
}
}
// Parse CSV file
export function parseCSV(csvContent) {
const lines = csvContent.trim().split('\n');
const headers = lines[0].split(',').map(h => h.trim().toLowerCase());
const books = [];
// Validate required columns
if (!headers.includes('author') || !headers.includes('title')) {
throw new Error('CSV must contain "author" and "title" columns. Optional: "year"');
}
for (let i = 1; i < lines.length; i++) {
const values = lines[i].split(',').map(v => v.trim().replace(/^"|"$/g, '')); // Remove quotes
const book = {};
headers.forEach((header, index) => {
book[header] = values[index] || '';
});
// Ensure required fields exist
if (!book.author || !book.title) {
console.warn(`Skipping row ${i + 1}: missing author or title`);
continue;
}
books.push(book);
}
return books;
}
// Batch process CSV books
export async function processBatchBooks(books, searchOptions, logFileName = null) {
const report = [];
const downloadedBooks = [];
let skippedCount = 0;
console.log(`Processing ${books.length} books from CSV...`);
// Create log file if not provided
if (!logFileName) {
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
logFileName = `batch_download_log_${timestamp}.txt`;
}
await logToFile(`Starting batch processing of ${books.length} books`, logFileName);
await logToFile(`Search options: ${JSON.stringify(searchOptions)}`, logFileName);
for (let i = 0; i < books.length; i++) {
const book = books[i];
const requestedAuthor = book.author || '';
const requestedTitle = book.title || '';
const requestedYear = book.year || '';
const yearText = requestedYear ? ` (${requestedYear})` : '';
console.log(`\n[${i + 1}/${books.length}] Searching for: ${requestedAuthor} - ${requestedTitle}${yearText}`);
await logToFile(`\n--- Processing book ${i + 1}/${books.length}: ${requestedAuthor} - ${requestedTitle}${yearText} ---`, logFileName);
try {
// Search for the book with enhanced search options
let searchQuery = `${requestedAuthor} ${requestedTitle}`.trim();
if (requestedYear) {
searchQuery += ` ${requestedYear}`;
}
await logToFile(` Searching with query: "${searchQuery}"`, logFileName);
const batchSearchOptions = {
...searchOptions,
maxResults: 20 // More results for better matching
};
const searchResults = await searchLibGen(searchQuery, batchSearchOptions);
await logToFile(` Found ${searchResults.length} search results`, logFileName);
if (searchResults.length === 0) {
console.log(' No results found');
await logToFile(` No search results found`, logFileName);
report.push({
requestedAuthor,
requestedTitle,
requestedYear,
downloadedAuthor: 'N/A',
downloadedTitle: 'N/A',
filePath: 'N/A',
status: 'Not Found'
});
continue;
}
console.log(` Found ${searchResults.length} results, using AI to select best match...`);
await logToFile(` Asking AI to select best result from ${searchResults.length} options`, logFileName);
// Use GPT to select the best result
const selectedResult = await selectBestResult(requestedAuthor, requestedTitle, searchResults);
if (!selectedResult) {
console.log(' AI determined no good matches found');
await logToFile(` AI determined no good matches from search results`, logFileName);
report.push({
requestedAuthor,
requestedTitle,
requestedYear,
downloadedAuthor: 'N/A',
downloadedTitle: 'N/A',
filePath: 'N/A',
status: 'No Good Match Found'
});
continue;
}
// Use the AI-selected result
let currentResult = selectedResult;
console.log(` AI selected: ${currentResult.author} - ${currentResult.title} (${currentResult.year})`);
await logToFile(` AI selected: ${currentResult.author} - ${currentResult.title} (${currentResult.year}) [${currentResult.md5}]`, logFileName);
// Standardize author and title using GPT
console.log(` Standardizing author and title...`);
const standardized = await standardizeAuthorTitle(currentResult.author, currentResult.title);
console.log(` Standardized: ${standardized.author} - ${standardized.title}`);
// Update the result with standardized values
let standardizedResult = {
...currentResult,
author: standardized.author,
title: standardized.title
};
await logToFile(` Standardized to: ${standardizedResult.author} - ${standardizedResult.title}`, logFileName);
// Attempt download with retry logic
await logToFile(` Attempting initial download...`, logFileName);
let downloadResult = await downloadBookSilent(standardizedResult);
let retryCount = 0;
const maxRetries = 3;
await logToFile(` Initial download result: success=${downloadResult.success}, shouldBlacklist=${downloadResult.shouldBlacklist}, error="${downloadResult.error}"`, logFileName);
while (!downloadResult.success && downloadResult.shouldBlacklist && retryCount < maxRetries) {
console.log(` Download failed: ${downloadResult.error}`);
await logToFile(` Download failed (attempt ${retryCount + 1}): ${downloadResult.error}`, logFileName);
await logToFile(` Initiating retry ${retryCount + 1}/${maxRetries}...`, logFileName);
// Try to get next best result using AI
const nextResult = await retryDownloadWithAI(
requestedAuthor,
requestedTitle,
searchResults,
standardizedResult.md5,
downloadResult.error,
addToBlacklist
);
if (!nextResult) {
console.log(` No more alternatives available after ${retryCount + 1} attempts`);
await logToFile(` No more alternatives available after ${retryCount + 1} attempts`, logFileName);
break;
}
await logToFile(` AI selected alternative: ${nextResult.author} - ${nextResult.title} (${nextResult.year}) [${nextResult.md5}]`, logFileName);
// Standardize the new result
const newStandardized = await standardizeAuthorTitle(nextResult.author, nextResult.title);
standardizedResult = {
...nextResult,
author: newStandardized.author,
title: newStandardized.title
};
await logToFile(` Standardized alternative to: ${standardizedResult.author} - ${standardizedResult.title}`, logFileName);
console.log(` Retry ${retryCount + 1}: Attempting download of ${standardizedResult.author} - ${standardizedResult.title}`);
await logToFile(` Attempting download of alternative...`, logFileName);
downloadResult = await downloadBookSilent(standardizedResult);
retryCount++;
await logToFile(` Retry ${retryCount} result: success=${downloadResult.success}, shouldBlacklist=${downloadResult.shouldBlacklist}, error="${downloadResult.error}"`, logFileName);
}
if (downloadResult.success) {
const status = retryCount > 0 ? `Downloaded (after ${retryCount} retries)` : 'Downloaded';
console.log(` Downloaded: ${downloadResult.filePath}`);
await logToFile(` FINAL SUCCESS: ${status} - ${downloadResult.filePath}`, logFileName);
report.push({
requestedAuthor,
requestedTitle,
requestedYear,
downloadedAuthor: standardizedResult.author,
downloadedTitle: standardizedResult.title,
filePath: downloadResult.filePath,
status
});
downloadedBooks.push(standardizedResult);
} else if (downloadResult.alreadyExists) {
const status = 'File Already Exists';
console.log(` Skipped: ${status}`);
await logToFile(` SKIPPED: ${status} - file already exists`, logFileName);
skippedCount++;
report.push({
requestedAuthor,
requestedTitle,
requestedYear,
downloadedAuthor: standardizedResult.author,
downloadedTitle: standardizedResult.title,
filePath: 'Already Exists',
status
});
} else {
const status = `Download Failed: ${downloadResult.error}${retryCount > 0 ? ` (after ${retryCount} retries)` : ''}`;
console.log(` Final download failed: ${downloadResult.error}`);
await logToFile(` FINAL FAILURE: ${status}`, logFileName);
report.push({
requestedAuthor,
requestedTitle,
requestedYear,
downloadedAuthor: standardizedResult.author,
downloadedTitle: standardizedResult.title,
filePath: 'N/A',
status
});
}
// Add delay between requests to be respectful
await sleep(1000);
} catch (error) {
const errorMsg = `Error: ${error.message}`;
console.log(` Error processing book: ${error.message}`);
await logToFile(` EXCEPTION: ${errorMsg}`, logFileName);
report.push({
requestedAuthor,
requestedTitle,
requestedYear,
downloadedAuthor: 'N/A',
downloadedTitle: 'N/A',
filePath: 'N/A',
status: errorMsg
});
}
}
await logToFile(`\n=== BATCH PROCESSING COMPLETE ===`, logFileName);
await logToFile(`Total books processed: ${books.length}`, logFileName);
await logToFile(`Successfully downloaded: ${downloadedBooks.length}`, logFileName);
await logToFile(`Skipped (already exist): ${skippedCount}`, logFileName);
await logToFile(`Failed: ${books.length - downloadedBooks.length - skippedCount}`, logFileName);
await logToFile(`Log file: ${logFileName}`, logFileName);
return { report, downloadedBooks, skippedCount };
}
// Generate CSV report
export function generateCSVReport(report) {
const headers = ['Requested Author', 'Requested Title', 'Requested Year', 'Downloaded Author', 'Downloaded Title', 'File Path', 'Status'];
const csvRows = [headers.join(',')];
report.forEach(row => {
const csvRow = [
`"${row.requestedAuthor}"`,
`"${row.requestedTitle}"`,
`"${row.requestedYear || ''}"`,
`"${row.downloadedAuthor}"`,
`"${row.downloadedTitle}"`,
`"${row.filePath}"`,
`"${row.status}"`
];
csvRows.push(csvRow.join(','));
});
return csvRows.join('\n');
}
// Save report and display summary
export async function saveReportAndSummary(report, downloadedBooks, logFileName) {
// Save report to CSV file
const reportFilename = `download_report_${new Date().toISOString().slice(0, 10)}.csv`;
await fs.promises.writeFile(reportFilename, generateCSVReport(report));
console.log('\n=== Batch Processing Complete ===');
console.log(`Report saved to: ${reportFilename}`);
if (logFileName) {
console.log(`Debug log saved to: ${logFileName}`);
}
console.log(`Successfully downloaded: ${downloadedBooks.length}/${report.length} books`);
// Show breakdown if there are skipped files
const skippedCount = report.filter(r => r.status === 'File Already Exists').length;
const failedCount = report.length - downloadedBooks.length - skippedCount;
if (skippedCount > 0) {
console.log(`Skipped (already exist): ${skippedCount}`);
}
if (failedCount > 0) {
console.log(`Failed: ${failedCount}`);
}
if (downloadedBooks.length > 0) {
console.log('\nDownloaded Books:');
downloadedBooks.forEach(book => {
console.log(` ${book.author} - ${book.title} (${book.year})`);
});
}
}