UNPKG

bookgrabs

Version:

Interactive CLI tool for LibGen ebook searches and downloads with batch processing support

324 lines (273 loc) 12.5 kB
import fs from 'fs'; import { searchLibGen } from './search.js'; import { downloadBookSilent } from './download.js'; import { standardizeAuthorTitle, selectBestResult, retryDownloadWithAI } from './ai.js'; import { addToBlacklist } from './blacklist.js'; import { sleep } from './utils.js'; // Log function to write to file async function logToFile(message, logFileName) { if (!logFileName) return; const timestamp = new Date().toISOString(); const logMessage = `[${timestamp}] ${message}\n`; try { await fs.promises.appendFile(logFileName, logMessage); } catch (error) { console.error('Error writing to log file:', error); } } // Parse CSV file export function parseCSV(csvContent) { const lines = csvContent.trim().split('\n'); const headers = lines[0].split(',').map(h => h.trim().toLowerCase()); const books = []; // Validate required columns if (!headers.includes('author') || !headers.includes('title')) { throw new Error('CSV must contain "author" and "title" columns. Optional: "year"'); } for (let i = 1; i < lines.length; i++) { const values = lines[i].split(',').map(v => v.trim().replace(/^"|"$/g, '')); // Remove quotes const book = {}; headers.forEach((header, index) => { book[header] = values[index] || ''; }); // Ensure required fields exist if (!book.author || !book.title) { console.warn(`Skipping row ${i + 1}: missing author or title`); continue; } books.push(book); } return books; } // Batch process CSV books export async function processBatchBooks(books, searchOptions, logFileName = null) { const report = []; const downloadedBooks = []; let skippedCount = 0; console.log(`Processing ${books.length} books from CSV...`); // Create log file if not provided if (!logFileName) { const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); logFileName = `batch_download_log_${timestamp}.txt`; } await logToFile(`Starting batch processing of ${books.length} books`, logFileName); await logToFile(`Search options: ${JSON.stringify(searchOptions)}`, logFileName); for (let i = 0; i < books.length; i++) { const book = books[i]; const requestedAuthor = book.author || ''; const requestedTitle = book.title || ''; const requestedYear = book.year || ''; const yearText = requestedYear ? ` (${requestedYear})` : ''; console.log(`\n[${i + 1}/${books.length}] Searching for: ${requestedAuthor} - ${requestedTitle}${yearText}`); await logToFile(`\n--- Processing book ${i + 1}/${books.length}: ${requestedAuthor} - ${requestedTitle}${yearText} ---`, logFileName); try { // Search for the book with enhanced search options let searchQuery = `${requestedAuthor} ${requestedTitle}`.trim(); if (requestedYear) { searchQuery += ` ${requestedYear}`; } await logToFile(` Searching with query: "${searchQuery}"`, logFileName); const batchSearchOptions = { ...searchOptions, maxResults: 20 // More results for better matching }; const searchResults = await searchLibGen(searchQuery, batchSearchOptions); await logToFile(` Found ${searchResults.length} search results`, logFileName); if (searchResults.length === 0) { console.log(' No results found'); await logToFile(` No search results found`, logFileName); report.push({ requestedAuthor, requestedTitle, requestedYear, downloadedAuthor: 'N/A', downloadedTitle: 'N/A', filePath: 'N/A', status: 'Not Found' }); continue; } console.log(` Found ${searchResults.length} results, using AI to select best match...`); await logToFile(` Asking AI to select best result from ${searchResults.length} options`, logFileName); // Use GPT to select the best result const selectedResult = await selectBestResult(requestedAuthor, requestedTitle, searchResults); if (!selectedResult) { console.log(' AI determined no good matches found'); await logToFile(` AI determined no good matches from search results`, logFileName); report.push({ requestedAuthor, requestedTitle, requestedYear, downloadedAuthor: 'N/A', downloadedTitle: 'N/A', filePath: 'N/A', status: 'No Good Match Found' }); continue; } // Use the AI-selected result let currentResult = selectedResult; console.log(` AI selected: ${currentResult.author} - ${currentResult.title} (${currentResult.year})`); await logToFile(` AI selected: ${currentResult.author} - ${currentResult.title} (${currentResult.year}) [${currentResult.md5}]`, logFileName); // Standardize author and title using GPT console.log(` Standardizing author and title...`); const standardized = await standardizeAuthorTitle(currentResult.author, currentResult.title); console.log(` Standardized: ${standardized.author} - ${standardized.title}`); // Update the result with standardized values let standardizedResult = { ...currentResult, author: standardized.author, title: standardized.title }; await logToFile(` Standardized to: ${standardizedResult.author} - ${standardizedResult.title}`, logFileName); // Attempt download with retry logic await logToFile(` Attempting initial download...`, logFileName); let downloadResult = await downloadBookSilent(standardizedResult); let retryCount = 0; const maxRetries = 3; await logToFile(` Initial download result: success=${downloadResult.success}, shouldBlacklist=${downloadResult.shouldBlacklist}, error="${downloadResult.error}"`, logFileName); while (!downloadResult.success && downloadResult.shouldBlacklist && retryCount < maxRetries) { console.log(` Download failed: ${downloadResult.error}`); await logToFile(` Download failed (attempt ${retryCount + 1}): ${downloadResult.error}`, logFileName); await logToFile(` Initiating retry ${retryCount + 1}/${maxRetries}...`, logFileName); // Try to get next best result using AI const nextResult = await retryDownloadWithAI( requestedAuthor, requestedTitle, searchResults, standardizedResult.md5, downloadResult.error, addToBlacklist ); if (!nextResult) { console.log(` No more alternatives available after ${retryCount + 1} attempts`); await logToFile(` No more alternatives available after ${retryCount + 1} attempts`, logFileName); break; } await logToFile(` AI selected alternative: ${nextResult.author} - ${nextResult.title} (${nextResult.year}) [${nextResult.md5}]`, logFileName); // Standardize the new result const newStandardized = await standardizeAuthorTitle(nextResult.author, nextResult.title); standardizedResult = { ...nextResult, author: newStandardized.author, title: newStandardized.title }; await logToFile(` Standardized alternative to: ${standardizedResult.author} - ${standardizedResult.title}`, logFileName); console.log(` Retry ${retryCount + 1}: Attempting download of ${standardizedResult.author} - ${standardizedResult.title}`); await logToFile(` Attempting download of alternative...`, logFileName); downloadResult = await downloadBookSilent(standardizedResult); retryCount++; await logToFile(` Retry ${retryCount} result: success=${downloadResult.success}, shouldBlacklist=${downloadResult.shouldBlacklist}, error="${downloadResult.error}"`, logFileName); } if (downloadResult.success) { const status = retryCount > 0 ? `Downloaded (after ${retryCount} retries)` : 'Downloaded'; console.log(` Downloaded: ${downloadResult.filePath}`); await logToFile(` FINAL SUCCESS: ${status} - ${downloadResult.filePath}`, logFileName); report.push({ requestedAuthor, requestedTitle, requestedYear, downloadedAuthor: standardizedResult.author, downloadedTitle: standardizedResult.title, filePath: downloadResult.filePath, status }); downloadedBooks.push(standardizedResult); } else if (downloadResult.alreadyExists) { const status = 'File Already Exists'; console.log(` Skipped: ${status}`); await logToFile(` SKIPPED: ${status} - file already exists`, logFileName); skippedCount++; report.push({ requestedAuthor, requestedTitle, requestedYear, downloadedAuthor: standardizedResult.author, downloadedTitle: standardizedResult.title, filePath: 'Already Exists', status }); } else { const status = `Download Failed: ${downloadResult.error}${retryCount > 0 ? ` (after ${retryCount} retries)` : ''}`; console.log(` Final download failed: ${downloadResult.error}`); await logToFile(` FINAL FAILURE: ${status}`, logFileName); report.push({ requestedAuthor, requestedTitle, requestedYear, downloadedAuthor: standardizedResult.author, downloadedTitle: standardizedResult.title, filePath: 'N/A', status }); } // Add delay between requests to be respectful await sleep(1000); } catch (error) { const errorMsg = `Error: ${error.message}`; console.log(` Error processing book: ${error.message}`); await logToFile(` EXCEPTION: ${errorMsg}`, logFileName); report.push({ requestedAuthor, requestedTitle, requestedYear, downloadedAuthor: 'N/A', downloadedTitle: 'N/A', filePath: 'N/A', status: errorMsg }); } } await logToFile(`\n=== BATCH PROCESSING COMPLETE ===`, logFileName); await logToFile(`Total books processed: ${books.length}`, logFileName); await logToFile(`Successfully downloaded: ${downloadedBooks.length}`, logFileName); await logToFile(`Skipped (already exist): ${skippedCount}`, logFileName); await logToFile(`Failed: ${books.length - downloadedBooks.length - skippedCount}`, logFileName); await logToFile(`Log file: ${logFileName}`, logFileName); return { report, downloadedBooks, skippedCount }; } // Generate CSV report export function generateCSVReport(report) { const headers = ['Requested Author', 'Requested Title', 'Requested Year', 'Downloaded Author', 'Downloaded Title', 'File Path', 'Status']; const csvRows = [headers.join(',')]; report.forEach(row => { const csvRow = [ `"${row.requestedAuthor}"`, `"${row.requestedTitle}"`, `"${row.requestedYear || ''}"`, `"${row.downloadedAuthor}"`, `"${row.downloadedTitle}"`, `"${row.filePath}"`, `"${row.status}"` ]; csvRows.push(csvRow.join(',')); }); return csvRows.join('\n'); } // Save report and display summary export async function saveReportAndSummary(report, downloadedBooks, logFileName) { // Save report to CSV file const reportFilename = `download_report_${new Date().toISOString().slice(0, 10)}.csv`; await fs.promises.writeFile(reportFilename, generateCSVReport(report)); console.log('\n=== Batch Processing Complete ==='); console.log(`Report saved to: ${reportFilename}`); if (logFileName) { console.log(`Debug log saved to: ${logFileName}`); } console.log(`Successfully downloaded: ${downloadedBooks.length}/${report.length} books`); // Show breakdown if there are skipped files const skippedCount = report.filter(r => r.status === 'File Already Exists').length; const failedCount = report.length - downloadedBooks.length - skippedCount; if (skippedCount > 0) { console.log(`Skipped (already exist): ${skippedCount}`); } if (failedCount > 0) { console.log(`Failed: ${failedCount}`); } if (downloadedBooks.length > 0) { console.log('\nDownloaded Books:'); downloadedBooks.forEach(book => { console.log(` ${book.author} - ${book.title} (${book.year})`); }); } }