UNPKG

bookgrabs

Version:

Interactive CLI tool for LibGen ebook searches and downloads with batch processing support

228 lines (185 loc) 7.82 kB
import OpenAI from 'openai'; import 'dotenv/config'; // Lazy initialization of OpenAI client let openaiClient = null; let lastApiKey = null; function getOpenAIClient() { // Re-import dotenv to get fresh environment variables if (typeof process !== 'undefined' && process.env) { const currentApiKey = process.env.OPEN_AI_KEY; // Create new client if API key changed or client doesn't exist if (currentApiKey && (currentApiKey !== lastApiKey || !openaiClient)) { openaiClient = new OpenAI({ apiKey: currentApiKey, }); lastApiKey = currentApiKey; } else if (!currentApiKey) { // Clear client if API key is removed openaiClient = null; lastApiKey = null; } } return openaiClient; } // Function to standardize author and title using GPT export async function standardizeAuthorTitle(author, title) { const openai = getOpenAIClient(); if (!openai) { console.warn('OpenAI API key not found. Skipping standardization.'); return { author, title }; } try { const prompt = `Please standardize the following book author and title into the exact format specified: Author format: First, Last (e.g., "John, Smith" or "Mary Jane, Doe") Title format: Book Title (Series name book 1) - only add series info if it's clearly a series book Original Author: ${author} Original Title: ${title} Please respond with ONLY the standardized format, one line for author and one line for title: Author: Title: `; const response = await openai.chat.completions.create({ model: 'gpt-3.5-turbo', messages: [{ role: 'user', content: prompt }], max_tokens: 150, temperature: 0.1, }); const content = response.choices[0].message.content.trim(); const lines = content.split('\n'); let standardizedAuthor = author; let standardizedTitle = title; for (const line of lines) { if (line.startsWith('Author:')) { standardizedAuthor = line.replace('Author:', '').trim(); } else if (line.startsWith('Title:')) { standardizedTitle = line.replace('Title:', '').trim(); } } return { author: standardizedAuthor, title: standardizedTitle }; } catch (error) { console.warn(`Error standardizing "${author} - ${title}":`, error.message); return { author, title }; } } // Function to use GPT for intelligent result selection export async function selectBestResult(requestedAuthor, requestedTitle, searchResults) { const openai = getOpenAIClient(); if (!openai) { console.warn('OpenAI API key not found. Using traditional scoring.'); return null; } if (searchResults.length === 0) { return null; } try { // Format results for GPT const formattedResults = searchResults.map((result, index) => { return `${index + 1}. Author: ${result.author} | Title: ${result.title} | Year: ${result.year} | Format: ${result.ext}`; }).join('\n'); const prompt = `I'm looking for a specific book and need you to select the best match from these search results, or determine if none match. REQUESTED BOOK: Author: ${requestedAuthor} Title: ${requestedTitle} SEARCH RESULTS: ${formattedResults} Please analyze these results and: 1. Look for the exact author and title match (ignoring minor formatting differences) 2. Prefer English language books over translations 3. Prefer common formats (epub, pdf, mobi) over academic papers or journals 4. Avoid results that are clearly different books, academic papers, or foreign language editions unless they're the only option Respond with ONLY one of these formats: - If you find a good match: "SELECTED: [number]" (e.g., "SELECTED: 3") - If no good matches: "NO MATCH" Do not include any explanation, just the selection.`; const response = await openai.chat.completions.create({ model: 'gpt-3.5-turbo', messages: [{ role: 'user', content: prompt }], max_tokens: 50, temperature: 0.1, }); const content = response.choices[0].message.content.trim(); if (content === 'NO MATCH') { return null; } const match = content.match(/SELECTED:\s*(\d+)/); if (match) { const selectedIndex = parseInt(match[1]) - 1; if (selectedIndex >= 0 && selectedIndex < searchResults.length) { return searchResults[selectedIndex]; } } return null; } catch (error) { console.warn(`Error selecting best result for "${requestedAuthor} - ${requestedTitle}":`, error.message); return null; } } // Function to retry download with AI selection after blacklisting failed result export async function retryDownloadWithAI(requestedAuthor, requestedTitle, searchResults, failedMd5, failureReason, addToBlacklist) { // Add failed result to blacklist await addToBlacklist(failedMd5, failureReason); // Filter out the failed result from search results const filteredResults = searchResults.filter(result => result.md5 !== failedMd5); if (filteredResults.length === 0) { console.log(' No more results available to try'); return null; } console.log(` Asking AI to select next best result from ${filteredResults.length} remaining options...`); // Use AI to select the next best result const nextBestResult = await selectBestResult(requestedAuthor, requestedTitle, filteredResults); if (!nextBestResult) { console.log(' AI determined no more good matches available'); return null; } console.log(` AI selected next option: ${nextBestResult.author} - ${nextBestResult.title} (${nextBestResult.year})`); return nextBestResult; } // Function to clean up filename using AI while maintaining readability export async function cleanupFilename(originalFilename) { const openai = getOpenAIClient(); if (!openai) { console.warn('OpenAI API key not found. Using basic filename cleanup.'); return basicFilenameCleanup(originalFilename); } try { const prompt = `Please clean up this book filename to make it safe for file systems while keeping it readable and meaningful: Original filename: ${originalFilename} Rules: 1. Remove or replace characters that are problematic for file systems: / \\ : * ? " < > | 2. Keep the filename readable - don't turn everything into underscores 3. Preserve proper capitalization and spacing where possible 4. Replace problematic characters with appropriate alternatives (e.g., & -> and, : -> -, etc.) 5. Keep it under 200 characters 6. Remove excessive punctuation but keep meaningful ones 7. Don't use all caps or all lowercase unless the original was that way Please respond with ONLY the cleaned filename, no explanation:`; const response = await openai.chat.completions.create({ model: 'gpt-3.5-turbo', messages: [{ role: 'user', content: prompt }], max_tokens: 100, temperature: 0.2, }); const cleanedFilename = response.choices[0].message.content.trim(); // Validate that the cleaned filename is reasonable if (cleanedFilename.length > 0 && cleanedFilename.length < 250) { return cleanedFilename; } else { console.warn(`AI returned invalid filename: "${cleanedFilename}". Using basic cleanup.`); return basicFilenameCleanup(originalFilename); } } catch (error) { console.warn(`Error cleaning filename "${originalFilename}":`, error.message); return basicFilenameCleanup(originalFilename); } } // Fallback function for basic filename cleanup when AI is not available function basicFilenameCleanup(filename) { return filename .replace(/[\/\\:*?"<>|]/g, '') // Remove problematic characters .replace(/&/g, 'and') // Replace ampersand .replace(/\s+/g, ' ') // Normalize whitespace .replace(/^\.+|\.+$/g, '') // Remove leading/trailing dots .trim(); }