bookgrabs
Version:
Interactive CLI tool for LibGen ebook searches and downloads with batch processing support
228 lines (185 loc) • 7.82 kB
JavaScript
import OpenAI from 'openai';
import 'dotenv/config';
// Lazy initialization of OpenAI client
let openaiClient = null;
let lastApiKey = null;
function getOpenAIClient() {
// Re-import dotenv to get fresh environment variables
if (typeof process !== 'undefined' && process.env) {
const currentApiKey = process.env.OPEN_AI_KEY;
// Create new client if API key changed or client doesn't exist
if (currentApiKey && (currentApiKey !== lastApiKey || !openaiClient)) {
openaiClient = new OpenAI({
apiKey: currentApiKey,
});
lastApiKey = currentApiKey;
} else if (!currentApiKey) {
// Clear client if API key is removed
openaiClient = null;
lastApiKey = null;
}
}
return openaiClient;
}
// Function to standardize author and title using GPT
export async function standardizeAuthorTitle(author, title) {
const openai = getOpenAIClient();
if (!openai) {
console.warn('OpenAI API key not found. Skipping standardization.');
return { author, title };
}
try {
const prompt = `Please standardize the following book author and title into the exact format specified:
Author format: First, Last (e.g., "John, Smith" or "Mary Jane, Doe")
Title format: Book Title (Series name book 1) - only add series info if it's clearly a series book
Original Author: ${author}
Original Title: ${title}
Please respond with ONLY the standardized format, one line for author and one line for title:
Author:
Title: `;
const response = await openai.chat.completions.create({
model: 'gpt-3.5-turbo',
messages: [{ role: 'user', content: prompt }],
max_tokens: 150,
temperature: 0.1,
});
const content = response.choices[0].message.content.trim();
const lines = content.split('\n');
let standardizedAuthor = author;
let standardizedTitle = title;
for (const line of lines) {
if (line.startsWith('Author:')) {
standardizedAuthor = line.replace('Author:', '').trim();
} else if (line.startsWith('Title:')) {
standardizedTitle = line.replace('Title:', '').trim();
}
}
return {
author: standardizedAuthor,
title: standardizedTitle
};
} catch (error) {
console.warn(`Error standardizing "${author} - ${title}":`, error.message);
return { author, title };
}
}
// Function to use GPT for intelligent result selection
export async function selectBestResult(requestedAuthor, requestedTitle, searchResults) {
const openai = getOpenAIClient();
if (!openai) {
console.warn('OpenAI API key not found. Using traditional scoring.');
return null;
}
if (searchResults.length === 0) {
return null;
}
try {
// Format results for GPT
const formattedResults = searchResults.map((result, index) => {
return `${index + 1}. Author: ${result.author} | Title: ${result.title} | Year: ${result.year} | Format: ${result.ext}`;
}).join('\n');
const prompt = `I'm looking for a specific book and need you to select the best match from these search results, or determine if none match.
REQUESTED BOOK:
Author: ${requestedAuthor}
Title: ${requestedTitle}
SEARCH RESULTS:
${formattedResults}
Please analyze these results and:
1. Look for the exact author and title match (ignoring minor formatting differences)
2. Prefer English language books over translations
3. Prefer common formats (epub, pdf, mobi) over academic papers or journals
4. Avoid results that are clearly different books, academic papers, or foreign language editions unless they're the only option
Respond with ONLY one of these formats:
- If you find a good match: "SELECTED: [number]" (e.g., "SELECTED: 3")
- If no good matches: "NO MATCH"
Do not include any explanation, just the selection.`;
const response = await openai.chat.completions.create({
model: 'gpt-3.5-turbo',
messages: [{ role: 'user', content: prompt }],
max_tokens: 50,
temperature: 0.1,
});
const content = response.choices[0].message.content.trim();
if (content === 'NO MATCH') {
return null;
}
const match = content.match(/SELECTED:\s*(\d+)/);
if (match) {
const selectedIndex = parseInt(match[1]) - 1;
if (selectedIndex >= 0 && selectedIndex < searchResults.length) {
return searchResults[selectedIndex];
}
}
return null;
} catch (error) {
console.warn(`Error selecting best result for "${requestedAuthor} - ${requestedTitle}":`, error.message);
return null;
}
}
// Function to retry download with AI selection after blacklisting failed result
export async function retryDownloadWithAI(requestedAuthor, requestedTitle, searchResults, failedMd5, failureReason, addToBlacklist) {
// Add failed result to blacklist
await addToBlacklist(failedMd5, failureReason);
// Filter out the failed result from search results
const filteredResults = searchResults.filter(result => result.md5 !== failedMd5);
if (filteredResults.length === 0) {
console.log(' No more results available to try');
return null;
}
console.log(` Asking AI to select next best result from ${filteredResults.length} remaining options...`);
// Use AI to select the next best result
const nextBestResult = await selectBestResult(requestedAuthor, requestedTitle, filteredResults);
if (!nextBestResult) {
console.log(' AI determined no more good matches available');
return null;
}
console.log(` AI selected next option: ${nextBestResult.author} - ${nextBestResult.title} (${nextBestResult.year})`);
return nextBestResult;
}
// Function to clean up filename using AI while maintaining readability
export async function cleanupFilename(originalFilename) {
const openai = getOpenAIClient();
if (!openai) {
console.warn('OpenAI API key not found. Using basic filename cleanup.');
return basicFilenameCleanup(originalFilename);
}
try {
const prompt = `Please clean up this book filename to make it safe for file systems while keeping it readable and meaningful:
Original filename: ${originalFilename}
Rules:
1. Remove or replace characters that are problematic for file systems: / \\ : * ? " < > |
2. Keep the filename readable - don't turn everything into underscores
3. Preserve proper capitalization and spacing where possible
4. Replace problematic characters with appropriate alternatives (e.g., & -> and, : -> -, etc.)
5. Keep it under 200 characters
6. Remove excessive punctuation but keep meaningful ones
7. Don't use all caps or all lowercase unless the original was that way
Please respond with ONLY the cleaned filename, no explanation:`;
const response = await openai.chat.completions.create({
model: 'gpt-3.5-turbo',
messages: [{ role: 'user', content: prompt }],
max_tokens: 100,
temperature: 0.2,
});
const cleanedFilename = response.choices[0].message.content.trim();
// Validate that the cleaned filename is reasonable
if (cleanedFilename.length > 0 && cleanedFilename.length < 250) {
return cleanedFilename;
} else {
console.warn(`AI returned invalid filename: "${cleanedFilename}". Using basic cleanup.`);
return basicFilenameCleanup(originalFilename);
}
} catch (error) {
console.warn(`Error cleaning filename "${originalFilename}":`, error.message);
return basicFilenameCleanup(originalFilename);
}
}
// Fallback function for basic filename cleanup when AI is not available
function basicFilenameCleanup(filename) {
return filename
.replace(/[\/\\:*?"<>|]/g, '') // Remove problematic characters
.replace(/&/g, 'and') // Replace ampersand
.replace(/\s+/g, ' ') // Normalize whitespace
.replace(/^\.+|\.+$/g, '') // Remove leading/trailing dots
.trim();
}