UNPKG

git-ripper

Version:

CLI tool that lets you download specific folders from GitHub repositories without cloning the entire repo.

842 lines (752 loc) 27.8 kB
import axios from "axios"; import fs from "fs"; import path from "path"; import { fileURLToPath } from "url"; import { dirname } from "path"; import cliProgress from "cli-progress"; import pLimit from "p-limit"; import chalk from "chalk"; import prettyBytes from "pretty-bytes"; import { ResumeManager } from "./resumeManager.js"; // Set concurrency limit (adjustable based on network performance) // Reduced from 500 to 5 to prevent GitHub API rate limiting const limit = pLimit(5); // Ensure __dirname and __filename are available in ESM const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); // Define spinner animation frames const spinnerFrames = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]; // Alternative progress bar characters for more visual appeal const progressChars = { complete: "▰", // Alternative: '■', '●', '◆', '▣' incomplete: "▱", // Alternative: '□', '○', '◇', '▢' }; // Track frame index for spinner animation let spinnerFrameIndex = 0; /** * Returns the next spinner frame for animation * @returns {string} - The spinner character */ const getSpinnerFrame = () => { const frame = spinnerFrames[spinnerFrameIndex]; spinnerFrameIndex = (spinnerFrameIndex + 1) % spinnerFrames.length; return frame; }; /** * Fetches the contents of a folder from a GitHub repository * @param {string} owner - Repository owner * @param {string} repo - Repository name * @param {string} branch - Branch name * @param {string} folderPath - Path to the folder * @returns {Promise<Array>} - Promise resolving to an array of file objects * @throws {Error} - Throws error on API failures instead of returning empty array */ const fetchFolderContents = async (owner, repo, branch, folderPath) => { let effectiveBranch = branch; if (!effectiveBranch) { // If no branch is specified, fetch the default branch for the repository try { const repoInfoUrl = `https://api.github.com/repos/${owner}/${repo}`; const repoInfoResponse = await axios.get(repoInfoUrl); effectiveBranch = repoInfoResponse.data.default_branch; if (!effectiveBranch) { throw new Error( `Could not determine default branch for ${owner}/${repo}. Please specify a branch in the URL.` ); } console.log( chalk.blue( `No branch specified, using default branch: ${effectiveBranch}` ) ); } catch (error) { if (error.message.includes("Could not determine default branch")) { throw error; } throw new Error( `Failed to fetch default branch for ${owner}/${repo}: ${error.message}` ); } } const apiUrl = `https://api.github.com/repos/${owner}/${repo}/git/trees/${effectiveBranch}?recursive=1`; try { const response = await axios.get(apiUrl); // Check if GitHub API returned truncated results if (response.data.truncated) { console.warn( chalk.yellow( `Warning: The repository is too large and some files may be missing. ` + `Consider using git clone for complete repositories.` ) ); } // Original filter: // return response.data.tree.filter((item) => // item.path.startsWith(folderPath) // ); // New filter logic: if (folderPath === "") { // For the root directory, all items from the recursive tree are relevant. // item.path.startsWith("") would also achieve this. return response.data.tree; } else { // For a specific folder, items must be *inside* that folder. // Ensure folderPath is treated as a directory prefix by adding a trailing slash if not present. const prefix = folderPath.endsWith("/") ? folderPath : folderPath + "/"; return response.data.tree.filter((item) => item.path.startsWith(prefix)); } } catch (error) { let errorMessage = ""; let isRateLimit = false; if (error.response) { // Handle specific HTTP error codes switch (error.response.status) { case 403: if (error.response.headers["x-ratelimit-remaining"] === "0") { isRateLimit = true; errorMessage = `GitHub API rate limit exceeded. Please wait until ${new Date( parseInt(error.response.headers["x-ratelimit-reset"]) * 1000 ).toLocaleTimeString()} or add a GitHub token (feature coming soon).`; } else { errorMessage = `Access forbidden: ${ error.response.data.message || "Repository may be private or you may not have access" }`; } break; case 404: errorMessage = `Repository, branch, or folder not found: ${owner}/${repo}/${branch}/${folderPath}`; break; default: errorMessage = `API error (${error.response.status}): ${ error.response.data.message || error.message }`; } } else if (error.request) { errorMessage = `Network error: No response received from GitHub. Please check your internet connection.`; } else { errorMessage = `Error preparing request: ${error.message}`; } // Always throw the error instead of returning empty array const enrichedError = new Error(errorMessage); enrichedError.isRateLimit = isRateLimit; enrichedError.statusCode = error.response?.status; throw enrichedError; } }; /** * Downloads a single file from a GitHub repository * @param {string} owner - Repository owner * @param {string} repo - Repository name * @param {string} branch - Branch name * @param {string} filePath - Path to the file * @param {string} outputPath - Path where the file should be saved * @returns {Promise<Object>} - Object containing download status */ const downloadFile = async (owner, repo, branch, filePath, outputPath) => { let effectiveBranch = branch; if (!effectiveBranch) { // If no branch is specified, fetch the default branch for the repository // This check might be redundant if fetchFolderContents already resolved it, // but it's a good fallback for direct downloadFile calls if any. try { const repoInfoUrl = `https://api.github.com/repos/${owner}/${repo}`; const repoInfoResponse = await axios.get(repoInfoUrl); effectiveBranch = repoInfoResponse.data.default_branch; if (!effectiveBranch) { // console.error(chalk.red(`Could not determine default branch for ${owner}/${repo} for file ${filePath}.`)); // Do not log error here as it might be a root file download where branch is not in URL } } catch (error) { // console.error(chalk.red(`Failed to fetch default branch for ${owner}/${repo} for file ${filePath}: ${error.message}`)); // Do not log error here } // If still no branch, the raw URL might work for default branch, or fail. // The original code didn't explicitly handle this for downloadFile, relying on raw.githubusercontent default behavior. // For robustness, we should ensure effectiveBranch is set. If not, the URL will be malformed or use GitHub's default. if (!effectiveBranch) { // Fallback to a common default, or let the API call fail if truly ambiguous // For raw content, GitHub often defaults to the main branch if not specified, // but it's better to be explicit if we can. // However, altering the URL structure for raw.githubusercontent.com without a branch // might be tricky if the original URL didn't have it. // The existing raw URL construction assumes branch is present or GitHub handles its absence. // Let's stick to the original logic for raw URL construction if branch is not found, // as `https://raw.githubusercontent.com/${owner}/${repo}/${filePath}` might work for root files on default branch. // The critical part is `fetchFolderContents` determining the branch for listing. } } const baseUrl = `https://raw.githubusercontent.com/${owner}/${repo}`; const fileUrlPath = effectiveBranch ? `/${effectiveBranch}/${filePath}` : `/${filePath}`; // filePath might be at root const url = `${baseUrl}${fileUrlPath}`; try { const response = await axios.get(url, { responseType: "arraybuffer" }); // Ensure the directory exists try { fs.mkdirSync(path.dirname(outputPath), { recursive: true }); } catch (dirError) { return { filePath, success: false, error: `Failed to create directory: ${dirError.message}`, size: 0, }; } // Write the file try { fs.writeFileSync(outputPath, Buffer.from(response.data)); } catch (fileError) { return { filePath, success: false, error: `Failed to write file: ${fileError.message}`, size: 0, }; } return { filePath, success: true, size: response.data.length, }; } catch (error) { // More detailed error handling for network requests let errorMessage = error.message; if (error.response) { // The request was made and the server responded with an error status switch (error.response.status) { case 403: errorMessage = "Access forbidden (possibly rate limited)"; break; case 404: errorMessage = "File not found"; break; default: errorMessage = `HTTP error ${error.response.status}`; } } else if (error.request) { // The request was made but no response was received errorMessage = "No response from server"; } return { filePath, success: false, error: errorMessage, size: 0, }; } }; /** * Creates a simplified progress bar renderer with animation * @param {string} owner - Repository owner * @param {string} repo - Repository name * @param {string} folderPath - Path to the folder * @returns {Function} - Function to render progress bar */ const createProgressRenderer = (owner, repo, folderPath) => { // Default terminal width const terminalWidth = process.stdout.columns || 80; return (options, params, payload) => { try { const { value, total, startTime } = params; const { downloadedSize = 0 } = payload || { downloadedSize: 0 }; // Calculate progress percentage const progress = Math.min(1, Math.max(0, value / Math.max(1, total))); const percentage = Math.floor(progress * 100); // Calculate elapsed time const elapsedSecs = Math.max(0.1, (Date.now() - startTime) / 1000); // Create the progress bar const barLength = Math.max( 20, Math.min(40, Math.floor(terminalWidth / 2)) ); const completedLength = Math.round(barLength * progress); const remainingLength = barLength - completedLength; // Build the bar with custom progress characters const completedBar = chalk.greenBright( progressChars.complete.repeat(completedLength) ); const remainingBar = chalk.gray( progressChars.incomplete.repeat(remainingLength) ); // Add spinner for animation const spinner = chalk.cyanBright(getSpinnerFrame()); // Format the output const progressInfo = `${chalk.cyan(`${value}/${total}`)} files`; const sizeInfo = prettyBytes(downloadedSize || 0); return `${spinner} ${completedBar}${remainingBar} ${chalk.yellow( percentage + "%" )} | ${progressInfo} | ${chalk.magenta(sizeInfo)}`; } catch (error) { // Fallback to a very simple progress indicator return `${Math.floor((params.value / params.total) * 100)}% complete`; } }; }; /** * Downloads all files from a folder in a GitHub repository * @param {Object} repoInfo - Object containing repository information * @param {string} repoInfo.owner - Repository owner * @param {string} repoInfo.repo - Repository name * @param {string} repoInfo.branch - Branch name * @param {string} repoInfo.folderPath - Path to the folder * @param {string} outputDir - Directory where files should be saved * @returns {Promise<void>} - Promise that resolves when all files are downloaded */ const downloadFolder = async ( { owner, repo, branch, folderPath }, outputDir ) => { console.log( chalk.cyan(`Analyzing repository structure for ${owner}/${repo}...`) ); try { const contents = await fetchFolderContents(owner, repo, branch, folderPath); if (!contents || contents.length === 0) { const message = `No files found in ${folderPath || "repository root"}`; console.log(chalk.yellow(message)); // Don't print success message when no files are found - this might indicate an error return { success: true, filesDownloaded: 0, failedFiles: 0, isEmpty: true, }; } // Filter for blob type (files) const files = contents.filter((item) => item.type === "blob"); const totalFiles = files.length; if (totalFiles === 0) { const message = `No files found in ${ folderPath || "repository root" } (only directories)`; console.log(chalk.yellow(message)); // This is a legitimate case - directory exists but contains only subdirectories console.log(chalk.green(`Directory structure downloaded successfully!`)); return { success: true, filesDownloaded: 0, failedFiles: 0, isEmpty: true, }; } console.log( chalk.cyan( `Downloading ${totalFiles} files from ${chalk.white( owner + "/" + repo )}...` ) ); // Simplified progress bar setup const progressBar = new cliProgress.SingleBar({ format: createProgressRenderer(owner, repo, folderPath), hideCursor: true, clearOnComplete: false, stopOnComplete: true, forceRedraw: true, }); // Track download metrics let downloadedSize = 0; const startTime = Date.now(); let failedFiles = []; // Start progress bar progressBar.start(totalFiles, 0, { downloadedSize: 0, startTime, }); // Create download promises with concurrency control const fileDownloadPromises = files.map((item) => { // Keep the original structure by preserving the folder name let relativePath = item.path; if (folderPath && folderPath.trim() !== "") { relativePath = item.path .substring(folderPath.length) .replace(/^\//, ""); } const outputFilePath = path.join(outputDir, relativePath); return limit(async () => { try { const result = await downloadFile( owner, repo, branch, item.path, outputFilePath ); // Update progress metrics if (result.success) { downloadedSize += result.size || 0; } else { // Track failed files for reporting failedFiles.push({ path: item.path, error: result.error, }); } // Update progress bar with current metrics progressBar.increment(1, { downloadedSize, }); return result; } catch (error) { failedFiles.push({ path: item.path, error: error.message, }); progressBar.increment(1, { downloadedSize }); return { filePath: item.path, success: false, error: error.message, size: 0, }; } }); }); // Execute downloads in parallel with controlled concurrency const results = await Promise.all(fileDownloadPromises); progressBar.stop(); console.log(); // Add an empty line after progress bar // Count successful and failed downloads const succeeded = results.filter((r) => r.success).length; const failed = failedFiles.length; if (failed > 0) { console.log( chalk.yellow( `Downloaded ${succeeded} files successfully, ${failed} files failed` ) ); // Show detailed errors if there aren't too many if (failed <= 5) { console.log(chalk.yellow("Failed files:")); failedFiles.forEach((file) => { console.log(chalk.yellow(` - ${file.path}: ${file.error}`)); }); } else { console.log( chalk.yellow( `${failed} files failed to download. Check your connection or repository access.` ) ); } // Don't claim success if files failed to download if (succeeded === 0) { console.log( chalk.red(`❌ Download failed: No files were downloaded successfully`) ); return { success: false, filesDownloaded: succeeded, failedFiles: failed, isEmpty: false, }; } else { console.log(chalk.yellow(`⚠️ Download completed with errors`)); return { success: false, filesDownloaded: succeeded, failedFiles: failed, isEmpty: false, }; } } else { console.log( chalk.green(`✅ All ${succeeded} files downloaded successfully!`) ); console.log(chalk.green(`Folder cloned successfully!`)); return { success: true, filesDownloaded: succeeded, failedFiles: failed, isEmpty: false, }; } } catch (error) { // Log the specific error details console.error(chalk.red(`❌ Error downloading folder: ${error.message}`)); // Re-throw the error so the main CLI can exit with proper error code throw error; } }; // Export functions in ESM format export { downloadFolder, downloadFolderWithResume }; /** * Downloads all files from a folder in a GitHub repository with resume capability */ const downloadFolderWithResume = async ( { owner, repo, branch, folderPath }, outputDir, options = { resume: true, forceRestart: false } ) => { const { resume = true, forceRestart = false } = options; if (!resume) { return downloadFolder({ owner, repo, branch, folderPath }, outputDir); } const resumeManager = new ResumeManager(); const url = `https://github.com/${owner}/${repo}/tree/${branch || "main"}/${ folderPath || "" }`; // Clear checkpoint if force restart is requested if (forceRestart) { resumeManager.cleanupCheckpoint(url, outputDir); } // Check for existing checkpoint let checkpoint = resumeManager.loadCheckpoint(url, outputDir); if (checkpoint) { console.log( chalk.blue( `🔄 Found previous download from ${new Date( checkpoint.timestamp ).toLocaleString()}` ) ); console.log( chalk.blue( `📊 Progress: ${checkpoint.downloadedFiles.length}/${checkpoint.totalFiles} files completed` ) ); // Verify integrity of existing files const validFiles = []; let corruptedCount = 0; for (const filename of checkpoint.downloadedFiles) { const filepath = path.join(outputDir, filename); const expectedHash = checkpoint.fileHashes[filename]; if ( expectedHash && resumeManager.verifyFileIntegrity(filepath, expectedHash) ) { validFiles.push(filename); } else { corruptedCount++; } } checkpoint.downloadedFiles = validFiles; if (corruptedCount > 0) { console.log( chalk.yellow( `🔧 Detected ${corruptedCount} corrupted files, will re-download` ) ); } console.log(chalk.green(`✅ Verified ${validFiles.length} existing files`)); } console.log( chalk.cyan(`Analyzing repository structure for ${owner}/${repo}...`) ); try { const contents = await fetchFolderContents(owner, repo, branch, folderPath); if (!contents || contents.length === 0) { const message = `No files found in ${folderPath || "repository root"}`; console.log(chalk.yellow(message)); // Don't print success message when no files are found - this might indicate an error return { success: true, filesDownloaded: 0, failedFiles: 0, isEmpty: true, }; } // Filter for blob type (files) const files = contents.filter((item) => item.type === "blob"); const totalFiles = files.length; if (totalFiles === 0) { const message = `No files found in ${ folderPath || "repository root" } (only directories)`; console.log(chalk.yellow(message)); // This is a legitimate case - directory exists but contains only subdirectories console.log(chalk.green(`Directory structure downloaded successfully!`)); return { success: true, filesDownloaded: 0, failedFiles: 0, isEmpty: true, }; } // Create new checkpoint if none exists if (!checkpoint) { checkpoint = resumeManager.createNewCheckpoint( url, outputDir, totalFiles ); console.log( chalk.cyan( `📥 Starting download of ${totalFiles} files from ${chalk.white( owner + "/" + repo )}...` ) ); } else { // Update total files in case repository changed checkpoint.totalFiles = totalFiles; console.log(chalk.cyan(`📥 Resuming download...`)); } // Get remaining files to download const remainingFiles = files.filter((item) => { let relativePath = item.path; if (folderPath && folderPath.trim() !== "") { relativePath = item.path .substring(folderPath.length) .replace(/^\//, ""); } return !checkpoint.downloadedFiles.includes(relativePath); }); if (remainingFiles.length === 0) { console.log(chalk.green(`🎉 All files already downloaded!`)); resumeManager.cleanupCheckpoint(url, outputDir); return; } console.log( chalk.cyan(`📥 Downloading ${remainingFiles.length} remaining files...`) ); // Setup progress bar const progressBar = new cliProgress.SingleBar({ format: createProgressRenderer(owner, repo, folderPath), hideCursor: true, clearOnComplete: false, stopOnComplete: true, forceRedraw: true, }); // Calculate already downloaded size let downloadedSize = 0; for (const filename of checkpoint.downloadedFiles) { const filepath = path.join(outputDir, filename); try { downloadedSize += fs.statSync(filepath).size; } catch { // File might be missing, will be re-downloaded } } const startTime = Date.now(); let failedFiles = [...(checkpoint.failedFiles || [])]; // Start progress bar with current progress progressBar.start(totalFiles, checkpoint.downloadedFiles.length, { downloadedSize, startTime, }); // Process remaining files let processedCount = 0; for (const item of remainingFiles) { try { let relativePath = item.path; if (folderPath && folderPath.trim() !== "") { relativePath = item.path .substring(folderPath.length) .replace(/^\//, ""); } const outputFilePath = path.join(outputDir, relativePath); const result = await downloadFile( owner, repo, branch, item.path, outputFilePath ); if (result.success) { // Calculate file hash for integrity checking const fileContent = fs.readFileSync(outputFilePath); const fileHash = resumeManager.calculateHash(fileContent); // Update checkpoint checkpoint.downloadedFiles.push(relativePath); checkpoint.fileHashes[relativePath] = fileHash; downloadedSize += result.size || 0; } else { // Track failed files failedFiles.push({ path: relativePath, error: result.error, }); checkpoint.failedFiles = failedFiles; } processedCount++; // Save checkpoint every 10 files if (processedCount % 10 === 0) { resumeManager.saveCheckpoint(checkpoint); } // Update progress bar progressBar.increment(1, { downloadedSize }); } catch (error) { // Handle interruption gracefully if (error.name === "SIGINT") { resumeManager.saveCheckpoint(checkpoint); progressBar.stop(); console.log( chalk.blue(`\n⏸️ Download interrupted. Progress saved.`) ); console.log(chalk.blue(`💡 Run the same command again to resume.`)); return; } failedFiles.push({ path: item.path, error: error.message, }); checkpoint.failedFiles = failedFiles; progressBar.increment(1, { downloadedSize }); } } progressBar.stop(); console.log(); // Add an empty line after progress bar // Final checkpoint save resumeManager.saveCheckpoint(checkpoint); // Count results const succeeded = checkpoint.downloadedFiles.length; const failed = failedFiles.length; if (failed > 0) { console.log( chalk.yellow( `Downloaded ${succeeded} files successfully, ${failed} files failed` ) ); if (failed <= 5) { console.log(chalk.yellow("Failed files:")); failedFiles.forEach((file) => { console.log(chalk.yellow(` - ${file.path}: ${file.error}`)); }); } console.log( chalk.blue(`💡 Run the same command again to retry failed downloads`) ); // Don't claim success if files failed to download if (succeeded === 0) { console.log( chalk.red(`❌ Download failed: No files were downloaded successfully`) ); return { success: false, filesDownloaded: succeeded, failedFiles: failed, isEmpty: false, }; } else { console.log(chalk.yellow(`⚠️ Download completed with errors`)); return { success: false, filesDownloaded: succeeded, failedFiles: failed, isEmpty: false, }; } } else { console.log( chalk.green(`🎉 All ${succeeded} files downloaded successfully!`) ); resumeManager.cleanupCheckpoint(url, outputDir); console.log(chalk.green(`Folder cloned successfully!`)); return { success: true, filesDownloaded: succeeded, failedFiles: failed, isEmpty: false, }; } } catch (error) { // Save checkpoint on any error if (checkpoint) { resumeManager.saveCheckpoint(checkpoint); } console.error(chalk.red(`❌ Error downloading folder: ${error.message}`)); throw error; } };