UNPKG

gitarsenal-cli

Version:

CLI tool for creating Modal sandboxes with GitHub repositories

263 lines (229 loc) 8.85 kB
#!/usr/bin/env node const { spawn } = require('child_process'); const chalk = require('chalk'); // Function to check if GitIngest CLI is available and working async function checkGitIngestCLI() { try { // Try a simple help command first const checkProcess = spawn('gitingest', ['--help'], { stdio: 'pipe', timeout: 5000 // 5 second timeout }); let stderr = ''; checkProcess.stderr.on('data', (data) => { stderr += data.toString(); }); return new Promise((resolve) => { checkProcess.on('close', (code) => { // If there are Python errors in stderr, consider it failed even if exit code is 0 if (stderr.includes('TypeError') || stderr.includes('Traceback') || stderr.includes('Error')) { // console.log(chalk.yellow('⚠️ GitIngest CLI has Python compatibility issues')); resolve(false); } else if (code === 0) { resolve(true); } else { resolve(false); } }); checkProcess.on('error', () => { resolve(false); }); // Handle timeout setTimeout(() => { if (!checkProcess.killed) { checkProcess.kill(); resolve(false); } }, 5000); }); } catch (error) { return false; } } // Function to fetch GitIngest data using local GitIngest CLI async function fetchGitIngestData(repoUrl) { try { // First check if GitIngest CLI is available const gitingestAvailable = await checkGitIngestCLI(); if (!gitingestAvailable) { return null; } // console.log(chalk.gray('📥 Running Repo Analysis locally...')); // Run GitIngest CLI command with optimal settings for AI analysis const gitingestProcess = spawn('gitingest', [ repoUrl, '-o', '-', // Output to stdout ], { stdio: ['pipe', 'pipe', 'pipe'] }); let gitingestOutput = ''; let errorOutput = ''; gitingestProcess.stdout.on('data', (data) => { gitingestOutput += data.toString(); }); gitingestProcess.stderr.on('data', (data) => { errorOutput += data.toString(); }); return new Promise((resolve) => { gitingestProcess.on('close', (code) => { if (code === 0 && gitingestOutput.trim().length > 0) { console.log(chalk.green('✅ Repo analysis complete')); console.log(chalk.gray(`📊 Captured ${gitingestOutput.length} characters of repository content`)); resolve(parseGitIngestOutput(gitingestOutput, repoUrl)); } else { console.log(chalk.yellow(`⚠️ Repo Analysis failed (exit code: ${code})`)); if (errorOutput) { console.log(chalk.gray(`Error details: ${errorOutput.slice(0, 300)}`)); } resolve(null); } }); gitingestProcess.on('error', (error) => { console.log(chalk.yellow(`⚠️ GitIngest CLI error: ${error.message}`)); resolve(null); }); }); } catch (error) { console.log(chalk.yellow(`⚠️ GitIngest execution failed: ${error.message}`)); return null; } } // Function to parse GitIngest text output into structured data function parseGitIngestOutput(gitingestText, repoUrl) { try { // Extract repository info from URL const urlMatch = repoUrl.match(/github\.com\/([^\/]+)\/([^\/]+)/); const owner = urlMatch ? urlMatch[1] : 'unknown'; const repo = urlMatch ? urlMatch[2].replace('.git', '') : 'unknown'; // GitIngest output format: // Repository: owner/repo-name // Files analyzed: 42 // Estimated tokens: 15.2k // // Directory structure: // └── project-name/ // ├── src/ // │ ├── main.py // └── README.md // // ================================================ // FILE: src/main.py // ================================================ // [file content] const lines = gitingestText.split('\n'); let summary = ''; let tree = ''; let content_preview = ''; let detectedLanguage = 'Unknown'; let detectedTechnologies = []; let primaryPackageManager = 'Unknown'; // Find sections let summaryEnd = -1; let treeStart = -1; let treeEnd = -1; let contentStart = -1; for (let i = 0; i < lines.length; i++) { const line = lines[i]; if (line.startsWith('Repository:') && summaryEnd === -1) { // Find end of summary (first empty line after Repository line) for (let j = i; j < lines.length; j++) { if (lines[j].trim() === '' && j > i) { summaryEnd = j; break; } } } if (line.startsWith('Directory structure:')) { treeStart = i; } if (line.includes('===') && line.includes('FILE:')) { if (treeStart > -1 && treeEnd === -1) { treeEnd = i; } if (contentStart === -1) { contentStart = i; } } } // Extract sections if (summaryEnd > 0) { summary = lines.slice(0, summaryEnd).join('\n'); } else { // Fallback: take first 10 lines as summary summary = lines.slice(0, 10).join('\n'); } if (treeStart > -1) { const endIdx = treeEnd > -1 ? treeEnd : (contentStart > -1 ? contentStart : Math.min(treeStart + 50, lines.length)); tree = lines.slice(treeStart, endIdx).join('\n'); } if (contentStart > -1) { // Take first 300 lines of content to provide good context without overwhelming content_preview = lines.slice(contentStart, Math.min(contentStart + 300, lines.length)).join('\n'); } // Detect technologies from content const contentLower = gitingestText.toLowerCase(); // Language detection if (contentLower.includes('import torch') || contentLower.includes('pytorch') || contentLower.includes('def ') || contentLower.includes('import ')) { detectedLanguage = 'Python'; primaryPackageManager = 'pip'; } else if (contentLower.includes('package.json') || contentLower.includes('require(') || contentLower.includes('import ') || contentLower.includes('function ')) { detectedLanguage = 'JavaScript'; primaryPackageManager = 'npm'; } else if (contentLower.includes('cargo.toml') || contentLower.includes('fn ') || contentLower.includes('use ')) { detectedLanguage = 'Rust'; primaryPackageManager = 'cargo'; } else if (contentLower.includes('go.mod') || contentLower.includes('func ') || contentLower.includes('package ')) { detectedLanguage = 'Go'; primaryPackageManager = 'go mod'; } // AI/ML Technology detection if (contentLower.includes('torch') || contentLower.includes('pytorch')) { detectedTechnologies.push('PyTorch'); } if (contentLower.includes('tensorflow') || contentLower.includes('tf.')) { detectedTechnologies.push('TensorFlow'); } if (contentLower.includes('transformers') || contentLower.includes('huggingface')) { detectedTechnologies.push('Hugging Face'); } if (contentLower.includes('numpy') || contentLower.includes('np.')) { detectedTechnologies.push('NumPy'); } if (contentLower.includes('openai') && (contentLower.includes('import openai') || contentLower.includes('openai.')) && !contentLower.includes('# example') && !contentLower.includes('# TODO')) { detectedTechnologies.push('OpenAI API'); } if (contentLower.includes('anthropic') && contentLower.includes('import anthropic')) { detectedTechnologies.push('Anthropic API'); } // Count files from summary const filesMatch = summary.match(/Files analyzed: (\d+)/); const fileCount = filesMatch ? parseInt(filesMatch[1]) : 0; const structuredData = { system_info: { platform: process.platform, python_version: process.version, detected_language: detectedLanguage, detected_technologies: detectedTechnologies, file_count: fileCount, repo_stars: 0, // Would need GitHub API repo_forks: 0, // Would need GitHub API primary_package_manager: primaryPackageManager, complexity_level: fileCount > 50 ? 'high' : fileCount > 20 ? 'medium' : 'low' }, repository_analysis: { summary: summary || `Repository: ${owner}/${repo}\nAnalyzed with GitIngest`, tree: tree || 'Directory structure not available', content_preview: content_preview || 'Content preview not available' }, success: true }; return structuredData; } catch (error) { console.log(chalk.yellow(`⚠️ Failed to parse GitIngest output: ${error.message}`)); return null; } } module.exports = { fetchGitIngestData, checkGitIngestCLI };