gitarsenal-cli
Version:
CLI tool for creating Modal sandboxes with GitHub repositories
263 lines (229 loc) • 8.85 kB
JavaScript
const { spawn } = require('child_process');
const chalk = require('chalk');
// Function to check if GitIngest CLI is available and working
async function checkGitIngestCLI() {
try {
// Try a simple help command first
const checkProcess = spawn('gitingest', ['--help'], {
stdio: 'pipe',
timeout: 5000 // 5 second timeout
});
let stderr = '';
checkProcess.stderr.on('data', (data) => {
stderr += data.toString();
});
return new Promise((resolve) => {
checkProcess.on('close', (code) => {
// If there are Python errors in stderr, consider it failed even if exit code is 0
if (stderr.includes('TypeError') || stderr.includes('Traceback') || stderr.includes('Error')) {
// console.log(chalk.yellow('⚠️ GitIngest CLI has Python compatibility issues'));
resolve(false);
} else if (code === 0) {
resolve(true);
} else {
resolve(false);
}
});
checkProcess.on('error', () => {
resolve(false);
});
// Handle timeout
setTimeout(() => {
if (!checkProcess.killed) {
checkProcess.kill();
resolve(false);
}
}, 5000);
});
} catch (error) {
return false;
}
}
// Function to fetch GitIngest data using local GitIngest CLI
async function fetchGitIngestData(repoUrl) {
try {
// First check if GitIngest CLI is available
const gitingestAvailable = await checkGitIngestCLI();
if (!gitingestAvailable) {
return null;
}
// console.log(chalk.gray('📥 Running Repo Analysis locally...'));
// Run GitIngest CLI command with optimal settings for AI analysis
const gitingestProcess = spawn('gitingest', [
repoUrl,
'-o', '-', // Output to stdout
], {
stdio: ['pipe', 'pipe', 'pipe']
});
let gitingestOutput = '';
let errorOutput = '';
gitingestProcess.stdout.on('data', (data) => {
gitingestOutput += data.toString();
});
gitingestProcess.stderr.on('data', (data) => {
errorOutput += data.toString();
});
return new Promise((resolve) => {
gitingestProcess.on('close', (code) => {
if (code === 0 && gitingestOutput.trim().length > 0) {
console.log(chalk.green('✅ Repo analysis complete'));
console.log(chalk.gray(`📊 Captured ${gitingestOutput.length} characters of repository content`));
resolve(parseGitIngestOutput(gitingestOutput, repoUrl));
} else {
console.log(chalk.yellow(`⚠️ Repo Analysis failed (exit code: ${code})`));
if (errorOutput) {
console.log(chalk.gray(`Error details: ${errorOutput.slice(0, 300)}`));
}
resolve(null);
}
});
gitingestProcess.on('error', (error) => {
console.log(chalk.yellow(`⚠️ GitIngest CLI error: ${error.message}`));
resolve(null);
});
});
} catch (error) {
console.log(chalk.yellow(`⚠️ GitIngest execution failed: ${error.message}`));
return null;
}
}
// Function to parse GitIngest text output into structured data
function parseGitIngestOutput(gitingestText, repoUrl) {
try {
// Extract repository info from URL
const urlMatch = repoUrl.match(/github\.com\/([^\/]+)\/([^\/]+)/);
const owner = urlMatch ? urlMatch[1] : 'unknown';
const repo = urlMatch ? urlMatch[2].replace('.git', '') : 'unknown';
// GitIngest output format:
// Repository: owner/repo-name
// Files analyzed: 42
// Estimated tokens: 15.2k
//
// Directory structure:
// └── project-name/
// ├── src/
// │ ├── main.py
// └── README.md
//
// ================================================
// FILE: src/main.py
// ================================================
// [file content]
const lines = gitingestText.split('\n');
let summary = '';
let tree = '';
let content_preview = '';
let detectedLanguage = 'Unknown';
let detectedTechnologies = [];
let primaryPackageManager = 'Unknown';
// Find sections
let summaryEnd = -1;
let treeStart = -1;
let treeEnd = -1;
let contentStart = -1;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (line.startsWith('Repository:') && summaryEnd === -1) {
// Find end of summary (first empty line after Repository line)
for (let j = i; j < lines.length; j++) {
if (lines[j].trim() === '' && j > i) {
summaryEnd = j;
break;
}
}
}
if (line.startsWith('Directory structure:')) {
treeStart = i;
}
if (line.includes('===') && line.includes('FILE:')) {
if (treeStart > -1 && treeEnd === -1) {
treeEnd = i;
}
if (contentStart === -1) {
contentStart = i;
}
}
}
// Extract sections
if (summaryEnd > 0) {
summary = lines.slice(0, summaryEnd).join('\n');
} else {
// Fallback: take first 10 lines as summary
summary = lines.slice(0, 10).join('\n');
}
if (treeStart > -1) {
const endIdx = treeEnd > -1 ? treeEnd : (contentStart > -1 ? contentStart : Math.min(treeStart + 50, lines.length));
tree = lines.slice(treeStart, endIdx).join('\n');
}
if (contentStart > -1) {
// Take first 300 lines of content to provide good context without overwhelming
content_preview = lines.slice(contentStart, Math.min(contentStart + 300, lines.length)).join('\n');
}
// Detect technologies from content
const contentLower = gitingestText.toLowerCase();
// Language detection
if (contentLower.includes('import torch') || contentLower.includes('pytorch') || contentLower.includes('def ') || contentLower.includes('import ')) {
detectedLanguage = 'Python';
primaryPackageManager = 'pip';
} else if (contentLower.includes('package.json') || contentLower.includes('require(') || contentLower.includes('import ') || contentLower.includes('function ')) {
detectedLanguage = 'JavaScript';
primaryPackageManager = 'npm';
} else if (contentLower.includes('cargo.toml') || contentLower.includes('fn ') || contentLower.includes('use ')) {
detectedLanguage = 'Rust';
primaryPackageManager = 'cargo';
} else if (contentLower.includes('go.mod') || contentLower.includes('func ') || contentLower.includes('package ')) {
detectedLanguage = 'Go';
primaryPackageManager = 'go mod';
}
// AI/ML Technology detection
if (contentLower.includes('torch') || contentLower.includes('pytorch')) {
detectedTechnologies.push('PyTorch');
}
if (contentLower.includes('tensorflow') || contentLower.includes('tf.')) {
detectedTechnologies.push('TensorFlow');
}
if (contentLower.includes('transformers') || contentLower.includes('huggingface')) {
detectedTechnologies.push('Hugging Face');
}
if (contentLower.includes('numpy') || contentLower.includes('np.')) {
detectedTechnologies.push('NumPy');
}
if (contentLower.includes('openai') && (contentLower.includes('import openai') || contentLower.includes('openai.')) && !contentLower.includes('# example') && !contentLower.includes('# TODO')) {
detectedTechnologies.push('OpenAI API');
}
if (contentLower.includes('anthropic') && contentLower.includes('import anthropic')) {
detectedTechnologies.push('Anthropic API');
}
// Count files from summary
const filesMatch = summary.match(/Files analyzed: (\d+)/);
const fileCount = filesMatch ? parseInt(filesMatch[1]) : 0;
const structuredData = {
system_info: {
platform: process.platform,
python_version: process.version,
detected_language: detectedLanguage,
detected_technologies: detectedTechnologies,
file_count: fileCount,
repo_stars: 0, // Would need GitHub API
repo_forks: 0, // Would need GitHub API
primary_package_manager: primaryPackageManager,
complexity_level: fileCount > 50 ? 'high' : fileCount > 20 ? 'medium' : 'low'
},
repository_analysis: {
summary: summary || `Repository: ${owner}/${repo}\nAnalyzed with GitIngest`,
tree: tree || 'Directory structure not available',
content_preview: content_preview || 'Content preview not available'
},
success: true
};
return structuredData;
} catch (error) {
console.log(chalk.yellow(`⚠️ Failed to parse GitIngest output: ${error.message}`));
return null;
}
}
module.exports = {
fetchGitIngestData,
checkGitIngestCLI
};