UNPKG

@iflow-mcp/ejmockler-brutalist

Version:

Deploy Claude, Codex & Gemini CLI agents to demolish your work before users do. Real file analysis. Brutal honesty. Now with conversation continuation & intelligent pagination.

1,104 lines 53.8 kB
import { spawn, exec } from 'child_process'; import { promises as fs, realpathSync } from 'fs'; import { promisify } from 'util'; import { logger } from './logger.js'; import { ModelResolver } from './model-resolver.js'; import { resolveServers, writeClaudeMCPConfig, cleanupTempConfig, buildCodexMCPOverride, ensureGeminiMCPServers, ensurePlaywrightBrowsers, } from './mcp-registry.js'; // Configurable timeouts and limits const DEFAULT_TIMEOUT = parseInt(process.env.BRUTALIST_TIMEOUT || '1800000', 10); // 30 minutes default const CLI_CHECK_TIMEOUT = parseInt(process.env.BRUTALIST_CLI_CHECK_TIMEOUT || '5000', 10); // 5 seconds for CLI checks const MAX_BUFFER_SIZE = parseInt(process.env.BRUTALIST_MAX_BUFFER || String(10 * 1024 * 1024), 10); // 10MB default const MAX_CONCURRENT_CLIS = parseInt(process.env.BRUTALIST_MAX_CONCURRENT || '3', 10); // 3 concurrent CLIs // Resource limits for security const MAX_MEMORY_MB = parseInt(process.env.BRUTALIST_MAX_MEMORY || '2048', 10); // 2GB memory limit per process const MAX_CPU_TIME_SEC = parseInt(process.env.BRUTALIST_MAX_CPU_TIME || '3000', 10); // 50 minutes CPU time (should exceed default timeout) const MEMORY_CHECK_INTERVAL = 5000; // Check memory usage every 5 seconds // Process tracking for resource management const activeProcesses = new Map(); // Claude CLI accepts aliases natively — no need to maintain full model IDs. export const CLAUDE_ALIASES = ['opus', 'sonnet', 'haiku']; // Security utilities for CLI execution const MAX_PATH_DEPTH = 10; // Maximum directory depth for paths // Validate and sanitize CLI arguments // Note: We use spawn() with shell:false and array args, so we don't need to block // punctuation characters. Only block truly dangerous patterns (null bytes). // We use stdin for large content, so no arg length limit needed (OS limit is ~1MB anyway). function validateArguments(args) { for (const arg of args) { // Check for null bytes (can terminate strings prematurely) if (arg.includes('\0')) { throw new Error('Argument contains null byte'); } } } // Validate and canonicalize paths to prevent traversal attacks function validatePath(path, name) { if (!path) { throw new Error(`${name} cannot be empty`); } // Check for null bytes if (path.includes('\0')) { throw new Error(`${name} contains null byte`); } // Check for dangerous path traversal patterns if (path.includes('../') || path.includes('..\\') || path.includes('/..') || path.includes('\\..')) { throw new Error(`${name} contains path traversal attempt: ${path}`); } // Check path depth to prevent deeply nested attacks const depth = path.split('/').length - 1; if (depth > MAX_PATH_DEPTH) { throw new Error(`${name} exceeds maximum depth: ${depth} > ${MAX_PATH_DEPTH}`); } // Canonicalize the path (this also validates it exists and resolves symlinks) try { return realpathSync(path); } catch (error) { throw new Error(`Invalid ${name}: ${error instanceof Error ? error.message : String(error)}`); } } // Async version of validatePath for use in async contexts async function asyncValidatePath(path, name) { if (!path) { throw new Error(`${name} cannot be empty`); } // Check for null bytes if (path.includes('\0')) { throw new Error(`${name} contains null byte`); } // Check for dangerous path traversal patterns if (path.includes('../') || path.includes('..\\') || path.includes('/..') || path.includes('\\..')) { throw new Error(`${name} contains path traversal attempt: ${path}`); } // Check path depth to prevent deeply nested attacks const depth = path.split('/').length - 1; if (depth > MAX_PATH_DEPTH) { throw new Error(`${name} exceeds maximum depth: ${depth} > ${MAX_PATH_DEPTH}`); } // Canonicalize the path (this also validates it exists and resolves symlinks) try { return await fs.realpath(path); } catch (error) { throw new Error(`Invalid ${name}: ${error instanceof Error ? error.message : String(error)}`); } } // Create secure environment for CLI processes function createSecureEnvironment() { // Minimal environment whitelist const SAFE_ENV_VARS = [ 'PATH', 'HOME', 'USER', 'SHELL', 'TERM', 'LANG', 'LC_ALL', 'TZ', 'NODE_ENV' ]; const secureEnv = {}; // Copy only safe environment variables for (const varName of SAFE_ENV_VARS) { if (process.env[varName]) { secureEnv[varName] = process.env[varName]; } } // Add security-focused environment variables secureEnv.TERM = 'dumb'; // Disable terminal features secureEnv.NO_COLOR = '1'; // Disable color output secureEnv.CI = 'true'; // Indicate non-interactive environment return secureEnv; } // Cross-platform memory usage monitoring async function getUnixMemoryUsage(pid) { try { const execAsync = promisify(exec); // Use ps command to get memory usage in KB const { stdout } = await execAsync(`ps -o rss= -p ${pid}`); const memoryKB = parseInt(stdout.trim(), 10); if (isNaN(memoryKB)) return null; return { memoryMB: Math.round(memoryKB / 1024) }; } catch { return null; } } async function getWindowsMemoryUsage(pid) { try { const execAsync = promisify(exec); // Use wmic command to get memory usage const { stdout } = await execAsync(`wmic process where "ProcessId=${pid}" get WorkingSetSize /value`); const match = stdout.match(/WorkingSetSize=(\d+)/); if (!match) return null; const memoryBytes = parseInt(match[1], 10); return { memoryMB: Math.round(memoryBytes / (1024 * 1024)) }; } catch { return null; } } // Safe command execution helper using spawn instead of exec to prevent command injection async function spawnAsync(command, args, options = {}) { return new Promise((resolve, reject) => { // Validate command name (basic validation) if (!command || command.length === 0) { reject(new Error('Command cannot be empty')); return; } // Validate arguments for injection attacks try { validateArguments(args); } catch (error) { reject(error); return; } // Validate and canonicalize working directory let cwd; try { if (options.cwd) { cwd = validatePath(options.cwd, 'working directory'); } else { cwd = process.cwd(); } } catch (error) { reject(error); return; } // Use secure environment const secureEnv = options.env || createSecureEnvironment(); const child = spawn(command, args, { cwd: cwd, stdio: ['pipe', 'pipe', 'pipe'], shell: false, // CRITICAL: disable shell to prevent injection detached: false, // Run all CLIs non-detached for consistent behavior env: secureEnv, // Additional security options uid: process.getuid ? process.getuid() : undefined, // Maintain current user ID gid: process.getgid ? process.getgid() : undefined // Maintain current group ID }); let stdout = ''; let stderr = ''; let timedOut = false; let killed = false; // Track process for resource monitoring if (child.pid) { activeProcesses.set(child.pid, { startTime: Date.now(), memoryChecks: 0 }); } // Memory monitoring timer let memoryTimer; if (child.pid) { memoryTimer = setInterval(async () => { try { const pid = child.pid; const processInfo = activeProcesses.get(pid); if (!processInfo || killed) { if (memoryTimer) clearInterval(memoryTimer); return; } processInfo.memoryChecks++; // Check memory usage (cross-platform) const usage = process.platform === 'win32' ? await getWindowsMemoryUsage(pid) : await getUnixMemoryUsage(pid); if (usage && usage.memoryMB > MAX_MEMORY_MB) { child.kill('SIGTERM'); reject(new Error(`Process exceeded memory limit: ${usage.memoryMB}MB > ${MAX_MEMORY_MB}MB`)); return; } // Check CPU time limit const runtimeMs = Date.now() - processInfo.startTime; if (runtimeMs > MAX_CPU_TIME_SEC * 1000) { child.kill('SIGTERM'); reject(new Error(`Process exceeded CPU time limit: ${runtimeMs}ms > ${MAX_CPU_TIME_SEC * 1000}ms`)); return; } } catch (error) { // Memory check failed, but don't kill process for this logger.warn('Memory check failed:', error); } }, MEMORY_CHECK_INTERVAL); } // Set up timeout with SIGKILL escalation const timeoutMs = options.timeout || DEFAULT_TIMEOUT; let killTimer; const timer = setTimeout(() => { timedOut = true; // First try SIGTERM child.kill('SIGTERM'); // If still running after 5 seconds, escalate to SIGKILL killTimer = setTimeout(() => { if (!killed) { try { // All CLIs run non-detached now, so just kill the process directly child.kill('SIGKILL'); } catch (e) { // Process may have already exited } } }, 5000); reject(new Error(`Command timed out after ${timeoutMs}ms: ${command} ${args.join(' ')}`)); }, timeoutMs); // Collect output // NOTE: maxBuffer (default 10MB) can lead to high memory usage if CLI agents produce large outputs. // Consider making this configurable or dynamically adjusting based on expected output size. child.stdout?.on('data', (data) => { const chunk = data.toString(); stdout += chunk; // Call progress callback if provided if (options.onProgress) { options.onProgress(chunk, 'stdout'); } if (options.maxBuffer && stdout.length > options.maxBuffer) { child.kill('SIGTERM'); reject(new Error(`stdout exceeded maxBuffer size: ${options.maxBuffer}`)); } }); child.stderr?.on('data', (data) => { const chunk = data.toString(); stderr += chunk; // Call progress callback if provided if (options.onProgress) { options.onProgress(chunk, 'stderr'); } // Apply same buffer limit to stderr to prevent DoS if (options.maxBuffer && stderr.length > options.maxBuffer) { child.kill('SIGTERM'); reject(new Error(`stderr exceeded maxBuffer size: ${options.maxBuffer}`)); } }); // Handle completion child.on('close', (code) => { killed = true; clearTimeout(timer); if (killTimer) clearTimeout(killTimer); if (memoryTimer) clearInterval(memoryTimer); // Clean up process tracking if (child.pid) { activeProcesses.delete(child.pid); } if (!timedOut) { if (code === 0) { resolve({ stdout, stderr }); } else { const error = new Error(`Command failed with exit code ${code}: ${command} ${args.join(' ')}`); error.code = code || undefined; error.stdout = stdout; error.stderr = stderr; reject(error); } } }); child.on('error', (error) => { clearTimeout(timer); if (killTimer) clearTimeout(killTimer); if (memoryTimer) clearInterval(memoryTimer); // Clean up process tracking if (child.pid) { activeProcesses.delete(child.pid); } reject(error); }); // Send input if provided, then close stdin if (options.input) { child.stdin?.write(options.input); child.stdin?.end(); } else { // CRITICAL: For Claude CLI specifically, close stdin immediately even without input // Claude --print waits for stdin EOF before processing the prompt argument if (command === 'claude') { child.stdin?.end(); } // Other CLIs (Codex, Gemini) work fine with stdin left open } }); } const CLI_BUILDER_CONFIGS = { claude: { command: 'claude', defaultArgs: ['--print'], modelArgName: '--model', mpcEnvCleanup: ['CLAUDE_MCP_CONFIG', 'MCP_ENABLED', 'CLAUDECODE', 'CLAUDE_CODE_ENTRYPOINT'], streamingArgs: () => ['--output-format', 'stream-json', '--verbose'], mcpSupport: { configMethod: 'flag-file', configFlag: '--mcp-config', strictFlag: '--strict-mcp-config', writeProtection: { method: 'disallowed-tools', flag: '--disallowedTools', value: 'Edit,Write,NotebookEdit', }, }, }, codex: { command: 'codex', defaultArgs: ['exec', '--sandbox', 'read-only'], modelArgName: '--model', jsonFlag: '--json', mpcEnvCleanup: ['CODEX_MCP_CONFIG', 'MCP_ENABLED'], promptWrapper: (sys, user) => `${sys}\n\n${user}\n\nUse your shell tools to read files (cat, ls, find, grep, head, etc.) and analyze the codebase. You ARE allowed to run read-only commands. Explore the directory structure, read relevant source files, and provide a comprehensive brutal analysis based on what you find.`, mcpSupport: { configMethod: 'config-override', configOverrideKey: 'mcp_servers', writeProtection: { method: 'sandbox', flag: '--sandbox', value: 'read-only', // already in defaultArgs }, }, }, gemini: { command: 'gemini', defaultArgs: [], modelArgName: '--model', envExtras: { TERM: 'dumb', NO_COLOR: '1', CI: 'true' }, mpcEnvCleanup: ['GEMINI_MCP_CONFIG', 'MCP_ENABLED'], mcpSupport: { configMethod: 'server-whitelist', whitelistFlag: '--allowed-mcp-server-names', writeProtection: { method: 'approval-mode', flag: '--approval-mode', value: 'plan', }, }, }, }; export class CLIAgentOrchestrator { defaultTimeout = 1800000; // 30 minutes - complex codebases need time defaultWorkingDir = process.cwd(); cliContext = { availableCLIs: [] }; cliContextCached = false; cliContextCacheTime = 0; CLI_CACHE_TTL = 300000; // 5 minutes cache runningCLIs = 0; // Track concurrent CLI executions MAX_CONCURRENT_CLIS = MAX_CONCURRENT_CLIS; // Configurable concurrency limit // Runtime model discovery modelResolver; // Streaming throttle properties streamingBuffers = new Map(); STREAMING_FLUSH_INTERVAL = 200; // 200ms MAX_CHUNK_SIZE = 2048; // 2KB per event HEARTBEAT_INTERVAL = 5000; // 5s between progress heartbeats lastHeartbeat = 0; constructor(modelResolver) { this.modelResolver = modelResolver || new ModelResolver(); // Log configuration at startup logger.info(`🔧 Brutalist MCP Configuration:`); logger.info(` - Default timeout: ${DEFAULT_TIMEOUT}ms`); logger.info(` - CLI check timeout: ${CLI_CHECK_TIMEOUT}ms`); logger.info(` - Max buffer size: ${MAX_BUFFER_SIZE} bytes`); logger.info(` - Max concurrent CLIs: ${MAX_CONCURRENT_CLIS}`); // Detect CLI context and discover models at startup Promise.all([ this.detectCLIContext(), this.modelResolver.initialize(), ]).catch(error => { logger.error("Failed startup detection:", error); }); } // Parse NDJSON with proper JSON boundary detection // Handles JSON objects that contain embedded newlines without data loss parseNDJSON(input) { if (!input || !input.trim()) { return []; } const results = []; let depth = 0; let inString = false; let escape = false; let start = 0; for (let i = 0; i < input.length; i++) { const char = input[i]; // Handle escape sequences if (escape) { escape = false; continue; } if (char === '\\') { escape = true; continue; } // Track string boundaries if (char === '"') { inString = !inString; continue; } // Only count braces/brackets outside of strings if (inString) continue; // Track depth if (char === '{' || char === '[') { depth++; } else if (char === '}' || char === ']') { depth--; // When depth returns to 0, we've found a complete JSON object if (depth === 0) { const jsonStr = input.slice(start, i + 1).trim(); if (jsonStr) { try { const parsed = JSON.parse(jsonStr); results.push(parsed); } catch (e) { // Log unparseable segments (not silent) logger.warn(`Failed to parse JSON segment at position ${start}-${i + 1}:`, { preview: jsonStr.substring(0, 100), error: e instanceof Error ? e.message : String(e) }); } } // Move start pointer past this object and any whitespace start = i + 1; while (start < input.length && /\s/.test(input[start])) { start++; } i = start - 1; // Will be incremented by loop } } } // Warn about incomplete JSON at end of input if (start < input.length) { const remaining = input.slice(start).trim(); if (remaining) { logger.warn(`Incomplete JSON at end of input:`, { preview: remaining.substring(0, 100) }); } } return results; } // Decode Claude's stream-json NDJSON output into plain text. // Extracts text content blocks from all 'assistant' events across all turns. // Skips system events, user events (tool results with raw file contents), and // tool_use content blocks within assistant events. // Falls back to 'result' event if no assistant text was captured. decodeClaudeStreamJson(ndjsonOutput) { if (!ndjsonOutput || !ndjsonOutput.trim()) { logger.warn('decodeClaudeStreamJson: empty input'); return ''; } const events = this.parseNDJSON(ndjsonOutput); if (events.length === 0) { logger.warn('decodeClaudeStreamJson: no valid JSON events found in output'); return ''; } const textParts = []; let resultText = ''; let resultError = ''; for (const event of events) { if (typeof event !== 'object' || event === null) continue; const typedEvent = event; if (typedEvent.type === 'assistant' && typedEvent.message?.content) { // Extract only text blocks from assistant messages (skip tool_use blocks) const content = typedEvent.message.content; if (Array.isArray(content)) { for (const item of content) { if (item.type === 'text' && item.text) { textParts.push(item.text); } } } } else if (typedEvent.type === 'result') { if (typedEvent.subtype === 'error' || typedEvent.is_error) { resultError = typedEvent.error || typedEvent.result || 'Unknown error'; } else if (typedEvent.result) { resultText = typedEvent.result; } } // Skip: system, user (tool_result with raw file contents), hooks } // Handle error if (resultError) { logger.error('decodeClaudeStreamJson: Claude returned error result', { error: resultError }); return `[Claude Error] ${resultError}`; } // Use accumulated assistant text if available, fall back to result event if (textParts.length > 0) { return textParts.join('\n\n'); } if (resultText) { return resultText; } logger.warn('decodeClaudeStreamJson: no text content found in stream-json output', { eventCount: events.length, eventTypes: events.map(e => e.type).filter(Boolean) }); return ''; } // Extract only the agent messages from Codex JSON output (no thinking, no file reads, no commands) extractCodexAgentMessage(jsonOutput) { if (!jsonOutput || !jsonOutput.trim()) { logger.debug('extractCodexAgentMessage: empty input'); return ''; } const agentMessages = []; const events = this.parseNDJSON(jsonOutput); logger.debug(`extractCodexAgentMessage: processing ${events.length} JSON events`); for (const event of events) { if (typeof event !== 'object' || event === null) continue; const typedEvent = event; logger.debug(`extractCodexAgentMessage: parsed event type=${typedEvent.type}, item.type=${typedEvent.item?.type}`); // Codex --json outputs events with structure: {"type":"item.completed","item":{...}} // Only extract agent_message type - this is the actual response if (typedEvent.type === 'item.completed' && typedEvent.item) { if (typedEvent.item.type === 'agent_message' && typedEvent.item.text) { // Agent's actual response text logger.info(`✅ extractCodexAgentMessage: found agent_message with ${typedEvent.item.text.length} chars`); agentMessages.push(typedEvent.item.text); } // Skip all other types: // - reasoning: internal thinking steps // - command_execution: file reads, bash commands // - error: will be in stderr } } const result = agentMessages.join('\n\n').trim(); logger.info(`extractCodexAgentMessage: extracted ${agentMessages.length} messages, total ${result.length} chars`); return result; } emitThrottledStreamingEvent(agent, type, content, onStreamingEvent, options) { if (!onStreamingEvent) return; // Claude uses stream-json: intermediate stdout chunks are raw NDJSON events // (including huge tool_result payloads). Skip emitting them as streaming events; // the decoder extracts only assistant text post-completion. if (agent === 'claude') { return; } // Use requestId to prevent buffer sharing between overlapping requests const requestId = options?.requestId || 'default'; const key = `${agent}-${type}-${requestId}`; const now = Date.now(); // Truncate content to prevent huge events const truncatedContent = content.length > this.MAX_CHUNK_SIZE ? content.substring(0, this.MAX_CHUNK_SIZE) + '...[truncated]' : content; // Get or create buffer for this agent+type if (!this.streamingBuffers.has(key)) { this.streamingBuffers.set(key, { chunks: [], lastFlush: now }); } const buffer = this.streamingBuffers.get(key); buffer.chunks.push(truncatedContent); // Indeterminate heartbeat: signal "still working" without faking a percentage // Throttled to avoid spamming the client — streaming events still flow at full speed if (options?.progressToken && options?.onProgress && type === 'agent_progress' && now - this.lastHeartbeat >= this.HEARTBEAT_INTERVAL) { this.lastHeartbeat = now; options.onProgress(buffer.chunks.length, undefined, `${agent.toUpperCase()}: ${truncatedContent.substring(0, 80)}`); } // Flush if enough time has passed or buffer is getting large if (now - buffer.lastFlush > this.STREAMING_FLUSH_INTERVAL || buffer.chunks.length > 10) { const combinedContent = buffer.chunks.join('\n'); onStreamingEvent({ type, agent, content: combinedContent, timestamp: now, sessionId: options?.sessionId }); // Reset buffer buffer.chunks = []; buffer.lastFlush = now; } } async buildCLICommand(cli, userPrompt, systemPrompt, options) { const config = CLI_BUILDER_CONFIGS[cli]; const mcpEnabled = options.mcpServers && options.mcpServers.length > 0; // Build args const args = [...config.defaultArgs]; const resolvedModel = this.modelResolver.resolveModel(cli, options.models?.[cli]); if (resolvedModel) { args.push(config.modelArgName, resolvedModel); } if (config.jsonFlag && process.env.CODEX_USE_JSON !== 'false') { args.push(config.jsonFlag); } if (config.streamingArgs) { args.push(...config.streamingArgs(options)); } // ── MCP configuration ──────────────────────────────────────────────── let tempMcpConfigPath; if (mcpEnabled && config.mcpSupport) { const servers = resolveServers(options.mcpServers); const serverNames = Object.keys(servers); // Auto-install Playwright browsers if playwright is requested if (servers.playwright) { await ensurePlaywrightBrowsers(); } if (serverNames.length > 0) { const mcp = config.mcpSupport; switch (mcp.configMethod) { case 'flag-file': { // Claude: write temp JSON config, pass --mcp-config <path> --strict-mcp-config const sessionId = options.sessionId || 'default'; tempMcpConfigPath = await writeClaudeMCPConfig(servers, sessionId); args.push(mcp.configFlag, tempMcpConfigPath); args.push(mcp.strictFlag); // Hard deny on write tools args.push(mcp.writeProtection.flag, mcp.writeProtection.value); // Non-interactive MCP tool use requires permission bypass args.push('--permission-mode', 'bypassPermissions'); break; } case 'config-override': { // Codex: -c 'mcp_servers={...}' — replaces all configured servers (excludes brutalist) const tomlOverride = buildCodexMCPOverride(servers); args.push('-c', `${mcp.configOverrideKey}=${tomlOverride}`); // Write protection already in defaultArgs (--sandbox read-only) break; } case 'server-whitelist': { // Gemini: --allowed-mcp-server-names <names> --approval-mode plan await ensureGeminiMCPServers(servers); args.push(mcp.whitelistFlag, ...serverNames); args.push(mcp.writeProtection.flag, mcp.writeProtection.value); break; } } logger.info(`🔌 MCP enabled for ${cli}: [${serverNames.join(', ')}]`); } } // Build prompt — skip CLI-specific wrapper in debate mode (prevents Codex // from exploring the brutalist repo and reading its own control prompts) const combinedPrompt = (config.promptWrapper && !options.debateMode) ? config.promptWrapper(systemPrompt, userPrompt) : `${systemPrompt}\n\n${userPrompt}`; // Build secure env const secureEnv = createSecureEnvironment(); // Add CLI-specific env extras if (config.envExtras) { Object.assign(secureEnv, config.envExtras); } // Add required API key const apiKeyMap = { claude: ['ANTHROPIC_API_KEY'], codex: ['OPENAI_API_KEY'], gemini: ['GOOGLE_API_KEY', 'GEMINI_API_KEY'] }; for (const key of apiKeyMap[cli]) { if (process.env[key]) secureEnv[key] = process.env[key]; } // Clean up MPC env vars that could cause deadlock — SKIP when MCP is enabled // (the per-CLI config above already isolates to only the requested servers) if (!mcpEnabled && config.mpcEnvCleanup) { for (const envVar of config.mpcEnvCleanup) { delete secureEnv[envVar]; } } secureEnv.BRUTALIST_SUBPROCESS = '1'; return { command: config.command, args, input: combinedPrompt, env: secureEnv, tempMcpConfigPath }; } async detectCLIContext() { // Return cached context if still valid if (this.cliContextCached && Date.now() - this.cliContextCacheTime < this.CLI_CACHE_TTL) { logger.debug('Using cached CLI context'); return this.cliContext; } const availableCLIs = []; // Check for available CLIs const cliChecks = [ { name: 'claude', command: 'claude --version' }, { name: 'codex', command: 'codex --version' }, { name: 'gemini', command: 'gemini --version' } ]; const results = await Promise.allSettled(cliChecks.map(async (check) => { try { await spawnAsync(check.name, ['--version'], { timeout: CLI_CHECK_TIMEOUT }); logger.debug(`CLI available: ${check.name}`); return check.name; } catch (error) { logger.debug(`CLI not available: ${check.name}`); return null; } })); const detectedCLIs = results .filter(result => result.status === 'fulfilled' && result.value !== null) .map(result => result.value); availableCLIs.push(...detectedCLIs); this.cliContext = { availableCLIs }; this.cliContextCached = true; this.cliContextCacheTime = Date.now(); return this.cliContext; } selectSingleCLI(preferredCLI, analysisType) { // 1. Honor explicit preference if available if (preferredCLI && this.cliContext.availableCLIs.includes(preferredCLI)) { logger.info(`✅ Using preferred CLI: ${preferredCLI}`); return preferredCLI; } // 2. Smart selection based on analysis type const selectionRules = { 'code': ['claude', 'codex', 'gemini'], 'architecture': ['gemini', 'claude', 'codex'], 'research': ['claude', 'gemini', 'codex'], 'security': ['codex', 'claude', 'gemini'], 'data': ['gemini', 'claude', 'codex'], 'product': ['claude', 'gemini', 'codex'], 'infrastructure': ['gemini', 'codex', 'claude'], 'idea': ['claude', 'gemini', 'codex'], 'debate': ['claude', 'gemini', 'codex'], 'default': ['claude', 'gemini', 'codex'] }; const priority = selectionRules[analysisType || 'default'] || selectionRules.default; // 3. Select by priority from available CLIs for (const cli of priority) { if (this.cliContext.availableCLIs.includes(cli)) { logger.info(`🎯 Auto-selected ${cli} for ${analysisType || 'general'} analysis`); return cli; } } // Fallback to first available if (this.cliContext.availableCLIs.length === 0) { throw new Error('No CLI agents available'); } logger.warn(`⚠️ Using fallback CLI: ${this.cliContext.availableCLIs[0]}`); return this.cliContext.availableCLIs[0]; } async _executeCLI(cliName, userPrompt, systemPromptSpec, options = {}, commandBuilder) { const startTime = Date.now(); const workingDir = options.workingDirectory || this.defaultWorkingDir; const timeout = options.timeout || this.defaultTimeout; let tempMcpConfigPath; try { logger.info(`🤖 Executing ${cliName.toUpperCase()} CLI`); logger.debug(`${cliName.toUpperCase()} prompt`, { prompt: userPrompt.substring(0, 100) }); // Emit agent start event if (options.onStreamingEvent) { options.onStreamingEvent({ type: 'agent_start', agent: cliName, content: `Starting ${cliName.toUpperCase()} analysis...`, timestamp: Date.now(), sessionId: options.sessionId }); } const built = await commandBuilder(userPrompt, systemPromptSpec, options); const { command, args, env, input } = built; tempMcpConfigPath = built.tempMcpConfigPath; logger.info(`📋 Command: ${command} ${args.join(' ')}`); logger.info(`📁 Working directory: ${workingDir}`); logger.info(`⏱️ Timeout: ${timeout}ms`); if (input) { logger.info(`📝 Using stdin for prompt (${input.length} characters)`); } const { stdout, stderr } = await spawnAsync(command, args, { cwd: workingDir, timeout: timeout, maxBuffer: MAX_BUFFER_SIZE, // Configurable buffer for model outputs env: env, input: input, onProgress: (chunk, type) => { // Stream output in real-time with agent identification if (type === 'stdout' && chunk.trim()) { logger.info(`🤖 ${cliName.toUpperCase()}: ${chunk.trim()}`); // Emit throttled streaming event for real-time updates this.emitThrottledStreamingEvent(cliName, 'agent_progress', chunk.trim(), options.onStreamingEvent, options); } else if (type === 'stderr' && chunk.trim()) { logger.warn(`⚠️ ${cliName.toUpperCase()} stderr: ${chunk.trim()}`); // Emit throttled error streaming event this.emitThrottledStreamingEvent(cliName, 'agent_error', chunk.trim(), options.onStreamingEvent, options); } } }); logger.info(`✅ ${cliName.toUpperCase()} completed (${Date.now() - startTime}ms)`); // Emit completion event if (options.onStreamingEvent) { options.onStreamingEvent({ type: 'agent_complete', agent: cliName, content: `${cliName.toUpperCase()} analysis completed (${Date.now() - startTime}ms)`, timestamp: Date.now(), sessionId: options.sessionId }); } // Post-process CLI output if needed let finalOutput = stdout; // If Claude was run with stream-json format, decode the NDJSON to extract text if (cliName === 'claude' && args.includes('--output-format') && args.includes('stream-json')) { const decodedText = this.decodeClaudeStreamJson(stdout); if (decodedText) { finalOutput = decodedText; } } // If Codex was run with --json flag, extract only the agent messages if (cliName === 'codex' && args.includes('--json')) { const decodedText = this.extractCodexAgentMessage(stdout); if (decodedText) { finalOutput = decodedText; } } // Fallback: If stdout is empty but stderr has content and exit was successful, // Claude might have written to stderr (common in non-TTY environments) if (!finalOutput.trim() && stderr && stderr.trim()) { logger.info(`📝 Using stderr as output for ${cliName} (stdout was empty)`); finalOutput = stderr; } // Detect CLI errors that exit 0 but contain fatal error output // (e.g., Gemini CLI returns exit code 0 on quota exhaustion) const combinedOutput = `${finalOutput}\n${stderr}`; const quotaPatterns = [ /TerminalQuotaError/i, /exhausted your capacity/i, /quota will reset/i, /rateLimitExceeded/i, ]; const quotaMatch = quotaPatterns.find(p => p.test(combinedOutput)); if (quotaMatch) { // Extract reset time if present const resetMatch = combinedOutput.match(/reset(?:s)? (?:in|after) (\d+h\s*\d+m(?:\s*\d+s)?)/i); const resetInfo = resetMatch ? ` (resets in ${resetMatch[1]})` : ''; const errorMsg = `${cliName.toUpperCase()} quota exhausted${resetInfo}. The CLI exited 0 but returned a quota error instead of analysis output.`; logger.warn(`⏱️ ${errorMsg}`); if (options.onStreamingEvent) { options.onStreamingEvent({ type: 'agent_error', agent: cliName, content: errorMsg, timestamp: Date.now(), sessionId: options.sessionId }); } return { agent: cliName, success: false, output: '', error: errorMsg, executionTime: Date.now() - startTime, command: `${command} ${args.join(' ')}`, workingDirectory: workingDir, exitCode: 0 }; } return { agent: cliName, success: true, output: finalOutput, error: stderr || undefined, executionTime: Date.now() - startTime, command: `${command} ${args.join(' ')}`, workingDirectory: workingDir, exitCode: 0 }; } catch (error) { const execError = error; const exitCode = execError.code || -1; // Detect rate limiting errors for Gemini const isRateLimit = cliName === 'gemini' && (execError.stderr?.includes('429') || execError.message?.includes('rateLimitExceeded') || execError.stderr?.includes('rate limit')); if (isRateLimit) { logger.warn(`⏱️ ${cliName.toUpperCase()} CLI hit rate limit (${Date.now() - startTime}ms)`); } else { logger.error(`❌ ${cliName.toUpperCase()} execution failed (${Date.now() - startTime}ms)`, { error: "Redacted: See internal logs for full error details.", exitCode, stderr: "Redacted: See internal logs for full stderr output." }); } // Emit error event if (options.onStreamingEvent) { options.onStreamingEvent({ type: 'agent_error', agent: cliName, content: `${cliName.toUpperCase()} failed: ${error instanceof Error ? error.message : String(error)}`, timestamp: Date.now(), sessionId: options.sessionId }); } return { agent: cliName, success: false, output: '', error: error instanceof Error ? error.message : String(error), executionTime: Date.now() - startTime, command: `(redacted command for ${cliName})`, workingDirectory: workingDir, exitCode }; } finally { // Clean up temp MCP config file (Claude flag-file method) if (tempMcpConfigPath) { await cleanupTempConfig(tempMcpConfigPath); } } } async executeClaudeCode(userPrompt, systemPromptSpec, options = {}) { return this._executeCLI('claude', userPrompt, systemPromptSpec, options, (user, sys, opts) => this.buildCLICommand('claude', user, sys, opts)); } async executeCodex(userPrompt, systemPromptSpec, options = {}) { return this._executeCLI('codex', userPrompt, systemPromptSpec, options, (user, sys, opts) => this.buildCLICommand('codex', user, sys, opts)); } async executeGemini(userPrompt, systemPromptSpec, options = {}) { return this._executeCLI('gemini', userPrompt, systemPromptSpec, options, (user, sys, opts) => this.buildCLICommand('gemini', user, sys, opts)); } async executeSingleCLI(cli, userPrompt, systemPromptSpec, options = {}) { // Wait for available slot to prevent resource exhaustion await this.waitForAvailableSlot(); this.runningCLIs++; logger.info(`🎯 Executing ${cli} (${this.runningCLIs}/${this.MAX_CONCURRENT_CLIS} slots used)`); try { switch (cli) { case 'claude': return await this.executeClaudeCode(userPrompt, systemPromptSpec, options); case 'codex': return await this.executeCodex(userPrompt, systemPromptSpec, options); case 'gemini': return await this.executeGemini(userPrompt, systemPromptSpec, options); default: throw new Error(`Unknown CLI: ${cli}`); } } finally { this.runningCLIs--; logger.info(`✅ Released CLI slot (${this.runningCLIs}/${this.MAX_CONCURRENT_CLIS} slots used)`); } } async waitForAvailableSlot() { let waitTime = 100; // Start with 100ms wait time while (this.runningCLIs >= this.MAX_CONCURRENT_CLIS) { logger.info(`⏳ Waiting for available CLI slot (${this.runningCLIs}/${this.MAX_CONCURRENT_CLIS} in use). Next check in ${waitTime}ms...`); await new Promise(resolve => setTimeout(resolve, waitTime)); waitTime = Math.min(waitTime * 2, 5000); // Exponential backoff, max 5 seconds } } async executeCLIAgents(cliAgents, systemPrompt, userPrompt, options = {}) { // Filter to valid CLI agents const validAgents = cliAgents.filter(agent => ['claude', 'codex', 'gemini'].includes(agent)); if (validAgents.length === 0) { return []; } // Execute all CLIs in parallel with Promise.allSettled const promises = validAgents.map(async (agent) => { try { return await this.executeCLIAgent(agent, systemPrompt, userPrompt, options); } catch (error) { return { agent, success: false, output: '', error: error instanceof Error ? error.message : String(error), executionTime: 0, command: `${agent} execution failed`, workingDirectory: options.workingDirectory || process.cwd(), exitCode: -1 }; } }); const results = await Promise.allSettled(promises); return results .filter((result) => result.status === 'fulfilled') .map(result => result.value); } async executeCLIAgent(agent, systemPrompt, userPrompt, options = {}) { if (!['claude', 'codex', 'gemini'].includes(agent)) { throw new Error(`Unsupported CLI agent: ${agent}`); } return await this.executeSingleCLI(agent, userPrompt, systemPrompt, options); } async executeBrutalistAnalysis(analysisType, primaryContent, systemPromptSpec, context, options = {}) { // Only validate filesystem paths for tools that actually operate on files/directories // NOTE: Must match BrutalistPromptType values (camelCase) const filesystemTools = ['codebase', 'fileStructure', 'dependencies', 'gitHistory', 'testCoverage']; logger.debug(`Validation check: analysisType="${analysisType}", isFilesystemTool=${filesystemTools.includes(analysisType)}`); try { if (filesystemTools.includes(analysisType) && primaryContent && primaryContent.trim() !== '') { logger.debug(`Validating path: "${primaryContent}"`); await asyncValidatePath(primaryContent, 'targetPath'); } } catch (error) { logger.error(`Path validation failed: ${error}`); throw new Error(`Security validation failed: ${error instanceof Error ? error.message : String(error)}`); } // Validate workingDirectory if provided try { if (options.workingDirectory) { await asyncValidatePath(options.workingDirectory, 'workingDirectory'); } } catch (error) { throw new Error(`Security validation failed: ${error instanceof Error ? error.message : String(error)}`); } const userPrompt = this.constructUserPrompt(analysisType, primaryContent, context); // Determine which CLIs to use let clisToUse; if (options.clis && options.clis.length > 0) { // User specified which CLIs to use - validate they're available const unavailable = options.clis.filter(cli => !this.cliContext.availableCLIs.includes(cli)); if (unavailable.length > 0) { throw new Error(`Requested CLIs not available: ${unavailable.join(', ')}. ` + `Available: ${this.cliContext.availableCLIs.join(', ')}`); } // Deduplicate clisToUse = [...new Set(options.clis)]; logger.info(`🎯 Using user-specified CLIs: ${clisToUse.join(', ')}`); } else { // Default: use all available CLIs clisToUse = [...this.cliContext.availableCLIs]; logger.info(`📋 Using all available CLIs: ${clisToUse.join(', ')}`); } if (clisToUse.length === 0) { throw new Error('No CLI agents available for analysis'); } const selectionMethod = options.clis ? 'user-specified' : 'all-available'; logger.info(`📊 Executing ${clisToUse.length} CLI(s): ${clisToUse.join(', ')} (${selectionMethod})`); // Execute selected CLIs in parallel with allSettled for better error handling const promises = clisToUse.map(async (cli) => { try { const response = await this.executeSingleCLI(cli, userPrompt, systemPromptSpec, options); return { ...response, selectionMethod, analysisType }; } catch (error) { logger.error(`❌ ${cli} execution failed:`, error); return { agent: cli, success: false, output: '', error: error instanceof Error ? error.message : String(error), executionTime: 0, selectionMethod, analysisType }; } }); // Use allSettled to handle partial failures gracefully const results = await Promise.allSettled(promises); const responses = results .filter(result => result.status === 'fulfilled') .map(result => result.