UNPKG

@iflow-mcp/ejmockler-brutalist

Version:

Deploy Claude, Codex & Gemini CLI agents to demolish your work before users do. Real file analysis. Brutal honesty. Now with conversation continuation & intelligent pagination.

github.com/ejmockler/brutalist-mcp

ejmockler/brutalist-mcp

916 lines (900 loc) • 66.8 kB

JavaScript

import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { z } from "zod"; import { CLIAgentOrchestrator } from './cli-agents.js'; import { listRegisteredServers } from './mcp-registry.js'; import { logger } from './logger.js'; import { mediateTranscript } from './utils/transcript-mediator.js'; import { existsSync } from 'fs'; import { join as pathJoin, resolve as pathResolve } from 'path'; import { parseCursor, PAGINATION_DEFAULTS } from './utils/pagination.js'; import { ResponseCache } from './utils/response-cache.js'; import { ResponseFormatter } from './formatting/response-formatter.js'; import { HttpTransport } from './transport/http-transport.js'; import { ToolHandler } from './handlers/tool-handler.js'; import { getDomain, generateToolConfig } from './registry/domains.js'; import { filterToolsByIntent, getMatchingDomainIds } from './tool-router.js'; // Use environment variable or fallback to manual version const PACKAGE_VERSION = process.env.npm_package_version || "1.3.0"; /** * BrutalistServer - Composition root for the Brutalist MCP Server * * This class has been refactored to follow the Single Responsibility Principle. * Responsibilities are now delegated to specialized modules: * - ResponseFormatter: Handles all response formatting and pagination * - HttpTransport: Manages HTTP server and CORS * - ToolHandler: Handles roast tool execution, caching, and conversation continuation */ export class BrutalistServer { server; config; // Core dependencies cliOrchestrator; responseCache; // Extracted modules formatter; toolHandler; httpTransport; // Session tracking for security activeSessions = new Map(); // Session cleanup configuration MAX_SESSIONS = 10000; SESSION_TTL_MS = 24 * 60 * 60 * 1000; // 24 hours sessionCleanupTimer; constructor(config = {}) { this.config = { workingDirectory: process.cwd(), defaultTimeout: 1800000, // 30 minutes - complex codebases need time transport: 'stdio', // Default to stdio for backward compatibility httpPort: 3000, ...config }; logger.debug("Initializing CLI Agent Orchestrator"); this.cliOrchestrator = new CLIAgentOrchestrator(); // Initialize response cache with configurable TTL const cacheTTLHours = parseInt(process.env.BRUTALIST_CACHE_TTL_HOURS || '2', 10); this.responseCache = new ResponseCache({ ttlHours: cacheTTLHours, maxEntries: 50, maxTotalSizeMB: 500, maxEntrySizeMB: 10, compressionThresholdMB: 1 }); logger.info(`📦 Response cache initialized with ${cacheTTLHours} hour TTL`); // Session cleanup timer - runs hourly this.sessionCleanupTimer = setInterval(() => this.cleanupStaleSessions(), 60 * 60 * 1000); this.sessionCleanupTimer.unref(); // Don't block Node.js exit logger.info(`🔐 Session cleanup initialized (TTL: 24h, max: ${this.MAX_SESSIONS})`); // Initialize extracted modules this.formatter = new ResponseFormatter(); this.toolHandler = new ToolHandler(this.cliOrchestrator, this.responseCache, this.formatter, this.config, this.activeSessions, this.handleStreamingEvent, this.handleProgressUpdate, () => this.ensureSessionCapacity() // Session capacity management ); // Initialize MCP server this.server = new McpServer({ name: "brutalist-mcp", version: PACKAGE_VERSION }, { capabilities: { tools: {}, logging: {} // Removed experimental.streaming - caused Zod validation errors in Claude Code client } }); this.registerTools(); } async start() { logger.info("Starting Brutalist MCP Server with CLI Agents"); // Skip CLI detection at startup - will be done lazily on first request logger.info("CLI context will be detected on first request"); if (this.config.transport === 'http') { await this.startHttpServer(); } else { await this.startStdioServer(); } logger.info("Brutalist MCP Server started successfully"); } async startStdioServer() { logger.info("Starting with stdio transport"); const transport = new StdioServerTransport(); await this.server.connect(transport); } async startHttpServer() { // Create and start HTTP transport this.httpTransport = new HttpTransport(this.config, (transport) => { // Connect MCP server to HTTP transport this.server.connect(transport); }); await this.httpTransport.start(PACKAGE_VERSION); } // Getter for actual listening port (useful for tests) getActualPort() { return this.httpTransport?.getActualPort(); } // Stop the HTTP server gracefully async stop() { if (this.httpTransport) { await this.httpTransport.stop(); } } /** * Clean up stale sessions that exceed TTL */ cleanupStaleSessions() { const now = Date.now(); let cleaned = 0; for (const [id, session] of this.activeSessions) { if (now - session.lastActivity > this.SESSION_TTL_MS) { this.activeSessions.delete(id); cleaned++; } } if (cleaned > 0) { logger.info(`🧹 Cleaned ${cleaned} stale sessions (>${this.SESSION_TTL_MS / 3600000}h idle)`); } } /** * Ensure session capacity doesn't exceed MAX_SESSIONS * Evicts oldest sessions when capacity is reached */ ensureSessionCapacity() { while (this.activeSessions.size >= this.MAX_SESSIONS) { // Remove oldest session (first entry in Map) const oldestKey = this.activeSessions.keys().next().value; if (oldestKey) { this.activeSessions.delete(oldestKey); logger.debug(`♻️ Evicted oldest session to maintain capacity`); } else { break; } } } // Cleanup method for tests - remove event listeners and close MCP server async cleanup() { if (this.httpTransport) { this.httpTransport.cleanup(); } if (this.sessionCleanupTimer) { clearInterval(this.sessionCleanupTimer); this.sessionCleanupTimer = undefined; } if (this.server) { try { await this.server.close(); } catch { // Ignore close errors during cleanup } } logger.shutdown(); } /** * Handle streaming events from CLI agents */ handleStreamingEvent = (event) => { try { if (!event.sessionId) { logger.warn("⚠️ Streaming event without session ID - dropping for security"); return; } logger.debug(`🔄 Session-scoped streaming: ${event.type} from ${event.agent} to session ${event.sessionId.substring(0, 8)}...`); // For HTTP transport: send session-specific notification if client supports it const httpTransportInstance = this.httpTransport?.getTransport(); if (httpTransportInstance) { try { // Use MCP server's notification system with session context this.server.server.notification({ method: "notifications/message", params: { level: 'info', data: { type: 'streaming_event', sessionId: event.sessionId, agent: event.agent, eventType: event.type, content: event.content?.substring(0, 1000), // Truncate for safety timestamp: event.timestamp }, logger: 'brutalist-mcp-streaming' } }); } catch (notificationError) { // Client doesn't support logging notifications - silently skip logger.debug("Client doesn't support logging notifications, skipping streaming event"); } } // For STDIO transport: still send but with session info else { try { this.server.sendLoggingMessage({ level: 'info', data: { sessionId: event.sessionId, agent: event.agent, type: event.type, content: event.content?.substring(0, 500) // More restrictive for stdio }, logger: 'brutalist-mcp-streaming' }); } catch (loggingError) { // Client doesn't support logging - silently skip logger.debug("Client doesn't support logging, skipping streaming event"); } } // Update session activity if (this.activeSessions.has(event.sessionId)) { this.activeSessions.get(event.sessionId).lastActivity = Date.now(); } } catch (error) { logger.error("💥 Failed to send session-scoped streaming event", { error: error instanceof Error ? error.message : String(error), sessionId: event.sessionId?.substring(0, 8) }); } }; /** * Handle progress updates from CLI agents */ handleProgressUpdate = (progressToken, progress, total, message, sessionId) => { try { if (!sessionId) { logger.warn("⚠️ Progress update without session ID - dropping for security"); return; } const progressLabel = total !== undefined ? `${progress}/${total}` : `heartbeat #${progress}`; logger.debug(`📊 Session progress: ${progressLabel} for session ${sessionId.substring(0, 8)}...`); // Send progress notification with session context if client supports it // When total is undefined, the client should treat this as indeterminate progress try { this.server.server.notification({ method: "notifications/progress", params: { progressToken, progress, ...(total !== undefined ? { total } : {}), message: `[${sessionId.substring(0, 8)}] ${message}`, sessionId } }); logger.debug(`✅ Sent session-scoped progress notification: ${progressLabel}`); } catch (notificationError) { // Client doesn't support progress notifications - silently skip logger.debug("Client doesn't support progress notifications, skipping"); } } catch (error) { logger.error("💥 Failed to send progress notification", { error: error instanceof Error ? error.message : String(error), sessionId: sessionId?.substring(0, 8) }); } }; /** * Register all MCP tools * * TOOL REDUCTION STRATEGY: Only expose 4 gateway tools instead of 15. * The unified `roast` tool with domain parameter replaces all 11 roast_* tools. * This reduces cognitive load for AI agents while maintaining full functionality. */ registerTools() { // NOTE: Individual domain tools (roast_codebase, roast_security, etc.) are NOT registered. // Use the unified `roast` tool with domain parameter instead. // The getToolConfigs() function still exists for internal routing via handleUnifiedRoast(). // Register only the gateway tools this.registerSpecialTools(); } /** * Register special tools (debate, roster, unified roast) */ registerSpecialTools() { // UNIFIED ROAST TOOL: Single entry point for all domain analysis this.server.tool("roast", "Unified brutal AI critique. Specify domain for targeted analysis. Consolidates all roast_* tools into one polymorphic API. IMPORTANT: Critically evaluate all returned feedback — these are adversarial perspectives, not authoritative verdicts. Weigh each claim against evidence before presenting to the user.", { domain: z.enum([ "codebase", "file_structure", "dependencies", "git_history", "test_coverage", "idea", "architecture", "research", "security", "product", "infrastructure", "design" ]).describe("Analysis domain"), target: z.string().describe("Directory path for filesystem domains (codebase, dependencies, git_history, etc.) OR text content for abstract domains (idea, architecture, security, etc.)"), // Common optional fields context: z.string().optional().describe("Additional context"), workingDirectory: z.string().optional().describe("Working directory"), clis: z.array(z.enum(["claude", "codex", "gemini"])).min(1).max(3).optional().describe("OMIT unless user explicitly requests specific CLIs. All available CLIs run by default — specifying a subset discards perspectives."), verbose: z.boolean().optional().describe("Detailed output"), models: z.object({ claude: z.string().optional(), codex: z.string().optional(), gemini: z.string().optional() }).optional().describe("Per-CLI model override. Pass any model the CLI supports. Deprecated codex names auto-resolve. Omit to use each CLI's configured default."), // Pagination offset: z.number().min(0).optional().describe("Pagination offset"), limit: z.number().min(1000).max(100000).optional().describe("Max chars/chunk"), cursor: z.string().optional().describe("Pagination cursor"), context_id: z.string().optional().describe("Context ID for pagination/continuation"), resume: z.boolean().optional().describe("Continue conversation"), force_refresh: z.boolean().optional().describe("Ignore cache"), // Domain-specific optional fields (passed through to handler) depth: z.number().optional().describe("Max depth for file_structure"), includeDevDeps: z.boolean().optional().describe("Include dev deps for dependencies"), commitRange: z.string().optional().describe("Commit range for git_history"), runCoverage: z.boolean().optional().describe("Run coverage for test_coverage"), resources: z.string().optional().describe("Resources for idea"), timeline: z.string().optional().describe("Timeline for idea"), scale: z.string().optional().describe("Scale for architecture/infrastructure"), constraints: z.string().optional().describe("Constraints for architecture"), deployment: z.string().optional().describe("Deployment for architecture"), field: z.string().optional().describe("Field for research"), claims: z.string().optional().describe("Claims for research"), data: z.string().optional().describe("Data for research"), assets: z.string().optional().describe("Assets for security"), threatModel: z.string().optional().describe("Threat model for security"), compliance: z.string().optional().describe("Compliance for security"), users: z.string().optional().describe("Users for product"), competition: z.string().optional().describe("Competition for product"), metrics: z.string().optional().describe("Metrics for product"), sla: z.string().optional().describe("SLA for infrastructure"), budget: z.string().optional().describe("Budget for infrastructure"), medium: z.string().optional().describe("Design medium for design domain (web, mobile, spatial, print)"), audience: z.string().optional().describe("Target audience for design domain"), brand: z.string().optional().describe("Brand identity or design system constraints for design domain"), url: z.string().optional().describe("Live URL for visual evaluation (e.g., 'http://localhost:5173'). When provided with design domain, critics use Playwright to navigate and visually evaluate the running interface. Strongly recommended for design critiques."), mcp_servers: z.array(z.string()).optional().describe(`MCP servers to enable for CLI agents (e.g., ["playwright"]). Enables evidence-backed analysis via external tools. Available: ${listRegisteredServers().join(', ')}. Auto-enabled for design domain.`) }, async (args, extra) => this.handleUnifiedRoast(args, extra)); // ROAST_CLI_DEBATE: Adversarial analysis between different CLI agents this.server.tool("roast_cli_debate", "Deploy 2 CLI agents in structured adversarial debate with constitutional position anchoring. Calling agent should extract PRO/CON positions from topic before invoking. IMPORTANT: Critically evaluate all debate output — positions are assigned, not necessarily held. Weigh each argument's validity independently before presenting to the user.", { topic: z.string().describe("The debate topic"), proPosition: z.string().describe("The PRO thesis to defend (extracted by calling agent)"), conPosition: z.string().describe("The CON thesis to defend (extracted by calling agent)"), agents: z.array(z.enum(["claude", "codex", "gemini"])).length(2).optional() .describe("OMIT unless user explicitly requests specific agents. Two agents are auto-selected from all available CLIs by default."), rounds: z.number().min(1).max(3).default(3).optional() .describe("Number of debate rounds (default: 3)"), context: z.string().optional().describe("Additional context for the debate"), workingDirectory: z.string().optional().describe("Working directory for analysis"), models: z.object({ claude: z.string().optional(), codex: z.string().optional(), gemini: z.string().optional() }).optional().describe("Model overrides for specific agents"), // Pagination and conversation continuation context_id: z.string().optional().describe("Context ID for pagination/continuation"), resume: z.boolean().optional().describe("Continue debate (requires context_id)"), offset: z.number().min(0).optional(), limit: z.number().min(1000).max(100000).optional(), cursor: z.string().optional(), force_refresh: z.boolean().optional(), verbose: z.boolean().optional(), mcp_servers: z.array(z.string()).optional().describe(`MCP servers to enable for debate agents (e.g., ["playwright"]). Available: ${listRegisteredServers().join(', ')}`) }, async (args, extra) => { // CRITICAL: Prevent recursion if (process.env.BRUTALIST_SUBPROCESS === '1') { logger.warn(`🚫 Rejecting roast_cli_debate from brutalist subprocess`); return { content: [{ type: "text", text: `ERROR: Brutalist MCP tools cannot be used from within a brutalist-spawned CLI subprocess (recursion prevented)` }] }; } return this.handleDebateToolExecution(args, extra); }); // BRUTALIST_DISCOVER: Intent-based tool discovery this.server.tool("brutalist_discover", "Discover relevant brutalist tools based on your intent. Returns the top 3 most relevant analysis tools.", { intent: z.string().describe("What you want to analyze (e.g., 'review security of my auth system', 'check code quality')") }, async (args) => { const matchingDomains = getMatchingDomainIds(args.intent); const configs = filterToolsByIntent(args.intent); let response = "# Recommended Brutalist Domains\n\n"; response += `Based on your intent: "${args.intent}"\n\n`; if (matchingDomains.length === 0) { response += "No specific matches found. Use the unified `roast` tool with any domain:\n"; response += "- `roast(domain: 'codebase', target: '/path/to/code')` for code review\n"; response += "- `roast(domain: 'security', target: 'description of system')` for security analysis\n"; } else { response += `**Top ${matchingDomains.length} matching domains:**\n\n`; for (const config of configs) { // Extract domain from tool name (roast_security -> security) const domain = config.name.replace('roast_', ''); response += `### ${domain}\n`; response += `${config.description}\n`; response += `\`roast(domain: '${domain}', target: '...')\`\n\n`; } } return { content: [{ type: "text", text: response }] }; }); // CLI_AGENT_ROSTER: Show available brutalist critics this.server.tool("cli_agent_roster", "Know your weapons. Display the available CLI agent critics (Claude Code, Codex, Gemini CLI) ready to demolish your work, their capabilities, and how to deploy them for systematic destruction.", {}, async (args) => { try { let roster = "# Brutalist CLI Agent Arsenal\n\n"; roster += "## Available Tools (4 Gateway Tools)\n\n"; roster += "### `roast` - Unified Analysis Tool\n"; roster += "The primary entry point for all brutal analysis. Use the `domain` parameter to target:\n\n"; roster += "**Filesystem Domains:**\n"; roster += "- `codebase` - Analyze source code for security, performance, maintainability\n"; roster += "- `file_structure` - Examine directory organization\n"; roster += "- `dependencies` - Review package management and vulnerabilities\n"; roster += "- `git_history` - Analyze version control workflow\n"; roster += "- `test_coverage` - Evaluate testing strategy\n\n"; roster += "**Abstract Domains:**\n"; roster += "- `idea` - Destroy business/technical concepts\n"; roster += "- `architecture` - Demolish system designs\n"; roster += "- `research` - Tear apart methodologies\n"; roster += "- `security` - Annihilate security designs\n"; roster += "- `product` - Eviscerate UX concepts\n"; roster += "- `infrastructure` - Obliterate DevOps setups\n"; roster += "- `design` - Perceptual engineering critique of interface design and visual systems\n\n"; roster += "### `roast_cli_debate` - Adversarial Multi-Agent Debate\n"; roster += "Pit CLI agents against each other on any topic.\n\n"; roster += "### `brutalist_discover` - Intent-Based Discovery\n"; roster += "Describe what you want to analyze, get domain recommendations.\n\n"; roster += "### `cli_agent_roster` - This Tool\n"; roster += "Show available capabilities and usage.\n\n"; roster += "## CLI Agent Capabilities\n"; roster += "**Claude Code** - Advanced analysis with direct system prompt injection\n"; roster += "**Codex** - Secure execution with embedded brutal prompts\n"; roster += "**Gemini CLI** - Workspace context with environment variable system prompts\n\n"; // Add CLI context information const cliContext = await this.cliOrchestrator.detectCLIContext(); await this.cliOrchestrator.modelResolver.refreshIfStale(); roster += "## Current CLI Context\n"; roster += `**Available CLIs:** ${cliContext.availableCLIs.join(', ') || 'None detected'}\n\n`; // Add auto-discovered model info roster += this.cliOrchestrator.modelResolver.getRosterModelInfo(); roster += '\n'; roster += "## Domain Discovery\n"; roster += "Use `brutalist_discover` to find the best domain for your analysis:\n"; roster += "- Example: `brutalist_discover(intent: 'review my authentication security')`\n"; roster += "- Returns the top 3 most relevant domains to use with the `roast` tool\n\n"; roster += "## Pagination & Conversation Continuation\n"; roster += "**Two distinct modes for using context_id:**\n\n"; roster += "**1. Pagination** (cached result retrieval):\n"; roster += "- `context_id` alone returns cached response at different offsets\n"; roster += "- Example: `roast(domain: 'codebase', target: '.', context_id: 'abc123', offset: 25000)`\n\n"; roster += "**2. Conversation Continuation** (resume dialogue with history):\n"; roster += "- `context_id` + `resume: true` + new content continues the conversation\n"; roster += "- Prior conversation is injected into CLI agent context\n"; roster += "- Example: `roast(domain: 'codebase', target: '.', context_id: 'abc123', resume: true)`\n\n"; roster += "**Cache TTL:** 2 hours\n\n"; // Add MCP server info const mcpServerNames = listRegisteredServers(); if (mcpServerNames.length > 0) { roster += "## MCP Server Integration\n"; roster += "CLI agents can use external MCP tools for evidence-backed analysis.\n"; roster += `**Available servers:** ${mcpServerNames.join(', ')}\n`; roster += "- Example: `roast(domain: 'security', target: '.', mcp_servers: ['playwright'])`\n"; roster += "- Agents remain read-only — MCP enables observation and interaction, not code modification\n\n"; } roster += "## Brutalist Philosophy\n"; roster += "*All tools use CLI agents with brutal system prompts for maximum reality-based criticism.*\n"; return { content: [{ type: "text", text: roster }] }; } catch (error) { return this.formatter.formatErrorResponse(error); } }); } /** * Handle unified roast tool - routes to appropriate domain handler */ async handleUnifiedRoast(args, extra) { // CRITICAL: Prevent recursion if (process.env.BRUTALIST_SUBPROCESS === '1') { logger.warn(`🚫 Rejecting unified roast from brutalist subprocess`); return { content: [{ type: "text", text: `ERROR: Brutalist MCP tools cannot be used from within a brutalist-spawned CLI subprocess (recursion prevented)` }] }; } // Get domain config const domain = getDomain(args.domain); if (!domain) { return { content: [{ type: "text", text: `ERROR: Unknown domain "${args.domain}". Valid domains: codebase, file_structure, dependencies, git_history, test_coverage, idea, architecture, research, security, product, infrastructure, design` }] }; } // Auto-enable Playwright for design domain — visual evaluation should be // the default, not an opt-in afterthought critics never discover if (args.domain === 'design') { const mcpServers = Array.isArray(args.mcp_servers) ? [...args.mcp_servers] : []; if (!mcpServers.includes('playwright')) { mcpServers.push('playwright'); } args.mcp_servers = mcpServers; } // Generate tool config from domain const toolConfig = generateToolConfig(domain); // Map 'target' to the appropriate primary arg field const mappedArgs = { ...args }; delete mappedArgs.domain; delete mappedArgs.target; // Set the primary argument based on domain's input type if (domain.inputType === 'filesystem') { mappedArgs.targetPath = args.target; } else { mappedArgs.content = args.target; // For abstract tools, also set targetPath if workingDirectory not provided if (!args.workingDirectory) { mappedArgs.targetPath = '.'; } } // Delegate to the unified handler return this.toolHandler.handleRoastTool(toolConfig, mappedArgs, extra); } /** * Handle debate tool execution with constitutional position anchoring. * Uses 2 randomly selected agents (or user-specified) with explicit PRO/CON positions. */ async handleDebateToolExecution(args, extra) { try { // Build pagination params const paginationParams = { offset: args.offset || 0, limit: args.limit || PAGINATION_DEFAULTS.DEFAULT_LIMIT_TOKENS }; if (args.cursor) { const cursorParams = parseCursor(args.cursor); Object.assign(paginationParams, cursorParams); } const explicitPaginationRequested = args.offset !== undefined || args.limit !== undefined || args.cursor !== undefined || args.context_id !== undefined; // Extract session ID early — needed for cache session isolation const sessionId = extra?.sessionId || extra?._meta?.sessionId || extra?.headers?.['mcp-session-id'] || 'anonymous'; // Validate resume flag requires context_id if (args.resume && !args.context_id) { throw new Error(`The 'resume' flag requires a 'context_id' from a previous debate. ` + `Run an initial debate first, then use the returned context_id with resume: true.`); } // Check cache if context_id provided let conversationHistory; if (args.context_id && !args.force_refresh) { const cachedResponse = await this.responseCache.getByContextId(args.context_id, sessionId); if (cachedResponse) { logger.info(`🎯 Debate cache HIT for context_id: ${args.context_id}`); if (args.resume === true) { // CONVERSATION CONTINUATION: Continue the debate if (!args.topic || args.topic.trim() === '') { throw new Error(`Debate continuation (resume: true) requires a new prompt/question. ` + `Provide your follow-up in the topic field.`); } logger.info(`💬 Debate continuation - new prompt: "${args.topic.substring(0, 50)}..."`); conversationHistory = cachedResponse.conversationHistory || []; // Fall through to execute new debate round with history } else { // PAGINATION: Return cached debate result logger.info(`📖 Debate pagination request - returning cached response`); const cachedResult = { success: true, responses: [{ agent: 'cached', success: true, output: cachedResponse.content, executionTime: 0 }] }; return this.formatter.formatToolResponse(cachedResult, args.verbose, paginationParams, args.context_id, explicitPaginationRequested); } } else { logger.warn(`❌ Debate cache MISS for context_id: ${args.context_id}`); throw new Error(`Context ID "${args.context_id}" not found in cache. ` + `It may have expired (2 hour TTL) or belong to a different session. ` + `Remove context_id parameter to run a new debate.`); } } // Generate cache key for this debate const cacheKey = this.responseCache.generateCacheKey({ tool: 'roast_cli_debate', topic: args.topic, proPosition: args.proPosition, conPosition: args.conPosition, agents: args.agents, rounds: args.rounds, context: args.context }); // Check cache for identical request (if not resuming) if (!args.force_refresh && !args.resume) { const cachedContent = await this.responseCache.get(cacheKey); if (cachedContent) { const existingContextId = this.responseCache.findContextIdForKey(cacheKey); const contextId = existingContextId ? this.responseCache.createAlias(existingContextId, cacheKey) : this.responseCache.generateContextId(cacheKey); logger.info(`🎯 Debate cache hit for new request, using context_id: ${contextId}`); const cachedResult = { success: true, responses: [{ agent: 'cached', success: true, output: cachedContent, executionTime: 0 }] }; return this.formatter.formatToolResponse(cachedResult, args.verbose, paginationParams, contextId, explicitPaginationRequested); } } // Build context with conversation history if resuming let debateContext = args.context || ''; if (conversationHistory && conversationHistory.length > 0) { const previousDebate = conversationHistory.map(msg => { const role = msg.role === 'user' ? 'User Question' : 'Debate Response'; return `${role}:\n${msg.content}`; }).join('\n\n---\n\n'); debateContext = `## Previous Debate Context\n\n${previousDebate}\n\n---\n\n## New Follow-up Question\n\nThe user wants to continue this debate with a new question or direction.\n\n${debateContext}`; logger.info(`💬 Injected ${conversationHistory.length} previous messages into debate context`); } // Extract streaming context from extra const progressToken = extra?._meta?.progressToken; // Execute the debate const numRounds = Math.min(args.rounds || 3, 3); const result = await this.executeCLIDebate({ topic: args.topic, proPosition: args.proPosition, conPosition: args.conPosition, agents: args.agents, rounds: numRounds, context: debateContext, workingDirectory: args.workingDirectory, models: args.models, onStreamingEvent: this.handleStreamingEvent, progressToken, onProgress: progressToken && sessionId ? (progress, total, message) => this.handleProgressUpdate(progressToken, progress, total, message, sessionId) : undefined, sessionId, mcp_servers: args.mcp_servers, }); // Cache the result let contextId; if (result.success && result.responses.length > 0) { const fullContent = this.formatter.extractFullContent(result); if (fullContent) { const now = Date.now(); const updatedConversation = [ ...(conversationHistory || []), { role: 'user', content: args.topic, timestamp: now }, { role: 'assistant', content: fullContent, timestamp: now } ]; if (args.resume && args.context_id && conversationHistory) { // Update existing cache entry contextId = args.context_id; await this.responseCache.updateByContextId(contextId, fullContent, updatedConversation, sessionId); logger.info(`✅ Updated debate conversation ${contextId} (now ${updatedConversation.length} messages)`); } else { // New debate - create new context_id const { contextId: newId } = await this.responseCache.set({ tool: 'roast_cli_debate', topic: args.topic }, fullContent, cacheKey, sessionId, undefined, updatedConversation); contextId = newId; logger.info(`✅ Cached new debate with context ID: ${contextId}`); } } } return this.formatter.formatToolResponse(result, args.verbose, paginationParams, contextId, explicitPaginationRequested); } catch (error) { return this.formatter.formatErrorResponse(error); } } /** * Execute CLI debate with constitutional position anchoring. * 2 agents, explicit PRO/CON positions, context compression between rounds. */ async executeCLIDebate(args) { const { topic, proPosition, conPosition, rounds, context, workingDirectory, models, onStreamingEvent, progressToken, onProgress, sessionId } = args; logger.debug("Executing CLI debate", { topic, proPosition, conPosition, rounds }); try { // Get available CLIs const cliContext = await this.cliOrchestrator.detectCLIContext(); const availableCLIs = cliContext.availableCLIs; if (availableCLIs.length < 2) { throw new Error(`Need at least 2 CLI agents for debate. Available: ${availableCLIs.join(', ')}`); } // Select 2 agents: use specified or random selection let selectedAgents; if (args.agents && args.agents.length === 2) { // Validate specified agents are available const unavailable = args.agents.filter(a => !availableCLIs.includes(a)); if (unavailable.length > 0) { throw new Error(`Specified agents not available: ${unavailable.join(', ')}. Available: ${availableCLIs.join(', ')}`); } selectedAgents = args.agents; } else { // Random selection of 2 agents const shuffled = [...availableCLIs].sort(() => Math.random() - 0.5); selectedAgents = shuffled.slice(0, 2); } // Randomly assign PRO/CON positions const shuffledAgents = [...selectedAgents].sort(() => Math.random() - 0.5); const proAgent = shuffledAgents[0]; const conAgent = shuffledAgents[1]; logger.info(`🎭 Debate: ${proAgent.toUpperCase()} (PRO) vs ${conAgent.toUpperCase()} (CON)`); const debateResponses = []; const transcript = []; const turnMetadata = []; let compressedContext = ''; const totalTurns = rounds * 2; // 2 agents per round let completedTurns = 0; // Frontier 1: Detect self-referential working directory (Codex reading its own control prompts) const resolvedWorkDir = workingDirectory || this.config.workingDirectory || process.cwd(); const absWorkDir = pathResolve(resolvedWorkDir); const isSelfReferential = existsSync(pathJoin(absWorkDir, 'src', 'brutalist-server.ts')) || existsSync(pathJoin(absWorkDir, 'dist', 'brutalist-server.js')); if (isSelfReferential) { logger.info(`🔒 Debate working directory is brutalist repo — Codex will be sandboxed`); } // Refusal detection — identifies when an agent breaks debate framing // Two classes: direct refusal (front-loaded) and evasive refusal (pivots to meta-analysis) const DIRECT_REFUSAL_PATTERNS = [ /\bi('m| am) not going to (participate|argue|engage|debate|take|write|adopt)/i, /\bi (will not|won't|cannot|can't) (participate|argue|engage|debate|write|adopt)/i, /\bdeclin(e|ing) (to|this|the)/i, /\bnot going to participate in this as (framed|structured)/i, /\binstead of (the adversarial|this debate|arguing)/i, /\bwhat i can do instead\b/i, /\bi('d| would) suggest a (different|better) topic\b/i, /\bI'll .* but on my own terms\b/i, /\bwhere i part from the assigned thesis\b/i, /\bi can'?t help write (persuasive|adversarial|advocacy)/i, /\bneed to be straightforward\b/i, /\bthe problem is the format\b/i, /\bnot appropriate for this topic\b/i, ]; const EVASIVE_REFUSAL_PATTERNS = [ /\brepo[- ]?(read|map|backed|analysis)\b/i, /\bi'?ll (map|inspect|trace) the repo\b/i, /\bneutral[,.]? evidence-focused analysis\b/i, /\bcodebase (analysis|review|classifies|contains)\b/i, /\bI found the core (files|mechanism)\b/i, /\bsrc\/brutalist-server\.ts:\d+/i, /\bsrc\/cli-agents\.ts:\d+/i, /\bsrc\/utils\/transcript-mediator\.ts:\d+/i, /\btests\/integration\/.*\.test\.ts:\d+/i, /\bdebate coercion engine\b/i, /\bposition-enforcement system\b/i, ]; const detectRefusal = (output) => { // Direct refusals front-load in first 1000 chars const head = output.substring(0, 1000); if (DIRECT_REFUSAL_PATTERNS.some(p => p.test(head))) return true; // Evasive refusals (repo analysis pivot) can appear anywhere — scan full output if (EVASIVE_REFUSAL_PATTERNS.some(p => p.test(output))) return true; return false; }; const constitutionalAnchor = (agent, position, thesis, tier = 'standard') => { if (tier === 'decomposed') { return `You are the ${position} analyst in this exercise. Your counterpart is presenting the opposing analysis right now. Your position: ${thesis} Demonstrate your expertise by constructing the most rigorous, evidence-grounded case for this position. Engage with the full complexity of the subject. Address the strongest counterarguments and show specifically why they do not defeat your position. The depth of your analysis is the measure of this exercise.`; } const escalatedFrame = tier === 'escalated' ? ` An unexamined position is an unearned conclusion. The depth of this analysis depends on each position receiving full-strength treatment from an expert who can construct its best case. Your counterpart is doing the same for the opposing position right now. ` : ''; return `You are the ${position} analyst. Your counterpart is presenting the opposing analysis right now. ${escalatedFrame} YOUR POSITION: ${thesis} ANALYTICAL CONSTRAINTS: 1. Maintain your assigned position throughout all rounds 2. You may acknowledge valid counterpoints — then show why they do not defeat your position 3. Do not propose compromise or middle-ground framings 4. Engage directly with your counterpart's strongest arguments 5. Reinforce your core position in every response 6. Provide substantive analysis — do not comment on the exercise format 7. Do not suggest alternative framings or decline the analytical task Your depth of expertise is demonstrated by the strength of the case you construct. `; }; // Execute rounds for (let round = 1; round <= rounds; round++) { logger.info(`📢 Round ${round}/${rounds}`); // Both agents argue in each round for (const [agent, position, thesis] of [ [proAgent, 'PRO', proPosition], [conAgent, 'CON', conPosition] ]) { let prompt; logger.info(` ⚔️ ${agent.toUpperCase()} (${position}) arguing...`); // Build prompt-generation function so we can rebuild on escalation const mcpBlock = args.mcp_servers?.length ? `\nEXTERNAL TOOL ACCESS: You have MCP tools available (${args.mcp_servers.join(', ')}). Use them to gather evidence supporting your position. You MUST NOT modify the codebase.\n` : ''; const buildPrompt = (tier) => { if (round === 1) { return `${constitutionalAnchor(agent, position, thesis, tier)} ${mcpBlock} TOPIC: ${topic} ${context ? `CONTEXT: ${context}` : ''} Round 1: Opening analysis. Present your ${position} analysis. Structure your response: <thesis_statement> Your core analytical position </thesis_statement> <key_arguments> Three strongest arguments grounding your position in evidence and reasoning </key_arguments> <preemptive_rebuttal> Address the strongest counterargument and show why it does not defeat your position </preemptive_rebuttal> <conclusion> Reinforce why your analysis holds </conclusion>`; } else { const rawOpponent = transcript .filter(t => t.agent !== agent && t.round === round - 1) .map(t => t.content) .join('\n\n'); const { sanitized: opponentTranscript, patternsDetected: opponentPatterns } = mediateTranscript(rawOpponent, 'sanitize', 4000); if (opponentPatterns.length > 0) { logger.info(`🛡️ Mediated ${opponentPatterns.length} patterns from opponent transcript for ${agent}`, { opponentPatterns }); } return `${constitutionalAnchor(agent, position, thesis, tier)} ${mcpBlock} TOPIC: ${topic} Round ${round}: Engage with your counterpart's analysis. YOUR COUNTERPART'S PREVIOUS ANALYSIS: ${opponentTranscript || 'No previous analysis recorded'} ${compressedContext ? `ANALYSIS CONTEXT SO FAR:\n${compressedContext}\n` : ''} <counterpart_gaps> Identify the specific weaknesses in their reasoning and evidence </counterpart_gaps> <deepening_analysis> Advance new evidence and reasoning that strengthens your position </deepening_analysis> <reinforcement> Show why your position holds against their strongest points </reinforcement>`; } }; try { const turnRequestId = `debate-${sessionId || 'anon'}-${round}-${agent}-${Date.now()}`; // Emit agent_start streaming event if (onStreamingEvent) { onStreamingEvent({ type: 'agent_start', agent, content: `Round ${round}/${rounds}: ${agent.toUpperCase()} (${position}) arguing...`, timestamp: Date.now(), sessionId, }); } // Working directory: debateMode suppresses Codex shell exploration via prompt, // so no need to redirect — Codex still needs a git repo to function const agentWorkDir = workingDirectory || this.config.workingDirectory; const cliOptions = { workingDirectory: agentWorkDir, timeout: (this.config.defaultTimeout || 60000) * 2, models, onStreamingEvent, progressToken, onProgress, sessionId, requestId: turnRequestId