UNPKG

@probelabs/probe

Version:

Node.js wrapper for the probe code search tool

813 lines (738 loc) 29.1 kB
import { ProbeAgent } from './ProbeAgent.js'; import { Server } from '@modelcontextprotocol/sdk/server/index.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import { CallToolRequestSchema, ErrorCode, ListToolsRequestSchema, McpError, } from '@modelcontextprotocol/sdk/types.js'; import { readFileSync, existsSync } from 'fs'; import { resolve } from 'path'; import { initializeSimpleTelemetryFromOptions, SimpleAppTracer } from './simpleTelemetry.js'; import { cleanSchemaResponse, processSchemaResponse, isJsonSchema, validateJsonResponse, createJsonCorrectionPrompt, isMermaidSchema, validateMermaidResponse, createMermaidCorrectionPrompt, validateAndFixMermaidResponse } from './schemaUtils.js'; import { ACPServer } from './acp/index.js'; // Helper function to detect if input is a file path and read it function readInputContent(input) { if (!input) return null; // Check if the input looks like a file path and exists try { const resolvedPath = resolve(input); if (existsSync(resolvedPath)) { return readFileSync(resolvedPath, 'utf-8').trim(); } } catch (error) { // If file reading fails, treat as literal string } // Return as literal string if not a valid file return input; } // Function to check if stdin has data available function isStdinAvailable() { // Check if stdin is not a TTY (indicates piped input) // Also ensure we're not in an interactive terminal session return !process.stdin.isTTY && process.stdin.readable; } // Function to read from stdin with timeout detection for interactive vs piped usage function readFromStdin() { return new Promise((resolve, reject) => { let data = ''; let hasReceivedData = false; let dataChunks = []; // Short timeout to detect if this is interactive usage (no immediate data) const timeout = setTimeout(() => { if (!hasReceivedData) { reject(new Error('INTERACTIVE_MODE')); } }, 100); // Very short timeout - piped input should arrive immediately process.stdin.setEncoding('utf8'); // Try to read immediately to see if data is available process.stdin.on('readable', () => { let chunk; while ((chunk = process.stdin.read()) !== null) { hasReceivedData = true; clearTimeout(timeout); dataChunks.push(chunk); data += chunk; } }); process.stdin.on('end', () => { clearTimeout(timeout); const trimmed = data.trim(); if (!trimmed && dataChunks.length === 0) { reject(new Error('No input received from stdin')); } else { resolve(trimmed); } }); process.stdin.on('error', (error) => { clearTimeout(timeout); reject(error); }); // Force a read attempt to trigger readable event if data is available process.nextTick(() => { const chunk = process.stdin.read(); if (chunk !== null) { hasReceivedData = true; clearTimeout(timeout); data += chunk; dataChunks.push(chunk); } }); }); } // Parse command line arguments function parseArgs() { const args = process.argv.slice(2); const config = { mcp: false, acp: false, question: null, path: null, prompt: null, systemPrompt: null, schema: null, provider: null, model: null, allowEdit: false, verbose: false, help: false, maxIterations: null, maxResponseTokens: null, traceFile: undefined, traceRemote: undefined, traceConsole: false, useStdin: false, // New flag to indicate stdin should be used outline: false, // New flag to enable outline format noMermaidValidation: false // New flag to disable mermaid validation }; for (let i = 0; i < args.length; i++) { const arg = args[i]; if (arg === '--mcp') { config.mcp = true; } else if (arg === '--acp') { config.acp = true; } else if (arg === '--help' || arg === '-h') { config.help = true; } else if (arg === '--verbose') { config.verbose = true; } else if (arg === '--allow-edit') { config.allowEdit = true; } else if (arg === '--path' && i + 1 < args.length) { config.path = args[++i]; } else if (arg === '--prompt' && i + 1 < args.length) { config.prompt = args[++i]; } else if (arg === '--system-prompt' && i + 1 < args.length) { config.systemPrompt = args[++i]; } else if (arg === '--schema' && i + 1 < args.length) { config.schema = args[++i]; } else if (arg === '--provider' && i + 1 < args.length) { config.provider = args[++i]; } else if (arg === '--model' && i + 1 < args.length) { config.model = args[++i]; } else if (arg === '--max-iterations' && i + 1 < args.length) { config.maxIterations = parseInt(args[++i], 10); } else if (arg === '--max-response-tokens' && i + 1 < args.length) { config.maxResponseTokens = parseInt(args[++i], 10); } else if (arg === '--trace-file' && i + 1 < args.length) { config.traceFile = args[++i]; } else if (arg === '--trace-remote' && i + 1 < args.length) { config.traceRemote = args[++i]; } else if (arg === '--trace-console') { config.traceConsole = true; } else if (arg === '--outline') { config.outline = true; } else if (arg === '--no-mermaid-validation') { config.noMermaidValidation = true; } else if (!arg.startsWith('--') && !config.question) { // First non-flag argument is the question config.question = arg; } } // Auto-detect stdin usage if no question provided and stdin appears to be piped // For simplicity, let's use a more practical approach: // If user provides no arguments at all, we try to read from stdin with a short timeout // This works better across different environments if (!config.question && !config.mcp && !config.acp && !config.help) { // We'll check for stdin in the main function with a timeout approach config.useStdin = true; } return config; } // Show help message function showHelp() { console.log(` probe agent - AI-powered code exploration tool Usage: probe agent <question> Answer a question about the codebase probe agent <file> Read question from file echo "question" | probe agent Read question from stdin (pipe input) probe agent --mcp Start as MCP server probe agent --acp Start as ACP server Options: --path <dir> Search directory (default: current) --prompt <type> Persona: code-explorer, engineer, code-review, support, architect --system-prompt <text|file> Custom system prompt (text or file path) --schema <schema|file> Output schema (JSON, XML, any format - text or file path) --provider <name> Force AI provider: anthropic, openai, google --model <name> Override model name --allow-edit Enable code modification capabilities --verbose Enable verbose output --outline Use outline-xml format for code search results --mcp Run as MCP server --acp Run as ACP server (Agent Client Protocol) --max-iterations <number> Max tool iterations (default: 30) --max-response-tokens <number> Max tokens for AI response (overrides model defaults) --trace-file <path> Enable tracing to file (JSONL format) --trace-remote <endpoint> Enable tracing to remote OTLP endpoint --trace-console Enable tracing to console output --no-mermaid-validation Disable automatic mermaid diagram validation and fixing --help, -h Show this help message Environment Variables: ANTHROPIC_API_KEY Anthropic Claude API key OPENAI_API_KEY OpenAI GPT API key GOOGLE_API_KEY Google Gemini API key FORCE_PROVIDER Force specific provider (anthropic, openai, google) MODEL_NAME Override model name MAX_RESPONSE_TOKENS Maximum tokens for AI response DEBUG Enable verbose mode (set to '1') Examples: probe agent "How does authentication work?" probe agent question.txt # Read question from file echo "How does the search algorithm work?" | probe agent # Read from stdin cat requirements.txt | probe agent --prompt architect # Pipe file content probe agent "Find all database queries" --path ./src --prompt engineer probe agent "Review this code for bugs" --prompt code-review --system-prompt custom-prompt.txt probe agent "List all functions" --schema '{"functions": [{"name": "string", "file": "string"}]}' probe agent "Analyze codebase" --schema schema.json # Schema from file probe agent "Debug issue" --trace-file ./debug.jsonl --verbose probe agent "Analyze code" --trace-remote http://localhost:4318/v1/traces probe agent --mcp # Start MCP server mode probe agent --acp # Start ACP server mode Personas: code-explorer Default. Explores and explains code structure and functionality engineer Senior engineer focused on implementation and architecture code-review Reviews code for bugs, performance, and best practices support Helps troubleshoot issues and solve problems architect Focuses on software architecture and high-level design `); } // MCP Server implementation class ProbeAgentMcpServer { constructor() { this.server = new Server( { name: '@buger/probe-agent', version: '1.0.0', }, { capabilities: { tools: {}, }, } ); this.setupToolHandlers(); this.server.onerror = (error) => console.error('[MCP Error]', error); process.on('SIGINT', async () => { await this.server.close(); process.exit(0); }); } setupToolHandlers() { this.server.setRequestHandler(ListToolsRequestSchema, async () => ({ tools: [ { name: 'search_code', description: "Search code and answer questions about the codebase using an AI agent. This tool provides intelligent responses based on code analysis.", inputSchema: { type: 'object', properties: { query: { type: 'string', description: 'The question or request about the codebase.', }, path: { type: 'string', description: 'Optional path to the directory to search in. Defaults to current directory.', }, prompt: { type: 'string', description: 'Optional persona type: code-explorer, engineer, code-review, support, architect.', }, system_prompt: { type: 'string', description: 'Optional custom system prompt (text or file path).', }, provider: { type: 'string', description: 'Optional AI provider to force: anthropic, openai, google.', }, model: { type: 'string', description: 'Optional model name override.', }, allow_edit: { type: 'boolean', description: 'Enable code modification capabilities.', }, max_iterations: { type: 'number', description: 'Maximum number of tool iterations (default: 30).', }, max_response_tokens: { type: 'number', description: 'Maximum tokens for AI response (overrides model defaults).', }, schema: { type: 'string', description: 'Optional output schema (JSON, XML, or any format - text or file path).', }, no_mermaid_validation: { type: 'boolean', description: 'Disable automatic mermaid diagram validation and fixing.', } }, required: ['query'] }, }, ], })); this.server.setRequestHandler(CallToolRequestSchema, async (request) => { if (request.params.name !== 'search_code') { throw new McpError( ErrorCode.MethodNotFound, `Unknown tool: ${request.params.name}` ); } try { const args = request.params.arguments; // Validate required fields if (!args.query) { throw new Error("Query is required"); } // Set MAX_TOOL_ITERATIONS if provided if (args.max_iterations) { process.env.MAX_TOOL_ITERATIONS = args.max_iterations.toString(); } // Process system prompt if provided (could be file or literal string) let systemPrompt = null; if (args.system_prompt) { systemPrompt = readInputContent(args.system_prompt); if (!systemPrompt) { throw new Error('System prompt could not be read'); } } // Process query input (could be file or literal string) const query = readInputContent(args.query); if (!query) { throw new Error('Query is required and could not be read'); } // Process schema if provided (could be file or literal string) let schema = null; if (args.schema) { schema = readInputContent(args.schema); if (!schema) { throw new Error('Schema could not be read'); } } // Create agent with configuration const agentConfig = { path: args.path || process.cwd(), promptType: args.prompt || 'code-explorer', customPrompt: systemPrompt, provider: args.provider, model: args.model, allowEdit: !!args.allow_edit, debug: process.env.DEBUG === '1', maxResponseTokens: args.max_response_tokens, disableMermaidValidation: !!args.no_mermaid_validation }; const agent = new ProbeAgent(agentConfig); let result = await agent.answer(query, [], { schema }); // If schema is provided, make a follow-up request to format the output if (schema) { const schemaPrompt = `Now you need to respond according to this schema:\n\n${schema}\n\nPlease reformat your previous response to match this schema exactly. Only return the formatted response, no additional text.`; try { result = await agent.answer(schemaPrompt, [], { schema }); // Clean the schema response to remove code blocks and formatting result = cleanSchemaResponse(result); // Check for mermaid diagrams in response and validate/fix them regardless of schema if (!args.no_mermaid_validation) { try { const mermaidValidation = await validateAndFixMermaidResponse(result, { debug: args.debug, path: agentConfig.path, provider: args.provider, model: args.model }); if (mermaidValidation.wasFixed) { result = mermaidValidation.fixedResponse; if (args.debug) { console.error(`[DEBUG] Mermaid diagrams fixed using specialized agent`); mermaidValidation.fixingResults.forEach((fixResult, index) => { if (fixResult.wasFixed) { console.error(`[DEBUG] Fixed diagram ${index + 1}: ${fixResult.originalError}`); } }); } } else if (!mermaidValidation.isValid && mermaidValidation.diagrams && mermaidValidation.diagrams.length > 0 && args.debug) { console.error(`[DEBUG] Mermaid validation failed: ${mermaidValidation.errors?.join(', ')}`); } } catch (error) { if (args.debug) { console.error(`[DEBUG] Enhanced mermaid validation failed: ${error.message}`); } } } else if (args.debug) { console.error(`[DEBUG] Mermaid validation skipped due to --no-mermaid-validation flag`); } // Then, if schema expects JSON, validate and retry if invalid if (isJsonSchema(schema)) { const validation = validateJsonResponse(result); if (!validation.isValid) { // Retry once with correction prompt const correctionPrompt = createJsonCorrectionPrompt(result, schema, validation.error); try { result = await agent.answer(correctionPrompt, [], { schema }); result = cleanSchemaResponse(result); // Validate again after correction const finalValidation = validateJsonResponse(result); if (!finalValidation.isValid && args.debug) { console.error(`[DEBUG] JSON validation failed after retry: ${finalValidation.error}`); } } catch (retryError) { // If retry fails, keep the original result if (args.debug) { console.error(`[DEBUG] JSON correction retry failed: ${retryError.message}`); } } } } } catch (error) { // If schema formatting fails, use original result } } // Get token usage for debugging const tokenUsage = agent.getTokenUsage(); console.error(`Token usage: ${JSON.stringify(tokenUsage)}`); return { content: [ { type: 'text', text: result, }, ], }; } catch (error) { console.error(`Error executing search_code:`, error); return { content: [ { type: 'text', text: `Error: ${error.message}`, }, ], isError: true, }; } }); } async run() { const transport = new StdioServerTransport(); await this.server.connect(transport); console.error('Probe Agent MCP server running on stdio'); } } // Main function async function main() { const config = parseArgs(); if (config.help) { showHelp(); return; } if (config.mcp) { // Start as MCP server const server = new ProbeAgentMcpServer(); await server.run(); return; } if (config.acp) { // Start as ACP server const server = new ACPServer({ provider: config.provider, model: config.model, path: config.path, allowEdit: config.allowEdit, debug: config.verbose }); await server.start(); return; } // Handle stdin input if detected if (config.useStdin) { try { if (config.verbose) { console.error('[DEBUG] Reading question from stdin...'); } config.question = await readFromStdin(); if (!config.question) { console.error('Error: No input received from stdin'); process.exit(1); } } catch (error) { // If this is interactive mode (no piped input), show help if (error.message === 'INTERACTIVE_MODE') { showHelp(); process.exit(0); } else { console.error(`Error reading from stdin: ${error.message}`); process.exit(1); } } } if (!config.question) { showHelp(); process.exit(1); } try { // Initialize tracing if any tracing options are provided let telemetryConfig = null; let appTracer = null; if (config.traceFile !== undefined || config.traceRemote !== undefined || config.traceConsole) { try { telemetryConfig = initializeSimpleTelemetryFromOptions(config); appTracer = new SimpleAppTracer(telemetryConfig); if (config.verbose) { console.error('[DEBUG] Simple tracing initialized'); } } catch (error) { if (config.verbose) { console.error(`[DEBUG] Failed to initialize tracing: ${error.message}`); } } } // Set environment variables if provided via flags if (config.verbose) { process.env.DEBUG = '1'; } if (config.provider) { process.env.FORCE_PROVIDER = config.provider; } if (config.model) { process.env.MODEL_NAME = config.model; } if (config.maxIterations) { process.env.MAX_TOOL_ITERATIONS = config.maxIterations.toString(); } // Process question input (could be file or literal string) const question = readInputContent(config.question); if (!question) { console.error('Error: Question is required and could not be read'); process.exit(1); } // Process system prompt if provided (could be file or literal string) let systemPrompt = null; if (config.systemPrompt) { systemPrompt = readInputContent(config.systemPrompt); if (!systemPrompt) { console.error('Error: System prompt could not be read'); process.exit(1); } } // Process schema if provided (could be file or literal string) let schema = null; if (config.schema) { schema = readInputContent(config.schema); if (!schema) { console.error('Error: Schema could not be read'); process.exit(1); } } // Create and configure agent const agentConfig = { path: config.path, promptType: config.prompt, customPrompt: systemPrompt, allowEdit: config.allowEdit, debug: config.verbose, tracer: appTracer, outline: config.outline, maxResponseTokens: config.maxResponseTokens, disableMermaidValidation: config.noMermaidValidation }; const agent = new ProbeAgent(agentConfig); // Execute with tracing if available let result; if (appTracer) { const sessionSpan = appTracer.createSessionSpan({ 'question': question.substring(0, 100) + (question.length > 100 ? '...' : ''), 'path': config.path || process.cwd(), 'prompt_type': config.prompt || 'code-explorer' }); try { result = await appTracer.withSpan('agent.answer', () => agent.answer(question, [], { schema }), { 'question.length': question.length } ); } finally { if (sessionSpan) { sessionSpan.end(); } } } else { result = await agent.answer(question, [], { schema }); } // If schema is provided, make a follow-up request to format the output if (schema) { if (config.verbose) { console.error('[DEBUG] Schema provided, making follow-up request to format output...'); } const schemaPrompt = `Now you need to respond according to this schema:\n\n${schema}\n\nPlease reformat your previous response to match this schema exactly. Only return the formatted response, no additional text.`; try { if (appTracer) { result = await appTracer.withSpan('agent.schema_formatting', () => agent.answer(schemaPrompt, [], { schema }), { 'schema.length': schema.length } ); } else { result = await agent.answer(schemaPrompt, [], { schema }); } // Clean the schema response to remove code blocks and formatting const cleaningResult = processSchemaResponse(result, schema, { debug: config.verbose }); result = cleaningResult.cleaned; if (config.verbose && cleaningResult.debug && cleaningResult.debug.wasModified) { console.error('[DEBUG] Schema response was cleaned:'); console.error(` Original length: ${cleaningResult.debug.originalLength}`); console.error(` Cleaned length: ${cleaningResult.debug.cleanedLength}`); } // Check for mermaid diagrams in response and validate/fix them regardless of schema if (!config.noMermaidValidation) { try { const mermaidValidationResult = await validateAndFixMermaidResponse(result, { debug: config.verbose, path: config.path, provider: config.provider, model: config.model, tracer: appTracer }); if (mermaidValidationResult.wasFixed) { result = mermaidValidationResult.fixedResponse; if (config.verbose) { console.error(`[DEBUG] Mermaid diagrams fixed using specialized agent`); mermaidValidationResult.fixingResults.forEach((fixResult, index) => { if (fixResult.wasFixed) { console.error(`[DEBUG] Fixed diagram ${index + 1}: ${fixResult.originalError}`); } }); } } else if (!mermaidValidationResult.isValid && mermaidValidationResult.diagrams && mermaidValidationResult.diagrams.length > 0 && config.verbose) { console.error(`[DEBUG] Mermaid validation failed: ${mermaidValidationResult.errors?.join(', ')}`); } } catch (error) { if (config.verbose) { console.error(`[DEBUG] Enhanced mermaid validation failed: ${error.message}`); } } } else if (config.verbose) { console.error(`[DEBUG] Mermaid validation skipped due to --no-mermaid-validation flag`); } // Then, if schema expects JSON, validate and retry if invalid if (isJsonSchema(schema)) { const validation = validateJsonResponse(result); if (!validation.isValid) { if (config.verbose) { console.error(`[DEBUG] JSON validation failed: ${validation.error}`); console.error('[DEBUG] Attempting to correct JSON...'); } // Retry once with correction prompt const correctionPrompt = createJsonCorrectionPrompt(result, schema, validation.error); try { if (appTracer) { result = await appTracer.withSpan('agent.json_correction', () => agent.answer(correctionPrompt, [], { schema }), { 'original_error': validation.error } ); } else { result = await agent.answer(correctionPrompt, [], { schema }); } result = cleanSchemaResponse(result); // Validate again after correction const finalValidation = validateJsonResponse(result); if (config.verbose) { if (finalValidation.isValid) { console.error('[DEBUG] JSON correction successful'); } else { console.error(`[DEBUG] JSON validation failed after retry: ${finalValidation.error}`); } } } catch (retryError) { // If retry fails, keep the original result if (config.verbose) { console.error(`[DEBUG] JSON correction retry failed: ${retryError.message}`); } } } else if (config.verbose) { console.error('[DEBUG] JSON validation passed'); } } } catch (error) { if (config.verbose) { console.error('[DEBUG] Schema formatting failed, using original result'); } // If schema formatting fails, use original result } } // Output the result console.log(result); // Show token usage in verbose mode if (config.verbose) { const tokenUsage = agent.getTokenUsage(); console.error(`\n[DEBUG] Token usage: ${JSON.stringify(tokenUsage, null, 2)}`); } // Flush and shutdown tracing if (appTracer) { try { await appTracer.flush(); if (config.verbose) { console.error('[DEBUG] Tracing flushed'); } } catch (error) { if (config.verbose) { console.error(`[DEBUG] Failed to flush tracing: ${error.message}`); } } } } catch (error) { console.error(`Error: ${error.message}`); if (config.verbose) { console.error(error.stack); } process.exit(1); } } // Handle uncaught exceptions process.on('uncaughtException', (error) => { console.error('Uncaught Exception:', error); process.exit(1); }); process.on('unhandledRejection', (reason, promise) => { console.error('Unhandled Rejection at:', promise, 'reason:', reason); process.exit(1); }); // Run main function main().catch((error) => { console.error('Fatal error:', error); process.exit(1); });