UNPKG

@probelabs/probe

Version:

Node.js wrapper for the probe code search tool

1,210 lines (1,050 loc) 106 kB
// Core ProbeAgent class adapted from examples/chat/probeChat.js // Load .env file if present (silent fail if not found) import dotenv from 'dotenv'; dotenv.config(); import { createAnthropic } from '@ai-sdk/anthropic'; import { createOpenAI } from '@ai-sdk/openai'; import { createGoogleGenerativeAI } from '@ai-sdk/google'; import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock'; import { streamText } from 'ai'; import { randomUUID } from 'crypto'; import { EventEmitter } from 'events'; import { existsSync } from 'fs'; import { readFile, stat } from 'fs/promises'; import { resolve, isAbsolute, dirname } from 'path'; import { TokenCounter } from './tokenCounter.js'; import { InMemoryStorageAdapter } from './storage/InMemoryStorageAdapter.js'; import { HookManager, HOOK_TYPES } from './hooks/HookManager.js'; import { SUPPORTED_IMAGE_EXTENSIONS, IMAGE_MIME_TYPES } from './imageConfig.js'; import { createTools, searchToolDefinition, queryToolDefinition, extractToolDefinition, delegateToolDefinition, listFilesToolDefinition, searchFilesToolDefinition, attemptCompletionToolDefinition, implementToolDefinition, attemptCompletionSchema, parseXmlToolCallWithThinking } from './tools.js'; import { createMessagePreview } from '../tools/common.js'; import { createWrappedTools, listFilesToolInstance, searchFilesToolInstance, clearToolExecutionData } from './probeTool.js'; import { createMockProvider } from './mockProvider.js'; import { listFilesByLevel } from '../index.js'; import { cleanSchemaResponse, isJsonSchema, validateJsonResponse, createJsonCorrectionPrompt, isJsonSchemaDefinition, createSchemaDefinitionCorrectionPrompt, validateAndFixMermaidResponse } from './schemaUtils.js'; import { removeThinkingTags } from './xmlParsingUtils.js'; import { MCPXmlBridge, parseHybridXmlToolCall, loadMCPConfigurationFromPath } from './mcp/index.js'; // Maximum tool iterations to prevent infinite loops - configurable via MAX_TOOL_ITERATIONS env var const MAX_TOOL_ITERATIONS = parseInt(process.env.MAX_TOOL_ITERATIONS || '30', 10); const MAX_HISTORY_MESSAGES = 100; // Supported image file extensions (imported from shared config) // Maximum image file size (20MB) to prevent OOM attacks const MAX_IMAGE_FILE_SIZE = 20 * 1024 * 1024; /** * ProbeAgent class to handle AI interactions with code search capabilities */ export class ProbeAgent { /** * Create a new ProbeAgent instance * @param {Object} options - Configuration options * @param {string} [options.sessionId] - Optional session ID * @param {string} [options.customPrompt] - Custom prompt to replace the default system message * @param {string} [options.promptType] - Predefined prompt type (architect, code-review, support) * @param {boolean} [options.allowEdit=false] - Allow the use of the 'implement' tool * @param {boolean} [options.enableDelegate=false] - Enable the delegate tool for task distribution to subagents * @param {string} [options.path] - Search directory path * @param {string} [options.provider] - Force specific AI provider * @param {string} [options.model] - Override model name * @param {boolean} [options.debug] - Enable debug mode * @param {boolean} [options.outline] - Enable outline-xml format for search results * @param {number} [options.maxResponseTokens] - Maximum tokens for AI responses * @param {number} [options.maxIterations] - Maximum tool iterations (overrides MAX_TOOL_ITERATIONS env var) * @param {boolean} [options.disableMermaidValidation=false] - Disable automatic mermaid diagram validation and fixing * @param {boolean} [options.disableJsonValidation=false] - Disable automatic JSON validation and fixing (prevents infinite recursion in JsonFixingAgent) * @param {boolean} [options.enableMcp=false] - Enable MCP tool integration * @param {string} [options.mcpConfigPath] - Path to MCP configuration file * @param {Object} [options.mcpConfig] - MCP configuration object (overrides mcpConfigPath) * @param {Array} [options.mcpServers] - Deprecated, use mcpConfig instead * @param {Object} [options.storageAdapter] - Custom storage adapter for history management * @param {Object} [options.hooks] - Hook callbacks for events (e.g., {'tool:start': callback}) */ constructor(options = {}) { // Basic configuration this.sessionId = options.sessionId || randomUUID(); this.customPrompt = options.customPrompt || null; this.promptType = options.promptType || 'code-explorer'; this.allowEdit = !!options.allowEdit; this.enableDelegate = !!options.enableDelegate; this.debug = options.debug || process.env.DEBUG === '1'; this.cancelled = false; this.tracer = options.tracer || null; this.outline = !!options.outline; this.maxResponseTokens = options.maxResponseTokens || parseInt(process.env.MAX_RESPONSE_TOKENS || '0', 10) || null; this.maxIterations = options.maxIterations || null; this.disableMermaidValidation = !!options.disableMermaidValidation; this.disableJsonValidation = !!options.disableJsonValidation; // Storage adapter (defaults to in-memory) this.storageAdapter = options.storageAdapter || new InMemoryStorageAdapter(); // Hook manager this.hooks = new HookManager(); // Register hooks from options if (options.hooks) { for (const [hookName, callback] of Object.entries(options.hooks)) { this.hooks.on(hookName, callback); } } // Bash configuration this.enableBash = !!options.enableBash; this.bashConfig = options.bashConfig || {}; // Search configuration - support both path (single) and allowedFolders (array) if (options.allowedFolders && Array.isArray(options.allowedFolders)) { this.allowedFolders = options.allowedFolders; } else if (options.path) { this.allowedFolders = [options.path]; } else { this.allowedFolders = [process.cwd()]; } // API configuration this.clientApiProvider = options.provider || null; this.clientApiModel = options.model || null; this.clientApiKey = null; // Will be set from environment this.clientApiUrl = null; // Initialize token counter this.tokenCounter = new TokenCounter(); if (this.debug) { console.log(`[DEBUG] Generated session ID for agent: ${this.sessionId}`); console.log(`[DEBUG] Maximum tool iterations configured: ${MAX_TOOL_ITERATIONS}`); console.log(`[DEBUG] Allow Edit (implement tool): ${this.allowEdit}`); } // Initialize tools this.initializeTools(); // Initialize chat history this.history = []; // Initialize image tracking for agentic loop this.pendingImages = new Map(); // Map<imagePath, base64Data> to avoid reloading this.currentImages = []; // Currently active images for AI calls // Initialize event emitter for tool execution updates this.events = new EventEmitter(); // MCP configuration this.enableMcp = !!options.enableMcp || process.env.ENABLE_MCP === '1'; this.mcpConfigPath = options.mcpConfigPath || null; this.mcpConfig = options.mcpConfig || null; this.mcpServers = options.mcpServers || null; // Deprecated, keep for backward compatibility this.mcpBridge = null; this._mcpInitialized = false; // Track if MCP initialization has been attempted // Initialize the AI model this.initializeModel(); // Note: MCP initialization is now done in initialize() method // Constructor must remain synchronous for backward compatibility } /** * Initialize the agent asynchronously (must be called after constructor) * This method initializes MCP and merges MCP tools into the tool list, and loads history from storage */ async initialize() { // Load history from storage adapter try { const history = await this.storageAdapter.loadHistory(this.sessionId); this.history = history; if (this.debug && history.length > 0) { console.log(`[DEBUG] Loaded ${history.length} messages from storage for session ${this.sessionId}`); } // Emit storage load hook await this.hooks.emit(HOOK_TYPES.STORAGE_LOAD, { sessionId: this.sessionId, messages: history }); } catch (error) { console.error(`[ERROR] Failed to load history from storage:`, error); // Continue with empty history if storage fails this.history = []; } // Initialize MCP if enabled and not already initialized if (this.enableMcp && !this._mcpInitialized) { this._mcpInitialized = true; // Prevent multiple initialization attempts try { await this.initializeMCP(); // Merge MCP tools into toolImplementations for unified access if (this.mcpBridge) { const mcpTools = this.mcpBridge.mcpTools || {}; for (const [toolName, toolImpl] of Object.entries(mcpTools)) { this.toolImplementations[toolName] = toolImpl; } } // Log all available tools after MCP initialization if (this.debug) { const allToolNames = Object.keys(this.toolImplementations); const nativeToolCount = allToolNames.filter(name => !this.mcpBridge?.mcpTools?.[name]).length; const mcpToolCount = allToolNames.length - nativeToolCount; console.error('\n[DEBUG] ========================================'); console.error('[DEBUG] All Tools Initialized'); console.error(`[DEBUG] Native tools: ${nativeToolCount}, MCP tools: ${mcpToolCount}`); console.error('[DEBUG] Available tools:'); for (const toolName of allToolNames) { const isMCP = this.mcpBridge?.mcpTools?.[toolName] ? ' (MCP)' : ''; console.error(`[DEBUG] - ${toolName}${isMCP}`); } console.error('[DEBUG] ========================================\n'); } } catch (error) { console.error('[MCP ERROR] Failed to initialize MCP:', error.message); if (this.debug) { console.error('[MCP DEBUG] Full error details:', error); } this.mcpBridge = null; } } // Emit agent initialized hook await this.hooks.emit(HOOK_TYPES.AGENT_INITIALIZED, { sessionId: this.sessionId, agent: this }); } /** * Initialize tools with configuration */ initializeTools() { const configOptions = { sessionId: this.sessionId, debug: this.debug, defaultPath: this.allowedFolders.length > 0 ? this.allowedFolders[0] : process.cwd(), allowedFolders: this.allowedFolders, outline: this.outline, enableBash: this.enableBash, bashConfig: this.bashConfig }; // Create base tools const baseTools = createTools(configOptions); // Create wrapped tools with event emission const wrappedTools = createWrappedTools(baseTools); // Store tool instances for execution this.toolImplementations = { search: wrappedTools.searchToolInstance, query: wrappedTools.queryToolInstance, extract: wrappedTools.extractToolInstance, delegate: wrappedTools.delegateToolInstance, listFiles: listFilesToolInstance, searchFiles: searchFilesToolInstance, }; // Add bash tool if enabled if (this.enableBash && wrappedTools.bashToolInstance) { this.toolImplementations.bash = wrappedTools.bashToolInstance; } // Store wrapped tools for ACP system this.wrappedTools = wrappedTools; // Log available tools in debug mode if (this.debug) { console.error('\n[DEBUG] ========================================'); console.error('[DEBUG] ProbeAgent Tools Initialized'); console.error('[DEBUG] Session ID:', this.sessionId); console.error('[DEBUG] Available tools:'); for (const toolName of Object.keys(this.toolImplementations)) { console.error(`[DEBUG] - ${toolName}`); } console.error('[DEBUG] Allowed folders:', this.allowedFolders); console.error('[DEBUG] Outline mode:', this.outline); console.error('[DEBUG] ========================================\n'); } } /** * Initialize the AI model based on available API keys and forced provider setting */ initializeModel() { // Get model override if provided (options.model takes precedence over environment variable) const modelName = this.clientApiModel || process.env.MODEL_NAME; // Check if we're in test mode and should use mock provider if (process.env.NODE_ENV === 'test' || process.env.USE_MOCK_AI === 'true') { this.initializeMockModel(modelName); return; } // Get API keys from environment variables // Support both ANTHROPIC_API_KEY and ANTHROPIC_AUTH_TOKEN (used by Z.AI) const anthropicApiKey = process.env.ANTHROPIC_API_KEY || process.env.ANTHROPIC_AUTH_TOKEN; const openaiApiKey = process.env.OPENAI_API_KEY; // Support both GOOGLE_GENERATIVE_AI_API_KEY (official) and GOOGLE_API_KEY (legacy) const googleApiKey = process.env.GOOGLE_GENERATIVE_AI_API_KEY || process.env.GOOGLE_API_KEY; const awsAccessKeyId = process.env.AWS_ACCESS_KEY_ID; const awsSecretAccessKey = process.env.AWS_SECRET_ACCESS_KEY; const awsRegion = process.env.AWS_REGION; const awsSessionToken = process.env.AWS_SESSION_TOKEN; const awsApiKey = process.env.AWS_BEDROCK_API_KEY; // Get custom API URLs if provided const llmBaseUrl = process.env.LLM_BASE_URL; const anthropicApiUrl = process.env.ANTHROPIC_API_URL || process.env.ANTHROPIC_BASE_URL || llmBaseUrl; const openaiApiUrl = process.env.OPENAI_API_URL || llmBaseUrl; const googleApiUrl = process.env.GOOGLE_API_URL || llmBaseUrl; const awsBedrockBaseUrl = process.env.AWS_BEDROCK_BASE_URL || llmBaseUrl; // Use client-forced provider or environment variable const forceProvider = this.clientApiProvider || (process.env.FORCE_PROVIDER ? process.env.FORCE_PROVIDER.toLowerCase() : null); if (this.debug) { const hasAwsCredentials = !!(awsAccessKeyId && awsSecretAccessKey && awsRegion); const hasAwsApiKey = !!awsApiKey; console.log(`[DEBUG] Available API keys: Anthropic=${!!anthropicApiKey}, OpenAI=${!!openaiApiKey}, Google=${!!googleApiKey}, AWS Bedrock=${hasAwsCredentials || hasAwsApiKey}`); if (hasAwsCredentials) console.log(`[DEBUG] AWS credentials: AccessKey=${!!awsAccessKeyId}, SecretKey=${!!awsSecretAccessKey}, Region=${awsRegion}, SessionToken=${!!awsSessionToken}`); if (hasAwsApiKey) console.log(`[DEBUG] AWS API Key provided`); if (awsBedrockBaseUrl) console.log(`[DEBUG] AWS Bedrock base URL: ${awsBedrockBaseUrl}`); console.log(`[DEBUG] Force provider: ${forceProvider || '(not set)'}`); if (modelName) console.log(`[DEBUG] Model override: ${modelName}`); } // Check if a specific provider is forced if (forceProvider) { if (forceProvider === 'anthropic' && anthropicApiKey) { this.initializeAnthropicModel(anthropicApiKey, anthropicApiUrl, modelName); return; } else if (forceProvider === 'openai' && openaiApiKey) { this.initializeOpenAIModel(openaiApiKey, openaiApiUrl, modelName); return; } else if (forceProvider === 'google' && googleApiKey) { this.initializeGoogleModel(googleApiKey, googleApiUrl, modelName); return; } else if (forceProvider === 'bedrock' && ((awsAccessKeyId && awsSecretAccessKey && awsRegion) || awsApiKey)) { this.initializeBedrockModel(awsAccessKeyId, awsSecretAccessKey, awsRegion, awsSessionToken, awsApiKey, awsBedrockBaseUrl, modelName); return; } console.warn(`WARNING: Forced provider "${forceProvider}" selected but required API key is missing or invalid! Falling back to auto-detection.`); } // If no provider is forced or forced provider failed, use the first available API key if (anthropicApiKey) { this.initializeAnthropicModel(anthropicApiKey, anthropicApiUrl, modelName); } else if (openaiApiKey) { this.initializeOpenAIModel(openaiApiKey, openaiApiUrl, modelName); } else if (googleApiKey) { this.initializeGoogleModel(googleApiKey, googleApiUrl, modelName); } else if ((awsAccessKeyId && awsSecretAccessKey && awsRegion) || awsApiKey) { this.initializeBedrockModel(awsAccessKeyId, awsSecretAccessKey, awsRegion, awsSessionToken, awsApiKey, awsBedrockBaseUrl, modelName); } else { throw new Error('No API key provided. Please set ANTHROPIC_API_KEY (or ANTHROPIC_AUTH_TOKEN), OPENAI_API_KEY, GOOGLE_GENERATIVE_AI_API_KEY (or GOOGLE_API_KEY), AWS credentials (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_REGION), or AWS_BEDROCK_API_KEY environment variables.'); } } /** * Initialize Anthropic model */ initializeAnthropicModel(apiKey, apiUrl, modelName) { this.provider = createAnthropic({ apiKey: apiKey, ...(apiUrl && { baseURL: apiUrl }), }); this.model = modelName || 'claude-sonnet-4-5-20250929'; this.apiType = 'anthropic'; if (this.debug) { console.log(`Using Anthropic API with model: ${this.model}${apiUrl ? ` (URL: ${apiUrl})` : ''}`); } } /** * Initialize OpenAI model */ initializeOpenAIModel(apiKey, apiUrl, modelName) { this.provider = createOpenAI({ compatibility: 'strict', apiKey: apiKey, ...(apiUrl && { baseURL: apiUrl }), }); this.model = modelName || 'gpt-5-thinking'; this.apiType = 'openai'; if (this.debug) { console.log(`Using OpenAI API with model: ${this.model}${apiUrl ? ` (URL: ${apiUrl})` : ''}`); } } /** * Initialize Google model */ initializeGoogleModel(apiKey, apiUrl, modelName) { this.provider = createGoogleGenerativeAI({ apiKey: apiKey, ...(apiUrl && { baseURL: apiUrl }), }); this.model = modelName || 'gemini-2.5-pro'; this.apiType = 'google'; if (this.debug) { console.log(`Using Google API with model: ${this.model}${apiUrl ? ` (URL: ${apiUrl})` : ''}`); } } /** * Initialize AWS Bedrock model */ initializeBedrockModel(accessKeyId, secretAccessKey, region, sessionToken, apiKey, baseURL, modelName) { // Build configuration object, only including defined values const config = {}; // Authentication - prefer API key if provided, otherwise use AWS credentials if (apiKey) { config.apiKey = apiKey; } else if (accessKeyId && secretAccessKey) { config.accessKeyId = accessKeyId; config.secretAccessKey = secretAccessKey; if (sessionToken) { config.sessionToken = sessionToken; } } // Region is required for AWS credentials but optional for API key if (region) { config.region = region; } // Optional base URL if (baseURL) { config.baseURL = baseURL; } this.provider = createAmazonBedrock(config); this.model = modelName || 'anthropic.claude-sonnet-4-20250514-v1:0'; this.apiType = 'bedrock'; if (this.debug) { const authMethod = apiKey ? 'API Key' : 'AWS Credentials'; const regionInfo = region ? ` (Region: ${region})` : ''; const baseUrlInfo = baseURL ? ` (Base URL: ${baseURL})` : ''; console.log(`Using AWS Bedrock API with model: ${this.model}${regionInfo} [Auth: ${authMethod}]${baseUrlInfo}`); } } /** * Process assistant response content and detect/load image references * @param {string} content - The assistant's response content * @returns {Promise<void>} */ async processImageReferences(content) { if (!content) return; // First, try to parse listFiles output format to extract directory context const listFilesDirectories = this.extractListFilesDirectories(content); // Enhanced pattern to detect image file mentions in various contexts // Looks for: "image", "file", "screenshot", etc. followed by path-like strings with image extensions const extensionsPattern = `(?:${SUPPORTED_IMAGE_EXTENSIONS.join('|')})`; const imagePatterns = [ // Direct file path mentions: "./screenshot.png", "/path/to/image.jpg", etc. new RegExp(`(?:\\.?\\.\\/)?[^\\s"'<>\\[\\]]+\\\.${extensionsPattern}(?!\\w)`, 'gi'), // Contextual mentions: "look at image.png", "the file screenshot.jpg shows" new RegExp(`(?:image|file|screenshot|diagram|photo|picture|graphic)\\s*:?\\s*([^\\s"'<>\\[\\]]+\\.${extensionsPattern})(?!\\w)`, 'gi'), // Tool result mentions: often contain file paths new RegExp(`(?:found|saved|created|generated).*?([^\\s"'<>\\[\\]]+\\.${extensionsPattern})(?!\\w)`, 'gi') ]; const foundPaths = new Set(); // Extract potential image paths using all patterns for (const pattern of imagePatterns) { let match; while ((match = pattern.exec(content)) !== null) { // For patterns with capture groups, use the captured path; otherwise use the full match const imagePath = match[1] || match[0]; if (imagePath && imagePath.length > 0) { foundPaths.add(imagePath.trim()); } } } if (foundPaths.size === 0) return; if (this.debug) { console.log(`[DEBUG] Found ${foundPaths.size} potential image references:`, Array.from(foundPaths)); } // Process each found path for (const imagePath of foundPaths) { // Try to resolve the path with directory context from listFiles output let resolvedPath = imagePath; // If the path is just a filename (no directory separator), try to find it in listFiles directories if (!imagePath.includes('/') && !imagePath.includes('\\')) { for (const dir of listFilesDirectories) { const potentialPath = resolve(dir, imagePath); // Check if this file exists by attempting to load it const loaded = await this.loadImageIfValid(potentialPath); if (loaded) { // Successfully loaded with this directory context if (this.debug) { console.log(`[DEBUG] Resolved ${imagePath} to ${potentialPath} using listFiles context`); } break; // Found it, no need to try other directories } } } else { // Path already has directory info, load as-is await this.loadImageIfValid(resolvedPath); } } } /** * Extract directory paths from tool output (both listFiles and extract tool) * @param {string} content - Tool output content * @returns {string[]} - Array of directory paths */ extractListFilesDirectories(content) { const directories = []; // Pattern 1: Extract directory from extract tool "File:" header // Format: "File: /path/to/file.md" or "File: ./relative/path/file.md" const fileHeaderPattern = /^File:\s+(.+)$/gm; let match; while ((match = fileHeaderPattern.exec(content)) !== null) { const filePath = match[1].trim(); // Get directory from file path const dir = dirname(filePath); if (dir && dir !== '.') { directories.push(dir); if (this.debug) { console.log(`[DEBUG] Extracted directory context from File header: ${dir}`); } } } // Pattern 2: Extract directory from listFiles output format: "/path/to/directory:" // Matches absolute paths (/path/to/dir:) or current directory markers (.:) or Windows paths (C:\path:) at start of line // Very strict to avoid matching random text like ".Something:" or "./Some text:" const dirPattern = /^(\/[^\n:]+|[A-Z]:\\[^\n:]+|\.\.?(?:\/[^\n:]+)?):\s*$/gm; while ((match = dirPattern.exec(content)) !== null) { const dirPath = match[1].trim(); // Strict validation: must look like an actual filesystem path // Reject if contains spaces or other characters that wouldn't be in listFiles output const hasInvalidChars = /\s/.test(dirPath); // Contains whitespace // Validate this looks like an actual path, not random text // Must be either: absolute path (Unix or Windows), or ./ or ../ followed by valid path chars const isValidPath = ( !hasInvalidChars && ( dirPath.startsWith('/') || // Unix absolute path /^[A-Z]:\\/.test(dirPath) || // Windows absolute path (C:\) dirPath === '.' || // Current directory dirPath === '..' || // Parent directory (dirPath.startsWith('./') && dirPath.length > 2 && !dirPath.includes(' ')) || // ./something (no spaces) (dirPath.startsWith('../') && dirPath.length > 3 && !dirPath.includes(' ')) // ../something (no spaces) ) ); if (isValidPath) { // Avoid duplicates if (!directories.includes(dirPath)) { directories.push(dirPath); if (this.debug) { console.log(`[DEBUG] Extracted directory context from listFiles: ${dirPath}`); } } } } return directories; } /** * Load and cache an image if it's valid and accessible * @param {string} imagePath - Path to the image file * @returns {Promise<boolean>} - True if image was loaded successfully */ async loadImageIfValid(imagePath) { try { // Skip if already loaded if (this.pendingImages.has(imagePath)) { if (this.debug) { console.log(`[DEBUG] Image already loaded: ${imagePath}`); } return true; } // Security validation: check if path is within any allowed directory const allowedDirs = this.allowedFolders && this.allowedFolders.length > 0 ? this.allowedFolders : [process.cwd()]; let absolutePath; let isPathAllowed = false; // If absolute path, check if it's within any allowed directory if (isAbsolute(imagePath)) { absolutePath = imagePath; isPathAllowed = allowedDirs.some(dir => absolutePath.startsWith(resolve(dir))); } else { // For relative paths, try resolving against each allowed directory for (const dir of allowedDirs) { const resolvedPath = resolve(dir, imagePath); if (resolvedPath.startsWith(resolve(dir))) { absolutePath = resolvedPath; isPathAllowed = true; break; } } } // Security check: ensure path is within at least one allowed directory if (!isPathAllowed) { if (this.debug) { console.log(`[DEBUG] Image path outside allowed directories: ${imagePath}`); } return false; } // Check if file exists and get file stats let fileStats; try { fileStats = await stat(absolutePath); } catch (error) { if (this.debug) { console.log(`[DEBUG] Image file not found: ${absolutePath}`); } return false; } // Validate file size to prevent OOM attacks if (fileStats.size > MAX_IMAGE_FILE_SIZE) { if (this.debug) { console.log(`[DEBUG] Image file too large: ${absolutePath} (${fileStats.size} bytes, max: ${MAX_IMAGE_FILE_SIZE})`); } return false; } // Validate file extension const extension = absolutePath.toLowerCase().split('.').pop(); if (!SUPPORTED_IMAGE_EXTENSIONS.includes(extension)) { if (this.debug) { console.log(`[DEBUG] Unsupported image format: ${extension}`); } return false; } // Determine MIME type (from shared config) const mimeType = IMAGE_MIME_TYPES[extension]; // Read and encode file asynchronously const fileBuffer = await readFile(absolutePath); const base64Data = fileBuffer.toString('base64'); const dataUrl = `data:${mimeType};base64,${base64Data}`; // Cache the loaded image this.pendingImages.set(imagePath, dataUrl); if (this.debug) { console.log(`[DEBUG] Successfully loaded image: ${imagePath} (${fileBuffer.length} bytes)`); } return true; } catch (error) { if (this.debug) { console.log(`[DEBUG] Failed to load image ${imagePath}: ${error.message}`); } return false; } } /** * Get all currently loaded images as an array for AI model consumption * @returns {Array<string>} - Array of base64 data URLs */ getCurrentImages() { return Array.from(this.pendingImages.values()); } /** * Clear loaded images (useful for new conversations) */ clearLoadedImages() { this.pendingImages.clear(); this.currentImages = []; if (this.debug) { console.log('[DEBUG] Cleared all loaded images'); } } /** * Prepare messages for AI consumption, adding images to the latest user message if available * @param {Array} messages - Current conversation messages * @returns {Array} - Messages formatted for AI SDK with potential image content */ prepareMessagesWithImages(messages) { const loadedImages = this.getCurrentImages(); // If no images loaded, return messages as-is if (loadedImages.length === 0) { return messages; } // Clone messages to avoid mutating the original const messagesWithImages = [...messages]; // Find the last user message to attach images to const lastUserMessageIndex = messagesWithImages.map(m => m.role).lastIndexOf('user'); if (lastUserMessageIndex === -1) { if (this.debug) { console.log('[DEBUG] No user messages found to attach images to'); } return messages; } const lastUserMessage = messagesWithImages[lastUserMessageIndex]; // Convert to multimodal format if we have images if (typeof lastUserMessage.content === 'string') { messagesWithImages[lastUserMessageIndex] = { ...lastUserMessage, content: [ { type: 'text', text: lastUserMessage.content }, ...loadedImages.map(imageData => ({ type: 'image', image: imageData })) ] }; if (this.debug) { console.log(`[DEBUG] Added ${loadedImages.length} images to the latest user message`); } } return messagesWithImages; } /** * Initialize mock model for testing */ initializeMockModel(modelName) { this.provider = createMockProvider(); this.model = modelName || 'mock-model'; this.apiType = 'mock'; if (this.debug) { console.log(`Using Mock API with model: ${this.model}`); } } /** * Initialize MCP bridge and load tools */ async initializeMCP() { if (!this.enableMcp) return; try { let mcpConfig = null; // Priority order: mcpConfig > mcpConfigPath > mcpServers (deprecated) > auto-discovery if (this.mcpConfig) { // Direct config object provided (SDK usage) mcpConfig = this.mcpConfig; if (this.debug) { console.error('[MCP DEBUG] Using provided MCP config object'); } } else if (this.mcpConfigPath) { // Explicit config path provided try { mcpConfig = loadMCPConfigurationFromPath(this.mcpConfigPath); if (this.debug) { console.error(`[MCP DEBUG] Loaded MCP config from: ${this.mcpConfigPath}`); } } catch (error) { throw new Error(`Failed to load MCP config from ${this.mcpConfigPath}: ${error.message}`); } } else if (this.mcpServers) { // Backward compatibility: convert old mcpServers format mcpConfig = { mcpServers: this.mcpServers }; if (this.debug) { console.error('[MCP DEBUG] Using deprecated mcpServers option. Consider using mcpConfig instead.'); } } else { // No explicit config provided - will attempt auto-discovery // This is important for CLI usage where config files may exist if (this.debug) { console.error('[MCP DEBUG] No explicit MCP config provided, will attempt auto-discovery'); } // Pass null to trigger auto-discovery in MCPXmlBridge mcpConfig = null; } // Initialize the MCP XML bridge this.mcpBridge = new MCPXmlBridge({ debug: this.debug }); await this.mcpBridge.initialize(mcpConfig); const mcpToolNames = this.mcpBridge.getToolNames(); const mcpToolCount = mcpToolNames.length; if (mcpToolCount > 0) { if (this.debug) { console.error('\n[MCP DEBUG] ========================================'); console.error(`[MCP DEBUG] MCP Tools Initialized (${mcpToolCount} tools)`); console.error('[MCP DEBUG] Available MCP tools:'); for (const toolName of mcpToolNames) { console.error(`[MCP DEBUG] - ${toolName}`); } console.error('[MCP DEBUG] ========================================\n'); } } else { // For backward compatibility: if no tools were loaded, set bridge to null // This maintains the behavior expected by existing tests if (this.debug) { console.error('[MCP DEBUG] No MCP tools loaded, setting bridge to null'); } this.mcpBridge = null; } } catch (error) { console.error('[MCP ERROR] Error initializing MCP:', error.message); if (this.debug) { console.error('[MCP DEBUG] Full error details:', error); } this.mcpBridge = null; } } /** * Get the system message with instructions for the AI (XML Tool Format) */ async getSystemMessage() { // Lazy initialize MCP if enabled but not yet initialized if (this.enableMcp && !this.mcpBridge && !this._mcpInitialized) { this._mcpInitialized = true; // Prevent multiple initialization attempts try { await this.initializeMCP(); // Merge MCP tools into toolImplementations for unified access if (this.mcpBridge) { const mcpTools = this.mcpBridge.mcpTools || {}; for (const [toolName, toolImpl] of Object.entries(mcpTools)) { this.toolImplementations[toolName] = toolImpl; } } } catch (error) { console.error('[MCP ERROR] Failed to lazy-initialize MCP:', error.message); if (this.debug) { console.error('[MCP DEBUG] Full error details:', error); } } } // Build tool definitions let toolDefinitions = ` ${searchToolDefinition} ${queryToolDefinition} ${extractToolDefinition} ${listFilesToolDefinition} ${searchFilesToolDefinition} ${attemptCompletionToolDefinition} `; if (this.allowEdit) { toolDefinitions += `${implementToolDefinition}\n`; } if (this.enableDelegate) { toolDefinitions += `${delegateToolDefinition}\n`; } // Build XML tool guidelines let xmlToolGuidelines = ` # Tool Use Formatting Tool use MUST be formatted using XML-style tags. Each tool call requires BOTH opening and closing tags with the exact tool name. Each parameter is similarly enclosed within its own set of opening and closing tags. You MUST use exactly ONE tool call per message until you are ready to complete the task. **CRITICAL: Every XML tag MUST have both opening <tag> and closing </tag> parts.** Structure (note the closing tags): <tool_name> <parameter1_name>value1</parameter1_name> <parameter2_name>value2</parameter2_name> ... </tool_name> Examples: <search> <query>error handling</query> <path>src/search</path> </search> <extract> <targets>src/config.js:15-25</targets> </extract> <attempt_completion> The configuration is loaded from src/config.js lines 15-25 which contains the database settings. </attempt_completion> # Special Case: Quick Completion If your previous response was already correct and complete, you may respond with just: <attempt_complete> This signals to use your previous response as the final answer without repeating content. # Thinking Process Before using a tool, analyze the situation within <thinking></thinking> tags. This helps you organize your thoughts and make better decisions. Example: <thinking> I need to find code related to error handling in the search module. The most appropriate tool for this is the search tool, which requires a query parameter and a path parameter. I have both the query ("error handling") and the path ("src/search"), so I can proceed with the search. </thinking> # Tool Use Guidelines 1. Think step-by-step about how to achieve the user's goal. 2. Use <thinking></thinking> tags to analyze the situation and determine the appropriate tool. 3. Choose **one** tool that helps achieve the current step. 4. Format the tool call using the specified XML format with BOTH opening and closing tags. Ensure all required parameters are included. 5. **You MUST respond with exactly one tool call in the specified XML format in each turn.** 6. Wait for the tool execution result, which will be provided in the next message (within a <tool_result> block). 7. Analyze the tool result and decide the next step. If more tool calls are needed, repeat steps 2-6. 8. If the task is fully complete and all previous steps were successful, use the \`<attempt_completion>\` tool to provide the final answer. This is the ONLY way to finish the task. 9. If you cannot proceed (e.g., missing information, invalid request), use \`<attempt_completion>\` to explain the issue clearly with an appropriate message directly inside the tags. 10. If your previous response was already correct and complete, you may use \`<attempt_complete>\` as a shorthand. Available Tools: - search: Search code using keyword queries. - query: Search code using structural AST patterns. - extract: Extract specific code blocks or lines from files. - listFiles: List files and directories in a specified location. - searchFiles: Find files matching a glob pattern with recursive search capability. ${this.allowEdit ? '- implement: Implement a feature or fix a bug using aider.\n' : ''}${this.enableDelegate ? '- delegate: Delegate big distinct tasks to specialized probe subagents.\n' : ''} - attempt_completion: Finalize the task and provide the result to the user. - attempt_complete: Quick completion using previous response (shorthand). `; // Common instructions const commonInstructions = `<instructions> Follow these instructions carefully: 1. Analyze the user's request. 2. Use <thinking></thinking> tags to analyze the situation and determine the appropriate tool for each step. 3. Use the available tools step-by-step to fulfill the request. 4. You should always prefer the \`search\` tool for code-related questions. Read full files only if really necessary. 5. Ensure to get really deep and understand the full picture before answering. 6. You MUST respond with exactly ONE tool call per message, using the specified XML format, until the task is complete. 7. Wait for the tool execution result (provided in the next user message in a <tool_result> block) before proceeding to the next step. 8. Once the task is fully completed, use the '<attempt_completion>' tool to provide the final result. This is the ONLY way to signal completion. 9. Prefer concise and focused search queries. Use specific keywords and phrases to narrow down results. </instructions> `; // Define predefined prompts (without the common instructions) const predefinedPrompts = { 'code-explorer': `You are ProbeChat Code Explorer, a specialized AI assistant focused on helping developers, product managers, and QAs understand and navigate codebases. Your primary function is to answer questions based on code, explain how systems work, and provide insights into code functionality using the provided code analysis tools. When exploring code: - Provide clear, concise explanations based on user request - Find and highlight the most relevant code snippets, if required - Trace function calls and data flow through the system - Try to understand the user's intent and provide relevant information - Understand high level picture - Balance detail with clarity in your explanations`, 'architect': `You are ProbeChat Architect, a specialized AI assistant focused on software architecture and design. Your primary function is to help users understand, analyze, and design software systems using the provided code analysis tools. When analyzing code: - Focus on high-level design patterns and system organization - Identify architectural patterns and component relationships - Evaluate system structure and suggest architectural improvements - Consider scalability, maintainability, and extensibility in your analysis`, 'code-review': `You are ProbeChat Code Reviewer, a specialized AI assistant focused on code quality and best practices. Your primary function is to help users identify issues, suggest improvements, and ensure code follows best practices using the provided code analysis tools. When reviewing code: - Look for bugs, edge cases, and potential issues - Identify performance bottlenecks and optimization opportunities - Check for security vulnerabilities and best practices - Evaluate code style and consistency - Provide specific, actionable suggestions with code examples where appropriate`, 'code-review-template': `You are going to perform code review according to provided user rules. Ensure to review only code provided in diff and latest commit, if provided. However you still need to fully understand how modified code works, and read dependencies if something is not clear.`, 'engineer': `You are senior engineer focused on software architecture and design. Before jumping on the task you first, in details analyse user request, and try to provide elegant and concise solution. If solution is clear, you can jump to implementation right away, if not, you can ask user a clarification question, by calling attempt_completion tool, with required details. Before jumping to implementation: - Focus on high-level design patterns and system organization - Identify architectural patterns and component relationships - Evaluate system structure and suggest architectural improvements - Focus on backward compatibility. - Consider scalability, maintainability, and extensibility in your analysis During the implementation: - Avoid implementing special cases - Do not forget to add the tests`, 'support': `You are ProbeChat Support, a specialized AI assistant focused on helping developers troubleshoot issues and solve problems. Your primary function is to help users diagnose errors, understand unexpected behaviors, and find solutions using the provided code analysis tools. When troubleshooting: - Focus on finding root causes, not just symptoms - Explain concepts clearly with appropriate context - Provide step-by-step guidance to solve problems - Suggest diagnostic steps to verify solutions - Consider edge cases and potential complications - Be empathetic and patient in your explanations` }; let systemMessage = ''; // Use custom prompt if provided if (this.customPrompt) { systemMessage = "<role>" + this.customPrompt + "</role>"; if (this.debug) { console.log(`[DEBUG] Using custom prompt`); } } // Use predefined prompt if specified else if (this.promptType && predefinedPrompts[this.promptType]) { systemMessage = "<role>" + predefinedPrompts[this.promptType] + "</role>"; if (this.debug) { console.log(`[DEBUG] Using predefined prompt: ${this.promptType}`); } // Add common instructions to predefined prompts systemMessage += commonInstructions; } else { // Use the default prompt (code explorer) if no prompt type is specified systemMessage = "<role>" + predefinedPrompts['code-explorer'] + "</role>"; if (this.debug) { console.log(`[DEBUG] Using default prompt: code explorer`); } // Add common instructions to the default prompt systemMessage += commonInstructions; } // Add XML Tool Guidelines systemMessage += `\n${xmlToolGuidelines}\n`; // Add Tool Definitions systemMessage += `\n# Tools Available\n${toolDefinitions}\n`; // Add MCP tools if available if (this.mcpBridge && this.mcpBridge.getToolNames().length > 0) { systemMessage += `\n## MCP Tools (JSON parameters in <params> tag)\n`; systemMessage += this.mcpBridge.getXmlToolDefinitions(); systemMessage += `\n\nFor MCP tools, use JSON format within the params tag, e.g.:\n<mcp_tool>\n<params>\n{"key": "value"}\n</params>\n</mcp_tool>\n`; } // Add folder information const searchDirectory = this.allowedFolders.length > 0 ? this.allowedFolders[0] : process.cwd(); if (this.debug) { console.log(`[DEBUG] Generating file list for base directory: ${searchDirectory}...`); } try { const files = await listFilesByLevel({ directory: searchDirectory, maxFiles: 100, respectGitignore: !process.env.PROBE_NO_GITIGNORE || process.env.PROBE_NO_GITIGNORE === '', cwd: process.cwd() }); systemMessage += `\n# Repository Structure\n\nYou are working with a repository located at: ${searchDirectory}\n\nHere's an overview of the repository structure (showing up to 100 most relevant files):\n\n\`\`\`\n${files}\n\`\`\`\n\n`; } catch (error) { if (this.debug) { console.log(`[DEBUG] Could not generate file list: ${error.message}`); } systemMessage += `\n# Repository Structure\n\nYou are working with a repository located at: ${searchDirectory}\n\n`; } if (this.allowedFolders.length > 0) { systemMessage += `\n**Important**: For security reasons, you can only search within these allowed folders: ${this.allowedFolders.join(', ')}\n\n`; } return systemMessage; } /** * Answer a question using the agentic flow * @param {string} message - The user's question * @param {Array} [images] - Optional array of image data (base64 strings or URLs) * @param {Object|string} [schemaOrOptions] - Can be either: * - A string: JSON schema for structured output (backwards compatible) * - An object: Options object with schema and other options * @param {string} [schemaOrOptions.schema] - JSON schema string for structured output * @returns {Promise<string>} - The final answer */ async answer(message, images = [], schemaOrOptions = {}) { if (!message || typeof message !== 'string' || message.trim().length === 0) { throw new Error('Message is required and must be a non-empty string'); } // Handle backwards compatibility - if third argument is a string, treat it as schema let options = {}; if (typeof schemaOrOptions === 'string') { options = { schema: schemaOrOptions }; } else { options = schemaOrOptions || {}; } try { // Track initial history length for storage const oldHistoryLength = this.history.length; // Emit user message hook await this.hooks.emit(HOOK_TYPES.MESSAGE_USER, { sessionId: this.sessionId, message, images }); // Generate system message const systemMessage = await this.getSystemMessage(); // Create user message with optional image support let userMessage = { role: 'user', content: message.trim() }; // If images are provided, use multi-modal message format if (images && images.length > 0) { userMessage.content = [ { type: 'text', text: message.trim() }, ...images.map(image => ({ type: 'image', image: image })) ]; } // Initialize conversation with existing history + new user message // If history already contains a system message (from session cloning), reuse it for cache efficiency // Otherwise add a fresh system message const hasSystemMessage = this.history.length > 0 && this.history[0].role === 'system'; let currentMessages; if (hasSystemMessage) { // Reuse existing system message from history for cache efficiency currentMessages = [ ...this.history, userMessage ]; if (this.debug) { console.log('[DEBUG] Reusing existing system message from history for cache efficiency'); } } else { // Add fresh system message (first call or empty history) currentMessages = [ { role: 'system', content: systemMessage }, ...this.history, // Include previous conversation history userMessage ]; } let currentIteration = 0; let completionAttempted = false; let finalResult = 'I was unable to complete your request due to reaching the maximum number of tool iterations.'; // Adjust max iterations if schema is provided // +1 for schema formatting // +2 for potential Mermaid validation retries (can be multiple diagrams) // +1 for potential JSON correction const baseMaxIterations = this.maxIterations || MAX_TOOL_ITERATIONS; const maxIterations = options.schema ? baseMaxIterations + 4 : baseMaxIterations; if (this.debug) { console.log(`[DEBUG] Starting agentic flow for question: ${message.substring(0, 100)}...`); if (options.schema) { console.log(`[DEBUG] Schema provided, using extended iteration limit: ${maxIterations} (base: ${baseMaxIterations})`); } } // Tool iteration loop while (currentIteration < maxIterations && !completionAttempted) { currentIteration++; if (this.cancelled) throw new Error('Request was cancelled by the user'); if (this.debug) { console.log(`\n[DEBUG] --- Tool Loop Iteration ${currentIteration}/${maxIterations} ---`); console.log(`[DEBUG] Current messages count for AI call: ${currentMessages.length}`); // Log preview of the latest user message (helpful for debugging loops) const lastUserMessage = [...curren