@probelabs/probe
Version:
Node.js wrapper for the probe code search tool
1,210 lines (1,050 loc) • 106 kB
JavaScript
// Core ProbeAgent class adapted from examples/chat/probeChat.js
// Load .env file if present (silent fail if not found)
import dotenv from 'dotenv';
dotenv.config();
import { createAnthropic } from '@ai-sdk/anthropic';
import { createOpenAI } from '@ai-sdk/openai';
import { createGoogleGenerativeAI } from '@ai-sdk/google';
import { createAmazonBedrock } from '@ai-sdk/amazon-bedrock';
import { streamText } from 'ai';
import { randomUUID } from 'crypto';
import { EventEmitter } from 'events';
import { existsSync } from 'fs';
import { readFile, stat } from 'fs/promises';
import { resolve, isAbsolute, dirname } from 'path';
import { TokenCounter } from './tokenCounter.js';
import { InMemoryStorageAdapter } from './storage/InMemoryStorageAdapter.js';
import { HookManager, HOOK_TYPES } from './hooks/HookManager.js';
import { SUPPORTED_IMAGE_EXTENSIONS, IMAGE_MIME_TYPES } from './imageConfig.js';
import {
createTools,
searchToolDefinition,
queryToolDefinition,
extractToolDefinition,
delegateToolDefinition,
listFilesToolDefinition,
searchFilesToolDefinition,
attemptCompletionToolDefinition,
implementToolDefinition,
attemptCompletionSchema,
parseXmlToolCallWithThinking
} from './tools.js';
import { createMessagePreview } from '../tools/common.js';
import {
createWrappedTools,
listFilesToolInstance,
searchFilesToolInstance,
clearToolExecutionData
} from './probeTool.js';
import { createMockProvider } from './mockProvider.js';
import { listFilesByLevel } from '../index.js';
import {
cleanSchemaResponse,
isJsonSchema,
validateJsonResponse,
createJsonCorrectionPrompt,
isJsonSchemaDefinition,
createSchemaDefinitionCorrectionPrompt,
validateAndFixMermaidResponse
} from './schemaUtils.js';
import { removeThinkingTags } from './xmlParsingUtils.js';
import {
MCPXmlBridge,
parseHybridXmlToolCall,
loadMCPConfigurationFromPath
} from './mcp/index.js';
// Maximum tool iterations to prevent infinite loops - configurable via MAX_TOOL_ITERATIONS env var
const MAX_TOOL_ITERATIONS = parseInt(process.env.MAX_TOOL_ITERATIONS || '30', 10);
const MAX_HISTORY_MESSAGES = 100;
// Supported image file extensions (imported from shared config)
// Maximum image file size (20MB) to prevent OOM attacks
const MAX_IMAGE_FILE_SIZE = 20 * 1024 * 1024;
/**
* ProbeAgent class to handle AI interactions with code search capabilities
*/
export class ProbeAgent {
/**
* Create a new ProbeAgent instance
* @param {Object} options - Configuration options
* @param {string} [options.sessionId] - Optional session ID
* @param {string} [options.customPrompt] - Custom prompt to replace the default system message
* @param {string} [options.promptType] - Predefined prompt type (architect, code-review, support)
* @param {boolean} [options.allowEdit=false] - Allow the use of the 'implement' tool
* @param {boolean} [options.enableDelegate=false] - Enable the delegate tool for task distribution to subagents
* @param {string} [options.path] - Search directory path
* @param {string} [options.provider] - Force specific AI provider
* @param {string} [options.model] - Override model name
* @param {boolean} [options.debug] - Enable debug mode
* @param {boolean} [options.outline] - Enable outline-xml format for search results
* @param {number} [options.maxResponseTokens] - Maximum tokens for AI responses
* @param {number} [options.maxIterations] - Maximum tool iterations (overrides MAX_TOOL_ITERATIONS env var)
* @param {boolean} [options.disableMermaidValidation=false] - Disable automatic mermaid diagram validation and fixing
* @param {boolean} [options.disableJsonValidation=false] - Disable automatic JSON validation and fixing (prevents infinite recursion in JsonFixingAgent)
* @param {boolean} [options.enableMcp=false] - Enable MCP tool integration
* @param {string} [options.mcpConfigPath] - Path to MCP configuration file
* @param {Object} [options.mcpConfig] - MCP configuration object (overrides mcpConfigPath)
* @param {Array} [options.mcpServers] - Deprecated, use mcpConfig instead
* @param {Object} [options.storageAdapter] - Custom storage adapter for history management
* @param {Object} [options.hooks] - Hook callbacks for events (e.g., {'tool:start': callback})
*/
constructor(options = {}) {
// Basic configuration
this.sessionId = options.sessionId || randomUUID();
this.customPrompt = options.customPrompt || null;
this.promptType = options.promptType || 'code-explorer';
this.allowEdit = !!options.allowEdit;
this.enableDelegate = !!options.enableDelegate;
this.debug = options.debug || process.env.DEBUG === '1';
this.cancelled = false;
this.tracer = options.tracer || null;
this.outline = !!options.outline;
this.maxResponseTokens = options.maxResponseTokens || parseInt(process.env.MAX_RESPONSE_TOKENS || '0', 10) || null;
this.maxIterations = options.maxIterations || null;
this.disableMermaidValidation = !!options.disableMermaidValidation;
this.disableJsonValidation = !!options.disableJsonValidation;
// Storage adapter (defaults to in-memory)
this.storageAdapter = options.storageAdapter || new InMemoryStorageAdapter();
// Hook manager
this.hooks = new HookManager();
// Register hooks from options
if (options.hooks) {
for (const [hookName, callback] of Object.entries(options.hooks)) {
this.hooks.on(hookName, callback);
}
}
// Bash configuration
this.enableBash = !!options.enableBash;
this.bashConfig = options.bashConfig || {};
// Search configuration - support both path (single) and allowedFolders (array)
if (options.allowedFolders && Array.isArray(options.allowedFolders)) {
this.allowedFolders = options.allowedFolders;
} else if (options.path) {
this.allowedFolders = [options.path];
} else {
this.allowedFolders = [process.cwd()];
}
// API configuration
this.clientApiProvider = options.provider || null;
this.clientApiModel = options.model || null;
this.clientApiKey = null; // Will be set from environment
this.clientApiUrl = null;
// Initialize token counter
this.tokenCounter = new TokenCounter();
if (this.debug) {
console.log(`[DEBUG] Generated session ID for agent: ${this.sessionId}`);
console.log(`[DEBUG] Maximum tool iterations configured: ${MAX_TOOL_ITERATIONS}`);
console.log(`[DEBUG] Allow Edit (implement tool): ${this.allowEdit}`);
}
// Initialize tools
this.initializeTools();
// Initialize chat history
this.history = [];
// Initialize image tracking for agentic loop
this.pendingImages = new Map(); // Map<imagePath, base64Data> to avoid reloading
this.currentImages = []; // Currently active images for AI calls
// Initialize event emitter for tool execution updates
this.events = new EventEmitter();
// MCP configuration
this.enableMcp = !!options.enableMcp || process.env.ENABLE_MCP === '1';
this.mcpConfigPath = options.mcpConfigPath || null;
this.mcpConfig = options.mcpConfig || null;
this.mcpServers = options.mcpServers || null; // Deprecated, keep for backward compatibility
this.mcpBridge = null;
this._mcpInitialized = false; // Track if MCP initialization has been attempted
// Initialize the AI model
this.initializeModel();
// Note: MCP initialization is now done in initialize() method
// Constructor must remain synchronous for backward compatibility
}
/**
* Initialize the agent asynchronously (must be called after constructor)
* This method initializes MCP and merges MCP tools into the tool list, and loads history from storage
*/
async initialize() {
// Load history from storage adapter
try {
const history = await this.storageAdapter.loadHistory(this.sessionId);
this.history = history;
if (this.debug && history.length > 0) {
console.log(`[DEBUG] Loaded ${history.length} messages from storage for session ${this.sessionId}`);
}
// Emit storage load hook
await this.hooks.emit(HOOK_TYPES.STORAGE_LOAD, {
sessionId: this.sessionId,
messages: history
});
} catch (error) {
console.error(`[ERROR] Failed to load history from storage:`, error);
// Continue with empty history if storage fails
this.history = [];
}
// Initialize MCP if enabled and not already initialized
if (this.enableMcp && !this._mcpInitialized) {
this._mcpInitialized = true; // Prevent multiple initialization attempts
try {
await this.initializeMCP();
// Merge MCP tools into toolImplementations for unified access
if (this.mcpBridge) {
const mcpTools = this.mcpBridge.mcpTools || {};
for (const [toolName, toolImpl] of Object.entries(mcpTools)) {
this.toolImplementations[toolName] = toolImpl;
}
}
// Log all available tools after MCP initialization
if (this.debug) {
const allToolNames = Object.keys(this.toolImplementations);
const nativeToolCount = allToolNames.filter(name => !this.mcpBridge?.mcpTools?.[name]).length;
const mcpToolCount = allToolNames.length - nativeToolCount;
console.error('\n[DEBUG] ========================================');
console.error('[DEBUG] All Tools Initialized');
console.error(`[DEBUG] Native tools: ${nativeToolCount}, MCP tools: ${mcpToolCount}`);
console.error('[DEBUG] Available tools:');
for (const toolName of allToolNames) {
const isMCP = this.mcpBridge?.mcpTools?.[toolName] ? ' (MCP)' : '';
console.error(`[DEBUG] - ${toolName}${isMCP}`);
}
console.error('[DEBUG] ========================================\n');
}
} catch (error) {
console.error('[MCP ERROR] Failed to initialize MCP:', error.message);
if (this.debug) {
console.error('[MCP DEBUG] Full error details:', error);
}
this.mcpBridge = null;
}
}
// Emit agent initialized hook
await this.hooks.emit(HOOK_TYPES.AGENT_INITIALIZED, {
sessionId: this.sessionId,
agent: this
});
}
/**
* Initialize tools with configuration
*/
initializeTools() {
const configOptions = {
sessionId: this.sessionId,
debug: this.debug,
defaultPath: this.allowedFolders.length > 0 ? this.allowedFolders[0] : process.cwd(),
allowedFolders: this.allowedFolders,
outline: this.outline,
enableBash: this.enableBash,
bashConfig: this.bashConfig
};
// Create base tools
const baseTools = createTools(configOptions);
// Create wrapped tools with event emission
const wrappedTools = createWrappedTools(baseTools);
// Store tool instances for execution
this.toolImplementations = {
search: wrappedTools.searchToolInstance,
query: wrappedTools.queryToolInstance,
extract: wrappedTools.extractToolInstance,
delegate: wrappedTools.delegateToolInstance,
listFiles: listFilesToolInstance,
searchFiles: searchFilesToolInstance,
};
// Add bash tool if enabled
if (this.enableBash && wrappedTools.bashToolInstance) {
this.toolImplementations.bash = wrappedTools.bashToolInstance;
}
// Store wrapped tools for ACP system
this.wrappedTools = wrappedTools;
// Log available tools in debug mode
if (this.debug) {
console.error('\n[DEBUG] ========================================');
console.error('[DEBUG] ProbeAgent Tools Initialized');
console.error('[DEBUG] Session ID:', this.sessionId);
console.error('[DEBUG] Available tools:');
for (const toolName of Object.keys(this.toolImplementations)) {
console.error(`[DEBUG] - ${toolName}`);
}
console.error('[DEBUG] Allowed folders:', this.allowedFolders);
console.error('[DEBUG] Outline mode:', this.outline);
console.error('[DEBUG] ========================================\n');
}
}
/**
* Initialize the AI model based on available API keys and forced provider setting
*/
initializeModel() {
// Get model override if provided (options.model takes precedence over environment variable)
const modelName = this.clientApiModel || process.env.MODEL_NAME;
// Check if we're in test mode and should use mock provider
if (process.env.NODE_ENV === 'test' || process.env.USE_MOCK_AI === 'true') {
this.initializeMockModel(modelName);
return;
}
// Get API keys from environment variables
// Support both ANTHROPIC_API_KEY and ANTHROPIC_AUTH_TOKEN (used by Z.AI)
const anthropicApiKey = process.env.ANTHROPIC_API_KEY || process.env.ANTHROPIC_AUTH_TOKEN;
const openaiApiKey = process.env.OPENAI_API_KEY;
// Support both GOOGLE_GENERATIVE_AI_API_KEY (official) and GOOGLE_API_KEY (legacy)
const googleApiKey = process.env.GOOGLE_GENERATIVE_AI_API_KEY || process.env.GOOGLE_API_KEY;
const awsAccessKeyId = process.env.AWS_ACCESS_KEY_ID;
const awsSecretAccessKey = process.env.AWS_SECRET_ACCESS_KEY;
const awsRegion = process.env.AWS_REGION;
const awsSessionToken = process.env.AWS_SESSION_TOKEN;
const awsApiKey = process.env.AWS_BEDROCK_API_KEY;
// Get custom API URLs if provided
const llmBaseUrl = process.env.LLM_BASE_URL;
const anthropicApiUrl = process.env.ANTHROPIC_API_URL || process.env.ANTHROPIC_BASE_URL || llmBaseUrl;
const openaiApiUrl = process.env.OPENAI_API_URL || llmBaseUrl;
const googleApiUrl = process.env.GOOGLE_API_URL || llmBaseUrl;
const awsBedrockBaseUrl = process.env.AWS_BEDROCK_BASE_URL || llmBaseUrl;
// Use client-forced provider or environment variable
const forceProvider = this.clientApiProvider || (process.env.FORCE_PROVIDER ? process.env.FORCE_PROVIDER.toLowerCase() : null);
if (this.debug) {
const hasAwsCredentials = !!(awsAccessKeyId && awsSecretAccessKey && awsRegion);
const hasAwsApiKey = !!awsApiKey;
console.log(`[DEBUG] Available API keys: Anthropic=${!!anthropicApiKey}, OpenAI=${!!openaiApiKey}, Google=${!!googleApiKey}, AWS Bedrock=${hasAwsCredentials || hasAwsApiKey}`);
if (hasAwsCredentials) console.log(`[DEBUG] AWS credentials: AccessKey=${!!awsAccessKeyId}, SecretKey=${!!awsSecretAccessKey}, Region=${awsRegion}, SessionToken=${!!awsSessionToken}`);
if (hasAwsApiKey) console.log(`[DEBUG] AWS API Key provided`);
if (awsBedrockBaseUrl) console.log(`[DEBUG] AWS Bedrock base URL: ${awsBedrockBaseUrl}`);
console.log(`[DEBUG] Force provider: ${forceProvider || '(not set)'}`);
if (modelName) console.log(`[DEBUG] Model override: ${modelName}`);
}
// Check if a specific provider is forced
if (forceProvider) {
if (forceProvider === 'anthropic' && anthropicApiKey) {
this.initializeAnthropicModel(anthropicApiKey, anthropicApiUrl, modelName);
return;
} else if (forceProvider === 'openai' && openaiApiKey) {
this.initializeOpenAIModel(openaiApiKey, openaiApiUrl, modelName);
return;
} else if (forceProvider === 'google' && googleApiKey) {
this.initializeGoogleModel(googleApiKey, googleApiUrl, modelName);
return;
} else if (forceProvider === 'bedrock' && ((awsAccessKeyId && awsSecretAccessKey && awsRegion) || awsApiKey)) {
this.initializeBedrockModel(awsAccessKeyId, awsSecretAccessKey, awsRegion, awsSessionToken, awsApiKey, awsBedrockBaseUrl, modelName);
return;
}
console.warn(`WARNING: Forced provider "${forceProvider}" selected but required API key is missing or invalid! Falling back to auto-detection.`);
}
// If no provider is forced or forced provider failed, use the first available API key
if (anthropicApiKey) {
this.initializeAnthropicModel(anthropicApiKey, anthropicApiUrl, modelName);
} else if (openaiApiKey) {
this.initializeOpenAIModel(openaiApiKey, openaiApiUrl, modelName);
} else if (googleApiKey) {
this.initializeGoogleModel(googleApiKey, googleApiUrl, modelName);
} else if ((awsAccessKeyId && awsSecretAccessKey && awsRegion) || awsApiKey) {
this.initializeBedrockModel(awsAccessKeyId, awsSecretAccessKey, awsRegion, awsSessionToken, awsApiKey, awsBedrockBaseUrl, modelName);
} else {
throw new Error('No API key provided. Please set ANTHROPIC_API_KEY (or ANTHROPIC_AUTH_TOKEN), OPENAI_API_KEY, GOOGLE_GENERATIVE_AI_API_KEY (or GOOGLE_API_KEY), AWS credentials (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_REGION), or AWS_BEDROCK_API_KEY environment variables.');
}
}
/**
* Initialize Anthropic model
*/
initializeAnthropicModel(apiKey, apiUrl, modelName) {
this.provider = createAnthropic({
apiKey: apiKey,
...(apiUrl && { baseURL: apiUrl }),
});
this.model = modelName || 'claude-sonnet-4-5-20250929';
this.apiType = 'anthropic';
if (this.debug) {
console.log(`Using Anthropic API with model: ${this.model}${apiUrl ? ` (URL: ${apiUrl})` : ''}`);
}
}
/**
* Initialize OpenAI model
*/
initializeOpenAIModel(apiKey, apiUrl, modelName) {
this.provider = createOpenAI({
compatibility: 'strict',
apiKey: apiKey,
...(apiUrl && { baseURL: apiUrl }),
});
this.model = modelName || 'gpt-5-thinking';
this.apiType = 'openai';
if (this.debug) {
console.log(`Using OpenAI API with model: ${this.model}${apiUrl ? ` (URL: ${apiUrl})` : ''}`);
}
}
/**
* Initialize Google model
*/
initializeGoogleModel(apiKey, apiUrl, modelName) {
this.provider = createGoogleGenerativeAI({
apiKey: apiKey,
...(apiUrl && { baseURL: apiUrl }),
});
this.model = modelName || 'gemini-2.5-pro';
this.apiType = 'google';
if (this.debug) {
console.log(`Using Google API with model: ${this.model}${apiUrl ? ` (URL: ${apiUrl})` : ''}`);
}
}
/**
* Initialize AWS Bedrock model
*/
initializeBedrockModel(accessKeyId, secretAccessKey, region, sessionToken, apiKey, baseURL, modelName) {
// Build configuration object, only including defined values
const config = {};
// Authentication - prefer API key if provided, otherwise use AWS credentials
if (apiKey) {
config.apiKey = apiKey;
} else if (accessKeyId && secretAccessKey) {
config.accessKeyId = accessKeyId;
config.secretAccessKey = secretAccessKey;
if (sessionToken) {
config.sessionToken = sessionToken;
}
}
// Region is required for AWS credentials but optional for API key
if (region) {
config.region = region;
}
// Optional base URL
if (baseURL) {
config.baseURL = baseURL;
}
this.provider = createAmazonBedrock(config);
this.model = modelName || 'anthropic.claude-sonnet-4-20250514-v1:0';
this.apiType = 'bedrock';
if (this.debug) {
const authMethod = apiKey ? 'API Key' : 'AWS Credentials';
const regionInfo = region ? ` (Region: ${region})` : '';
const baseUrlInfo = baseURL ? ` (Base URL: ${baseURL})` : '';
console.log(`Using AWS Bedrock API with model: ${this.model}${regionInfo} [Auth: ${authMethod}]${baseUrlInfo}`);
}
}
/**
* Process assistant response content and detect/load image references
* @param {string} content - The assistant's response content
* @returns {Promise<void>}
*/
async processImageReferences(content) {
if (!content) return;
// First, try to parse listFiles output format to extract directory context
const listFilesDirectories = this.extractListFilesDirectories(content);
// Enhanced pattern to detect image file mentions in various contexts
// Looks for: "image", "file", "screenshot", etc. followed by path-like strings with image extensions
const extensionsPattern = `(?:${SUPPORTED_IMAGE_EXTENSIONS.join('|')})`;
const imagePatterns = [
// Direct file path mentions: "./screenshot.png", "/path/to/image.jpg", etc.
new RegExp(`(?:\\.?\\.\\/)?[^\\s"'<>\\[\\]]+\\\.${extensionsPattern}(?!\\w)`, 'gi'),
// Contextual mentions: "look at image.png", "the file screenshot.jpg shows"
new RegExp(`(?:image|file|screenshot|diagram|photo|picture|graphic)\\s*:?\\s*([^\\s"'<>\\[\\]]+\\.${extensionsPattern})(?!\\w)`, 'gi'),
// Tool result mentions: often contain file paths
new RegExp(`(?:found|saved|created|generated).*?([^\\s"'<>\\[\\]]+\\.${extensionsPattern})(?!\\w)`, 'gi')
];
const foundPaths = new Set();
// Extract potential image paths using all patterns
for (const pattern of imagePatterns) {
let match;
while ((match = pattern.exec(content)) !== null) {
// For patterns with capture groups, use the captured path; otherwise use the full match
const imagePath = match[1] || match[0];
if (imagePath && imagePath.length > 0) {
foundPaths.add(imagePath.trim());
}
}
}
if (foundPaths.size === 0) return;
if (this.debug) {
console.log(`[DEBUG] Found ${foundPaths.size} potential image references:`, Array.from(foundPaths));
}
// Process each found path
for (const imagePath of foundPaths) {
// Try to resolve the path with directory context from listFiles output
let resolvedPath = imagePath;
// If the path is just a filename (no directory separator), try to find it in listFiles directories
if (!imagePath.includes('/') && !imagePath.includes('\\')) {
for (const dir of listFilesDirectories) {
const potentialPath = resolve(dir, imagePath);
// Check if this file exists by attempting to load it
const loaded = await this.loadImageIfValid(potentialPath);
if (loaded) {
// Successfully loaded with this directory context
if (this.debug) {
console.log(`[DEBUG] Resolved ${imagePath} to ${potentialPath} using listFiles context`);
}
break; // Found it, no need to try other directories
}
}
} else {
// Path already has directory info, load as-is
await this.loadImageIfValid(resolvedPath);
}
}
}
/**
* Extract directory paths from tool output (both listFiles and extract tool)
* @param {string} content - Tool output content
* @returns {string[]} - Array of directory paths
*/
extractListFilesDirectories(content) {
const directories = [];
// Pattern 1: Extract directory from extract tool "File:" header
// Format: "File: /path/to/file.md" or "File: ./relative/path/file.md"
const fileHeaderPattern = /^File:\s+(.+)$/gm;
let match;
while ((match = fileHeaderPattern.exec(content)) !== null) {
const filePath = match[1].trim();
// Get directory from file path
const dir = dirname(filePath);
if (dir && dir !== '.') {
directories.push(dir);
if (this.debug) {
console.log(`[DEBUG] Extracted directory context from File header: ${dir}`);
}
}
}
// Pattern 2: Extract directory from listFiles output format: "/path/to/directory:"
// Matches absolute paths (/path/to/dir:) or current directory markers (.:) or Windows paths (C:\path:) at start of line
// Very strict to avoid matching random text like ".Something:" or "./Some text:"
const dirPattern = /^(\/[^\n:]+|[A-Z]:\\[^\n:]+|\.\.?(?:\/[^\n:]+)?):\s*$/gm;
while ((match = dirPattern.exec(content)) !== null) {
const dirPath = match[1].trim();
// Strict validation: must look like an actual filesystem path
// Reject if contains spaces or other characters that wouldn't be in listFiles output
const hasInvalidChars = /\s/.test(dirPath); // Contains whitespace
// Validate this looks like an actual path, not random text
// Must be either: absolute path (Unix or Windows), or ./ or ../ followed by valid path chars
const isValidPath = (
!hasInvalidChars && (
dirPath.startsWith('/') || // Unix absolute path
/^[A-Z]:\\/.test(dirPath) || // Windows absolute path (C:\)
dirPath === '.' || // Current directory
dirPath === '..' || // Parent directory
(dirPath.startsWith('./') && dirPath.length > 2 && !dirPath.includes(' ')) || // ./something (no spaces)
(dirPath.startsWith('../') && dirPath.length > 3 && !dirPath.includes(' ')) // ../something (no spaces)
)
);
if (isValidPath) {
// Avoid duplicates
if (!directories.includes(dirPath)) {
directories.push(dirPath);
if (this.debug) {
console.log(`[DEBUG] Extracted directory context from listFiles: ${dirPath}`);
}
}
}
}
return directories;
}
/**
* Load and cache an image if it's valid and accessible
* @param {string} imagePath - Path to the image file
* @returns {Promise<boolean>} - True if image was loaded successfully
*/
async loadImageIfValid(imagePath) {
try {
// Skip if already loaded
if (this.pendingImages.has(imagePath)) {
if (this.debug) {
console.log(`[DEBUG] Image already loaded: ${imagePath}`);
}
return true;
}
// Security validation: check if path is within any allowed directory
const allowedDirs = this.allowedFolders && this.allowedFolders.length > 0 ? this.allowedFolders : [process.cwd()];
let absolutePath;
let isPathAllowed = false;
// If absolute path, check if it's within any allowed directory
if (isAbsolute(imagePath)) {
absolutePath = imagePath;
isPathAllowed = allowedDirs.some(dir => absolutePath.startsWith(resolve(dir)));
} else {
// For relative paths, try resolving against each allowed directory
for (const dir of allowedDirs) {
const resolvedPath = resolve(dir, imagePath);
if (resolvedPath.startsWith(resolve(dir))) {
absolutePath = resolvedPath;
isPathAllowed = true;
break;
}
}
}
// Security check: ensure path is within at least one allowed directory
if (!isPathAllowed) {
if (this.debug) {
console.log(`[DEBUG] Image path outside allowed directories: ${imagePath}`);
}
return false;
}
// Check if file exists and get file stats
let fileStats;
try {
fileStats = await stat(absolutePath);
} catch (error) {
if (this.debug) {
console.log(`[DEBUG] Image file not found: ${absolutePath}`);
}
return false;
}
// Validate file size to prevent OOM attacks
if (fileStats.size > MAX_IMAGE_FILE_SIZE) {
if (this.debug) {
console.log(`[DEBUG] Image file too large: ${absolutePath} (${fileStats.size} bytes, max: ${MAX_IMAGE_FILE_SIZE})`);
}
return false;
}
// Validate file extension
const extension = absolutePath.toLowerCase().split('.').pop();
if (!SUPPORTED_IMAGE_EXTENSIONS.includes(extension)) {
if (this.debug) {
console.log(`[DEBUG] Unsupported image format: ${extension}`);
}
return false;
}
// Determine MIME type (from shared config)
const mimeType = IMAGE_MIME_TYPES[extension];
// Read and encode file asynchronously
const fileBuffer = await readFile(absolutePath);
const base64Data = fileBuffer.toString('base64');
const dataUrl = `data:${mimeType};base64,${base64Data}`;
// Cache the loaded image
this.pendingImages.set(imagePath, dataUrl);
if (this.debug) {
console.log(`[DEBUG] Successfully loaded image: ${imagePath} (${fileBuffer.length} bytes)`);
}
return true;
} catch (error) {
if (this.debug) {
console.log(`[DEBUG] Failed to load image ${imagePath}: ${error.message}`);
}
return false;
}
}
/**
* Get all currently loaded images as an array for AI model consumption
* @returns {Array<string>} - Array of base64 data URLs
*/
getCurrentImages() {
return Array.from(this.pendingImages.values());
}
/**
* Clear loaded images (useful for new conversations)
*/
clearLoadedImages() {
this.pendingImages.clear();
this.currentImages = [];
if (this.debug) {
console.log('[DEBUG] Cleared all loaded images');
}
}
/**
* Prepare messages for AI consumption, adding images to the latest user message if available
* @param {Array} messages - Current conversation messages
* @returns {Array} - Messages formatted for AI SDK with potential image content
*/
prepareMessagesWithImages(messages) {
const loadedImages = this.getCurrentImages();
// If no images loaded, return messages as-is
if (loadedImages.length === 0) {
return messages;
}
// Clone messages to avoid mutating the original
const messagesWithImages = [...messages];
// Find the last user message to attach images to
const lastUserMessageIndex = messagesWithImages.map(m => m.role).lastIndexOf('user');
if (lastUserMessageIndex === -1) {
if (this.debug) {
console.log('[DEBUG] No user messages found to attach images to');
}
return messages;
}
const lastUserMessage = messagesWithImages[lastUserMessageIndex];
// Convert to multimodal format if we have images
if (typeof lastUserMessage.content === 'string') {
messagesWithImages[lastUserMessageIndex] = {
...lastUserMessage,
content: [
{ type: 'text', text: lastUserMessage.content },
...loadedImages.map(imageData => ({
type: 'image',
image: imageData
}))
]
};
if (this.debug) {
console.log(`[DEBUG] Added ${loadedImages.length} images to the latest user message`);
}
}
return messagesWithImages;
}
/**
* Initialize mock model for testing
*/
initializeMockModel(modelName) {
this.provider = createMockProvider();
this.model = modelName || 'mock-model';
this.apiType = 'mock';
if (this.debug) {
console.log(`Using Mock API with model: ${this.model}`);
}
}
/**
* Initialize MCP bridge and load tools
*/
async initializeMCP() {
if (!this.enableMcp) return;
try {
let mcpConfig = null;
// Priority order: mcpConfig > mcpConfigPath > mcpServers (deprecated) > auto-discovery
if (this.mcpConfig) {
// Direct config object provided (SDK usage)
mcpConfig = this.mcpConfig;
if (this.debug) {
console.error('[MCP DEBUG] Using provided MCP config object');
}
} else if (this.mcpConfigPath) {
// Explicit config path provided
try {
mcpConfig = loadMCPConfigurationFromPath(this.mcpConfigPath);
if (this.debug) {
console.error(`[MCP DEBUG] Loaded MCP config from: ${this.mcpConfigPath}`);
}
} catch (error) {
throw new Error(`Failed to load MCP config from ${this.mcpConfigPath}: ${error.message}`);
}
} else if (this.mcpServers) {
// Backward compatibility: convert old mcpServers format
mcpConfig = { mcpServers: this.mcpServers };
if (this.debug) {
console.error('[MCP DEBUG] Using deprecated mcpServers option. Consider using mcpConfig instead.');
}
} else {
// No explicit config provided - will attempt auto-discovery
// This is important for CLI usage where config files may exist
if (this.debug) {
console.error('[MCP DEBUG] No explicit MCP config provided, will attempt auto-discovery');
}
// Pass null to trigger auto-discovery in MCPXmlBridge
mcpConfig = null;
}
// Initialize the MCP XML bridge
this.mcpBridge = new MCPXmlBridge({ debug: this.debug });
await this.mcpBridge.initialize(mcpConfig);
const mcpToolNames = this.mcpBridge.getToolNames();
const mcpToolCount = mcpToolNames.length;
if (mcpToolCount > 0) {
if (this.debug) {
console.error('\n[MCP DEBUG] ========================================');
console.error(`[MCP DEBUG] MCP Tools Initialized (${mcpToolCount} tools)`);
console.error('[MCP DEBUG] Available MCP tools:');
for (const toolName of mcpToolNames) {
console.error(`[MCP DEBUG] - ${toolName}`);
}
console.error('[MCP DEBUG] ========================================\n');
}
} else {
// For backward compatibility: if no tools were loaded, set bridge to null
// This maintains the behavior expected by existing tests
if (this.debug) {
console.error('[MCP DEBUG] No MCP tools loaded, setting bridge to null');
}
this.mcpBridge = null;
}
} catch (error) {
console.error('[MCP ERROR] Error initializing MCP:', error.message);
if (this.debug) {
console.error('[MCP DEBUG] Full error details:', error);
}
this.mcpBridge = null;
}
}
/**
* Get the system message with instructions for the AI (XML Tool Format)
*/
async getSystemMessage() {
// Lazy initialize MCP if enabled but not yet initialized
if (this.enableMcp && !this.mcpBridge && !this._mcpInitialized) {
this._mcpInitialized = true; // Prevent multiple initialization attempts
try {
await this.initializeMCP();
// Merge MCP tools into toolImplementations for unified access
if (this.mcpBridge) {
const mcpTools = this.mcpBridge.mcpTools || {};
for (const [toolName, toolImpl] of Object.entries(mcpTools)) {
this.toolImplementations[toolName] = toolImpl;
}
}
} catch (error) {
console.error('[MCP ERROR] Failed to lazy-initialize MCP:', error.message);
if (this.debug) {
console.error('[MCP DEBUG] Full error details:', error);
}
}
}
// Build tool definitions
let toolDefinitions = `
${searchToolDefinition}
${queryToolDefinition}
${extractToolDefinition}
${listFilesToolDefinition}
${searchFilesToolDefinition}
${attemptCompletionToolDefinition}
`;
if (this.allowEdit) {
toolDefinitions += `${implementToolDefinition}\n`;
}
if (this.enableDelegate) {
toolDefinitions += `${delegateToolDefinition}\n`;
}
// Build XML tool guidelines
let xmlToolGuidelines = `
# Tool Use Formatting
Tool use MUST be formatted using XML-style tags. Each tool call requires BOTH opening and closing tags with the exact tool name. Each parameter is similarly enclosed within its own set of opening and closing tags. You MUST use exactly ONE tool call per message until you are ready to complete the task.
**CRITICAL: Every XML tag MUST have both opening <tag> and closing </tag> parts.**
Structure (note the closing tags):
<tool_name>
<parameter1_name>value1</parameter1_name>
<parameter2_name>value2</parameter2_name>
...
</tool_name>
Examples:
<search>
<query>error handling</query>
<path>src/search</path>
</search>
<extract>
<targets>src/config.js:15-25</targets>
</extract>
<attempt_completion>
The configuration is loaded from src/config.js lines 15-25 which contains the database settings.
</attempt_completion>
# Special Case: Quick Completion
If your previous response was already correct and complete, you may respond with just:
<attempt_complete>
This signals to use your previous response as the final answer without repeating content.
# Thinking Process
Before using a tool, analyze the situation within <thinking></thinking> tags. This helps you organize your thoughts and make better decisions.
Example:
<thinking>
I need to find code related to error handling in the search module. The most appropriate tool for this is the search tool, which requires a query parameter and a path parameter. I have both the query ("error handling") and the path ("src/search"), so I can proceed with the search.
</thinking>
# Tool Use Guidelines
1. Think step-by-step about how to achieve the user's goal.
2. Use <thinking></thinking> tags to analyze the situation and determine the appropriate tool.
3. Choose **one** tool that helps achieve the current step.
4. Format the tool call using the specified XML format with BOTH opening and closing tags. Ensure all required parameters are included.
5. **You MUST respond with exactly one tool call in the specified XML format in each turn.**
6. Wait for the tool execution result, which will be provided in the next message (within a <tool_result> block).
7. Analyze the tool result and decide the next step. If more tool calls are needed, repeat steps 2-6.
8. If the task is fully complete and all previous steps were successful, use the \`<attempt_completion>\` tool to provide the final answer. This is the ONLY way to finish the task.
9. If you cannot proceed (e.g., missing information, invalid request), use \`<attempt_completion>\` to explain the issue clearly with an appropriate message directly inside the tags.
10. If your previous response was already correct and complete, you may use \`<attempt_complete>\` as a shorthand.
Available Tools:
- search: Search code using keyword queries.
- query: Search code using structural AST patterns.
- extract: Extract specific code blocks or lines from files.
- listFiles: List files and directories in a specified location.
- searchFiles: Find files matching a glob pattern with recursive search capability.
${this.allowEdit ? '- implement: Implement a feature or fix a bug using aider.\n' : ''}${this.enableDelegate ? '- delegate: Delegate big distinct tasks to specialized probe subagents.\n' : ''}
- attempt_completion: Finalize the task and provide the result to the user.
- attempt_complete: Quick completion using previous response (shorthand).
`;
// Common instructions
const commonInstructions = `<instructions>
Follow these instructions carefully:
1. Analyze the user's request.
2. Use <thinking></thinking> tags to analyze the situation and determine the appropriate tool for each step.
3. Use the available tools step-by-step to fulfill the request.
4. You should always prefer the \`search\` tool for code-related questions. Read full files only if really necessary.
5. Ensure to get really deep and understand the full picture before answering.
6. You MUST respond with exactly ONE tool call per message, using the specified XML format, until the task is complete.
7. Wait for the tool execution result (provided in the next user message in a <tool_result> block) before proceeding to the next step.
8. Once the task is fully completed, use the '<attempt_completion>' tool to provide the final result. This is the ONLY way to signal completion.
9. Prefer concise and focused search queries. Use specific keywords and phrases to narrow down results.
</instructions>
`;
// Define predefined prompts (without the common instructions)
const predefinedPrompts = {
'code-explorer': `You are ProbeChat Code Explorer, a specialized AI assistant focused on helping developers, product managers, and QAs understand and navigate codebases. Your primary function is to answer questions based on code, explain how systems work, and provide insights into code functionality using the provided code analysis tools.
When exploring code:
- Provide clear, concise explanations based on user request
- Find and highlight the most relevant code snippets, if required
- Trace function calls and data flow through the system
- Try to understand the user's intent and provide relevant information
- Understand high level picture
- Balance detail with clarity in your explanations`,
'architect': `You are ProbeChat Architect, a specialized AI assistant focused on software architecture and design. Your primary function is to help users understand, analyze, and design software systems using the provided code analysis tools.
When analyzing code:
- Focus on high-level design patterns and system organization
- Identify architectural patterns and component relationships
- Evaluate system structure and suggest architectural improvements
- Consider scalability, maintainability, and extensibility in your analysis`,
'code-review': `You are ProbeChat Code Reviewer, a specialized AI assistant focused on code quality and best practices. Your primary function is to help users identify issues, suggest improvements, and ensure code follows best practices using the provided code analysis tools.
When reviewing code:
- Look for bugs, edge cases, and potential issues
- Identify performance bottlenecks and optimization opportunities
- Check for security vulnerabilities and best practices
- Evaluate code style and consistency
- Provide specific, actionable suggestions with code examples where appropriate`,
'code-review-template': `You are going to perform code review according to provided user rules. Ensure to review only code provided in diff and latest commit, if provided. However you still need to fully understand how modified code works, and read dependencies if something is not clear.`,
'engineer': `You are senior engineer focused on software architecture and design.
Before jumping on the task you first, in details analyse user request, and try to provide elegant and concise solution.
If solution is clear, you can jump to implementation right away, if not, you can ask user a clarification question, by calling attempt_completion tool, with required details.
Before jumping to implementation:
- Focus on high-level design patterns and system organization
- Identify architectural patterns and component relationships
- Evaluate system structure and suggest architectural improvements
- Focus on backward compatibility.
- Consider scalability, maintainability, and extensibility in your analysis
During the implementation:
- Avoid implementing special cases
- Do not forget to add the tests`,
'support': `You are ProbeChat Support, a specialized AI assistant focused on helping developers troubleshoot issues and solve problems. Your primary function is to help users diagnose errors, understand unexpected behaviors, and find solutions using the provided code analysis tools.
When troubleshooting:
- Focus on finding root causes, not just symptoms
- Explain concepts clearly with appropriate context
- Provide step-by-step guidance to solve problems
- Suggest diagnostic steps to verify solutions
- Consider edge cases and potential complications
- Be empathetic and patient in your explanations`
};
let systemMessage = '';
// Use custom prompt if provided
if (this.customPrompt) {
systemMessage = "<role>" + this.customPrompt + "</role>";
if (this.debug) {
console.log(`[DEBUG] Using custom prompt`);
}
}
// Use predefined prompt if specified
else if (this.promptType && predefinedPrompts[this.promptType]) {
systemMessage = "<role>" + predefinedPrompts[this.promptType] + "</role>";
if (this.debug) {
console.log(`[DEBUG] Using predefined prompt: ${this.promptType}`);
}
// Add common instructions to predefined prompts
systemMessage += commonInstructions;
} else {
// Use the default prompt (code explorer) if no prompt type is specified
systemMessage = "<role>" + predefinedPrompts['code-explorer'] + "</role>";
if (this.debug) {
console.log(`[DEBUG] Using default prompt: code explorer`);
}
// Add common instructions to the default prompt
systemMessage += commonInstructions;
}
// Add XML Tool Guidelines
systemMessage += `\n${xmlToolGuidelines}\n`;
// Add Tool Definitions
systemMessage += `\n# Tools Available\n${toolDefinitions}\n`;
// Add MCP tools if available
if (this.mcpBridge && this.mcpBridge.getToolNames().length > 0) {
systemMessage += `\n## MCP Tools (JSON parameters in <params> tag)\n`;
systemMessage += this.mcpBridge.getXmlToolDefinitions();
systemMessage += `\n\nFor MCP tools, use JSON format within the params tag, e.g.:\n<mcp_tool>\n<params>\n{"key": "value"}\n</params>\n</mcp_tool>\n`;
}
// Add folder information
const searchDirectory = this.allowedFolders.length > 0 ? this.allowedFolders[0] : process.cwd();
if (this.debug) {
console.log(`[DEBUG] Generating file list for base directory: ${searchDirectory}...`);
}
try {
const files = await listFilesByLevel({
directory: searchDirectory,
maxFiles: 100,
respectGitignore: !process.env.PROBE_NO_GITIGNORE || process.env.PROBE_NO_GITIGNORE === '',
cwd: process.cwd()
});
systemMessage += `\n# Repository Structure\n\nYou are working with a repository located at: ${searchDirectory}\n\nHere's an overview of the repository structure (showing up to 100 most relevant files):\n\n\`\`\`\n${files}\n\`\`\`\n\n`;
} catch (error) {
if (this.debug) {
console.log(`[DEBUG] Could not generate file list: ${error.message}`);
}
systemMessage += `\n# Repository Structure\n\nYou are working with a repository located at: ${searchDirectory}\n\n`;
}
if (this.allowedFolders.length > 0) {
systemMessage += `\n**Important**: For security reasons, you can only search within these allowed folders: ${this.allowedFolders.join(', ')}\n\n`;
}
return systemMessage;
}
/**
* Answer a question using the agentic flow
* @param {string} message - The user's question
* @param {Array} [images] - Optional array of image data (base64 strings or URLs)
* @param {Object|string} [schemaOrOptions] - Can be either:
* - A string: JSON schema for structured output (backwards compatible)
* - An object: Options object with schema and other options
* @param {string} [schemaOrOptions.schema] - JSON schema string for structured output
* @returns {Promise<string>} - The final answer
*/
async answer(message, images = [], schemaOrOptions = {}) {
if (!message || typeof message !== 'string' || message.trim().length === 0) {
throw new Error('Message is required and must be a non-empty string');
}
// Handle backwards compatibility - if third argument is a string, treat it as schema
let options = {};
if (typeof schemaOrOptions === 'string') {
options = { schema: schemaOrOptions };
} else {
options = schemaOrOptions || {};
}
try {
// Track initial history length for storage
const oldHistoryLength = this.history.length;
// Emit user message hook
await this.hooks.emit(HOOK_TYPES.MESSAGE_USER, {
sessionId: this.sessionId,
message,
images
});
// Generate system message
const systemMessage = await this.getSystemMessage();
// Create user message with optional image support
let userMessage = { role: 'user', content: message.trim() };
// If images are provided, use multi-modal message format
if (images && images.length > 0) {
userMessage.content = [
{ type: 'text', text: message.trim() },
...images.map(image => ({
type: 'image',
image: image
}))
];
}
// Initialize conversation with existing history + new user message
// If history already contains a system message (from session cloning), reuse it for cache efficiency
// Otherwise add a fresh system message
const hasSystemMessage = this.history.length > 0 && this.history[0].role === 'system';
let currentMessages;
if (hasSystemMessage) {
// Reuse existing system message from history for cache efficiency
currentMessages = [
...this.history,
userMessage
];
if (this.debug) {
console.log('[DEBUG] Reusing existing system message from history for cache efficiency');
}
} else {
// Add fresh system message (first call or empty history)
currentMessages = [
{ role: 'system', content: systemMessage },
...this.history, // Include previous conversation history
userMessage
];
}
let currentIteration = 0;
let completionAttempted = false;
let finalResult = 'I was unable to complete your request due to reaching the maximum number of tool iterations.';
// Adjust max iterations if schema is provided
// +1 for schema formatting
// +2 for potential Mermaid validation retries (can be multiple diagrams)
// +1 for potential JSON correction
const baseMaxIterations = this.maxIterations || MAX_TOOL_ITERATIONS;
const maxIterations = options.schema ? baseMaxIterations + 4 : baseMaxIterations;
if (this.debug) {
console.log(`[DEBUG] Starting agentic flow for question: ${message.substring(0, 100)}...`);
if (options.schema) {
console.log(`[DEBUG] Schema provided, using extended iteration limit: ${maxIterations} (base: ${baseMaxIterations})`);
}
}
// Tool iteration loop
while (currentIteration < maxIterations && !completionAttempted) {
currentIteration++;
if (this.cancelled) throw new Error('Request was cancelled by the user');
if (this.debug) {
console.log(`\n[DEBUG] --- Tool Loop Iteration ${currentIteration}/${maxIterations} ---`);
console.log(`[DEBUG] Current messages count for AI call: ${currentMessages.length}`);
// Log preview of the latest user message (helpful for debugging loops)
const lastUserMessage = [...curren