@probelabs/probe
Version: 
Node.js wrapper for the probe code search tool
238 lines (209 loc) • 8.45 kB
JavaScript
/**
 * Search functionality for the probe package
 * @module search
 */
import { execFile } from 'child_process';
import { promisify } from 'util';
import { getBinaryPath, buildCliArgs } from './utils.js';
const execFileAsync = promisify(execFile);
/**
 * Flag mapping for search options
 * Maps option keys to command-line flags
 */
const SEARCH_FLAG_MAP = {
	filesOnly: '--files-only',
	ignore: '--ignore',
	excludeFilenames: '--exclude-filenames',
	reranker: '--reranker',
	frequencySearch: '--frequency',
	exact: '--exact',
	strictElasticSyntax: '--strict-elastic-syntax',
	maxResults: '--max-results',
	maxBytes: '--max-bytes',
	maxTokens: '--max-tokens',
	allowTests: '--allow-tests',
	noMerge: '--no-merge',
	mergeThreshold: '--merge-threshold',
	session: '--session',
	timeout: '--timeout',
	language: '--language',
	format: '--format'
};
/**
 * Search code in a specified directory
 *
 * @param {Object} options - Search options
 * @param {string} options.path - Path to search in
 * @param {string|string[]} options.query - Search query or queries
 * @param {boolean} [options.filesOnly] - Only output file paths
 * @param {string[]} [options.ignore] - Patterns to ignore
 * @param {boolean} [options.excludeFilenames] - Exclude filenames from search
 * @param {string} [options.reranker] - Reranking method ('hybrid', 'hybrid2', 'bm25', 'tfidf')
 * @param {boolean} [options.frequencySearch] - Use frequency-based search
 * @param {boolean} [options.exact] - Perform exact search without tokenization (case-insensitive)
 * @param {boolean} [options.strictElasticSyntax] - Enforce strict ElasticSearch query syntax (require explicit AND/OR operators and quotes)
 * @param {number} [options.maxResults] - Maximum number of results
 * @param {number} [options.maxBytes] - Maximum bytes to return
 * @param {number} [options.maxTokens] - Maximum tokens to return
 * @param {boolean} [options.allowTests] - Include test files
 * @param {boolean} [options.noMerge] - Don't merge adjacent blocks
 * @param {number} [options.mergeThreshold] - Merge threshold
 * @param {string} [options.session] - Session ID for caching results
 * @param {number} [options.timeout] - Timeout in seconds (default: 30)
 * @param {string} [options.language] - Limit search to files of a specific programming language
 * @param {string} [options.format] - Output format ('json', 'outline-xml', etc.)
 * @param {Object} [options.binaryOptions] - Options for getting the binary
 * @param {boolean} [options.binaryOptions.forceDownload] - Force download even if binary exists
 * @param {string} [options.binaryOptions.version] - Specific version to download
 * @param {boolean} [options.json] - Return results as parsed JSON instead of string
 * @returns {Promise<string|Object>} - Search results as string or parsed JSON
 * @throws {Error} If the search fails
 */
export async function search(options) {
	if (!options || !options.path) {
		throw new Error('Path is required');
	}
	if (!options.query) {
		throw new Error('Query is required');
	}
	// Get the binary path
	const binaryPath = await getBinaryPath(options.binaryOptions || {});
	// Build CLI arguments from options
	const cliArgs = buildCliArgs(options, SEARCH_FLAG_MAP);
	// Add format if specified, with json option taking precedence for backwards compatibility
	if (options.json && !options.format) {
		cliArgs.push('--format', 'json');
	} else if (options.format) {
		// Format is already handled by buildCliArgs through SEARCH_FLAG_MAP
		// but we need to ensure json parsing for json format
		if (options.format === 'json') {
			options.json = true;
		}
	}
	// Set default maxTokens if not provided
	if (!options.maxTokens) {
		options.maxTokens = 10000;
		cliArgs.push('--max-tokens', '10000');
	}
	// Set default timeout if not provided
	if (!options.timeout) {
		options.timeout = 30;
		cliArgs.push('--timeout', '30');
	}
	// Ensure language is properly passed if provided
	if (options.language) {
		// Ensure language flag is in cliArgs
		if (!cliArgs.includes('--language')) {
			cliArgs.push('--language', options.language);
		}
	}
	// Ensure exact search is properly passed if enabled
	if (options.exact) {
		// Ensure exact flag is in cliArgs
		if (!cliArgs.includes('--exact')) {
			cliArgs.push('--exact');
		}
	}
	// Add session ID from environment variable if not provided in options
	if (!options.session && process.env.PROBE_SESSION_ID) {
		options.session = process.env.PROBE_SESSION_ID;
	}
	// Add query and path as positional arguments
	const queries = Array.isArray(options.query) ? options.query : [options.query];
	// Create a single log record with all search parameters (only in debug mode)
	if (process.env.DEBUG === '1') {
		let logMessage = `\nSearch: query="${queries[0]}" path="${options.path}"`;
		if (options.maxResults) logMessage += ` maxResults=${options.maxResults}`;
		logMessage += ` maxTokens=${options.maxTokens}`;
		logMessage += ` timeout=${options.timeout}`;
		if (options.allowTests) logMessage += " allowTests=true";
		if (options.language) logMessage += ` language=${options.language}`;
		if (options.exact) logMessage += " exact=true";
		if (options.session) logMessage += ` session=${options.session}`;
		console.error(logMessage);
	}
	// Build argument array for secure execution (no shell injection)
	const args = ['search', ...cliArgs];
	// Add positional arguments (query and path)
	if (queries.length > 0) {
		args.push(queries[0]);
	}
	args.push(options.path);
	// Debug logs
	if (process.env.DEBUG === '1') {
		console.error(`Executing: ${binaryPath} ${args.join(' ')}`);
	}
	try {
		// Execute with execFile (no shell, prevents command injection)
		const { stdout, stderr } = await execFileAsync(binaryPath, args, {
			timeout: options.timeout * 1000, // Convert seconds to milliseconds
			maxBuffer: 50 * 1024 * 1024 // 50MB buffer for large outputs
		});
		// Log after executing
		// console.error(`Command executed successfully`);
		if (stderr && process.env.DEBUG) {
			console.error(`stderr: ${stderr}`);
		}
		// Count results, tokens, and bytes
		let resultCount = 0;
		let tokenCount = 0;
		let bytesCount = 0;
		// Try to count results from stdout
		const lines = stdout.split('\n');
		for (const line of lines) {
			if (line.startsWith('```') && !line.includes('```language')) {
				resultCount++;
			}
		}
		// Look for the specific "Total bytes returned: X" line in the output
		const totalBytesMatch = stdout.match(/Total bytes returned:\s*(\d+)/i);
		if (totalBytesMatch && totalBytesMatch[1]) {
			bytesCount = parseInt(totalBytesMatch[1], 10);
		}
		// Look for the specific "Total tokens returned: X" line in the output
		const totalTokensMatch = stdout.match(/Total tokens returned:\s*(\d+)/i);
		if (totalTokensMatch && totalTokensMatch[1]) {
			tokenCount = parseInt(totalTokensMatch[1], 10);
		} else {
			// Try other patterns if the specific format isn't found
			const tokenMatch = stdout.match(/Tokens:?\s*(\d+)/i) ||
				stdout.match(/(\d+)\s*tokens/i) ||
				stdout.match(/token count:?\s*(\d+)/i);
			if (tokenMatch && tokenMatch[1]) {
				tokenCount = parseInt(tokenMatch[1], 10);
			} else {
				// If we still can't find the token count, use the default maxTokens value
				// This is a fallback, but the command should be returning the actual count
				tokenCount = options.maxTokens;
			}
		}
		// Log the results count, token count, and bytes count (only in debug mode)
		if (process.env.DEBUG === '1') {
			let resultsMessage = `\nSearch results: ${resultCount} matches, ${tokenCount} tokens`;
			if (bytesCount > 0) {
				resultsMessage += `, ${bytesCount} bytes`;
			}
			console.error(resultsMessage);
		}
		// Parse JSON if requested
		if (options.json) {
			try {
				return JSON.parse(stdout);
			} catch (error) {
				console.error('Error parsing JSON output:', error);
				return stdout; // Fall back to string output
			}
		}
		return stdout;
	} catch (error) {
		// Check if the error is a timeout
		if (error.code === 'ETIMEDOUT' || error.killed) {
			const timeoutMessage = `Search operation timed out after ${options.timeout} seconds.\nBinary: ${binaryPath}\nArgs: ${args.join(' ')}`;
			console.error(timeoutMessage);
			throw new Error(timeoutMessage);
		}
		// Enhance error message with command details
		const errorMessage = `Error executing search command: ${error.message}\nBinary: ${binaryPath}\nArgs: ${args.join(' ')}`;
		throw new Error(errorMessage);
	}
}