UNPKG

@probelabs/probe

Version:

Node.js wrapper for the probe code search tool

233 lines (198 loc) 7.58 kB
/** * Extract functionality for the probe package * @module extract */ import { exec, spawn } from 'child_process'; import { promisify } from 'util'; import { getBinaryPath, buildCliArgs, escapeString } from './utils.js'; const execAsync = promisify(exec); /** * Flag mapping for extract options * Maps option keys to command-line flags */ const EXTRACT_FLAG_MAP = { allowTests: '--allow-tests', contextLines: '--context', format: '--format', inputFile: '--input-file' }; /** * Extract code blocks from files * * @param {Object} options - Extract options * @param {string[]} [options.files] - Files to extract from (can include line numbers with colon, e.g., "/path/to/file.rs:10") * @param {string} [options.inputFile] - Path to a file containing unstructured text to extract file paths from * @param {string|Buffer} [options.content] - Content to pipe to stdin (e.g., git diff output). Alternative to inputFile. * @param {boolean} [options.allowTests] - Include test files * @param {number} [options.contextLines] - Number of context lines to include * @param {string} [options.format] - Output format ('markdown', 'plain', 'json', 'xml', 'color', 'outline-xml', 'outline-diff') * @param {Object} [options.binaryOptions] - Options for getting the binary * @param {boolean} [options.binaryOptions.forceDownload] - Force download even if binary exists * @param {string} [options.binaryOptions.version] - Specific version to download * @param {boolean} [options.json] - Return results as parsed JSON instead of string * @returns {Promise<string|Object>} - Extracted code as string or parsed JSON * @throws {Error} If the extraction fails */ export async function extract(options) { if (!options) { throw new Error('Options object is required'); } // Either files, inputFile, or content must be provided const hasFiles = options.files && Array.isArray(options.files) && options.files.length > 0; const hasInputFile = !!options.inputFile; const hasContent = options.content !== undefined && options.content !== null; if (!hasFiles && !hasInputFile && !hasContent) { throw new Error('Extract requires one of: "files" (array of file paths), "inputFile" (path to input file), or "content" (string/buffer for stdin)'); } // Get the binary path const binaryPath = await getBinaryPath(options.binaryOptions || {}); // Build CLI arguments from options (excluding content which goes via stdin) const filteredOptions = { ...options }; delete filteredOptions.content; const cliArgs = buildCliArgs(filteredOptions, EXTRACT_FLAG_MAP); // If json option is true, override format to json if (options.json && !options.format) { cliArgs.push('--format', 'json'); } // Add files as positional arguments if provided if (hasFiles) { for (const file of options.files) { cliArgs.push(escapeString(file)); } } // Create a single log record with all extract parameters (only in debug mode) if (process.env.DEBUG === '1') { let logMessage = `\nExtract:`; if (options.files && options.files.length > 0) { logMessage += ` files="${options.files.join(', ')}"`; } if (options.inputFile) logMessage += ` inputFile="${options.inputFile}"`; if (options.content) logMessage += ` content=(${typeof options.content === 'string' ? options.content.length : options.content.byteLength} bytes)`; if (options.allowTests) logMessage += " allowTests=true"; if (options.contextLines) logMessage += ` contextLines=${options.contextLines}`; if (options.format) logMessage += ` format=${options.format}`; if (options.json) logMessage += " json=true"; console.error(logMessage); } // If content is provided, use spawn with stdin piping if (hasContent) { return extractWithStdin(binaryPath, cliArgs, options.content, options); } // Otherwise use exec for simple command execution const command = `${binaryPath} extract ${cliArgs.join(' ')}`; try { const { stdout, stderr } = await execAsync(command); if (stderr) { console.error(`stderr: ${stderr}`); } return processExtractOutput(stdout, options); } catch (error) { // Enhance error message with command details const errorMessage = `Error executing extract command: ${error.message}\nCommand: ${command}`; throw new Error(errorMessage); } } /** * Extract with content piped to stdin * @private */ function extractWithStdin(binaryPath, cliArgs, content, options) { return new Promise((resolve, reject) => { const childProcess = spawn(binaryPath, ['extract', ...cliArgs], { stdio: ['pipe', 'pipe', 'pipe'] }); let stdout = ''; let stderr = ''; // Collect stdout childProcess.stdout.on('data', (data) => { stdout += data.toString(); }); // Collect stderr childProcess.stderr.on('data', (data) => { stderr += data.toString(); }); // Handle process exit childProcess.on('close', (code) => { if (stderr && process.env.DEBUG === '1') { console.error(`stderr: ${stderr}`); } if (code !== 0) { reject(new Error(`Extract command failed with exit code ${code}: ${stderr}`)); return; } try { const result = processExtractOutput(stdout, options); resolve(result); } catch (error) { reject(error); } }); // Handle errors childProcess.on('error', (error) => { reject(new Error(`Failed to spawn extract process: ${error.message}`)); }); // Write content to stdin and close if (typeof content === 'string') { childProcess.stdin.write(content); } else { childProcess.stdin.write(content); } childProcess.stdin.end(); }); } /** * Process extract output and add token usage information * @private */ function processExtractOutput(stdout, options) { // Parse the output to extract token usage information let tokenUsage = { requestTokens: 0, responseTokens: 0, totalTokens: 0 }; // Calculate approximate request tokens if (options.files && Array.isArray(options.files)) { tokenUsage.requestTokens = options.files.join(' ').length / 4; } else if (options.inputFile) { tokenUsage.requestTokens = options.inputFile.length / 4; } else if (options.content) { const contentLength = typeof options.content === 'string' ? options.content.length : options.content.byteLength; tokenUsage.requestTokens = contentLength / 4; } // Try to extract token information from the output if (stdout.includes('Total tokens returned:')) { const tokenMatch = stdout.match(/Total tokens returned: (\d+)/); if (tokenMatch && tokenMatch[1]) { tokenUsage.responseTokens = parseInt(tokenMatch[1], 10); tokenUsage.totalTokens = tokenUsage.requestTokens + tokenUsage.responseTokens; } } // Add token usage information to the output let output = stdout; // Add token usage information at the end if not already present if (!output.includes('Token Usage:')) { output += `\nToken Usage:\n Request tokens: ${tokenUsage.requestTokens}\n Response tokens: ${tokenUsage.responseTokens}\n Total tokens: ${tokenUsage.totalTokens}\n`; } // Parse JSON if requested or if format is json if (options.json || options.format === 'json') { try { const jsonOutput = JSON.parse(stdout); // Add token usage to JSON output if (!jsonOutput.token_usage) { jsonOutput.token_usage = { request_tokens: tokenUsage.requestTokens, response_tokens: tokenUsage.responseTokens, total_tokens: tokenUsage.totalTokens }; } return jsonOutput; } catch (error) { console.error('Error parsing JSON output:', error); return output; // Fall back to string output with token usage } } return output; }