UNPKG

glassbox-ai

Version:

Enterprise-grade AI testing framework with reliability, observability, and comprehensive validation

1,348 lines (1,175 loc) 48.7 kB
#!/usr/bin/env node import { Command } from 'commander'; import chalk from 'chalk'; import path from 'path'; import fs from 'fs'; import { parseTestFiles } from './parser.js'; import { runTests } from './runner.js'; import { runTests as runTestsCached } from './runner-cached.js'; import { OptimizedTestRunner } from './optimization/optimized-runner.js'; import { handleErrorWithGracefulDegradation, createUserFriendlyMessage, logger, getErrorStats, checkServiceHealth } from './error-handler.js'; import { safeCreateDirectory, validateFileSystem, getFileSystemInfo, checkDirectory, checkDiskSpace, logger as fsLogger } from './fs-error-handler.js'; import { createCacheCommands } from './commands/cache.js'; import { platformUtils } from './utils/platform-utils.js'; const program = new Command(); // CLI configuration with platform-specific color support const CLI_CONFIG = { colors: platformUtils.supportsColors() ? { pass: chalk.green, fail: chalk.red, warning: chalk.yellow, info: chalk.blue, success: chalk.green, error: chalk.red, highlight: chalk.cyan, muted: chalk.gray } : { pass: (text) => text, fail: (text) => text, warning: (text) => text, info: (text) => text, success: (text) => text, error: (text) => text, highlight: (text) => text, muted: (text) => text }, symbols: { pass: '✓', fail: '✗', warning: '⚠', info: 'ℹ', spinner: ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'] } }; // Error types for better categorization const ERROR_TYPES = { USER_ERROR: 'USER_ERROR', SYSTEM_ERROR: 'SYSTEM_ERROR', CONFIGURATION_ERROR: 'CONFIGURATION_ERROR', NETWORK_ERROR: 'NETWORK_ERROR', PERMISSION_ERROR: 'PERMISSION_ERROR' }; // Documentation URLs const DOCS = { gettingStarted: 'https://github.com/your-repo/glassbox#getting-started', testFormat: 'https://github.com/your-repo/glassbox#test-format', troubleshooting: 'https://github.com/your-repo/glassbox#troubleshooting', configuration: 'https://github.com/your-repo/glassbox#configuration' }; // Available models const AVAILABLE_MODELS = { local: ['mistral:7b', 'llama2:7b', 'codellama:7b'], openai: ['gpt-3.5-turbo', 'gpt-4', 'gpt-4-turbo'], anthropic: ['claude-3-haiku', 'claude-3-sonnet', 'claude-3-opus'] }; // Export formats const EXPORT_FORMATS = { json: 'JSON', xml: 'XML', html: 'HTML', csv: 'CSV' }; let isVerbose = false; let isQuiet = false; let spinnerInterval = null; let spinnerIndex = 0; /** * Export results to different formats * @param {object} results - Test results * @param {string} format - Export format * @param {string} outputPath - Output file path */ function exportResults(results, format, outputPath = null) { const { aggregated, raw } = results; switch (format.toLowerCase()) { case 'json': const jsonOutput = JSON.stringify(results.machineReadable, null, 2); if (outputPath) { fs.writeFileSync(outputPath, jsonOutput); console.log(CLI_CONFIG.colors.success(`✓ Results exported to: ${outputPath}`)); } else { console.log(jsonOutput); } break; case 'xml': const xmlOutput = generateXMLReport(results); if (outputPath) { fs.writeFileSync(outputPath, xmlOutput); console.log(CLI_CONFIG.colors.success(`✓ Results exported to: ${outputPath}`)); } else { console.log(xmlOutput); } break; case 'html': const htmlOutput = generateHTMLReport(results); if (outputPath) { fs.writeFileSync(outputPath, htmlOutput); console.log(CLI_CONFIG.colors.success(`✓ Results exported to: ${outputPath}`)); } else { console.log(htmlOutput); } break; case 'csv': const csvOutput = generateCSVReport(results); if (outputPath) { fs.writeFileSync(outputPath, csvOutput); console.log(CLI_CONFIG.colors.success(`✓ Results exported to: ${outputPath}`)); } else { console.log(csvOutput); } break; default: throw new Error(`Unsupported export format: ${format}`); } } /** * Generate XML report * @param {object} results - Test results * @returns {string} XML report */ function generateXMLReport(results) { const { aggregated, raw } = results; let xml = '<?xml version="1.0" encoding="UTF-8"?>\n'; xml += '<testResults>\n'; xml += ` <summary>\n`; xml += ` <total>${aggregated.summary.total}</total>\n`; xml += ` <passed>${aggregated.summary.passed}</passed>\n`; xml += ` <failed>${aggregated.summary.failed}</failed>\n`; xml += ` <successRate>${aggregated.summary.successRate.toFixed(1)}</successRate>\n`; xml += ` <totalDuration>${aggregated.summary.totalDuration}</totalDuration>\n`; xml += ` <totalCost>${aggregated.summary.totalCost}</totalCost>\n`; xml += ` </summary>\n`; xml += ` <tests>\n`; raw.forEach(test => { xml += ` <test>\n`; xml += ` <suite>${test.suite}</suite>\n`; xml += ` <name>${test.test}</name>\n`; xml += ` <pass>${test.pass}</pass>\n`; xml += ` <duration>${test.durationMs}</duration>\n`; xml += ` <cost>${test.cost || 0}</cost>\n`; if (test.error) { xml += ` <error>${test.error}</error>\n`; } xml += ` </test>\n`; }); xml += ` </tests>\n`; xml += '</testResults>'; return xml; } /** * Generate HTML report * @param {object} results - Test results * @returns {string} HTML report */ function generateHTMLReport(results) { const { aggregated, raw } = results; const html = `<!DOCTYPE html> <html> <head> <title>Glassbox Test Results</title> <style> body { font-family: Arial, sans-serif; margin: 20px; } .summary { background: #f5f5f5; padding: 20px; border-radius: 5px; margin-bottom: 20px; } .test { margin: 10px 0; padding: 10px; border-left: 4px solid #ccc; } .pass { border-left-color: #4caf50; background: #f1f8e9; } .fail { border-left-color: #f44336; background: #ffebee; } .metrics { display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 10px; } .metric { background: white; padding: 15px; border-radius: 3px; text-align: center; } .metric-value { font-size: 24px; font-weight: bold; } .metric-label { color: #666; } </style> </head> <body> <h1>Glassbox Test Results</h1> <div class="summary"> <h2>Summary</h2> <div class="metrics"> <div class="metric"> <div class="metric-value">${aggregated.summary.total}</div> <div class="metric-label">Total Tests</div> </div> <div class="metric"> <div class="metric-value" style="color: #4caf50;">${aggregated.summary.passed}</div> <div class="metric-label">Passed</div> </div> <div class="metric"> <div class="metric-value" style="color: #f44336;">${aggregated.summary.failed}</div> <div class="metric-label">Failed</div> </div> <div class="metric"> <div class="metric-value">${aggregated.summary.successRate.toFixed(1)}%</div> <div class="metric-label">Success Rate</div> </div> </div> </div> <h2>Test Results</h2> ${raw.map(test => ` <div class="test ${test.pass ? 'pass' : 'fail'}"> <strong>${test.suite} - ${test.test}</strong><br> Duration: ${test.durationMs}ms | Cost: $${test.cost || 0}<br> ${test.error ? `<span style="color: #f44336;">Error: ${test.error}</span>` : ''} </div> `).join('')} </body> </html>`; return html; } /** * Generate CSV report * @param {object} results - Test results * @returns {string} CSV report */ function generateCSVReport(results) { const { aggregated, raw } = results; let csv = 'Suite,Test,Pass,Duration (ms),Cost,Error\n'; raw.forEach(test => { const error = test.error ? `"${test.error.replace(/"/g, '""')}"` : ''; csv += `${test.suite},${test.test},${test.pass},${test.durationMs},${test.cost || 0},${error}\n`; }); return csv; } /** * Validate model selection * @param {string} model - Model name * @returns {boolean} Is valid model */ function validateModel(model) { const allModels = [ ...AVAILABLE_MODELS.local, ...AVAILABLE_MODELS.openai, ...AVAILABLE_MODELS.anthropic ]; return allModels.includes(model); } /** * Get model provider type * @param {string} model - Model name * @returns {string} Provider type */ function getModelProvider(model) { if (AVAILABLE_MODELS.local.includes(model)) return 'local'; if (AVAILABLE_MODELS.openai.includes(model)) return 'openai'; if (AVAILABLE_MODELS.anthropic.includes(model)) return 'anthropic'; return 'unknown'; } /** * Create user-friendly error messages * @param {string} errorType - Type of error * @param {string} message - Error message * @param {object} context - Additional context * @returns {string} Formatted error message */ function createUserFriendlyError(errorType, message, context = {}) { let errorOutput = ''; // Error header errorOutput += CLI_CONFIG.colors.error(`\n❌ ${errorType.replace('_', ' ')}: ${message}\n`); // Context information if (context.details) { errorOutput += CLI_CONFIG.colors.muted(`\nDetails: ${context.details}\n`); } // Common solutions based on error type switch (errorType) { case ERROR_TYPES.USER_ERROR: errorOutput += CLI_CONFIG.colors.info('\n💡 How to fix this:\n'); if (context.suggestions) { context.suggestions.forEach(suggestion => { errorOutput += CLI_CONFIG.colors.muted(` • ${suggestion}\n`); }); } break; case ERROR_TYPES.CONFIGURATION_ERROR: errorOutput += CLI_CONFIG.colors.info('\n🔧 Configuration help:\n'); errorOutput += CLI_CONFIG.colors.muted(' • Check your test YAML files for syntax errors\n'); errorOutput += CLI_CONFIG.colors.muted(' • Ensure all required fields are present\n'); errorOutput += CLI_CONFIG.colors.muted(` • See test format guide: ${DOCS.testFormat}\n`); break; case ERROR_TYPES.NETWORK_ERROR: errorOutput += CLI_CONFIG.colors.info('\n🌐 Network troubleshooting:\n'); errorOutput += CLI_CONFIG.colors.muted(' • Check your internet connection\n'); errorOutput += CLI_CONFIG.colors.muted(' • Verify API keys are set correctly\n'); errorOutput += CLI_CONFIG.colors.muted(' • Try running with --verbose for more details\n'); break; case ERROR_TYPES.PERMISSION_ERROR: errorOutput += CLI_CONFIG.colors.info('\n🔐 Permission help:\n'); errorOutput += CLI_CONFIG.colors.muted(' • Check file and directory permissions\n'); errorOutput += CLI_CONFIG.colors.muted(' • Ensure you have write access to .glassbox/\n'); errorOutput += CLI_CONFIG.colors.muted(' • Try running with elevated permissions if needed\n'); break; case ERROR_TYPES.SYSTEM_ERROR: errorOutput += CLI_CONFIG.colors.info('\n🛠️ System troubleshooting:\n'); errorOutput += CLI_CONFIG.colors.muted(' • Check available disk space\n'); errorOutput += CLI_CONFIG.colors.muted(' • Verify Node.js version (requires 16+) \n'); errorOutput += CLI_CONFIG.colors.muted(' • Try reinstalling dependencies: npm install\n'); break; } // Examples if provided if (context.examples) { errorOutput += CLI_CONFIG.colors.info('\n📝 Examples:\n'); context.examples.forEach(example => { errorOutput += CLI_CONFIG.colors.muted(` $ ${example}\n`); }); } // Documentation links if (context.docs) { errorOutput += CLI_CONFIG.colors.info('\n📚 More help:\n'); context.docs.forEach(doc => { errorOutput += CLI_CONFIG.colors.muted(` • ${doc}\n`); }); } // General troubleshooting errorOutput += CLI_CONFIG.colors.info('\n🔍 Need more help?\n'); errorOutput += CLI_CONFIG.colors.muted(` • Run with --verbose for detailed output\n`); errorOutput += CLI_CONFIG.colors.muted(` • Check troubleshooting guide: ${DOCS.troubleshooting}\n`); errorOutput += CLI_CONFIG.colors.muted(` • Report issues: https://github.com/your-repo/glassbox/issues\n`); return errorOutput; } /** * Handle specific error scenarios with user-friendly messages * @param {Error} error - The error object * @param {object} context - Additional context * @returns {string} User-friendly error message */ function handleSpecificError(error, context = {}) { const errorMessage = error.message.toLowerCase(); // Test directory not found if (errorMessage.includes('no such file') && errorMessage.includes('.glassbox')) { return createUserFriendlyError(ERROR_TYPES.USER_ERROR, 'Test directory not found', { details: 'The .glassbox/tests/ directory does not exist.', suggestions: [ 'Run "glassbox init" to create the test directory and sample files', 'Check if you\'re in the correct project directory', 'Verify the .glassbox folder exists in your project root' ], examples: [ 'glassbox init', 'ls -la .glassbox/', 'mkdir -p .glassbox/tests/' ], docs: [DOCS.gettingStarted] } ); } // No test files found if (errorMessage.includes('no valid test files')) { return createUserFriendlyError(ERROR_TYPES.USER_ERROR, 'No test files found', { details: 'No valid YAML test files were found in .glassbox/tests/', suggestions: [ 'Create test files in .glassbox/tests/ directory', 'Use "glassbox init" to create sample test files', 'Check that your YAML files have the correct format' ], examples: [ 'glassbox init', 'ls .glassbox/tests/', 'cat .glassbox/tests/sample-test.yml' ], docs: [DOCS.testFormat, DOCS.gettingStarted] } ); } // YAML parsing errors if (errorMessage.includes('yaml') || errorMessage.includes('parsing')) { return createUserFriendlyError(ERROR_TYPES.CONFIGURATION_ERROR, 'Invalid YAML format in test file', { details: 'One or more test files contain invalid YAML syntax.', suggestions: [ 'Check YAML syntax in your test files', 'Use a YAML validator to find syntax errors', 'Ensure proper indentation and formatting' ], examples: [ 'yamllint .glassbox/tests/*.yml', 'cat .glassbox/tests/your-test.yml' ], docs: [DOCS.testFormat] } ); } // Network/API errors if (errorMessage.includes('network') || errorMessage.includes('connection') || errorMessage.includes('timeout') || errorMessage.includes('econnrefused')) { return createUserFriendlyError(ERROR_TYPES.NETWORK_ERROR, 'Network connection failed', { details: 'Unable to connect to AI model services.', suggestions: [ 'Check your internet connection', 'Verify API keys are set correctly (OPENAI_API_KEY)', 'Ensure Ollama is running (if using local models)', 'Try again in a few minutes' ], examples: [ 'export OPENAI_API_KEY="your-api-key"', 'ollama serve', 'ping api.openai.com' ], docs: [DOCS.configuration] } ); } // Permission errors if (errorMessage.includes('permission') || errorMessage.includes('eacces') || errorMessage.includes('access denied')) { return createUserFriendlyError(ERROR_TYPES.PERMISSION_ERROR, 'Permission denied', { details: 'Unable to read or write files due to permission restrictions.', suggestions: [ 'Check file and directory permissions', 'Ensure you have write access to the project directory', 'Try running with appropriate permissions' ], examples: [ 'chmod 755 .glassbox/', 'sudo glassbox test', 'ls -la .glassbox/' ], docs: [DOCS.troubleshooting] } ); } // Budget exceeded if (errorMessage.includes('budget') || errorMessage.includes('cost')) { return createUserFriendlyError(ERROR_TYPES.USER_ERROR, 'Budget limit exceeded', { details: 'Test execution exceeded the specified budget limit.', suggestions: [ 'Increase the budget limit with --budget option', 'Reduce the number of tests or test complexity', 'Use cheaper models for testing' ], examples: [ 'glassbox test --budget 1.00', 'glassbox test --concurrency 1' ], docs: [DOCS.configuration] } ); } // Model not found/available if (errorMessage.includes('model') && (errorMessage.includes('not found') || errorMessage.includes('unavailable'))) { return createUserFriendlyError(ERROR_TYPES.CONFIGURATION_ERROR, 'AI model not available', { details: 'The specified AI model is not available or not properly configured.', suggestions: [ 'Check if Ollama is running (for local models)', 'Verify API keys are set correctly', 'Try using a different model', 'Check model availability in your region' ], examples: [ 'ollama list', 'export OPENAI_API_KEY="your-key"', 'glassbox test --model gpt-3.5-turbo' ], docs: [DOCS.configuration] } ); } // Default system error return createUserFriendlyError(ERROR_TYPES.SYSTEM_ERROR, 'An unexpected error occurred', { details: error.message, suggestions: [ 'Try running with --verbose for more details', 'Check the error logs for more information', 'Restart the application and try again' ], examples: [ 'glassbox test --verbose', 'npm install', 'node --version' ], docs: [DOCS.troubleshooting] } ); } /** * Display spinner animation * @param {string} message - Message to display with spinner */ function showSpinner(message) { if (isQuiet) return; spinnerInterval = setInterval(() => { process.stdout.write(`\r${CLI_CONFIG.colors.info(CLI_CONFIG.symbols.spinner[spinnerIndex])} ${message}`); spinnerIndex = (spinnerIndex + 1) % CLI_CONFIG.symbols.spinner.length; }, 100); } /** * Stop spinner animation */ function stopSpinner() { if (spinnerInterval) { clearInterval(spinnerInterval); spinnerInterval = null; process.stdout.write('\r' + ' '.repeat(50) + '\r'); // Clear spinner line } } /** * Display progress bar * @param {number} current - Current progress * @param {number} total - Total items * @param {string} label - Label for progress bar */ function showProgressBar(current, total, label = 'Progress') { if (isQuiet) return; const percentage = Math.round((current / total) * 100); const barLength = 30; const filledLength = Math.round((barLength * current) / total); const bar = ''.repeat(filledLength) + ''.repeat(barLength - filledLength); process.stdout.write(`\r${CLI_CONFIG.colors.info(label)}: [${bar}] ${percentage}% (${current}/${total})`); } /** * Display formatted table * @param {Array<Array<string>>} rows - Table rows * @param {Array<string>} headers - Table headers */ function displayTable(headers, rows) { if (isQuiet) return; // Calculate column widths const widths = headers.map((header, i) => { const columnValues = [header, ...rows.map(row => row[i] || '')]; return Math.max(...columnValues.map(val => val.length)); }); // Print header const headerRow = headers.map((header, i) => header.padEnd(widths[i])).join(' | '); console.log(CLI_CONFIG.colors.highlight(headerRow)); console.log(CLI_CONFIG.colors.muted('-'.repeat(headerRow.length))); // Print rows rows.forEach(row => { const formattedRow = row.map((cell, i) => (cell || '').padEnd(widths[i])).join(' | '); console.log(formattedRow); }); console.log(); } /** * Display test results in a professional format * @param {object} results - Test results from runner */ function displayTestResults(results) { if (isQuiet) return; const { raw, aggregated } = results; // Summary section console.log(CLI_CONFIG.colors.highlight('\n📊 TEST RESULTS SUMMARY')); console.log(CLI_CONFIG.colors.muted('='.repeat(50))); const summaryTable = [ ['Metric', 'Value'], ['Total Tests', aggregated.summary.total.toString()], ['Passed', CLI_CONFIG.colors.pass(`${aggregated.summary.passed} (${aggregated.summary.successRate.toFixed(1)}%)`)], ['Failed', CLI_CONFIG.colors.fail(aggregated.summary.failed.toString())], ['Total Duration', `${(aggregated.summary.totalDuration / 1000).toFixed(1)}s`], ['Average Duration', `${(aggregated.performance.averageDuration / 1000).toFixed(1)}s`], ['Total Cost', `$${aggregated.summary.totalCost.toFixed(6)}`], ['Total Tokens', aggregated.summary.totalTokens.toLocaleString()], ['Model Fallback Rate', `${aggregated.models.fallbackRate.toFixed(1)}%`] ]; displayTable(['Metric', 'Value'], summaryTable.map(row => [row[0], row[1]])); // Failure breakdown if (aggregated.summary.failed > 0) { console.log(CLI_CONFIG.colors.warning('⚠️ FAILURE BREAKDOWN')); console.log(CLI_CONFIG.colors.muted('-'.repeat(30))); const failureRows = Object.entries(aggregated.failures.byCategory).map(([category, count]) => [ category, count.toString(), `${((count / aggregated.summary.failed) * 100).toFixed(1)}%` ]); displayTable(['Category', 'Count', 'Percentage'], failureRows); } // Model usage console.log(CLI_CONFIG.colors.info('🤖 MODEL USAGE')); console.log(CLI_CONFIG.colors.muted('-'.repeat(20))); const modelRows = Object.entries(aggregated.models.usage).map(([model, count]) => [ model, count.toString(), `${((count / aggregated.summary.total) * 100).toFixed(1)}%` ]); displayTable(['Model', 'Tests', 'Usage %'], modelRows); // Performance insights console.log(CLI_CONFIG.colors.info('⚡ PERFORMANCE INSIGHTS')); console.log(CLI_CONFIG.colors.muted('-'.repeat(25))); const performanceRows = [ ['Fast tests (<5s)', aggregated.performance.durationDistribution.fast.toString()], ['Medium tests (5-15s)', aggregated.performance.durationDistribution.medium.toString()], ['Slow tests (>15s)', aggregated.performance.durationDistribution.slow.toString()] ]; displayTable(['Category', 'Count'], performanceRows); if (aggregated.performance.slowestTest) { console.log(CLI_CONFIG.colors.warning(`🐌 Slowest test: ${aggregated.performance.slowestTest.suite} - ${aggregated.performance.slowestTest.test} (${aggregated.performance.slowestTest.durationMs}ms)`)); } if (aggregated.performance.fastestTest) { console.log(CLI_CONFIG.colors.success(`⚡ Fastest test: ${aggregated.performance.fastestTest.suite} - ${aggregated.performance.fastestTest.test} (${aggregated.performance.fastestTest.durationMs}ms)`)); } // Detailed results in verbose mode if (isVerbose) { console.log(CLI_CONFIG.colors.info('\n🔍 DETAILED RESULTS')); console.log(CLI_CONFIG.colors.muted('-'.repeat(20))); raw.forEach((result, index) => { const status = result.pass ? CLI_CONFIG.symbols.pass : CLI_CONFIG.symbols.fail; const statusColor = result.pass ? CLI_CONFIG.colors.pass : CLI_CONFIG.colors.fail; const modelInfo = result.fallbackUsed ? ` [${result.modelUsed} via fallback]` : ` [${result.modelUsed}]`; console.log(`${statusColor(status)} ${result.suite} - ${result.test} (${result.durationMs}ms)${modelInfo}`); if (!result.pass && result.details) { result.details.forEach(detail => { console.log(CLI_CONFIG.colors.muted(` └─ ${detail}`)); }); } if (result.error) { console.log(CLI_CONFIG.colors.error(` └─ Error: ${result.error}`)); } }); } } async function initCommand() { const dir = platformUtils.joinPaths(process.cwd(), '.glassbox', 'tests'); const sampleFile = platformUtils.joinPaths(dir, 'sample-test.yml'); console.log(CLI_CONFIG.colors.info('🚀 Initializing Glassbox test environment...')); try { // Validate file system before operations const fsValidation = await validateFileSystem(sampleFile, { operation: 'init' }); if (!fsValidation.valid) { console.error(CLI_CONFIG.colors.error('❌ File system validation failed:')); fsValidation.errors.forEach(error => { console.error(CLI_CONFIG.colors.error(` ${error.message}`)); }); process.exit(1); } // Create directory with safe operations const dirResult = await safeCreateDirectory(dir, { recursive: true }); if (!dirResult.success) { console.error(CLI_CONFIG.colors.error(`❌ Failed to create directory: ${dir}`)); console.error(CLI_CONFIG.colors.error(` ${dirResult.error.message}`)); process.exit(1); } else if (dirResult.existed) { console.log(CLI_CONFIG.colors.warning(`⚠ Directory already exists: ${dir}`)); } else { console.log(CLI_CONFIG.colors.success(`✓ Created directory: ${dir}`)); } // Create sample test file with safe writing if (!fs.existsSync(sampleFile)) { const sampleContent = `name: Sample Test Suite description: Example test suite for Glassbox CLI. settings: max_tokens: 100 max_cost_usd: 0.001 safety_checks: block_pii: true block_email: true block_phone: true block_ssn: true tests: - name: Example Test description: Checks if the model can say hello. prompt: | Say hello. expect: contains: ["hello"] not_contains: ["bye"] `; const { safeWriteFile } = await import('./fs-error-handler.js'); const writeResult = await safeWriteFile(sampleFile, sampleContent, { atomic: true, backup: false }); if (!writeResult.success) { console.error(CLI_CONFIG.colors.error(`❌ Failed to create sample test file: ${sampleFile}`)); console.error(CLI_CONFIG.colors.error(` ${writeResult.error.message}`)); process.exit(1); } else { console.log(CLI_CONFIG.colors.success(`✓ Created sample test file: ${sampleFile}`)); } } else { console.log(CLI_CONFIG.colors.warning(`⚠ Sample test file already exists: ${sampleFile}`)); } console.log(CLI_CONFIG.colors.success('\n🎉 Glassbox initialization complete!')); console.log(CLI_CONFIG.colors.muted('Next steps:')); console.log(CLI_CONFIG.colors.muted(' 1. Edit the sample test file in .glassbox/tests/')); console.log(CLI_CONFIG.colors.muted(' 2. Run "glassbox test" to execute your tests')); console.log(CLI_CONFIG.colors.muted(' 3. Use "glassbox test --help" for more options')); } catch (error) { const errorResult = handleErrorWithGracefulDegradation(error, { operation: 'init', context: { directory: dir, details: 'Failed to initialize Glassbox test environment' } }); console.error(errorResult.userMessage); process.exit(1); } } function versionCommand() { const pkgPath = path.resolve('package.json'); try { const pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf8')); console.log(CLI_CONFIG.colors.highlight(`Glassbox CLI version: ${pkg.version}`)); console.log(CLI_CONFIG.colors.muted('A professional AI testing framework')); } catch (err) { const errorResult = handleErrorWithGracefulDegradation(err, { operation: 'version', context: { details: 'Could not read version information' } }); console.error(errorResult.userMessage); } } program .name('glassbox') .description('A professional CLI tool for testing AI systems via structured evaluations') .version('0.1.0') .option('-v, --verbose', 'Enable verbose output with detailed test results') .option('-q, --quiet', 'Suppress all output except errors and final results') .option('--json', 'Output results in JSON format for machine consumption') .option('--no-color', 'Disable colored output') .option('--timeout <ms>', 'Set test timeout in milliseconds (default: 30000)', '30000') .option('--concurrency <number>', 'Set maximum concurrent tests (default: 5)', '5') .option('--test-dir <path>', 'Custom test directory path (default: .glassbox/tests/)') .option('--model <name>', 'Specify AI model to use (e.g., gpt-3.5-turbo, mistral:7b)') .option('--retry <number>', 'Number of retries for failed tests (default: 2)', '2') .option('--budget <amount>', 'Set budget limit in USD') .option('--export <format>', `Export results to file (formats: ${Object.keys(EXPORT_FORMATS).join(', ')})`) .option('--output <path>', 'Output file path for exported results') .option('--cache', 'Enable response caching to improve performance and reduce costs') .option('--no-cache', 'Disable response caching') .option('--optimized', 'Use optimized runner with connection pooling, batching, and streaming') .option('--batch-size <number>', 'Set batch size for optimized processing', '10') .option('--max-concurrency <number>', 'Set maximum concurrent requests', '5') .option('--enable-streaming', 'Enable streaming responses for large outputs') .option('--enable-memory-profiling', 'Enable memory profiling and leak detection') .option('--enable-progress', 'Enable detailed progress indicators'); program .command('test') .description('Run AI prompt tests with professional reporting') .option('--suite <name>', 'Run only tests from specific suite') .option('--filter <pattern>', 'Run only tests matching pattern') .action(async (options, command) => { // Set global flags isVerbose = command.parent.opts().verbose; isQuiet = command.parent.opts().quiet; if (command.parent.opts().noColor) { // Disable colors Object.keys(CLI_CONFIG.colors).forEach(key => { CLI_CONFIG.colors[key] = (text) => text; }); } // Validate model selection const model = command.parent.opts().model; if (model && !validateModel(model)) { console.error(createUserFriendlyError(ERROR_TYPES.CONFIGURATION_ERROR, 'Invalid model specified', { details: `Model "${model}" is not supported.`, suggestions: [ 'Use one of the available models', 'Check model availability and spelling' ], examples: [ 'glassbox test --model gpt-3.5-turbo', 'glassbox test --model mistral:7b' ], docs: [DOCS.configuration] } )); process.exit(1); } // Set test directory const testDir = command.parent.opts().testDir || platformUtils.joinPaths(process.cwd(), '.glassbox', 'tests'); if (!isQuiet) { console.log(CLI_CONFIG.colors.info(`🔍 Loading tests from: ${testDir}`)); if (model) { console.log(CLI_CONFIG.colors.info(`🤖 Using model: ${model} (${getModelProvider(model)})`)); } } try { // Use the new validation system with comprehensive checks const validationOptions = { checkAPIConfig: true, checkNetwork: true, sanitize: true }; const testObjects = await parseTestFiles(testDir, validationOptions); if (!testObjects.length) { throw new Error('No valid test files found'); } if (!isQuiet) { console.log(CLI_CONFIG.colors.success(`✓ Found ${testObjects.length} test suite(s)`)); console.log(CLI_CONFIG.colors.info('🚀 Starting test execution...')); } // Choose runner based on optimization settings const useOptimized = command.parent.opts().optimized; const useCache = command.parent.opts().cache !== false; // Default to true unless explicitly disabled let results; if (useOptimized) { // Use optimized runner with all performance features const optimizedRunner = new OptimizedTestRunner({ maxConcurrency: parseInt(command.parent.opts().maxConcurrency || '5'), batchSize: parseInt(command.parent.opts().batchSize || '10'), enableStreaming: command.parent.opts().enableStreaming, enableCaching: useCache, enableProgress: command.parent.opts().enableProgress !== false, enableMemoryProfiling: command.parent.opts().enableMemoryProfiling }); if (!isQuiet) { console.log(CLI_CONFIG.colors.info('🚀 Optimized runner enabled with:')); console.log(CLI_CONFIG.colors.gray(` • Connection pooling`)); console.log(CLI_CONFIG.colors.gray(` • Batch processing (${optimizedRunner.options.batchSize} tests/batch)`)); console.log(CLI_CONFIG.colors.gray(` • Max concurrency: ${optimizedRunner.options.maxConcurrency}`)); if (optimizedRunner.options.enableStreaming) { console.log(CLI_CONFIG.colors.gray(` • Streaming responses`)); } if (optimizedRunner.options.enableCaching) { console.log(CLI_CONFIG.colors.gray(` • Response caching`)); } if (optimizedRunner.options.enableMemoryProfiling) { console.log(CLI_CONFIG.colors.gray(` • Memory profiling`)); } } results = await optimizedRunner.runTests(testObjects); // Cleanup await optimizedRunner.cleanup(); } else { // Use standard runner const runner = useCache ? runTestsCached : runTests; if (!isQuiet && useCache) { console.log(CLI_CONFIG.colors.info('💾 Caching enabled - responses will be cached for faster subsequent runs')); } results = await runner(testObjects); } // Handle export if specified const exportFormat = command.parent.opts().export; if (exportFormat) { const outputPath = command.parent.opts().output || `glassbox-results.${exportFormat}`; exportResults(results, exportFormat, outputPath); } if (command.parent.opts().json) { console.log(JSON.stringify(results.machineReadable, null, 2)); } else { displayTestResults(results); } // Exit with appropriate code process.exit(results.aggregated.summary.failed > 0 ? 1 : 0); } catch (error) { const errorResult = handleErrorWithGracefulDegradation(error, { operation: 'test', context: { testDir, model, options: command.parent.opts() } }); console.error(errorResult.userMessage); process.exit(1); } }); program .command('init') .description('Create sample .glassbox folder with example tests') .action(initCommand); program .command('validate') .description('Validate test files without running them') .option('--test-dir <path>', 'Custom test directory path (default: .glassbox/tests/)') .option('--check-api', 'Check API keys and network connectivity') .option('--no-sanitize', 'Skip input sanitization') .action(async (options, command) => { const testDir = options.testDir || path.resolve('.glassbox/tests'); if (!isQuiet) { console.log(CLI_CONFIG.colors.info(`🔍 Validating tests in: ${testDir}`)); } try { const { validateTestDirectory } = await import('./validators/input-validator.js'); const validationOptions = { checkAPIConfig: options.checkApi !== false, checkNetwork: options.checkApi !== false, sanitize: options.sanitize !== false }; const results = await validateTestDirectory(testDir, validationOptions); if (results.valid) { console.log(CLI_CONFIG.colors.success('✓ All test files are valid!')); console.log(CLI_CONFIG.colors.info(`📁 Validated ${results.files.length} file(s)`)); } else { console.log(CLI_CONFIG.colors.error('❌ Validation failed!')); console.log(CLI_CONFIG.colors.info(`📁 Checked ${results.files.length} file(s)`)); console.log(CLI_CONFIG.colors.error(`❌ Found ${results.totalErrors} error(s)`)); // Display detailed errors for each file results.files.forEach(fileResult => { if (!fileResult.valid) { console.log(CLI_CONFIG.colors.error(`\n📄 ${fileResult.file}:`)); fileResult.errors.forEach(error => { console.log(` ${error.message}`); }); } }); process.exit(1); } } catch (error) { const errorResult = handleErrorWithGracefulDegradation(error, { operation: 'validate', context: { testDir, options: command.parent.opts() } }); console.error(errorResult.userMessage); process.exit(1); } }); program .command('health') .description('Check health status of AI services and diagnose issues') .option('--provider <name>', 'Check specific provider (openai, ollama)') .option('--verbose', 'Show detailed health information') .action(async (options, command) => { const provider = options.provider; const isVerbose = options.verbose || command.parent.opts().verbose; if (!isQuiet) { console.log(CLI_CONFIG.colors.info('🏥 Checking AI service health...')); } try { const providers = provider ? [provider] : ['openai', 'ollama']; const results = {}; for (const prov of providers) { if (!isQuiet) { console.log(CLI_CONFIG.colors.info(`\n🔍 Checking ${prov}...`)); } const health = await checkServiceHealth(prov); results[prov] = health; if (health.healthy) { console.log(CLI_CONFIG.colors.success(`✓ ${prov} is healthy`)); } else { console.log(CLI_CONFIG.colors.error(`❌ ${prov} is unhealthy: ${health.reason}`)); } if (isVerbose && health.error) { console.log(CLI_CONFIG.colors.gray(` Error: ${health.error}`)); console.log(CLI_CONFIG.colors.gray(` Message: ${health.message}`)); } } // Show error statistics const errorStats = getErrorStats(); if (errorStats.totalErrors > 0) { console.log(CLI_CONFIG.colors.warning(`\n⚠️ Error Statistics:`)); console.log(CLI_CONFIG.colors.gray(` Total errors: ${errorStats.totalErrors}`)); if (Object.keys(errorStats.errorTypes).length > 0) { console.log(CLI_CONFIG.colors.gray(` Error types:`)); Object.entries(errorStats.errorTypes).forEach(([type, count]) => { console.log(CLI_CONFIG.colors.gray(` ${type}: ${count}`)); }); } } // Overall health assessment const healthyProviders = Object.values(results).filter(r => r.healthy).length; const totalProviders = Object.keys(results).length; if (healthyProviders === totalProviders) { console.log(CLI_CONFIG.colors.success('\n🎉 All services are healthy!')); } else if (healthyProviders > 0) { console.log(CLI_CONFIG.colors.warning(`\n⚠️ Partial availability: ${healthyProviders}/${totalProviders} services healthy`)); } else { console.log(CLI_CONFIG.colors.error('\n❌ All services are unhealthy')); process.exit(1); } } catch (error) { const errorResult = handleErrorWithGracefulDegradation(error, { operation: 'health_check', context: { provider } }); console.error(errorResult.userMessage); process.exit(1); } }); program .command('diagnose') .description('Diagnose and troubleshoot common issues') .option('--check-api', 'Check API configuration') .option('--check-network', 'Check network connectivity') .option('--check-models', 'Check model availability') .option('--check-fs', 'Check file system and permissions') .action(async (options, command) => { if (!isQuiet) { console.log(CLI_CONFIG.colors.info('🔧 Running diagnostics...')); } try { const checks = []; // API configuration check if (options.checkApi) { checks.push(async () => { console.log(CLI_CONFIG.colors.info('\n🔑 Checking API configuration...')); const openaiKey = process.env.OPENAI_API_KEY; const anthropicKey = process.env.ANTHROPIC_API_KEY; if (!openaiKey && !anthropicKey) { console.log(CLI_CONFIG.colors.error('❌ No API keys found')); console.log(CLI_CONFIG.colors.gray(' Set OPENAI_API_KEY or ANTHROPIC_API_KEY environment variables')); return false; } if (openaiKey) { console.log(CLI_CONFIG.colors.success('✓ OPENAI_API_KEY is set')); } if (anthropicKey) { console.log(CLI_CONFIG.colors.success('✓ ANTHROPIC_API_KEY is set')); } return true; }); } // Network connectivity check if (options.checkNetwork) { checks.push(async () => { console.log(CLI_CONFIG.colors.info('\n🌐 Checking network connectivity...')); const endpoints = [ { name: 'OpenAI API', url: 'https://api.openai.com' }, { name: 'Anthropic API', url: 'https://api.anthropic.com' }, { name: 'Ollama (local)', url: 'http://localhost:11434' } ]; for (const endpoint of endpoints) { try { const response = await fetch(endpoint.url, { method: 'HEAD', timeout: 5000 }); console.log(CLI_CONFIG.colors.success(`✓ ${endpoint.name} is reachable`)); } catch (error) { console.log(CLI_CONFIG.colors.error(`❌ ${endpoint.name} is not reachable`)); } } return true; }); } // Model availability check if (options.checkModels) { checks.push(async () => { console.log(CLI_CONFIG.colors.info('\n🤖 Checking model availability...')); // Check Ollama models try { const { getOllamaModels } = await import('./models/ollama-client.js'); const models = await getOllamaModels(); if (models.error) { console.log(CLI_CONFIG.colors.error('❌ Ollama service not available')); } else { console.log(CLI_CONFIG.colors.success(`✓ Ollama has ${models.count} models available`)); if (models.models.length > 0) { console.log(CLI_CONFIG.colors.gray(` Available: ${models.models.join(', ')}`)); } } } catch (error) { console.log(CLI_CONFIG.colors.error('❌ Ollama service check failed')); } return true; }); } // File system check if (options.checkFs) { checks.push(async () => { console.log(CLI_CONFIG.colors.info('\n💾 Checking file system...')); const testDir = path.resolve('.glassbox/tests'); const sampleFile = path.join(testDir, 'sample-test.yml'); // Check directory permissions const dirCheck = await checkDirectory(testDir, { operation: 'diagnose' }); if (dirCheck.valid) { console.log(CLI_CONFIG.colors.success('✓ Test directory accessible')); } else { console.log(CLI_CONFIG.colors.error('❌ Test directory issues:')); dirCheck.errors.forEach(error => { console.log(CLI_CONFIG.colors.error(` ${error.message}`)); }); } // Check disk space const spaceCheck = await checkDiskSpace(testDir); if (spaceCheck.sufficient) { console.log(CLI_CONFIG.colors.success(`✓ Sufficient disk space: ${Math.round(spaceCheck.freeSpace / 1024 / 1024)}MB available`)); } else { console.log(CLI_CONFIG.colors.error(`❌ Insufficient disk space: ${spaceCheck.error.message}`)); } // Check file system info const fsInfo = getFileSystemInfo(sampleFile); if (!fsInfo.error) { console.log(CLI_CONFIG.colors.success('✓ File system information:')); console.log(CLI_CONFIG.colors.gray(` Platform: ${fsInfo.system.platform}`)); console.log(CLI_CONFIG.colors.gray(` Architecture: ${fsInfo.system.arch}`)); console.log(CLI_CONFIG.colors.gray(` Home directory: ${fsInfo.system.homedir}`)); console.log(CLI_CONFIG.colors.gray(` Temp directory: ${fsInfo.system.tmpdir}`)); } else { console.log(CLI_CONFIG.colors.error('❌ Could not get file system information')); } return true; }); } // Run all checks for (const check of checks) { await check(); } console.log(CLI_CONFIG.colors.success('\n✅ Diagnostics completed')); } catch (error) { const errorResult = handleErrorWithGracefulDegradation(error, { operation: 'diagnose', context: { options } }); console.error(errorResult.userMessage); process.exit(1); } }); program .command('version') .description('Show version number and framework information') .action(versionCommand); // Add cache management commands program.addCommand(createCacheCommands()); // Enhanced help text program.addHelpText('after', ` Examples: $ glassbox init # Initialize test environment $ glassbox validate # Validate test files without running $ glassbox test # Run all tests $ glassbox test --verbose # Run with detailed output $ glassbox test --json # Output results in JSON format $ glassbox test --budget 0.10 # Set $0.10 budget limit $ glassbox test --concurrency 3 # Run max 3 tests concurrently $ glassbox test --model gpt-4 # Use specific AI model $ glassbox test --test-dir ./tests # Use custom test directory $ glassbox test --export html # Export results as HTML report $ glassbox test --export json --output results.json # Export to specific file $ glassbox test --cache # Enable response caching (default) $ glassbox test --no-cache # Disable response caching $ glassbox test --optimized # Use optimized runner with all performance features $ glassbox test --optimized --batch-size 20 --max-concurrency 10 # Custom optimization settings $ glassbox test --optimized --enable-streaming --enable-memory-profiling # Advanced optimization $ glassbox health # Check AI service health $ glassbox diagnose # Diagnose common issues $ glassbox cache stats # Show cache statistics $ glassbox cache list # List cache entries $ glassbox cache clear # Clear all cache $ glassbox cache cleanup # Clean up expired entries Configuration Options: --timeout <ms> Set test timeout (default: 30000ms) --retry <number> Number of retries for failed tests (default: 2) --model <name> Specify AI model (local: mistral:7b, openai: gpt-3.5-turbo) --test-dir <path> Custom test directory path --budget <amount> Set budget limit in USD --export <format> Export results (json, xml, html, csv) --output <path> Output file path for exports Test Configuration: Tests are defined in YAML files in .glassbox/tests/ Each test file can contain multiple test suites Tests support content validation, PII detection, and cost tracking For more information, visit: https://github.com/your-repo/glassbox `); program.parse(process.argv);