UNPKG

rag-cli-tester

Version:

A lightweight CLI tool for testing RAG (Retrieval-Augmented Generation) systems with different embedding combinations

1,138 lines 77.8 kB
#!/usr/bin/env node "use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const commander_1 = require("commander"); const chalk_1 = __importDefault(require("chalk")); const ora_1 = __importDefault(require("ora")); const inquirer_1 = __importDefault(require("inquirer")); const cli_table3_1 = __importDefault(require("cli-table3")); const fs = __importStar(require("fs")); const path = __importStar(require("path")); const config_1 = require("./config"); const database_1 = require("./database"); const embeddings_1 = require("./embeddings"); const tester_1 = require("./tester"); const enhanced_tester_1 = require("./enhanced-tester"); const production_tester_1 = require("./production-tester"); const program = new commander_1.Command(); // Read version from package.json const packageJson = JSON.parse(fs.readFileSync(path.join(__dirname, '../package.json'), 'utf-8')); program .name('rag-test') .description('CLI tool for testing RAG systems with different embedding combinations') .version(packageJson.version); // Production Test command (ML best practices) program .command('test-production') .description('Run production RAG testing experiment following ML best practices with proper train/validation/test splits') .option('-t, --table <tableName>', 'Table name to test') .option('-c, --columns <columns>', 'Comma-separated list of columns for embeddings') .option('-q, --query <column>', 'Column containing queries') .option('-a, --answer <column>', 'Column containing expected answers') .option('-m, --metric <type>', 'Metric type (brdr|sql|similarity)', 'sql') .option('--train-ratio <number>', 'Training ratio (0-1)', '0.7') .option('--val-ratio <number>', 'Validation ratio (0-1)', '0.15') .option('--test-ratio <number>', 'Testing ratio (0-1)', '0.15') .option('-n, --name <name>', 'Test name') .option('-l, --limit <number>', 'Max combinations to test', '20') .option('-b, --batch-size <number>', 'Batch size for processing', '100') .option('--max-train <number>', 'Maximum training samples', '50000') .option('--max-val <number>', 'Maximum validation samples', '10000') .option('--max-test <number>', 'Maximum testing samples', '10000') .option('--enable-caching', 'Enable embedding caching', false) .option('--sampling <strategy>', 'Data sampling strategy (random|stratified|time_based|query_complexity)', 'random') .option('--cv-folds <number>', 'Cross-validation folds', '5') .option('--min-query-len <number>', 'Minimum query length', '10') .option('--max-query-len <number>', 'Maximum query length', '500') .option('--min-answer-len <number>', 'Minimum answer length', '10') .option('--max-answer-len <number>', 'Maximum answer length', '1000') .option('--timestamp-col <column>', 'Timestamp column for time-based sampling') .option('--time-window <window>', 'Time window for sampling (daily|weekly|monthly)', 'weekly') .action(async (options) => { try { const configManager = new config_1.ConfigManager(); const config = await configManager.loadConfig(); // Interactive mode if no options provided let testConfig; if (!options.table) { testConfig = await interactiveProductionTestSetup(); } else { testConfig = { tableName: options.table, selectedColumns: options.columns?.split(',') || [], queryColumn: options.query || '', answerColumn: options.answer || '', embeddingConfig: config.embedding, metricType: options.metric || 'sql', trainingRatio: parseFloat(options.trainRatio || '0.7'), validationRatio: parseFloat(options.valRatio || '0.15'), testingRatio: parseFloat(options.testRatio || '0.15'), testName: options.name || `ProductionTest_${new Date().toISOString().replace(/[:.]/g, '-')}`, maxCombinations: parseInt(options.limit || '20'), maxTrainingSamples: parseInt(options.maxTrain || '50000'), maxValidationSamples: parseInt(options.maxVal || '10000'), maxTestingSamples: parseInt(options.maxTest || '10000'), batchSize: parseInt(options.batchSize || '100'), enableCaching: options.enableCaching || false, crossValidationFolds: parseInt(options.cvFolds || '5'), minQueryLength: parseInt(options.minQueryLen || '10'), maxQueryLength: parseInt(options.maxQueryLen || '500'), minAnswerLength: parseInt(options.minAnswerLen || '10'), maxAnswerLength: parseInt(options.maxAnswerLen || '1000'), samplingStrategy: options.sampling || 'random', timestampColumn: options.timestampCol, timeWindow: options.timeWindow || 'weekly' }; } await runProductionExperiment(testConfig, config); } catch (error) { console.error(chalk_1.default.red(`❌ Production test failed: ${error instanceof Error ? error.message : String(error)}`)); process.exit(1); } }); // Enhanced Test command for large datasets program .command('test-enhanced') .description('Run enhanced RAG testing experiment optimized for large datasets (1M+ rows)') .option('-t, --table <tableName>', 'Table name to test') .option('-c, --columns <columns>', 'Comma-separated list of columns for embeddings') .option('-q, --query <column>', 'Column containing queries') .option('-a, --answer <column>', 'Column containing expected answers') .option('-m, --metric <type>', 'Metric type (brdr|sql|similarity)', 'brdr') .option('-r, --ratio <number>', 'Training ratio (0-1)', '0.8') .option('-n, --name <name>', 'Test name') .option('-l, --limit <number>', 'Max combinations to test', '20') .option('-b, --batch-size <number>', 'Batch size for processing', '100') .option('--max-training <number>', 'Maximum training samples', '10000') .option('--max-testing <number>', 'Maximum testing samples', '2000') .option('--enable-caching', 'Enable embedding caching', false) .option('--sampling <strategy>', 'Data sampling strategy (random|stratified|sequential)', 'random') .action(async (options) => { try { const configManager = new config_1.ConfigManager(); const config = await configManager.loadConfig(); // Interactive mode if no options provided let testConfig; if (!options.table) { testConfig = await interactiveEnhancedTestSetup(); } else { testConfig = { tableName: options.table, selectedColumns: options.columns?.split(',') || [], queryColumn: options.query || '', answerColumn: options.answer || '', embeddingConfig: config.embedding, metricType: options.metric || 'brdr', trainingRatio: parseFloat(options.ratio || '0.8'), testName: options.name || `EnhancedTest_${new Date().toISOString().replace(/[:.]/g, '-')}`, maxCombinations: parseInt(options.limit || '20'), batchSize: parseInt(options.batchSize || '100'), maxTrainingSamples: parseInt(options.maxTraining || '10000'), maxTestingSamples: parseInt(options.maxTesting || '2000'), enableCaching: options.enableCaching || false, dataSamplingStrategy: options.sampling || 'random' }; } await runEnhancedExperiment(testConfig, config); } catch (error) { console.error(chalk_1.default.red(`❌ Enhanced test failed: ${error instanceof Error ? error.message : String(error)}`)); process.exit(1); } }); // Configure command program .command('configure') .description('Set up configuration for database and embedding model') .action(async () => { try { const configManager = new config_1.ConfigManager(); // Check if .env file already has the required variables const config = await configManager.loadConfig(); if (config.database.url && config.database.anonKey) { console.log(chalk_1.default.yellow('⚠️ Configuration already found in .env file:')); console.log(chalk_1.default.gray(` NEXT_PUBLIC_SUPABASE_URL: ${config.database.url.substring(0, 30)}...`)); console.log(chalk_1.default.gray(` NEXT_PUBLIC_SUPABASE_ANON_KEY: ${config.database.anonKey.substring(0, 20)}...`)); const inquirer = await Promise.resolve().then(() => __importStar(require('inquirer'))); const { proceed } = await inquirer.default.prompt({ type: 'confirm', name: 'proceed', message: 'Do you want to override the existing configuration?', default: false }); if (!proceed) { console.log(chalk_1.default.green('✅ Using existing configuration from .env file')); return; } } await configManager.initializeConfig(); console.log(chalk_1.default.green('\n✅ Configuration completed successfully!')); } catch (error) { console.error(chalk_1.default.red(`❌ Configuration failed: ${error instanceof Error ? error.message : String(error)}`)); process.exit(1); } }); // List available metrics program .command('metrics') .description('List available evaluation metrics') .action(async () => { try { // Import metrics using require to ensure registration happens require('./metrics/index'); // This registers the metrics const { MetricFactory } = require('./metrics/base-metric'); const availableMetrics = MetricFactory.getAvailableMetrics(); console.log(chalk_1.default.bold('📊 Available Evaluation Metrics:\n')); availableMetrics.forEach((metricName) => { try { const metricInfo = MetricFactory.getMetricInfo(metricName); console.log(chalk_1.default.cyan(`• ${metricInfo.name}`)); console.log(chalk_1.default.gray(` ${metricInfo.description}`)); console.log(''); } catch (metricError) { console.error(chalk_1.default.red(`Error getting info for ${metricName}: ${metricError}`)); } }); console.log(chalk_1.default.yellow('💡 Use --metric option with test commands to specify which metric to use')); } catch (error) { console.error(chalk_1.default.red(`❌ Failed to load metrics: ${error instanceof Error ? error.message : String(error)}`)); process.exit(1); } }); // List tables command program .command('tables') .description('List available tables in the database') .action(async () => { const spinner = (0, ora_1.default)('Connecting to database...').start(); try { const configManager = new config_1.ConfigManager(); const config = await configManager.loadConfig(); const validation = configManager.validateConfig(config); if (!validation.isValid) { spinner.fail('Invalid configuration'); console.error(chalk_1.default.red('Configuration errors:')); validation.errors.forEach(error => console.error(chalk_1.default.red(` • ${error}`))); console.log(chalk_1.default.yellow('\nRun "rag-test configure" to set up configuration.')); process.exit(1); } const db = new database_1.DatabaseConnection(config.database); const isConnected = await db.testConnection(); if (!isConnected) { spinner.fail('Failed to connect to database'); console.error('database config is: ', config.database); console.error(chalk_1.default.red('Please check your database configuration.')); process.exit(1); } spinner.text = 'Fetching tables...'; const tables = await db.getTables(); spinner.succeed('Tables retrieved'); if (tables.length === 0) { console.log(chalk_1.default.yellow('No tables found in the database.')); return; } console.log(chalk_1.default.bold('\n📊 Available Tables:')); const table = new cli_table3_1.default({ head: [chalk_1.default.cyan('Table Name')], style: { head: [], border: [] } }); tables.forEach(tableName => { table.push([tableName]); }); console.log(table.toString()); } catch (error) { spinner.fail('Operation failed'); console.error(chalk_1.default.red(`❌ Error: ${error instanceof Error ? error.message : String(error)}`)); process.exit(1); } }); // Inspect table command program .command('inspect <tableName>') .description('Inspect a table structure and sample data') .action(async (tableName) => { const spinner = (0, ora_1.default)('Loading table information...').start(); try { const configManager = new config_1.ConfigManager(); const config = await configManager.loadConfig(); const db = new database_1.DatabaseConnection(config.database); await db.testConnection(); const tableInfo = await db.getTableInfo(tableName); if (!tableInfo) { spinner.fail(`Table "${tableName}" not found`); process.exit(1); } spinner.text = 'Fetching sample data...'; const sampleData = await db.getTableData(tableName, ['*'], 3); spinner.succeed('Table inspection complete'); console.log(chalk_1.default.bold(`\n📋 Table: ${tableName}`)); console.log(chalk_1.default.gray(`Rows: ${tableInfo.rowCount}`)); // Show columns console.log(chalk_1.default.bold('\n🏛️ Columns:')); const columnsTable = new cli_table3_1.default({ head: [chalk_1.default.cyan('Column'), chalk_1.default.cyan('Type'), chalk_1.default.cyan('Nullable')], style: { head: [], border: [] } }); tableInfo.columns.forEach(col => { columnsTable.push([ col.column_name, col.data_type, col.is_nullable ? 'Yes' : 'No' ]); }); console.log(columnsTable.toString()); // Show sample data if (sampleData.length > 0) { console.log(chalk_1.default.bold('\n🔍 Sample Data:')); console.log(JSON.stringify(sampleData, null, 2)); } } catch (error) { spinner.fail('Operation failed'); console.error(chalk_1.default.red(`❌ Error: ${error instanceof Error ? error.message : String(error)}`)); process.exit(1); } }); // Test command program .command('test') .description('Run RAG testing experiment') .option('-t, --table <tableName>', 'Table name to test') .option('-c, --columns <columns>', 'Comma-separated list of columns for embeddings') .option('-q, --query <column>', 'Column containing queries') .option('-a, --answer <column>', 'Column containing expected answers') .option('-m, --metric <type>', 'Metric type (brdr|sql|similarity)', 'brdr') .option('-r, --ratio <number>', 'Training ratio (0-1)', '0.8') .option('-n, --name <name>', 'Test name') .option('-l, --limit <number>', 'Max combinations to test', '20') .action(async (options) => { try { const configManager = new config_1.ConfigManager(); const config = await configManager.loadConfig(); // Interactive mode if no options provided let testConfig; if (!options.table) { testConfig = await interactiveTestSetup(); } else { testConfig = { tableName: options.table, selectedColumns: options.columns?.split(',') || [], queryColumn: options.query || '', answerColumn: options.answer || '', embeddingConfig: config.embedding, metricType: options.metric, trainingRatio: parseFloat(options.ratio), testName: options.name || `Test_${new Date().toISOString().replace(/[:.]/g, '-')}`, maxCombinations: parseInt(options.limit) }; } await runExperiment(testConfig, config); } catch (error) { console.error(chalk_1.default.red(`❌ Test failed: ${error instanceof Error ? error.message : String(error)}`)); process.exit(1); } }); async function interactiveTestSetup() { console.log(chalk_1.default.bold('🧪 Interactive RAG Test Setup\n')); const configManager = new config_1.ConfigManager(); const config = await configManager.loadConfig(); const db = new database_1.DatabaseConnection(config.database); await db.testConnection(); const tables = await db.getTables(); const answers = await inquirer_1.default.prompt([ { type: 'list', name: 'tableName', message: 'Select table to test:', choices: tables } ]); // Get table info for column selection const tableInfo = await db.getTableInfo(answers.tableName); if (!tableInfo) { throw new Error(`Table ${answers.tableName} not found`); } const columnChoices = tableInfo.columns.map(col => ({ name: `${col.column_name} (${col.data_type})`, value: col.column_name })); const columnSelection = await inquirer_1.default.prompt({ type: 'checkbox', name: 'selectedColumns', message: 'Select columns for embeddings (max 5):', choices: columnChoices, validate: (input) => { if (input.length === 0) return 'At least one column must be selected'; if (input.length > 5) return 'Maximum 5 columns allowed'; return true; } }); const querySelection = await inquirer_1.default.prompt({ type: 'list', name: 'queryColumn', message: 'Select query column:', choices: columnChoices }); const answerSelection = await inquirer_1.default.prompt({ type: 'list', name: 'answerColumn', message: 'Select answer column:', choices: columnChoices }); const metricSelection = await inquirer_1.default.prompt({ type: 'list', name: 'metricType', message: 'Select evaluation metric:', choices: [ { name: 'Similarity (general purpose)', value: 'similarity' }, { name: 'BRDR (banking regulation specific)', value: 'brdr' } ] }); const ratioInput = await inquirer_1.default.prompt({ type: 'input', name: 'trainingRatio', message: 'Training ratio (0-1):', default: '0.8', validate: (input) => { const num = parseFloat(input); return (num > 0 && num < 1) || 'Must be between 0 and 1'; } }); const nameInput = await inquirer_1.default.prompt({ type: 'input', name: 'testName', message: 'Test name:', default: `Test_${new Date().toISOString().replace(/[:.]/g, '-')}` }); const limitInput = await inquirer_1.default.prompt({ type: 'input', name: 'maxCombinations', message: 'Maximum combinations to test:', default: '20', validate: (input) => { const num = parseInt(input); return (num > 0 && num <= 100) || 'Must be between 1 and 100'; } }); const moreAnswers = { ...columnSelection, ...querySelection, ...answerSelection, ...metricSelection, ...ratioInput, ...nameInput, ...limitInput }; return { tableName: answers.tableName, selectedColumns: columnSelection.selectedColumns, queryColumn: querySelection.queryColumn, answerColumn: answerSelection.answerColumn, embeddingConfig: config.embedding, metricType: metricSelection.metricType, trainingRatio: parseFloat(ratioInput.trainingRatio), testName: nameInput.testName, maxCombinations: parseInt(limitInput.maxCombinations) }; } async function interactiveEnhancedTestSetup() { console.log(chalk_1.default.bold('🚀 Interactive Enhanced RAG Test Setup (Large Datasets)\n')); const configManager = new config_1.ConfigManager(); const config = await configManager.loadConfig(); const db = new database_1.DatabaseConnection(config.database); await db.testConnection(); const tables = await db.getTables(); const answers = await inquirer_1.default.prompt([ { type: 'list', name: 'tableName', message: 'Select table to test:', choices: tables } ]); // Get table info for column selection const tableInfo = await db.getTableInfo(answers.tableName); if (!tableInfo) { throw new Error(`Table ${answers.tableName} not found`); } const columnChoices = tableInfo.columns.map(col => ({ name: `${col.column_name} (${col.data_type})`, value: col.column_name })); const columnSelection = await inquirer_1.default.prompt({ type: 'checkbox', name: 'selectedColumns', message: 'Select columns for embeddings (max 5):', choices: columnChoices, validate: (input) => { if (input.length === 0) return 'At least one column must be selected'; if (input.length > 5) return 'Maximum 5 columns allowed'; return true; } }); const querySelection = await inquirer_1.default.prompt({ type: 'list', name: 'queryColumn', message: 'Select query column:', choices: columnChoices }); const answerSelection = await inquirer_1.default.prompt({ type: 'list', name: 'answerColumn', message: 'Select answer column:', choices: columnChoices }); const metricSelection = await inquirer_1.default.prompt({ type: 'list', name: 'metricType', message: 'Select evaluation metric:', choices: [ { name: 'BRDR (Banking Regulation)', value: 'brdr' }, { name: 'SQL (Text-to-SQL)', value: 'sql' }, { name: 'Similarity (General Purpose)', value: 'similarity' } ] }); const ratioInput = await inquirer_1.default.prompt({ type: 'input', name: 'trainingRatio', message: 'Training ratio (0-1):', default: '0.8', validate: (input) => { const num = parseFloat(input); return (num > 0 && num < 1) || 'Must be between 0 and 1'; } }); const nameInput = await inquirer_1.default.prompt({ type: 'input', name: 'testName', message: 'Test name:', default: `EnhancedTest_${new Date().toISOString().replace(/[:.]/g, '-')}` }); const limitInput = await inquirer_1.default.prompt({ type: 'input', name: 'maxCombinations', message: 'Maximum combinations to test:', default: '20', validate: (input) => { const num = parseInt(input); return (num > 0 && num <= 100) || 'Must be between 1 and 100'; } }); const batchSizeInput = await inquirer_1.default.prompt({ type: 'input', name: 'batchSize', message: 'Batch size for processing:', default: '100', validate: (input) => { const num = parseInt(input); return (num > 0 && num <= 1000) || 'Must be between 1 and 1000'; } }); const maxTrainingInput = await inquirer_1.default.prompt({ type: 'input', name: 'maxTrainingSamples', message: 'Maximum training samples:', default: '10000', validate: (input) => { const num = parseInt(input); return (num >= 100 && num <= 100000) || 'Must be between 100 and 100000'; } }); const maxTestingInput = await inquirer_1.default.prompt({ type: 'input', name: 'maxTestingSamples', message: 'Maximum testing samples:', default: '2000', validate: (input) => { const num = parseInt(input); return (num >= 50 && num <= 20000) || 'Must be between 50 and 20000'; } }); const cachingInput = await inquirer_1.default.prompt({ type: 'confirm', name: 'enableCaching', message: 'Enable embedding caching?', default: true }); const samplingInput = await inquirer_1.default.prompt({ type: 'list', name: 'dataSamplingStrategy', message: 'Data sampling strategy:', choices: [ { name: 'Random (recommended)', value: 'random' }, { name: 'Stratified (maintains distribution)', value: 'stratified' }, { name: 'Sequential (first N rows)', value: 'sequential' } ] }); const moreAnswers = { ...columnSelection, ...querySelection, ...answerSelection, ...metricSelection, ...ratioInput, ...nameInput, ...limitInput, ...batchSizeInput, ...maxTrainingInput, ...maxTestingInput, ...cachingInput, ...samplingInput }; return { tableName: answers.tableName, selectedColumns: columnSelection.selectedColumns, queryColumn: querySelection.queryColumn, answerColumn: answerSelection.answerColumn, embeddingConfig: config.embedding, metricType: metricSelection.metricType, trainingRatio: parseFloat(ratioInput.trainingRatio), testName: nameInput.testName, maxCombinations: parseInt(limitInput.maxCombinations), batchSize: parseInt(batchSizeInput.batchSize), maxTrainingSamples: parseInt(maxTrainingInput.maxTrainingSamples), maxTestingSamples: parseInt(maxTestingInput.maxTestingSamples), enableCaching: cachingInput.enableCaching, dataSamplingStrategy: samplingInput.dataSamplingStrategy }; } async function interactiveProductionTestSetup() { console.log(chalk_1.default.bold('🚀 Interactive Production RAG Test Setup (ML Best Practices)\n')); const configManager = new config_1.ConfigManager(); const config = await configManager.loadConfig(); const db = new database_1.DatabaseConnection(config.database); await db.testConnection(); const tables = await db.getTables(); const answers = await inquirer_1.default.prompt([ { type: 'list', name: 'tableName', message: 'Select table to test:', choices: tables } ]); // Get table info for column selection const tableInfo = await db.getTableInfo(answers.tableName); if (!tableInfo) { throw new Error(`Table ${answers.tableName} not found`); } const columnChoices = tableInfo.columns.map(col => ({ name: `${col.column_name} (${col.data_type})`, value: col.column_name })); const columnSelection = await inquirer_1.default.prompt({ type: 'checkbox', name: 'selectedColumns', message: 'Select columns for embeddings (max 5):', choices: columnChoices, validate: (input) => { if (input.length === 0) return 'At least one column must be selected'; if (input.length > 5) return 'Maximum 5 columns allowed'; return true; } }); const querySelection = await inquirer_1.default.prompt({ type: 'list', name: 'queryColumn', message: 'Select query column:', choices: columnChoices }); const answerSelection = await inquirer_1.default.prompt({ type: 'list', name: 'answerColumn', message: 'Select answer column:', choices: columnChoices }); const metricSelection = await inquirer_1.default.prompt({ type: 'list', name: 'metricType', message: 'Select evaluation metric:', choices: [ { name: 'SQL (Text-to-SQL)', value: 'sql' }, { name: 'BRDR (Banking Regulation)', value: 'brdr' }, { name: 'Similarity (General Purpose)', value: 'similarity' } ] }); const trainRatioInput = await inquirer_1.default.prompt({ type: 'input', name: 'trainingRatio', message: 'Training ratio (0-1):', default: '0.7', validate: (input) => { const num = parseFloat(input); return (num > 0 && num < 1) || 'Must be between 0 and 1'; } }); const valRatioInput = await inquirer_1.default.prompt({ type: 'input', name: 'validationRatio', message: 'Validation ratio (0-1):', default: '0.15', validate: (input) => { const num = parseFloat(input); return (num > 0 && num < 1) || 'Must be between 0 and 1'; } }); const testRatioInput = await inquirer_1.default.prompt({ type: 'input', name: 'testingRatio', message: 'Testing ratio (0-1):', default: '0.15', validate: (input) => { const num = parseFloat(input.trainRatio || '0.7') + parseFloat(input.valRatio || '0.15') + parseFloat(input.testRatio || '0.15'); return Math.abs(num - 1) < 0.01 || 'Ratios must sum to 1'; } }); const nameInput = await inquirer_1.default.prompt({ type: 'input', name: 'testName', message: 'Test name:', default: `ProductionTest_${new Date().toISOString().replace(/[:.]/g, '-')}` }); const limitInput = await inquirer_1.default.prompt({ type: 'input', name: 'maxCombinations', message: 'Maximum combinations to test:', default: '20', validate: (input) => { const num = parseInt(input); return (num > 0 && num <= 100) || 'Must be between 1 and 100'; } }); const batchSizeInput = await inquirer_1.default.prompt({ type: 'input', name: 'batchSize', message: 'Batch size for processing:', default: '100', validate: (input) => { const num = parseInt(input); return (num > 0 && num <= 1000) || 'Must be between 1 and 1000'; } }); const maxTrainInput = await inquirer_1.default.prompt({ type: 'input', name: 'maxTrainingSamples', message: 'Maximum training samples:', default: '50000', validate: (input) => { const num = parseInt(input); return (num >= 100 && num <= 200000) || 'Must be between 100 and 200000'; } }); const maxValInput = await inquirer_1.default.prompt({ type: 'input', name: 'maxValidationSamples', message: 'Maximum validation samples:', default: '10000', validate: (input) => { const num = parseInt(input); return (num >= 50 && num <= 50000) || 'Must be between 50 and 50000'; } }); const maxTestInput = await inquirer_1.default.prompt({ type: 'input', name: 'maxTestingSamples', message: 'Maximum testing samples:', default: '10000', validate: (input) => { const num = parseInt(input); return (num >= 50 && num <= 50000) || 'Must be between 50 and 50000'; } }); const cachingInput = await inquirer_1.default.prompt({ type: 'confirm', name: 'enableCaching', message: 'Enable embedding caching?', default: true }); const cvFoldsInput = await inquirer_1.default.prompt({ type: 'input', name: 'crossValidationFolds', message: 'Cross-validation folds:', default: '5', validate: (input) => { const num = parseInt(input); return (num >= 2 && num <= 10) || 'Must be between 2 and 10'; } }); const minQueryLenInput = await inquirer_1.default.prompt({ type: 'input', name: 'minQueryLength', message: 'Minimum query length:', default: '10', validate: (input) => { const num = parseInt(input); return (num >= 1 && num <= 100) || 'Must be between 1 and 100'; } }); const maxQueryLenInput = await inquirer_1.default.prompt({ type: 'input', name: 'maxQueryLength', message: 'Maximum query length:', default: '500', validate: (input) => { const num = parseInt(input); return (num >= 50 && num <= 2000) || 'Must be between 50 and 2000'; } }); const minAnswerLenInput = await inquirer_1.default.prompt({ type: 'input', name: 'minAnswerLength', message: 'Minimum answer length:', default: '10', validate: (input) => { const num = parseInt(input); return (num >= 1 && num <= 100) || 'Must be between 1 and 100'; } }); const maxAnswerLenInput = await inquirer_1.default.prompt({ type: 'input', name: 'maxAnswerLength', message: 'Maximum answer length:', default: '1000', validate: (input) => { const num = parseInt(input); return (num >= 50 && num <= 5000) || 'Must be between 50 and 5000'; } }); const samplingInput = await inquirer_1.default.prompt({ type: 'list', name: 'samplingStrategy', message: 'Data sampling strategy:', choices: [ { name: 'Random (recommended)', value: 'random' }, { name: 'Stratified (maintains distribution)', value: 'stratified' }, { name: 'Time-based (if you have timestamps)', value: 'time_based' }, { name: 'Query complexity-based', value: 'query_complexity' } ] }); let timestampColumn; let timeWindow; if (samplingInput.samplingStrategy === 'time_based') { const timestampInput = await inquirer_1.default.prompt({ type: 'list', name: 'timestampColumn', message: 'Select timestamp column:', choices: columnChoices.filter(col => { const colName = col.value; const colInfo = tableInfo.columns.find(c => c.column_name === colName); return colInfo?.data_type?.includes('timestamp') || colInfo?.data_type?.includes('date'); }) }); timestampColumn = timestampInput.timestampColumn; const timeWindowInput = await inquirer_1.default.prompt({ type: 'list', name: 'timeWindow', message: 'Select time window:', choices: [ { name: 'Daily', value: 'daily' }, { name: 'Weekly', value: 'weekly' }, { name: 'Monthly', value: 'monthly' } ] }); timeWindow = timeWindowInput.timeWindow; } return { tableName: answers.tableName, selectedColumns: columnSelection.selectedColumns, queryColumn: querySelection.queryColumn, answerColumn: answerSelection.answerColumn, embeddingConfig: config.embedding, metricType: metricSelection.metricType, trainingRatio: parseFloat(trainRatioInput.trainingRatio), validationRatio: parseFloat(valRatioInput.validationRatio), testingRatio: parseFloat(testRatioInput.testingRatio), testName: nameInput.testName, maxCombinations: parseInt(limitInput.maxCombinations), maxTrainingSamples: parseInt(maxTrainInput.maxTrainingSamples), maxValidationSamples: parseInt(maxValInput.maxValidationSamples), maxTestingSamples: parseInt(maxTestInput.maxTestingSamples), batchSize: parseInt(batchSizeInput.batchSize), enableCaching: cachingInput.enableCaching, crossValidationFolds: parseInt(cvFoldsInput.crossValidationFolds), minQueryLength: parseInt(minQueryLenInput.minQueryLength), maxQueryLength: parseInt(maxQueryLenInput.maxQueryLength), minAnswerLength: parseInt(minAnswerLenInput.minAnswerLength), maxAnswerLength: parseInt(maxAnswerLenInput.maxAnswerLength), samplingStrategy: samplingInput.samplingStrategy, timestampColumn, timeWindow }; } async function runProductionExperiment(testConfig, config) { const spinner = (0, ora_1.default)('Initializing Production RAG Tester...').start(); try { const db = new database_1.DatabaseConnection(config.database); const embeddings = new embeddings_1.EmbeddingGenerator(config.embedding); const tester = new production_tester_1.ProductionRAGTester(db, embeddings); // Validate configuration spinner.text = 'Validating production configuration...'; const validation = await tester.validateProductionConfiguration(testConfig); if (!validation.isValid) { spinner.fail('Production configuration validation failed'); console.error(chalk_1.default.red('\nErrors:')); validation.errors.forEach(error => console.error(chalk_1.default.red(` • ${error}`))); if (validation.warnings.length > 0) { console.warn(chalk_1.default.yellow('\nWarnings:')); validation.warnings.forEach(warning => console.warn(chalk_1.default.yellow(` • ${warning}`))); } process.exit(1); } if (validation.warnings.length > 0) { spinner.warn('Production configuration has warnings'); console.warn(chalk_1.default.yellow('Warnings:')); validation.warnings.forEach(warning => console.warn(chalk_1.default.yellow(` • ${warning}`))); const { proceed } = await inquirer_1.default.prompt([{ type: 'confirm', name: 'proceed', message: 'Continue anyway?', default: true }]); if (!proceed) { console.log(chalk_1.default.gray('Production test cancelled.')); process.exit(0); } } // Initialize embeddings spinner.text = 'Initializing embedding model...'; await tester.initialize(); spinner.succeed('Production RAG Tester initialized'); // Run production experiment console.log(chalk_1.default.bold('\n🚀 Starting production experiment...\n')); const results = await tester.runProductionExperiment(testConfig); // Display production results displayProductionResults(results); // Save results const outputDir = config.outputPath || './rag-test-results'; if (!fs.existsSync(outputDir)) { fs.mkdirSync(outputDir, { recursive: true }); } const filename = `${testConfig.testName.replace(/[^a-zA-Z0-9]/g, '_')}.json`; const filepath = path.join(outputDir, filename); fs.writeFileSync(filepath, JSON.stringify(results, null, 2)); console.log(chalk_1.default.green(`\n💾 Production results saved to: ${filepath}`)); } catch (error) { spinner.fail('Production experiment failed'); throw error; } } async function runEnhancedExperiment(testConfig, config) { const spinner = (0, ora_1.default)('Initializing Enhanced RAG Tester...').start(); try { const db = new database_1.DatabaseConnection(config.database); const embeddings = new embeddings_1.EmbeddingGenerator(config.embedding); const tester = new enhanced_tester_1.EnhancedRAGTester(db, embeddings); // Validate configuration spinner.text = 'Validating enhanced configuration...'; const validation = await tester.validateEnhancedConfiguration(testConfig); if (!validation.isValid) { spinner.fail('Enhanced configuration validation failed'); console.error(chalk_1.default.red('\nErrors:')); validation.errors.forEach(error => console.error(chalk_1.default.red(` • ${error}`))); if (validation.warnings.length > 0) { console.warn(chalk_1.default.yellow('\nWarnings:')); validation.warnings.forEach(warning => console.warn(chalk_1.default.yellow(` • ${warning}`))); } process.exit(1); } if (validation.warnings.length > 0) { spinner.warn('Enhanced configuration has warnings'); console.warn(chalk_1.default.yellow('Warnings:')); validation.warnings.forEach(warning => console.warn(chalk_1.default.yellow(` • ${warning}`))); const { proceed } = await inquirer_1.default.prompt([{ type: 'confirm', name: 'proceed', message: 'Continue anyway?', default: true }]); if (!proceed) { console.log(chalk_1.default.gray('Enhanced test cancelled.')); process.exit(0); } } // Initialize embeddings spinner.text = 'Initializing embedding model...'; await tester.initialize(); spinner.succeed('Enhanced RAG Tester initialized'); // Run enhanced experiment console.log(chalk_1.default.bold('\n🚀 Starting enhanced experiment...\n')); const results = await tester.runEnhancedExperiment(testConfig); // Display enhanced results displayEnhancedResults(results); // Save results const outputDir = config.outputPath || './rag-test-results'; if (!fs.existsSync(outputDir)) { fs.mkdirSync(outputDir, { recursive: true }); } const filename = `${testConfig.testName.replace(/[^a-zA-Z0-9]/g, '_')}.json`; const filepath = path.join(outputDir, filename); fs.writeFileSync(filepath, JSON.stringify(results, null, 2)); console.log(chalk_1.default.green(`\n💾 Enhanced results saved to: ${filepath}`)); } catch (error) { spinner.fail('Enhanced experiment failed'); throw error; } } async function runExperiment(testConfig, config) { const spinner = (0, ora_1.default)('Initializing RAG Tester...').start(); try { const db = new database_1.DatabaseConnection(config.database); const embeddings = new embeddings_1.EmbeddingGenerator(config.embedding); const tester = new tester_1.RAGTester(db, embeddings); // Validate configuration spinner.text = 'Validating configuration...'; const validation = await tester.validateConfiguration(testConfig); if (!validation.isValid) { spinner.fail('Configuration validation failed'); console.error(chalk_1.default.red('\nErrors:')); validation.errors.forEach(error => console.error(chalk_1.default.red(` • ${error}`))); if (validation.warnings.length > 0) { console.warn(chalk_1.default.yellow('\nWarnings:')); validation.warnings.forEach(warning => console.warn(chalk_1.default.yellow(` • ${warning}`))); } process.exit(1); } if (validation.warnings.length > 0) { spinner.warn('Configuration has warnings'); console.warn(chalk_1.default.yellow('Warnings:')); validation.warnings.forEach(warning => console.warn(chalk_1.default.yellow(` • ${warning}`))); const { proceed } = await inquirer_1.default.prompt([{ type: 'confirm', name: 'proceed', message: 'Continue anyway?', default: true }]); if (!proceed) { console.log(chalk_1.default.gray('Test cancelled.')); process.exit(0); } } // Initialize embeddings spinner.text = 'Initializing embedding model...'; await tester.initialize(); spinner.succeed('RAG Tester initialized'); // Run experiment console.log(chalk_1.default.bold('\n🚀 Starting experiment...\n')); const results = await tester.runExperiment(testConfig); // Display results displayResults(results); // Save results const outputDir = config.outputPath || './rag-test-results'; if (!fs.existsSync(outputDir)) { fs.mkdirSync(outputDir, { recursive: true }); } const filename = `${testConfig.testName.replace(/[^a-zA-Z0-9]/g, '_')}.json`; const filepath = path.join(outputDir, filename); fs.writeFileSync(filepath, JSON.stringify(results, null, 2)); console.log(chalk_1.default.green(`\n💾 Results saved to: ${filepath}`)); } catch (error) { spinner.fail('Experiment failed'); throw error; } } function displayResults(results) { console.log(chalk_1.default.bold('\n🎉 Experiment Complete!\n')); // Summary table const summaryTable = new cli_table3_1.default({ head: [chalk_1.default.cyan('Metric'), chalk_1.default.cyan('Value')], style: { head: [], border: [] } }); summaryTable.push(['Test Name', results.testName], ['Total Combinations', results.summary.totalCombinations.toString()], ['Best Score', results.summary.bestScore.toFixed(4)], ['Worst Score', results.summary.worstScore.toFixed(4)], ['Average Score', results.summary.averageScore.toFixed(4)], ['Processing Time', `${(results.processingTime / 1000).toFixed(1)}s`]); console.log(summaryTable.toString()); // Best combination console.log(chalk_1.default.bold('\n🏆 Best Combination:')); console.log(chalk_1.default.green(` ${results.summary.bestCombination.name}`)); console.log(chalk_1.default.green(` Score: ${results.summary.bestScore.toFixed(4)}`)); // Top 5 results console.log(chalk_1.default.bold('\n📊 Top 5 Results:')); const topResults = results.allResults .sort((a, b) => b.averageScore - a.averageScore) .slice(0, 5); const resultsTable = new cli_table3_1.default({ head: [chalk_1.default.cyan('Rank'), chalk_1.default.cyan('Combination'), chalk_1.default.cyan('Score'), chalk_1.default.cyan('Tests')], style: { head: [], border: [] } }); topResults.forEach((result, index) => { resultsTable.push([ (index + 1).toString(), result.combination.name, result.averageScore.toFixed(4), result.totalTests.toString() ]); }); console.log(resultsTable.toString()); } function displayEnhancedResults(results) { console.log(chalk_1.default.bold('\n🚀 Enhanced Experiment Complete!\n')); // Enhanced summary table const summaryTable = new cli_table3_1.default({ head: [chalk_1.default.cyan('Metric'), chalk_1.default.cyan('Value')], style: { head: [], border: [] } }); summaryTable.push(['Test Name', results.testName], ['Total Combinations', results.summary.totalCombinations.toString()], ['Best Score', results.summary.bestScore.toFixed(4)], ['Worst Score', results.summary.worstScore.toFixed(4)], ['Average Score', results.summary.averageScore.toFixed(4)], ['Processing Time', `${(results.processingTime / 1000).toFixed(1)}s`]); // Add enhanced metrics if available if ('medianScore' in results.summary) { summaryTable.push(['Median Score', results.summary.medianScore.toFixed(4)], ['Q1 Score', results.summary.q1Score.toFixed(4)], ['Q3 Score', results.summary.q3Score.toFixed(4)], ['Total Tests', results.summary.totalTests.toString()], ['Average Confidence', results.summary.averageConfidence.toFixed(4)]); } console.log(summaryTable.toString()); // Best combination console.log(chalk_1.default.bold('\n🏆 Best Combination:')); console.log(chalk_1.default.green(` ${results.summary.bestCombination.name}`)); console.log(chalk_1.default.green(` Score: ${results.summary.bestScore.toFixed(4)}`)); // Top 5 results with enhanced details console.log(chalk_1.default.bold('\n📊 Top 5 Results:')); const topResults = results.allResults .sort((a,