rag-cli-tester
Version:
A lightweight CLI tool for testing RAG (Retrieval-Augmented Generation) systems with different embedding combinations
1,138 lines • 77.8 kB
JavaScript
#!/usr/bin/env node
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const commander_1 = require("commander");
const chalk_1 = __importDefault(require("chalk"));
const ora_1 = __importDefault(require("ora"));
const inquirer_1 = __importDefault(require("inquirer"));
const cli_table3_1 = __importDefault(require("cli-table3"));
const fs = __importStar(require("fs"));
const path = __importStar(require("path"));
const config_1 = require("./config");
const database_1 = require("./database");
const embeddings_1 = require("./embeddings");
const tester_1 = require("./tester");
const enhanced_tester_1 = require("./enhanced-tester");
const production_tester_1 = require("./production-tester");
const program = new commander_1.Command();
// Read version from package.json
const packageJson = JSON.parse(fs.readFileSync(path.join(__dirname, '../package.json'), 'utf-8'));
program
.name('rag-test')
.description('CLI tool for testing RAG systems with different embedding combinations')
.version(packageJson.version);
// Production Test command (ML best practices)
program
.command('test-production')
.description('Run production RAG testing experiment following ML best practices with proper train/validation/test splits')
.option('-t, --table <tableName>', 'Table name to test')
.option('-c, --columns <columns>', 'Comma-separated list of columns for embeddings')
.option('-q, --query <column>', 'Column containing queries')
.option('-a, --answer <column>', 'Column containing expected answers')
.option('-m, --metric <type>', 'Metric type (brdr|sql|similarity)', 'sql')
.option('--train-ratio <number>', 'Training ratio (0-1)', '0.7')
.option('--val-ratio <number>', 'Validation ratio (0-1)', '0.15')
.option('--test-ratio <number>', 'Testing ratio (0-1)', '0.15')
.option('-n, --name <name>', 'Test name')
.option('-l, --limit <number>', 'Max combinations to test', '20')
.option('-b, --batch-size <number>', 'Batch size for processing', '100')
.option('--max-train <number>', 'Maximum training samples', '50000')
.option('--max-val <number>', 'Maximum validation samples', '10000')
.option('--max-test <number>', 'Maximum testing samples', '10000')
.option('--enable-caching', 'Enable embedding caching', false)
.option('--sampling <strategy>', 'Data sampling strategy (random|stratified|time_based|query_complexity)', 'random')
.option('--cv-folds <number>', 'Cross-validation folds', '5')
.option('--min-query-len <number>', 'Minimum query length', '10')
.option('--max-query-len <number>', 'Maximum query length', '500')
.option('--min-answer-len <number>', 'Minimum answer length', '10')
.option('--max-answer-len <number>', 'Maximum answer length', '1000')
.option('--timestamp-col <column>', 'Timestamp column for time-based sampling')
.option('--time-window <window>', 'Time window for sampling (daily|weekly|monthly)', 'weekly')
.action(async (options) => {
try {
const configManager = new config_1.ConfigManager();
const config = await configManager.loadConfig();
// Interactive mode if no options provided
let testConfig;
if (!options.table) {
testConfig = await interactiveProductionTestSetup();
}
else {
testConfig = {
tableName: options.table,
selectedColumns: options.columns?.split(',') || [],
queryColumn: options.query || '',
answerColumn: options.answer || '',
embeddingConfig: config.embedding,
metricType: options.metric || 'sql',
trainingRatio: parseFloat(options.trainRatio || '0.7'),
validationRatio: parseFloat(options.valRatio || '0.15'),
testingRatio: parseFloat(options.testRatio || '0.15'),
testName: options.name || `ProductionTest_${new Date().toISOString().replace(/[:.]/g, '-')}`,
maxCombinations: parseInt(options.limit || '20'),
maxTrainingSamples: parseInt(options.maxTrain || '50000'),
maxValidationSamples: parseInt(options.maxVal || '10000'),
maxTestingSamples: parseInt(options.maxTest || '10000'),
batchSize: parseInt(options.batchSize || '100'),
enableCaching: options.enableCaching || false,
crossValidationFolds: parseInt(options.cvFolds || '5'),
minQueryLength: parseInt(options.minQueryLen || '10'),
maxQueryLength: parseInt(options.maxQueryLen || '500'),
minAnswerLength: parseInt(options.minAnswerLen || '10'),
maxAnswerLength: parseInt(options.maxAnswerLen || '1000'),
samplingStrategy: options.sampling || 'random',
timestampColumn: options.timestampCol,
timeWindow: options.timeWindow || 'weekly'
};
}
await runProductionExperiment(testConfig, config);
}
catch (error) {
console.error(chalk_1.default.red(`❌ Production test failed: ${error instanceof Error ? error.message : String(error)}`));
process.exit(1);
}
});
// Enhanced Test command for large datasets
program
.command('test-enhanced')
.description('Run enhanced RAG testing experiment optimized for large datasets (1M+ rows)')
.option('-t, --table <tableName>', 'Table name to test')
.option('-c, --columns <columns>', 'Comma-separated list of columns for embeddings')
.option('-q, --query <column>', 'Column containing queries')
.option('-a, --answer <column>', 'Column containing expected answers')
.option('-m, --metric <type>', 'Metric type (brdr|sql|similarity)', 'brdr')
.option('-r, --ratio <number>', 'Training ratio (0-1)', '0.8')
.option('-n, --name <name>', 'Test name')
.option('-l, --limit <number>', 'Max combinations to test', '20')
.option('-b, --batch-size <number>', 'Batch size for processing', '100')
.option('--max-training <number>', 'Maximum training samples', '10000')
.option('--max-testing <number>', 'Maximum testing samples', '2000')
.option('--enable-caching', 'Enable embedding caching', false)
.option('--sampling <strategy>', 'Data sampling strategy (random|stratified|sequential)', 'random')
.action(async (options) => {
try {
const configManager = new config_1.ConfigManager();
const config = await configManager.loadConfig();
// Interactive mode if no options provided
let testConfig;
if (!options.table) {
testConfig = await interactiveEnhancedTestSetup();
}
else {
testConfig = {
tableName: options.table,
selectedColumns: options.columns?.split(',') || [],
queryColumn: options.query || '',
answerColumn: options.answer || '',
embeddingConfig: config.embedding,
metricType: options.metric || 'brdr',
trainingRatio: parseFloat(options.ratio || '0.8'),
testName: options.name || `EnhancedTest_${new Date().toISOString().replace(/[:.]/g, '-')}`,
maxCombinations: parseInt(options.limit || '20'),
batchSize: parseInt(options.batchSize || '100'),
maxTrainingSamples: parseInt(options.maxTraining || '10000'),
maxTestingSamples: parseInt(options.maxTesting || '2000'),
enableCaching: options.enableCaching || false,
dataSamplingStrategy: options.sampling || 'random'
};
}
await runEnhancedExperiment(testConfig, config);
}
catch (error) {
console.error(chalk_1.default.red(`❌ Enhanced test failed: ${error instanceof Error ? error.message : String(error)}`));
process.exit(1);
}
});
// Configure command
program
.command('configure')
.description('Set up configuration for database and embedding model')
.action(async () => {
try {
const configManager = new config_1.ConfigManager();
// Check if .env file already has the required variables
const config = await configManager.loadConfig();
if (config.database.url && config.database.anonKey) {
console.log(chalk_1.default.yellow('⚠️ Configuration already found in .env file:'));
console.log(chalk_1.default.gray(` NEXT_PUBLIC_SUPABASE_URL: ${config.database.url.substring(0, 30)}...`));
console.log(chalk_1.default.gray(` NEXT_PUBLIC_SUPABASE_ANON_KEY: ${config.database.anonKey.substring(0, 20)}...`));
const inquirer = await Promise.resolve().then(() => __importStar(require('inquirer')));
const { proceed } = await inquirer.default.prompt({
type: 'confirm',
name: 'proceed',
message: 'Do you want to override the existing configuration?',
default: false
});
if (!proceed) {
console.log(chalk_1.default.green('✅ Using existing configuration from .env file'));
return;
}
}
await configManager.initializeConfig();
console.log(chalk_1.default.green('\n✅ Configuration completed successfully!'));
}
catch (error) {
console.error(chalk_1.default.red(`❌ Configuration failed: ${error instanceof Error ? error.message : String(error)}`));
process.exit(1);
}
});
// List available metrics
program
.command('metrics')
.description('List available evaluation metrics')
.action(async () => {
try {
// Import metrics using require to ensure registration happens
require('./metrics/index'); // This registers the metrics
const { MetricFactory } = require('./metrics/base-metric');
const availableMetrics = MetricFactory.getAvailableMetrics();
console.log(chalk_1.default.bold('📊 Available Evaluation Metrics:\n'));
availableMetrics.forEach((metricName) => {
try {
const metricInfo = MetricFactory.getMetricInfo(metricName);
console.log(chalk_1.default.cyan(`• ${metricInfo.name}`));
console.log(chalk_1.default.gray(` ${metricInfo.description}`));
console.log('');
}
catch (metricError) {
console.error(chalk_1.default.red(`Error getting info for ${metricName}: ${metricError}`));
}
});
console.log(chalk_1.default.yellow('💡 Use --metric option with test commands to specify which metric to use'));
}
catch (error) {
console.error(chalk_1.default.red(`❌ Failed to load metrics: ${error instanceof Error ? error.message : String(error)}`));
process.exit(1);
}
});
// List tables command
program
.command('tables')
.description('List available tables in the database')
.action(async () => {
const spinner = (0, ora_1.default)('Connecting to database...').start();
try {
const configManager = new config_1.ConfigManager();
const config = await configManager.loadConfig();
const validation = configManager.validateConfig(config);
if (!validation.isValid) {
spinner.fail('Invalid configuration');
console.error(chalk_1.default.red('Configuration errors:'));
validation.errors.forEach(error => console.error(chalk_1.default.red(` • ${error}`)));
console.log(chalk_1.default.yellow('\nRun "rag-test configure" to set up configuration.'));
process.exit(1);
}
const db = new database_1.DatabaseConnection(config.database);
const isConnected = await db.testConnection();
if (!isConnected) {
spinner.fail('Failed to connect to database');
console.error('database config is: ', config.database);
console.error(chalk_1.default.red('Please check your database configuration.'));
process.exit(1);
}
spinner.text = 'Fetching tables...';
const tables = await db.getTables();
spinner.succeed('Tables retrieved');
if (tables.length === 0) {
console.log(chalk_1.default.yellow('No tables found in the database.'));
return;
}
console.log(chalk_1.default.bold('\n📊 Available Tables:'));
const table = new cli_table3_1.default({
head: [chalk_1.default.cyan('Table Name')],
style: { head: [], border: [] }
});
tables.forEach(tableName => {
table.push([tableName]);
});
console.log(table.toString());
}
catch (error) {
spinner.fail('Operation failed');
console.error(chalk_1.default.red(`❌ Error: ${error instanceof Error ? error.message : String(error)}`));
process.exit(1);
}
});
// Inspect table command
program
.command('inspect <tableName>')
.description('Inspect a table structure and sample data')
.action(async (tableName) => {
const spinner = (0, ora_1.default)('Loading table information...').start();
try {
const configManager = new config_1.ConfigManager();
const config = await configManager.loadConfig();
const db = new database_1.DatabaseConnection(config.database);
await db.testConnection();
const tableInfo = await db.getTableInfo(tableName);
if (!tableInfo) {
spinner.fail(`Table "${tableName}" not found`);
process.exit(1);
}
spinner.text = 'Fetching sample data...';
const sampleData = await db.getTableData(tableName, ['*'], 3);
spinner.succeed('Table inspection complete');
console.log(chalk_1.default.bold(`\n📋 Table: ${tableName}`));
console.log(chalk_1.default.gray(`Rows: ${tableInfo.rowCount}`));
// Show columns
console.log(chalk_1.default.bold('\n🏛️ Columns:'));
const columnsTable = new cli_table3_1.default({
head: [chalk_1.default.cyan('Column'), chalk_1.default.cyan('Type'), chalk_1.default.cyan('Nullable')],
style: { head: [], border: [] }
});
tableInfo.columns.forEach(col => {
columnsTable.push([
col.column_name,
col.data_type,
col.is_nullable ? 'Yes' : 'No'
]);
});
console.log(columnsTable.toString());
// Show sample data
if (sampleData.length > 0) {
console.log(chalk_1.default.bold('\n🔍 Sample Data:'));
console.log(JSON.stringify(sampleData, null, 2));
}
}
catch (error) {
spinner.fail('Operation failed');
console.error(chalk_1.default.red(`❌ Error: ${error instanceof Error ? error.message : String(error)}`));
process.exit(1);
}
});
// Test command
program
.command('test')
.description('Run RAG testing experiment')
.option('-t, --table <tableName>', 'Table name to test')
.option('-c, --columns <columns>', 'Comma-separated list of columns for embeddings')
.option('-q, --query <column>', 'Column containing queries')
.option('-a, --answer <column>', 'Column containing expected answers')
.option('-m, --metric <type>', 'Metric type (brdr|sql|similarity)', 'brdr')
.option('-r, --ratio <number>', 'Training ratio (0-1)', '0.8')
.option('-n, --name <name>', 'Test name')
.option('-l, --limit <number>', 'Max combinations to test', '20')
.action(async (options) => {
try {
const configManager = new config_1.ConfigManager();
const config = await configManager.loadConfig();
// Interactive mode if no options provided
let testConfig;
if (!options.table) {
testConfig = await interactiveTestSetup();
}
else {
testConfig = {
tableName: options.table,
selectedColumns: options.columns?.split(',') || [],
queryColumn: options.query || '',
answerColumn: options.answer || '',
embeddingConfig: config.embedding,
metricType: options.metric,
trainingRatio: parseFloat(options.ratio),
testName: options.name || `Test_${new Date().toISOString().replace(/[:.]/g, '-')}`,
maxCombinations: parseInt(options.limit)
};
}
await runExperiment(testConfig, config);
}
catch (error) {
console.error(chalk_1.default.red(`❌ Test failed: ${error instanceof Error ? error.message : String(error)}`));
process.exit(1);
}
});
async function interactiveTestSetup() {
console.log(chalk_1.default.bold('🧪 Interactive RAG Test Setup\n'));
const configManager = new config_1.ConfigManager();
const config = await configManager.loadConfig();
const db = new database_1.DatabaseConnection(config.database);
await db.testConnection();
const tables = await db.getTables();
const answers = await inquirer_1.default.prompt([
{
type: 'list',
name: 'tableName',
message: 'Select table to test:',
choices: tables
}
]);
// Get table info for column selection
const tableInfo = await db.getTableInfo(answers.tableName);
if (!tableInfo) {
throw new Error(`Table ${answers.tableName} not found`);
}
const columnChoices = tableInfo.columns.map(col => ({
name: `${col.column_name} (${col.data_type})`,
value: col.column_name
}));
const columnSelection = await inquirer_1.default.prompt({
type: 'checkbox',
name: 'selectedColumns',
message: 'Select columns for embeddings (max 5):',
choices: columnChoices,
validate: (input) => {
if (input.length === 0)
return 'At least one column must be selected';
if (input.length > 5)
return 'Maximum 5 columns allowed';
return true;
}
});
const querySelection = await inquirer_1.default.prompt({
type: 'list',
name: 'queryColumn',
message: 'Select query column:',
choices: columnChoices
});
const answerSelection = await inquirer_1.default.prompt({
type: 'list',
name: 'answerColumn',
message: 'Select answer column:',
choices: columnChoices
});
const metricSelection = await inquirer_1.default.prompt({
type: 'list',
name: 'metricType',
message: 'Select evaluation metric:',
choices: [
{ name: 'Similarity (general purpose)', value: 'similarity' },
{ name: 'BRDR (banking regulation specific)', value: 'brdr' }
]
});
const ratioInput = await inquirer_1.default.prompt({
type: 'input',
name: 'trainingRatio',
message: 'Training ratio (0-1):',
default: '0.8',
validate: (input) => {
const num = parseFloat(input);
return (num > 0 && num < 1) || 'Must be between 0 and 1';
}
});
const nameInput = await inquirer_1.default.prompt({
type: 'input',
name: 'testName',
message: 'Test name:',
default: `Test_${new Date().toISOString().replace(/[:.]/g, '-')}`
});
const limitInput = await inquirer_1.default.prompt({
type: 'input',
name: 'maxCombinations',
message: 'Maximum combinations to test:',
default: '20',
validate: (input) => {
const num = parseInt(input);
return (num > 0 && num <= 100) || 'Must be between 1 and 100';
}
});
const moreAnswers = {
...columnSelection,
...querySelection,
...answerSelection,
...metricSelection,
...ratioInput,
...nameInput,
...limitInput
};
return {
tableName: answers.tableName,
selectedColumns: columnSelection.selectedColumns,
queryColumn: querySelection.queryColumn,
answerColumn: answerSelection.answerColumn,
embeddingConfig: config.embedding,
metricType: metricSelection.metricType,
trainingRatio: parseFloat(ratioInput.trainingRatio),
testName: nameInput.testName,
maxCombinations: parseInt(limitInput.maxCombinations)
};
}
async function interactiveEnhancedTestSetup() {
console.log(chalk_1.default.bold('🚀 Interactive Enhanced RAG Test Setup (Large Datasets)\n'));
const configManager = new config_1.ConfigManager();
const config = await configManager.loadConfig();
const db = new database_1.DatabaseConnection(config.database);
await db.testConnection();
const tables = await db.getTables();
const answers = await inquirer_1.default.prompt([
{
type: 'list',
name: 'tableName',
message: 'Select table to test:',
choices: tables
}
]);
// Get table info for column selection
const tableInfo = await db.getTableInfo(answers.tableName);
if (!tableInfo) {
throw new Error(`Table ${answers.tableName} not found`);
}
const columnChoices = tableInfo.columns.map(col => ({
name: `${col.column_name} (${col.data_type})`,
value: col.column_name
}));
const columnSelection = await inquirer_1.default.prompt({
type: 'checkbox',
name: 'selectedColumns',
message: 'Select columns for embeddings (max 5):',
choices: columnChoices,
validate: (input) => {
if (input.length === 0)
return 'At least one column must be selected';
if (input.length > 5)
return 'Maximum 5 columns allowed';
return true;
}
});
const querySelection = await inquirer_1.default.prompt({
type: 'list',
name: 'queryColumn',
message: 'Select query column:',
choices: columnChoices
});
const answerSelection = await inquirer_1.default.prompt({
type: 'list',
name: 'answerColumn',
message: 'Select answer column:',
choices: columnChoices
});
const metricSelection = await inquirer_1.default.prompt({
type: 'list',
name: 'metricType',
message: 'Select evaluation metric:',
choices: [
{ name: 'BRDR (Banking Regulation)', value: 'brdr' },
{ name: 'SQL (Text-to-SQL)', value: 'sql' },
{ name: 'Similarity (General Purpose)', value: 'similarity' }
]
});
const ratioInput = await inquirer_1.default.prompt({
type: 'input',
name: 'trainingRatio',
message: 'Training ratio (0-1):',
default: '0.8',
validate: (input) => {
const num = parseFloat(input);
return (num > 0 && num < 1) || 'Must be between 0 and 1';
}
});
const nameInput = await inquirer_1.default.prompt({
type: 'input',
name: 'testName',
message: 'Test name:',
default: `EnhancedTest_${new Date().toISOString().replace(/[:.]/g, '-')}`
});
const limitInput = await inquirer_1.default.prompt({
type: 'input',
name: 'maxCombinations',
message: 'Maximum combinations to test:',
default: '20',
validate: (input) => {
const num = parseInt(input);
return (num > 0 && num <= 100) || 'Must be between 1 and 100';
}
});
const batchSizeInput = await inquirer_1.default.prompt({
type: 'input',
name: 'batchSize',
message: 'Batch size for processing:',
default: '100',
validate: (input) => {
const num = parseInt(input);
return (num > 0 && num <= 1000) || 'Must be between 1 and 1000';
}
});
const maxTrainingInput = await inquirer_1.default.prompt({
type: 'input',
name: 'maxTrainingSamples',
message: 'Maximum training samples:',
default: '10000',
validate: (input) => {
const num = parseInt(input);
return (num >= 100 && num <= 100000) || 'Must be between 100 and 100000';
}
});
const maxTestingInput = await inquirer_1.default.prompt({
type: 'input',
name: 'maxTestingSamples',
message: 'Maximum testing samples:',
default: '2000',
validate: (input) => {
const num = parseInt(input);
return (num >= 50 && num <= 20000) || 'Must be between 50 and 20000';
}
});
const cachingInput = await inquirer_1.default.prompt({
type: 'confirm',
name: 'enableCaching',
message: 'Enable embedding caching?',
default: true
});
const samplingInput = await inquirer_1.default.prompt({
type: 'list',
name: 'dataSamplingStrategy',
message: 'Data sampling strategy:',
choices: [
{ name: 'Random (recommended)', value: 'random' },
{ name: 'Stratified (maintains distribution)', value: 'stratified' },
{ name: 'Sequential (first N rows)', value: 'sequential' }
]
});
const moreAnswers = {
...columnSelection,
...querySelection,
...answerSelection,
...metricSelection,
...ratioInput,
...nameInput,
...limitInput,
...batchSizeInput,
...maxTrainingInput,
...maxTestingInput,
...cachingInput,
...samplingInput
};
return {
tableName: answers.tableName,
selectedColumns: columnSelection.selectedColumns,
queryColumn: querySelection.queryColumn,
answerColumn: answerSelection.answerColumn,
embeddingConfig: config.embedding,
metricType: metricSelection.metricType,
trainingRatio: parseFloat(ratioInput.trainingRatio),
testName: nameInput.testName,
maxCombinations: parseInt(limitInput.maxCombinations),
batchSize: parseInt(batchSizeInput.batchSize),
maxTrainingSamples: parseInt(maxTrainingInput.maxTrainingSamples),
maxTestingSamples: parseInt(maxTestingInput.maxTestingSamples),
enableCaching: cachingInput.enableCaching,
dataSamplingStrategy: samplingInput.dataSamplingStrategy
};
}
async function interactiveProductionTestSetup() {
console.log(chalk_1.default.bold('🚀 Interactive Production RAG Test Setup (ML Best Practices)\n'));
const configManager = new config_1.ConfigManager();
const config = await configManager.loadConfig();
const db = new database_1.DatabaseConnection(config.database);
await db.testConnection();
const tables = await db.getTables();
const answers = await inquirer_1.default.prompt([
{
type: 'list',
name: 'tableName',
message: 'Select table to test:',
choices: tables
}
]);
// Get table info for column selection
const tableInfo = await db.getTableInfo(answers.tableName);
if (!tableInfo) {
throw new Error(`Table ${answers.tableName} not found`);
}
const columnChoices = tableInfo.columns.map(col => ({
name: `${col.column_name} (${col.data_type})`,
value: col.column_name
}));
const columnSelection = await inquirer_1.default.prompt({
type: 'checkbox',
name: 'selectedColumns',
message: 'Select columns for embeddings (max 5):',
choices: columnChoices,
validate: (input) => {
if (input.length === 0)
return 'At least one column must be selected';
if (input.length > 5)
return 'Maximum 5 columns allowed';
return true;
}
});
const querySelection = await inquirer_1.default.prompt({
type: 'list',
name: 'queryColumn',
message: 'Select query column:',
choices: columnChoices
});
const answerSelection = await inquirer_1.default.prompt({
type: 'list',
name: 'answerColumn',
message: 'Select answer column:',
choices: columnChoices
});
const metricSelection = await inquirer_1.default.prompt({
type: 'list',
name: 'metricType',
message: 'Select evaluation metric:',
choices: [
{ name: 'SQL (Text-to-SQL)', value: 'sql' },
{ name: 'BRDR (Banking Regulation)', value: 'brdr' },
{ name: 'Similarity (General Purpose)', value: 'similarity' }
]
});
const trainRatioInput = await inquirer_1.default.prompt({
type: 'input',
name: 'trainingRatio',
message: 'Training ratio (0-1):',
default: '0.7',
validate: (input) => {
const num = parseFloat(input);
return (num > 0 && num < 1) || 'Must be between 0 and 1';
}
});
const valRatioInput = await inquirer_1.default.prompt({
type: 'input',
name: 'validationRatio',
message: 'Validation ratio (0-1):',
default: '0.15',
validate: (input) => {
const num = parseFloat(input);
return (num > 0 && num < 1) || 'Must be between 0 and 1';
}
});
const testRatioInput = await inquirer_1.default.prompt({
type: 'input',
name: 'testingRatio',
message: 'Testing ratio (0-1):',
default: '0.15',
validate: (input) => {
const num = parseFloat(input.trainRatio || '0.7') + parseFloat(input.valRatio || '0.15') + parseFloat(input.testRatio || '0.15');
return Math.abs(num - 1) < 0.01 || 'Ratios must sum to 1';
}
});
const nameInput = await inquirer_1.default.prompt({
type: 'input',
name: 'testName',
message: 'Test name:',
default: `ProductionTest_${new Date().toISOString().replace(/[:.]/g, '-')}`
});
const limitInput = await inquirer_1.default.prompt({
type: 'input',
name: 'maxCombinations',
message: 'Maximum combinations to test:',
default: '20',
validate: (input) => {
const num = parseInt(input);
return (num > 0 && num <= 100) || 'Must be between 1 and 100';
}
});
const batchSizeInput = await inquirer_1.default.prompt({
type: 'input',
name: 'batchSize',
message: 'Batch size for processing:',
default: '100',
validate: (input) => {
const num = parseInt(input);
return (num > 0 && num <= 1000) || 'Must be between 1 and 1000';
}
});
const maxTrainInput = await inquirer_1.default.prompt({
type: 'input',
name: 'maxTrainingSamples',
message: 'Maximum training samples:',
default: '50000',
validate: (input) => {
const num = parseInt(input);
return (num >= 100 && num <= 200000) || 'Must be between 100 and 200000';
}
});
const maxValInput = await inquirer_1.default.prompt({
type: 'input',
name: 'maxValidationSamples',
message: 'Maximum validation samples:',
default: '10000',
validate: (input) => {
const num = parseInt(input);
return (num >= 50 && num <= 50000) || 'Must be between 50 and 50000';
}
});
const maxTestInput = await inquirer_1.default.prompt({
type: 'input',
name: 'maxTestingSamples',
message: 'Maximum testing samples:',
default: '10000',
validate: (input) => {
const num = parseInt(input);
return (num >= 50 && num <= 50000) || 'Must be between 50 and 50000';
}
});
const cachingInput = await inquirer_1.default.prompt({
type: 'confirm',
name: 'enableCaching',
message: 'Enable embedding caching?',
default: true
});
const cvFoldsInput = await inquirer_1.default.prompt({
type: 'input',
name: 'crossValidationFolds',
message: 'Cross-validation folds:',
default: '5',
validate: (input) => {
const num = parseInt(input);
return (num >= 2 && num <= 10) || 'Must be between 2 and 10';
}
});
const minQueryLenInput = await inquirer_1.default.prompt({
type: 'input',
name: 'minQueryLength',
message: 'Minimum query length:',
default: '10',
validate: (input) => {
const num = parseInt(input);
return (num >= 1 && num <= 100) || 'Must be between 1 and 100';
}
});
const maxQueryLenInput = await inquirer_1.default.prompt({
type: 'input',
name: 'maxQueryLength',
message: 'Maximum query length:',
default: '500',
validate: (input) => {
const num = parseInt(input);
return (num >= 50 && num <= 2000) || 'Must be between 50 and 2000';
}
});
const minAnswerLenInput = await inquirer_1.default.prompt({
type: 'input',
name: 'minAnswerLength',
message: 'Minimum answer length:',
default: '10',
validate: (input) => {
const num = parseInt(input);
return (num >= 1 && num <= 100) || 'Must be between 1 and 100';
}
});
const maxAnswerLenInput = await inquirer_1.default.prompt({
type: 'input',
name: 'maxAnswerLength',
message: 'Maximum answer length:',
default: '1000',
validate: (input) => {
const num = parseInt(input);
return (num >= 50 && num <= 5000) || 'Must be between 50 and 5000';
}
});
const samplingInput = await inquirer_1.default.prompt({
type: 'list',
name: 'samplingStrategy',
message: 'Data sampling strategy:',
choices: [
{ name: 'Random (recommended)', value: 'random' },
{ name: 'Stratified (maintains distribution)', value: 'stratified' },
{ name: 'Time-based (if you have timestamps)', value: 'time_based' },
{ name: 'Query complexity-based', value: 'query_complexity' }
]
});
let timestampColumn;
let timeWindow;
if (samplingInput.samplingStrategy === 'time_based') {
const timestampInput = await inquirer_1.default.prompt({
type: 'list',
name: 'timestampColumn',
message: 'Select timestamp column:',
choices: columnChoices.filter(col => {
const colName = col.value;
const colInfo = tableInfo.columns.find(c => c.column_name === colName);
return colInfo?.data_type?.includes('timestamp') || colInfo?.data_type?.includes('date');
})
});
timestampColumn = timestampInput.timestampColumn;
const timeWindowInput = await inquirer_1.default.prompt({
type: 'list',
name: 'timeWindow',
message: 'Select time window:',
choices: [
{ name: 'Daily', value: 'daily' },
{ name: 'Weekly', value: 'weekly' },
{ name: 'Monthly', value: 'monthly' }
]
});
timeWindow = timeWindowInput.timeWindow;
}
return {
tableName: answers.tableName,
selectedColumns: columnSelection.selectedColumns,
queryColumn: querySelection.queryColumn,
answerColumn: answerSelection.answerColumn,
embeddingConfig: config.embedding,
metricType: metricSelection.metricType,
trainingRatio: parseFloat(trainRatioInput.trainingRatio),
validationRatio: parseFloat(valRatioInput.validationRatio),
testingRatio: parseFloat(testRatioInput.testingRatio),
testName: nameInput.testName,
maxCombinations: parseInt(limitInput.maxCombinations),
maxTrainingSamples: parseInt(maxTrainInput.maxTrainingSamples),
maxValidationSamples: parseInt(maxValInput.maxValidationSamples),
maxTestingSamples: parseInt(maxTestInput.maxTestingSamples),
batchSize: parseInt(batchSizeInput.batchSize),
enableCaching: cachingInput.enableCaching,
crossValidationFolds: parseInt(cvFoldsInput.crossValidationFolds),
minQueryLength: parseInt(minQueryLenInput.minQueryLength),
maxQueryLength: parseInt(maxQueryLenInput.maxQueryLength),
minAnswerLength: parseInt(minAnswerLenInput.minAnswerLength),
maxAnswerLength: parseInt(maxAnswerLenInput.maxAnswerLength),
samplingStrategy: samplingInput.samplingStrategy,
timestampColumn,
timeWindow
};
}
async function runProductionExperiment(testConfig, config) {
const spinner = (0, ora_1.default)('Initializing Production RAG Tester...').start();
try {
const db = new database_1.DatabaseConnection(config.database);
const embeddings = new embeddings_1.EmbeddingGenerator(config.embedding);
const tester = new production_tester_1.ProductionRAGTester(db, embeddings);
// Validate configuration
spinner.text = 'Validating production configuration...';
const validation = await tester.validateProductionConfiguration(testConfig);
if (!validation.isValid) {
spinner.fail('Production configuration validation failed');
console.error(chalk_1.default.red('\nErrors:'));
validation.errors.forEach(error => console.error(chalk_1.default.red(` • ${error}`)));
if (validation.warnings.length > 0) {
console.warn(chalk_1.default.yellow('\nWarnings:'));
validation.warnings.forEach(warning => console.warn(chalk_1.default.yellow(` • ${warning}`)));
}
process.exit(1);
}
if (validation.warnings.length > 0) {
spinner.warn('Production configuration has warnings');
console.warn(chalk_1.default.yellow('Warnings:'));
validation.warnings.forEach(warning => console.warn(chalk_1.default.yellow(` • ${warning}`)));
const { proceed } = await inquirer_1.default.prompt([{
type: 'confirm',
name: 'proceed',
message: 'Continue anyway?',
default: true
}]);
if (!proceed) {
console.log(chalk_1.default.gray('Production test cancelled.'));
process.exit(0);
}
}
// Initialize embeddings
spinner.text = 'Initializing embedding model...';
await tester.initialize();
spinner.succeed('Production RAG Tester initialized');
// Run production experiment
console.log(chalk_1.default.bold('\n🚀 Starting production experiment...\n'));
const results = await tester.runProductionExperiment(testConfig);
// Display production results
displayProductionResults(results);
// Save results
const outputDir = config.outputPath || './rag-test-results';
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
const filename = `${testConfig.testName.replace(/[^a-zA-Z0-9]/g, '_')}.json`;
const filepath = path.join(outputDir, filename);
fs.writeFileSync(filepath, JSON.stringify(results, null, 2));
console.log(chalk_1.default.green(`\n💾 Production results saved to: ${filepath}`));
}
catch (error) {
spinner.fail('Production experiment failed');
throw error;
}
}
async function runEnhancedExperiment(testConfig, config) {
const spinner = (0, ora_1.default)('Initializing Enhanced RAG Tester...').start();
try {
const db = new database_1.DatabaseConnection(config.database);
const embeddings = new embeddings_1.EmbeddingGenerator(config.embedding);
const tester = new enhanced_tester_1.EnhancedRAGTester(db, embeddings);
// Validate configuration
spinner.text = 'Validating enhanced configuration...';
const validation = await tester.validateEnhancedConfiguration(testConfig);
if (!validation.isValid) {
spinner.fail('Enhanced configuration validation failed');
console.error(chalk_1.default.red('\nErrors:'));
validation.errors.forEach(error => console.error(chalk_1.default.red(` • ${error}`)));
if (validation.warnings.length > 0) {
console.warn(chalk_1.default.yellow('\nWarnings:'));
validation.warnings.forEach(warning => console.warn(chalk_1.default.yellow(` • ${warning}`)));
}
process.exit(1);
}
if (validation.warnings.length > 0) {
spinner.warn('Enhanced configuration has warnings');
console.warn(chalk_1.default.yellow('Warnings:'));
validation.warnings.forEach(warning => console.warn(chalk_1.default.yellow(` • ${warning}`)));
const { proceed } = await inquirer_1.default.prompt([{
type: 'confirm',
name: 'proceed',
message: 'Continue anyway?',
default: true
}]);
if (!proceed) {
console.log(chalk_1.default.gray('Enhanced test cancelled.'));
process.exit(0);
}
}
// Initialize embeddings
spinner.text = 'Initializing embedding model...';
await tester.initialize();
spinner.succeed('Enhanced RAG Tester initialized');
// Run enhanced experiment
console.log(chalk_1.default.bold('\n🚀 Starting enhanced experiment...\n'));
const results = await tester.runEnhancedExperiment(testConfig);
// Display enhanced results
displayEnhancedResults(results);
// Save results
const outputDir = config.outputPath || './rag-test-results';
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
const filename = `${testConfig.testName.replace(/[^a-zA-Z0-9]/g, '_')}.json`;
const filepath = path.join(outputDir, filename);
fs.writeFileSync(filepath, JSON.stringify(results, null, 2));
console.log(chalk_1.default.green(`\n💾 Enhanced results saved to: ${filepath}`));
}
catch (error) {
spinner.fail('Enhanced experiment failed');
throw error;
}
}
async function runExperiment(testConfig, config) {
const spinner = (0, ora_1.default)('Initializing RAG Tester...').start();
try {
const db = new database_1.DatabaseConnection(config.database);
const embeddings = new embeddings_1.EmbeddingGenerator(config.embedding);
const tester = new tester_1.RAGTester(db, embeddings);
// Validate configuration
spinner.text = 'Validating configuration...';
const validation = await tester.validateConfiguration(testConfig);
if (!validation.isValid) {
spinner.fail('Configuration validation failed');
console.error(chalk_1.default.red('\nErrors:'));
validation.errors.forEach(error => console.error(chalk_1.default.red(` • ${error}`)));
if (validation.warnings.length > 0) {
console.warn(chalk_1.default.yellow('\nWarnings:'));
validation.warnings.forEach(warning => console.warn(chalk_1.default.yellow(` • ${warning}`)));
}
process.exit(1);
}
if (validation.warnings.length > 0) {
spinner.warn('Configuration has warnings');
console.warn(chalk_1.default.yellow('Warnings:'));
validation.warnings.forEach(warning => console.warn(chalk_1.default.yellow(` • ${warning}`)));
const { proceed } = await inquirer_1.default.prompt([{
type: 'confirm',
name: 'proceed',
message: 'Continue anyway?',
default: true
}]);
if (!proceed) {
console.log(chalk_1.default.gray('Test cancelled.'));
process.exit(0);
}
}
// Initialize embeddings
spinner.text = 'Initializing embedding model...';
await tester.initialize();
spinner.succeed('RAG Tester initialized');
// Run experiment
console.log(chalk_1.default.bold('\n🚀 Starting experiment...\n'));
const results = await tester.runExperiment(testConfig);
// Display results
displayResults(results);
// Save results
const outputDir = config.outputPath || './rag-test-results';
if (!fs.existsSync(outputDir)) {
fs.mkdirSync(outputDir, { recursive: true });
}
const filename = `${testConfig.testName.replace(/[^a-zA-Z0-9]/g, '_')}.json`;
const filepath = path.join(outputDir, filename);
fs.writeFileSync(filepath, JSON.stringify(results, null, 2));
console.log(chalk_1.default.green(`\n💾 Results saved to: ${filepath}`));
}
catch (error) {
spinner.fail('Experiment failed');
throw error;
}
}
function displayResults(results) {
console.log(chalk_1.default.bold('\n🎉 Experiment Complete!\n'));
// Summary table
const summaryTable = new cli_table3_1.default({
head: [chalk_1.default.cyan('Metric'), chalk_1.default.cyan('Value')],
style: { head: [], border: [] }
});
summaryTable.push(['Test Name', results.testName], ['Total Combinations', results.summary.totalCombinations.toString()], ['Best Score', results.summary.bestScore.toFixed(4)], ['Worst Score', results.summary.worstScore.toFixed(4)], ['Average Score', results.summary.averageScore.toFixed(4)], ['Processing Time', `${(results.processingTime / 1000).toFixed(1)}s`]);
console.log(summaryTable.toString());
// Best combination
console.log(chalk_1.default.bold('\n🏆 Best Combination:'));
console.log(chalk_1.default.green(` ${results.summary.bestCombination.name}`));
console.log(chalk_1.default.green(` Score: ${results.summary.bestScore.toFixed(4)}`));
// Top 5 results
console.log(chalk_1.default.bold('\n📊 Top 5 Results:'));
const topResults = results.allResults
.sort((a, b) => b.averageScore - a.averageScore)
.slice(0, 5);
const resultsTable = new cli_table3_1.default({
head: [chalk_1.default.cyan('Rank'), chalk_1.default.cyan('Combination'), chalk_1.default.cyan('Score'), chalk_1.default.cyan('Tests')],
style: { head: [], border: [] }
});
topResults.forEach((result, index) => {
resultsTable.push([
(index + 1).toString(),
result.combination.name,
result.averageScore.toFixed(4),
result.totalTests.toString()
]);
});
console.log(resultsTable.toString());
}
function displayEnhancedResults(results) {
console.log(chalk_1.default.bold('\n🚀 Enhanced Experiment Complete!\n'));
// Enhanced summary table
const summaryTable = new cli_table3_1.default({
head: [chalk_1.default.cyan('Metric'), chalk_1.default.cyan('Value')],
style: { head: [], border: [] }
});
summaryTable.push(['Test Name', results.testName], ['Total Combinations', results.summary.totalCombinations.toString()], ['Best Score', results.summary.bestScore.toFixed(4)], ['Worst Score', results.summary.worstScore.toFixed(4)], ['Average Score', results.summary.averageScore.toFixed(4)], ['Processing Time', `${(results.processingTime / 1000).toFixed(1)}s`]);
// Add enhanced metrics if available
if ('medianScore' in results.summary) {
summaryTable.push(['Median Score', results.summary.medianScore.toFixed(4)], ['Q1 Score', results.summary.q1Score.toFixed(4)], ['Q3 Score', results.summary.q3Score.toFixed(4)], ['Total Tests', results.summary.totalTests.toString()], ['Average Confidence', results.summary.averageConfidence.toFixed(4)]);
}
console.log(summaryTable.toString());
// Best combination
console.log(chalk_1.default.bold('\n🏆 Best Combination:'));
console.log(chalk_1.default.green(` ${results.summary.bestCombination.name}`));
console.log(chalk_1.default.green(` Score: ${results.summary.bestScore.toFixed(4)}`));
// Top 5 results with enhanced details
console.log(chalk_1.default.bold('\n📊 Top 5 Results:'));
const topResults = results.allResults
.sort((a,