glassbox-ai
Version:
Enterprise-grade AI testing framework with reliability, observability, and comprehensive validation
551 lines (480 loc) • 15.3 kB
JavaScript
import { OptimizedAPIClient } from './optimized-api-client.js';
import { OptimizedParser } from './optimized-parser.js';
import { globalProgressManager } from './progress-manager.js';
import { globalMemoryProfiler } from './memory-profiler.js';
import { validateContent } from '../validators/content-validator.js';
import { detectPII } from '../validators/pii-detector.js';
import { calculateRequestCost } from '../validators/cost-calculator.js';
import { createCachedClient } from '../cache/cache-integration.js';
import { CacheConfig } from '../cache/cache-config.js';
/**
* Optimized Test Runner with all performance optimizations
*/
export class OptimizedTestRunner {
constructor(options = {}) {
this.options = {
maxConcurrency: options.maxConcurrency || 5,
batchSize: options.batchSize || 10,
enableStreaming: options.enableStreaming !== false,
enableCaching: options.enableCaching !== false,
enableProgress: options.enableProgress !== false,
enableMemoryProfiling: options.enableMemoryProfiling !== false,
...options
};
this.apiClients = new Map();
this.parser = new OptimizedParser({
cacheSize: 100,
batchSize: this.options.batchSize,
lazyLoad: true,
validateOnLoad: true
});
this.cacheConfig = null;
this.stats = {
totalTests: 0,
completedTests: 0,
failedTests: 0,
cachedResponses: 0,
averageResponseTime: 0,
memoryUsage: 0
};
this.setupEventListeners();
}
/**
* Setup event listeners for monitoring
*/
setupEventListeners() {
// Parser events
this.parser.on('batch-start', (data) => {
if (this.options.enableProgress) {
globalProgressManager.startOperation(
`parse-batch-${data.batchIndex}`,
`Parsing batch ${data.batchIndex + 1}/${data.totalBatches}`,
{ total: data.files.length, showProgressBar: true }
);
}
});
this.parser.on('batch-complete', (data) => {
if (this.options.enableProgress) {
globalProgressManager.completeOperation(`parse-batch-${data.batchIndex}`, data.results);
}
});
this.parser.on('file-parsed', (data) => {
if (this.options.enableProgress) {
globalProgressManager.updateProgress(`parse-batch-${data.batchIndex}`, data.index + 1);
}
});
// Memory profiling events
if (this.options.enableMemoryProfiling) {
globalMemoryProfiler.on('memory-leak-detected', (data) => {
console.warn('Memory leak detected:', data.recommendation);
});
globalMemoryProfiler.on('snapshot-taken', (snapshot) => {
this.stats.memoryUsage = snapshot.heapUsed;
});
}
}
/**
* Initialize the test runner
*/
async initialize() {
// Initialize cache if enabled
if (this.options.enableCaching) {
this.cacheConfig = new CacheConfig();
await this.cacheConfig.load();
}
// Start memory profiling if enabled
if (this.options.enableMemoryProfiling) {
globalMemoryProfiler.start();
}
// Initialize API clients
await this.initializeAPIClients();
}
/**
* Initialize API clients with connection pooling
*/
async initializeAPIClients() {
const providers = [
{
name: 'openai',
baseURL: 'https://api.openai.com',
apiKey: process.env.OPENAI_API_KEY,
defaultModel: 'gpt-3.5-turbo'
},
{
name: 'ollama',
baseURL: 'http://localhost:11434',
apiKey: null,
defaultModel: 'mistral:7b'
}
];
for (const provider of providers) {
if (provider.apiKey || provider.name === 'ollama') {
const client = new OptimizedAPIClient(provider.name, {
baseURL: provider.baseURL,
apiKey: provider.apiKey,
defaultModel: provider.defaultModel,
maxConcurrentRequests: this.options.maxConcurrency
});
// Wrap with caching if enabled
if (this.options.enableCaching) {
const cachedClient = createCachedClient(client, this.cacheConfig.getCacheOptions());
this.apiClients.set(provider.name, cachedClient);
} else {
this.apiClients.set(provider.name, client);
}
}
}
}
/**
* Run tests with all optimizations
*/
async runTests(testObjects) {
await this.initialize();
const allTests = this.extractAllTests(testObjects);
this.stats.totalTests = allTests.length;
if (this.options.enableProgress) {
globalProgressManager.startBatchOperation(
'test-execution',
'Running tests',
allTests.length
);
}
// Process tests in batches
const batches = this.createBatches(allTests, this.options.batchSize);
const results = [];
for (let i = 0; i < batches.length; i++) {
const batch = batches[i];
if (this.options.enableProgress) {
globalProgressManager.updateProgress('test-execution', results.length);
}
const batchResults = await this.executeBatch(batch);
results.push(...batchResults);
// Memory cleanup between batches
if (this.options.enableMemoryProfiling) {
globalMemoryProfiler.forceGC();
}
}
if (this.options.enableProgress) {
globalProgressManager.completeOperation('test-execution', results);
}
// Aggregate results
const aggregation = this.aggregateResults(results);
// Stop memory profiling
if (this.options.enableMemoryProfiling) {
globalMemoryProfiler.stop();
}
return {
raw: results,
aggregated: aggregation,
machineReadable: {
summary: aggregation.summary,
performance: aggregation.performance,
models: aggregation.models,
failures: aggregation.failures,
cache: aggregation.cache,
memory: this.options.enableMemoryProfiling ? globalMemoryProfiler.getMemoryReport() : null,
tests: results
}
};
}
/**
* Extract all tests from test objects
*/
extractAllTests(testObjects) {
const allTests = [];
for (const testObj of testObjects) {
const { name: suiteName, tests, settings = {} } = testObj;
for (const test of tests) {
allTests.push({
test,
settings,
suiteName
});
}
}
return allTests;
}
/**
* Create batches of tests
*/
createBatches(tests, batchSize) {
const batches = [];
for (let i = 0; i < tests.length; i += batchSize) {
batches.push(tests.slice(i, i + batchSize));
}
return batches;
}
/**
* Execute a batch of tests
*/
async executeBatch(batch) {
const promises = batch.map(({ test, settings, suiteName }) =>
this.executeTest(test, settings, suiteName)
);
const results = await Promise.allSettled(promises);
return results.map((result, index) => {
if (result.status === 'fulfilled') {
this.stats.completedTests++;
return result.value;
} else {
this.stats.failedTests++;
return {
suite: batch[index].suiteName,
test: batch[index].test.name,
pass: false,
error: result.reason.message,
durationMs: 0,
cached: false
};
}
});
}
/**
* Execute a single test with optimizations
*/
async executeTest(test, settings, suiteName) {
const { name, description, prompt, expect } = test;
const startTime = Date.now();
try {
// Try to get from cache first
let response = null;
let cached = false;
if (this.options.enableCaching) {
for (const [providerName, client] of this.apiClients.entries()) {
try {
const cachedResponse = await client.get(prompt, settings.model || client.defaultModel, settings);
if (cachedResponse) {
response = cachedResponse.response;
cached = true;
this.stats.cachedResponses++;
break;
}
} catch (error) {
// Cache miss, continue to API call
}
}
}
// If not cached, make API call
if (!response) {
response = await this.makeAPICall(prompt, settings);
}
// Validate response
const validation = this.validateResponse(response, expect, settings);
const endTime = Date.now();
const durationMs = endTime - startTime;
this.updateStats(durationMs);
return {
suite: suiteName,
test: name,
description,
prompt,
response,
pass: validation.pass,
details: validation.details,
durationMs,
cached,
modelUsed: settings.model || 'unknown',
fallbackUsed: false
};
} catch (error) {
const endTime = Date.now();
const durationMs = endTime - startTime;
return {
suite: suiteName,
test: name,
description,
prompt,
response: '',
pass: false,
details: [error.message],
durationMs,
error: error.message,
cached: false
};
}
}
/**
* Make API call with streaming support
*/
async makeAPICall(prompt, settings) {
const client = this.getBestClient(settings.model);
if (!client) {
throw new Error('No available API client');
}
const options = {
model: settings.model || client.defaultModel,
maxTokens: settings.max_tokens || 1000,
temperature: settings.temperature || 0.7,
stream: this.options.enableStreaming && prompt.length > 1000 // Stream for long prompts
};
const result = await client.makeRequest(prompt, options);
return result.response || result.content || result.text || '';
}
/**
* Get the best available client
*/
getBestClient(preferredModel = null) {
// Try to find client that supports the preferred model
if (preferredModel) {
for (const [name, client] of this.apiClients.entries()) {
if (client.defaultModel === preferredModel || name === 'openai') {
return client;
}
}
}
// Return first available client
return this.apiClients.values().next().value;
}
/**
* Validate response against expectations
*/
validateResponse(response, expect, settings = {}) {
const results = {
pass: true,
details: []
};
try {
// Content validation
if (expect.contains || expect.not_contains) {
const contentValidation = validateContent(response, expect);
if (!contentValidation.overall.pass) {
results.pass = false;
results.details.push(...contentValidation.overall.details);
}
}
// PII detection
if (settings.safety_checks?.block_pii) {
const piiResult = detectPII(response);
if (piiResult.detected) {
results.pass = false;
results.details.push(`PII detected: ${piiResult.types.join(', ')}`);
}
}
} catch (error) {
results.pass = false;
results.details.push(`Validation error: ${error.message}`);
}
return results;
}
/**
* Update statistics
*/
updateStats(duration) {
this.stats.averageResponseTime =
(this.stats.averageResponseTime * (this.stats.completedTests - 1) + duration) /
this.stats.completedTests;
}
/**
* Aggregate test results
*/
aggregateResults(results) {
const aggregation = {
summary: {
total: results.length,
passed: results.filter(r => r.pass).length,
failed: results.filter(r => !r.pass).length,
totalDuration: results.reduce((sum, r) => sum + r.durationMs, 0),
totalCost: results.reduce((sum, r) => sum + (parseFloat(r.cost) || 0), 0),
cachedResponses: results.filter(r => r.cached).length,
cacheHitRate: 0
},
performance: {
averageDuration: 0,
fastestTest: null,
slowestTest: null,
durationDistribution: {
fast: 0,
medium: 0,
slow: 0
}
},
models: {
usage: {},
fallbackRate: 0
},
failures: {
byCategory: {},
total: 0
},
cache: {
hits: this.stats.cachedResponses,
misses: results.length - this.stats.cachedResponses,
hitRate: 0
}
};
// Calculate cache statistics
aggregation.summary.cacheHitRate = results.length > 0
? (aggregation.summary.cachedResponses / results.length) * 100
: 0;
aggregation.cache.hitRate = aggregation.summary.cacheHitRate;
// Calculate performance metrics
if (results.length > 0) {
aggregation.performance.averageDuration = aggregation.summary.totalDuration / results.length;
const sortedByDuration = [...results].sort((a, b) => a.durationMs - b.durationMs);
aggregation.performance.fastestTest = sortedByDuration[0];
aggregation.performance.slowestTest = sortedByDuration[sortedByDuration.length - 1];
results.forEach(result => {
if (result.durationMs < 5000) aggregation.performance.durationDistribution.fast++;
else if (result.durationMs < 15000) aggregation.performance.durationDistribution.medium++;
else aggregation.performance.durationDistribution.slow++;
});
}
// Calculate model usage
results.forEach(result => {
const model = result.modelUsed || 'unknown';
aggregation.models.usage[model] = (aggregation.models.usage[model] || 0) + 1;
});
// Calculate failure categories
results.forEach(result => {
if (!result.pass) {
const category = this.categorizeFailure(result);
aggregation.failures.byCategory[category] = (aggregation.failures.byCategory[category] || 0) + 1;
aggregation.failures.total++;
}
});
// Calculate success rate
aggregation.summary.successRate = results.length > 0
? (aggregation.summary.passed / results.length) * 100
: 0;
return aggregation;
}
/**
* Categorize test failure
*/
categorizeFailure(result) {
if (result.error) {
if (result.error.includes('timeout')) return 'TIMEOUT_FAILURE';
if (result.error.includes('network')) return 'NETWORK_FAILURE';
if (result.error.includes('model')) return 'MODEL_FAILURE';
return 'UNKNOWN_FAILURE';
}
if (result.details && result.details.length > 0) {
const details = result.details.join(' ').toLowerCase();
if (details.includes('pii')) return 'PII_FAILURE';
if (details.includes('content')) return 'CONTENT_FAILURE';
return 'VALIDATION_FAILURE';
}
return 'UNKNOWN_FAILURE';
}
/**
* Get runner statistics
*/
getStats() {
return {
...this.stats,
parserStats: this.parser.getStats(),
memoryStats: this.options.enableMemoryProfiling ? globalMemoryProfiler.getMemoryReport() : null,
progressStats: globalProgressManager.getStats()
};
}
/**
* Cleanup resources
*/
async cleanup() {
if (this.options.enableMemoryProfiling) {
globalMemoryProfiler.stop();
}
globalProgressManager.clear();
this.parser.clearCache();
}
}