quantum-cli-core
Version:
Quantum CLI Core - Multi-LLM Collaboration System
352 lines • 16.8 kB
JavaScript
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect, beforeEach, vi } from 'vitest';
import { DualModelEngine } from './dual-engine.js';
import { UncertaintyDetector } from './detection/uncertainty-detector.js';
import { BaseLLMProvider, } from './providers/base-provider.js';
// Mock provider for performance testing
class MockPerformanceProvider extends BaseLLMProvider {
id = 'mock-performance';
capabilities = ['test'];
latency;
constructor(id, latency = 100) {
super({ id, name: id, type: 'mock' });
this.latency = latency;
}
async generate(prompt, options) {
// Simulate network latency
await new Promise((resolve) => setTimeout(resolve, this.latency));
return {
providerId: this.id,
content: `Mock response for: ${prompt}`,
confidence: 0.8,
latency: this.latency,
tokens: prompt.length,
cost: this.calculateCost(prompt.length),
};
}
async validateCredentials() {
return true;
}
}
describe('Performance Benchmarks', () => {
let fastProvider;
let slowProvider;
let uncertaintyDetector;
beforeEach(() => {
fastProvider = new MockPerformanceProvider('fast', 50);
slowProvider = new MockPerformanceProvider('slow', 200);
uncertaintyDetector = new UncertaintyDetector();
// Mock feature flags to enable collaboration
vi.doMock('./feature-flags.js', () => ({
isCollaborationEnabled: () => true,
}));
});
describe('DualModelEngine Performance', () => {
it('should complete single query within acceptable time', async () => {
const config = {
autoVerifyThreshold: 0.7,
maxCostPerQuery: 1.0,
enableSynthesis: true,
};
const collaborationConfig = {
enabled: true,
verificationMode: 'automatic',
autoVerifyThreshold: 0.1, // Always verify for testing
maxCostPerQuery: 1.0,
providers: [],
};
const engine = new DualModelEngine(fastProvider, slowProvider, config, collaborationConfig);
const startTime = performance.now();
await engine.generateWithVerification('Test query for performance');
const endTime = performance.now();
const duration = endTime - startTime;
// Should complete within 500ms (fast + slow + processing overhead)
expect(duration).toBeLessThan(500);
});
it('should handle parallel execution efficiently', async () => {
const config = {
autoVerifyThreshold: 0.7,
maxCostPerQuery: 1.0,
enableSynthesis: true,
};
const collaborationConfig = {
enabled: true,
verificationMode: 'manual', // Only verify when explicitly requested
providers: [],
};
const engine = new DualModelEngine(fastProvider, slowProvider, config, collaborationConfig);
const queries = [
'What is React?',
'How to use TypeScript?',
'Explain async/await',
'Database design patterns',
'API security best practices',
];
const startTime = performance.now();
// Run queries in parallel
const promises = queries.map((query) => engine.generateWithVerification(query));
await Promise.all(promises);
const endTime = performance.now();
const duration = endTime - startTime;
// Parallel execution should be faster than sequential
// Expected: ~100ms (parallel) vs ~500ms (sequential)
expect(duration).toBeLessThan(200);
});
it('should respect timeout constraints', async () => {
const timeoutProvider = new MockPerformanceProvider('timeout', 1000); // 1 second delay
const config = {
autoVerifyThreshold: 0.7,
maxCostPerQuery: 1.0,
};
const collaborationConfig = {
enabled: true,
verificationMode: 'automatic',
maxLatency: 300, // 300ms timeout
providers: [],
};
const engine = new DualModelEngine(fastProvider, timeoutProvider, config, collaborationConfig);
const startTime = performance.now();
// Should either complete quickly or handle timeout gracefully
try {
await engine.generateWithVerification('Test timeout handling');
const endTime = performance.now();
expect(endTime - startTime).toBeLessThan(400); // Should not exceed timeout significantly
}
catch (error) {
// Timeout handling is acceptable
const endTime = performance.now();
expect(endTime - startTime).toBeLessThan(400);
}
});
it('should scale with increasing provider count', async () => {
const providers = [
new MockPerformanceProvider('provider1', 50),
new MockPerformanceProvider('provider2', 75),
new MockPerformanceProvider('provider3', 100),
];
const measurements = [];
// Test with increasing number of providers
for (let i = 1; i <= providers.length; i++) {
const config = { maxCostPerQuery: 1.0 };
const collaborationConfig = {
enabled: true,
verificationMode: 'automatic',
providers: [],
};
const engine = new DualModelEngine(providers[0], providers[i - 1], config, collaborationConfig);
const startTime = performance.now();
await engine.generateWithVerification('Scaling test');
const endTime = performance.now();
measurements.push(endTime - startTime);
}
// Performance should not degrade significantly with more providers
// (assuming parallel execution)
const firstMeasurement = measurements[0];
const lastMeasurement = measurements[measurements.length - 1];
expect(lastMeasurement).toBeLessThan(firstMeasurement * 2);
});
});
describe('Uncertainty Detection Performance', () => {
it('should process text quickly', () => {
const testTexts = [
'This is a simple response.',
'I think this might work, but there are several approaches to consider.',
'This is definitely the correct solution for your problem.',
'Maybe we should explore different options, as this seems complex.',
'The implementation involves multiple steps and careful consideration.',
];
const startTime = performance.now();
testTexts.forEach((text) => {
uncertaintyDetector.detect(text);
});
const endTime = performance.now();
const duration = endTime - startTime;
// Should process 5 texts in under 10ms
expect(duration).toBeLessThan(10);
});
it('should handle large text efficiently', () => {
const largeText = `
This is a comprehensive analysis of the problem domain. I think there are several
approaches we could consider, but maybe we should start with the simplest solution.
The implementation might involve multiple components, and we probably need to
consider various edge cases. There are different ways to handle error scenarios,
and we could potentially use several design patterns.
`.repeat(100); // Repeat to create large text
const startTime = performance.now();
const result = uncertaintyDetector.detect(largeText);
const endTime = performance.now();
const duration = endTime - startTime;
// Should process large text in under 50ms
expect(duration).toBeLessThan(50);
expect(result.level).toBeDefined();
});
it('should maintain consistent performance across patterns', () => {
const patterns = {
simple: 'This works correctly.',
uncertain: 'I think this might work.',
complex: 'There are several ways to implement this, and we probably need to consider multiple factors.',
mixed: 'This solution works, but maybe we should consider alternatives, as there might be edge cases.',
};
const measurements = {};
Object.entries(patterns).forEach(([key, text]) => {
const startTime = performance.now();
uncertaintyDetector.detect(text);
const endTime = performance.now();
measurements[key] = endTime - startTime;
});
// All measurements should be under 5ms
Object.values(measurements).forEach((duration) => {
expect(duration).toBeLessThan(5);
});
// Performance should be consistent regardless of text complexity
const maxDuration = Math.max(...Object.values(measurements));
const minDuration = Math.min(...Object.values(measurements));
expect(maxDuration / minDuration).toBeLessThan(5); // Less than 5x difference
});
});
describe('Memory Usage', () => {
it('should not leak memory during extended usage', async () => {
const config = { maxCostPerQuery: 1.0 };
const collaborationConfig = {
enabled: true,
verificationMode: 'manual',
providers: [],
};
const engine = new DualModelEngine(fastProvider, slowProvider, config, collaborationConfig);
// Get initial memory usage
const initialMemory = process.memoryUsage().heapUsed;
// Perform many operations
const operations = Array.from({ length: 100 }, (_, i) => engine.generateWithVerification(`Test query ${i}`));
await Promise.all(operations);
// Force garbage collection if available
if (global.gc) {
global.gc();
}
const finalMemory = process.memoryUsage().heapUsed;
const memoryIncrease = finalMemory - initialMemory;
// Memory increase should be reasonable (less than 50MB)
expect(memoryIncrease).toBeLessThan(50 * 1024 * 1024);
});
it('should clean up resources properly', async () => {
let enginesCreated = 0;
const engines = [];
// Create and use multiple engines
for (let i = 0; i < 10; i++) {
const config = { maxCostPerQuery: 1.0 };
const collaborationConfig = {
enabled: true,
verificationMode: 'manual',
providers: [],
};
const engine = new DualModelEngine(new MockPerformanceProvider(`fast-${i}`, 10), new MockPerformanceProvider(`slow-${i}`, 20), config, collaborationConfig);
engines.push(engine);
enginesCreated++;
await engine.generateWithVerification(`Test ${i}`);
}
expect(enginesCreated).toBe(10);
// Clear references
engines.length = 0;
// Force garbage collection if available
if (global.gc) {
global.gc();
}
// Memory should be manageable
const memoryAfterCleanup = process.memoryUsage().heapUsed;
expect(memoryAfterCleanup).toBeLessThan(100 * 1024 * 1024); // Less than 100MB
});
});
describe('Concurrent Load Testing', () => {
it('should handle high concurrent load', async () => {
const config = { maxCostPerQuery: 1.0 };
const collaborationConfig = {
enabled: true,
verificationMode: 'manual',
providers: [],
};
const engine = new DualModelEngine(fastProvider, slowProvider, config, collaborationConfig);
const concurrentRequests = 50;
const startTime = performance.now();
const promises = Array.from({ length: concurrentRequests }, (_, i) => engine.generateWithVerification(`Concurrent request ${i}`));
const results = await Promise.all(promises);
const endTime = performance.now();
const duration = endTime - startTime;
const avgResponseTime = duration / concurrentRequests;
// All requests should complete
expect(results).toHaveLength(concurrentRequests);
// Average response time should be reasonable
expect(avgResponseTime).toBeLessThan(20); // Less than 20ms average
// Total time should be reasonable (parallel processing)
expect(duration).toBeLessThan(1000); // Less than 1 second total
});
it('should maintain quality under load', async () => {
const config = { maxCostPerQuery: 1.0 };
const collaborationConfig = {
enabled: true,
verificationMode: 'manual',
providers: [],
};
const engine = new DualModelEngine(fastProvider, slowProvider, config, collaborationConfig);
const queries = [
'What is the best way to handle errors?',
'How should I structure my database?',
'What are the security considerations?',
'How to optimize performance?',
'What testing strategy should I use?',
];
// Run same queries multiple times concurrently
const promises = queries.flatMap((query) => Array.from({ length: 5 }, () => engine.generateWithVerification(query)));
const results = await Promise.all(promises);
// All results should be valid
results.forEach((result) => {
expect(result.content).toBeDefined();
expect(result.confidence).toBeGreaterThan(0);
expect(result.primary).toBeDefined();
});
// Results should be consistent for same queries
const groupedResults = queries.map((query) => results.filter((result) => result.primary?.content.includes(query)));
groupedResults.forEach((group) => {
expect(group.length).toBe(5); // Should have 5 results per query
});
});
});
describe('Resource Monitoring', () => {
it('should provide performance metrics', async () => {
const config = { maxCostPerQuery: 1.0 };
const collaborationConfig = {
enabled: true,
verificationMode: 'automatic',
providers: [],
};
const engine = new DualModelEngine(fastProvider, slowProvider, config, collaborationConfig);
const result = await engine.generateWithVerification('Performance monitoring test');
// Should include performance metadata
expect(result.metadata).toBeDefined();
expect(result.metadata?.duration).toBeGreaterThan(0);
expect(result.metadata?.modelsUsed).toBeDefined();
expect(result.metadata?.modelsUsed.length).toBeGreaterThan(0);
});
it('should track cost accurately', async () => {
const expensiveProvider = new MockPerformanceProvider('expensive', 50);
// Override cost calculation
expensiveProvider.calculateCost = (tokens) => tokens * 0.001;
const config = { maxCostPerQuery: 1.0 };
const collaborationConfig = {
enabled: true,
verificationMode: 'automatic',
providers: [],
};
const engine = new DualModelEngine(fastProvider, expensiveProvider, config, collaborationConfig);
const result = await engine.generateWithVerification('Cost tracking test');
// Should track costs for both providers
expect(result.primary?.metadata?.cost).toBeDefined();
if (result.secondary) {
expect(result.secondary.metadata?.cost).toBeDefined();
}
});
});
});
//# sourceMappingURL=performance.test.js.map