UNPKG

quantum-cli-core

Version:

Quantum CLI Core - Multi-LLM Collaboration System

github.com/kanghunlee/quantum-cli

kanghunlee/quantum-cli

352 lines • 16.8 kB

JavaScript

/** * @license * Copyright 2025 Google LLC * SPDX-License-Identifier: Apache-2.0 */ import { describe, it, expect, beforeEach, vi } from 'vitest'; import { DualModelEngine } from './dual-engine.js'; import { UncertaintyDetector } from './detection/uncertainty-detector.js'; import { BaseLLMProvider, } from './providers/base-provider.js'; // Mock provider for performance testing class MockPerformanceProvider extends BaseLLMProvider { id = 'mock-performance'; capabilities = ['test']; latency; constructor(id, latency = 100) { super({ id, name: id, type: 'mock' }); this.latency = latency; } async generate(prompt, options) { // Simulate network latency await new Promise((resolve) => setTimeout(resolve, this.latency)); return { providerId: this.id, content: `Mock response for: ${prompt}`, confidence: 0.8, latency: this.latency, tokens: prompt.length, cost: this.calculateCost(prompt.length), }; } async validateCredentials() { return true; } } describe('Performance Benchmarks', () => { let fastProvider; let slowProvider; let uncertaintyDetector; beforeEach(() => { fastProvider = new MockPerformanceProvider('fast', 50); slowProvider = new MockPerformanceProvider('slow', 200); uncertaintyDetector = new UncertaintyDetector(); // Mock feature flags to enable collaboration vi.doMock('./feature-flags.js', () => ({ isCollaborationEnabled: () => true, })); }); describe('DualModelEngine Performance', () => { it('should complete single query within acceptable time', async () => { const config = { autoVerifyThreshold: 0.7, maxCostPerQuery: 1.0, enableSynthesis: true, }; const collaborationConfig = { enabled: true, verificationMode: 'automatic', autoVerifyThreshold: 0.1, // Always verify for testing maxCostPerQuery: 1.0, providers: [], }; const engine = new DualModelEngine(fastProvider, slowProvider, config, collaborationConfig); const startTime = performance.now(); await engine.generateWithVerification('Test query for performance'); const endTime = performance.now(); const duration = endTime - startTime; // Should complete within 500ms (fast + slow + processing overhead) expect(duration).toBeLessThan(500); }); it('should handle parallel execution efficiently', async () => { const config = { autoVerifyThreshold: 0.7, maxCostPerQuery: 1.0, enableSynthesis: true, }; const collaborationConfig = { enabled: true, verificationMode: 'manual', // Only verify when explicitly requested providers: [], }; const engine = new DualModelEngine(fastProvider, slowProvider, config, collaborationConfig); const queries = [ 'What is React?', 'How to use TypeScript?', 'Explain async/await', 'Database design patterns', 'API security best practices', ]; const startTime = performance.now(); // Run queries in parallel const promises = queries.map((query) => engine.generateWithVerification(query)); await Promise.all(promises); const endTime = performance.now(); const duration = endTime - startTime; // Parallel execution should be faster than sequential // Expected: ~100ms (parallel) vs ~500ms (sequential) expect(duration).toBeLessThan(200); }); it('should respect timeout constraints', async () => { const timeoutProvider = new MockPerformanceProvider('timeout', 1000); // 1 second delay const config = { autoVerifyThreshold: 0.7, maxCostPerQuery: 1.0, }; const collaborationConfig = { enabled: true, verificationMode: 'automatic', maxLatency: 300, // 300ms timeout providers: [], }; const engine = new DualModelEngine(fastProvider, timeoutProvider, config, collaborationConfig); const startTime = performance.now(); // Should either complete quickly or handle timeout gracefully try { await engine.generateWithVerification('Test timeout handling'); const endTime = performance.now(); expect(endTime - startTime).toBeLessThan(400); // Should not exceed timeout significantly } catch (error) { // Timeout handling is acceptable const endTime = performance.now(); expect(endTime - startTime).toBeLessThan(400); } }); it('should scale with increasing provider count', async () => { const providers = [ new MockPerformanceProvider('provider1', 50), new MockPerformanceProvider('provider2', 75), new MockPerformanceProvider('provider3', 100), ]; const measurements = []; // Test with increasing number of providers for (let i = 1; i <= providers.length; i++) { const config = { maxCostPerQuery: 1.0 }; const collaborationConfig = { enabled: true, verificationMode: 'automatic', providers: [], }; const engine = new DualModelEngine(providers[0], providers[i - 1], config, collaborationConfig); const startTime = performance.now(); await engine.generateWithVerification('Scaling test'); const endTime = performance.now(); measurements.push(endTime - startTime); } // Performance should not degrade significantly with more providers // (assuming parallel execution) const firstMeasurement = measurements[0]; const lastMeasurement = measurements[measurements.length - 1]; expect(lastMeasurement).toBeLessThan(firstMeasurement * 2); }); }); describe('Uncertainty Detection Performance', () => { it('should process text quickly', () => { const testTexts = [ 'This is a simple response.', 'I think this might work, but there are several approaches to consider.', 'This is definitely the correct solution for your problem.', 'Maybe we should explore different options, as this seems complex.', 'The implementation involves multiple steps and careful consideration.', ]; const startTime = performance.now(); testTexts.forEach((text) => { uncertaintyDetector.detect(text); }); const endTime = performance.now(); const duration = endTime - startTime; // Should process 5 texts in under 10ms expect(duration).toBeLessThan(10); }); it('should handle large text efficiently', () => { const largeText = ` This is a comprehensive analysis of the problem domain. I think there are several approaches we could consider, but maybe we should start with the simplest solution. The implementation might involve multiple components, and we probably need to consider various edge cases. There are different ways to handle error scenarios, and we could potentially use several design patterns. `.repeat(100); // Repeat to create large text const startTime = performance.now(); const result = uncertaintyDetector.detect(largeText); const endTime = performance.now(); const duration = endTime - startTime; // Should process large text in under 50ms expect(duration).toBeLessThan(50); expect(result.level).toBeDefined(); }); it('should maintain consistent performance across patterns', () => { const patterns = { simple: 'This works correctly.', uncertain: 'I think this might work.', complex: 'There are several ways to implement this, and we probably need to consider multiple factors.', mixed: 'This solution works, but maybe we should consider alternatives, as there might be edge cases.', }; const measurements = {}; Object.entries(patterns).forEach(([key, text]) => { const startTime = performance.now(); uncertaintyDetector.detect(text); const endTime = performance.now(); measurements[key] = endTime - startTime; }); // All measurements should be under 5ms Object.values(measurements).forEach((duration) => { expect(duration).toBeLessThan(5); }); // Performance should be consistent regardless of text complexity const maxDuration = Math.max(...Object.values(measurements)); const minDuration = Math.min(...Object.values(measurements)); expect(maxDuration / minDuration).toBeLessThan(5); // Less than 5x difference }); }); describe('Memory Usage', () => { it('should not leak memory during extended usage', async () => { const config = { maxCostPerQuery: 1.0 }; const collaborationConfig = { enabled: true, verificationMode: 'manual', providers: [], }; const engine = new DualModelEngine(fastProvider, slowProvider, config, collaborationConfig); // Get initial memory usage const initialMemory = process.memoryUsage().heapUsed; // Perform many operations const operations = Array.from({ length: 100 }, (_, i) => engine.generateWithVerification(`Test query ${i}`)); await Promise.all(operations); // Force garbage collection if available if (global.gc) { global.gc(); } const finalMemory = process.memoryUsage().heapUsed; const memoryIncrease = finalMemory - initialMemory; // Memory increase should be reasonable (less than 50MB) expect(memoryIncrease).toBeLessThan(50 * 1024 * 1024); }); it('should clean up resources properly', async () => { let enginesCreated = 0; const engines = []; // Create and use multiple engines for (let i = 0; i < 10; i++) { const config = { maxCostPerQuery: 1.0 }; const collaborationConfig = { enabled: true, verificationMode: 'manual', providers: [], }; const engine = new DualModelEngine(new MockPerformanceProvider(`fast-${i}`, 10), new MockPerformanceProvider(`slow-${i}`, 20), config, collaborationConfig); engines.push(engine); enginesCreated++; await engine.generateWithVerification(`Test ${i}`); } expect(enginesCreated).toBe(10); // Clear references engines.length = 0; // Force garbage collection if available if (global.gc) { global.gc(); } // Memory should be manageable const memoryAfterCleanup = process.memoryUsage().heapUsed; expect(memoryAfterCleanup).toBeLessThan(100 * 1024 * 1024); // Less than 100MB }); }); describe('Concurrent Load Testing', () => { it('should handle high concurrent load', async () => { const config = { maxCostPerQuery: 1.0 }; const collaborationConfig = { enabled: true, verificationMode: 'manual', providers: [], }; const engine = new DualModelEngine(fastProvider, slowProvider, config, collaborationConfig); const concurrentRequests = 50; const startTime = performance.now(); const promises = Array.from({ length: concurrentRequests }, (_, i) => engine.generateWithVerification(`Concurrent request ${i}`)); const results = await Promise.all(promises); const endTime = performance.now(); const duration = endTime - startTime; const avgResponseTime = duration / concurrentRequests; // All requests should complete expect(results).toHaveLength(concurrentRequests); // Average response time should be reasonable expect(avgResponseTime).toBeLessThan(20); // Less than 20ms average // Total time should be reasonable (parallel processing) expect(duration).toBeLessThan(1000); // Less than 1 second total }); it('should maintain quality under load', async () => { const config = { maxCostPerQuery: 1.0 }; const collaborationConfig = { enabled: true, verificationMode: 'manual', providers: [], }; const engine = new DualModelEngine(fastProvider, slowProvider, config, collaborationConfig); const queries = [ 'What is the best way to handle errors?', 'How should I structure my database?', 'What are the security considerations?', 'How to optimize performance?', 'What testing strategy should I use?', ]; // Run same queries multiple times concurrently const promises = queries.flatMap((query) => Array.from({ length: 5 }, () => engine.generateWithVerification(query))); const results = await Promise.all(promises); // All results should be valid results.forEach((result) => { expect(result.content).toBeDefined(); expect(result.confidence).toBeGreaterThan(0); expect(result.primary).toBeDefined(); }); // Results should be consistent for same queries const groupedResults = queries.map((query) => results.filter((result) => result.primary?.content.includes(query))); groupedResults.forEach((group) => { expect(group.length).toBe(5); // Should have 5 results per query }); }); }); describe('Resource Monitoring', () => { it('should provide performance metrics', async () => { const config = { maxCostPerQuery: 1.0 }; const collaborationConfig = { enabled: true, verificationMode: 'automatic', providers: [], }; const engine = new DualModelEngine(fastProvider, slowProvider, config, collaborationConfig); const result = await engine.generateWithVerification('Performance monitoring test'); // Should include performance metadata expect(result.metadata).toBeDefined(); expect(result.metadata?.duration).toBeGreaterThan(0); expect(result.metadata?.modelsUsed).toBeDefined(); expect(result.metadata?.modelsUsed.length).toBeGreaterThan(0); }); it('should track cost accurately', async () => { const expensiveProvider = new MockPerformanceProvider('expensive', 50); // Override cost calculation expensiveProvider.calculateCost = (tokens) => tokens * 0.001; const config = { maxCostPerQuery: 1.0 }; const collaborationConfig = { enabled: true, verificationMode: 'automatic', providers: [], }; const engine = new DualModelEngine(fastProvider, expensiveProvider, config, collaborationConfig); const result = await engine.generateWithVerification('Cost tracking test'); // Should track costs for both providers expect(result.primary?.metadata?.cost).toBeDefined(); if (result.secondary) { expect(result.secondary.metadata?.cost).toBeDefined(); } }); }); }); //# sourceMappingURL=performance.test.js.map