@quantumai/quantum-cli-core
Version:
Quantum CLI Core - Multi-LLM Collaboration System
250 lines • 14.6 kB
JavaScript
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, it, expect } from 'vitest';
import { ModelCharacteristicsService, MODEL_CHARACTERISTICS, ModelCapability, ModelStrength, QueryType, } from './model-characteristics.js';
describe('ModelCharacteristicsService', () => {
describe('getModelCharacteristics', () => {
it('should return characteristics for existing models', () => {
const geminiChars = ModelCharacteristicsService.getModelCharacteristics('gemini-2.5-pro');
expect(geminiChars).toBeDefined();
expect(geminiChars?.id).toBe('gemini-2.5-pro');
expect(geminiChars?.provider).toBe('google');
const gptChars = ModelCharacteristicsService.getModelCharacteristics('gpt-4');
expect(gptChars).toBeDefined();
expect(gptChars?.id).toBe('gpt-4');
expect(gptChars?.provider).toBe('openai');
});
it('should return undefined for non-existent models', () => {
const chars = ModelCharacteristicsService.getModelCharacteristics('non-existent-model');
expect(chars).toBeUndefined();
});
});
describe('getAllModels', () => {
it('should return all available models', () => {
const models = ModelCharacteristicsService.getAllModels();
expect(models).toHaveLength(3); // Gemini, GPT-4, Claude
expect(models.map((m) => m.id)).toContain('gemini-2.5-pro');
expect(models.map((m) => m.id)).toContain('gpt-4');
expect(models.map((m) => m.id)).toContain('claude-3-sonnet');
});
});
describe('getBestModelForQueryType', () => {
it('should recommend best model for code queries', () => {
const bestModel = ModelCharacteristicsService.getBestModelForQueryType(QueryType.CODE);
expect(bestModel).toBeDefined();
expect(bestModel?.id).toBe('gemini-2.5-pro'); // Based on high code suitability score
});
it('should recommend best model for creative queries', () => {
const bestModel = ModelCharacteristicsService.getBestModelForQueryType(QueryType.CREATIVE);
expect(bestModel).toBeDefined();
expect(bestModel?.id).toBe('gpt-4'); // GPT-4 has highest creative score
});
it('should respect cost constraints', () => {
const lowCostModel = ModelCharacteristicsService.getBestModelForQueryType(QueryType.GENERAL, 0.01);
expect(lowCostModel).toBeDefined();
// Should exclude very expensive models
expect(lowCostModel?.id).not.toBe('gpt-4');
});
it('should return undefined if no models meet cost requirements', () => {
const model = ModelCharacteristicsService.getBestModelForQueryType(QueryType.GENERAL, 0.001);
expect(model).toBeUndefined();
});
});
describe('rankModelsByCostEfficiency', () => {
it('should rank models by cost efficiency for analysis tasks', () => {
const rankedModels = ModelCharacteristicsService.rankModelsByCostEfficiency(QueryType.ANALYSIS);
expect(rankedModels).toHaveLength(3);
// Verify that ranking considers both quality and cost
const first = rankedModels[0];
const last = rankedModels[rankedModels.length - 1];
const firstUseCase = first.useCases.find((uc) => uc.queryType === QueryType.ANALYSIS);
const lastUseCase = last.useCases.find((uc) => uc.queryType === QueryType.ANALYSIS);
const firstQuality = firstUseCase
? firstUseCase.suitabilityScore
: first.qualityScores.overall;
const lastQuality = lastUseCase
? lastUseCase.suitabilityScore
: last.qualityScores.overall;
const firstEfficiency = firstQuality / first.cost.costPerQualityPoint;
const lastEfficiency = lastQuality / last.cost.costPerQualityPoint;
expect(firstEfficiency).toBeGreaterThanOrEqual(lastEfficiency);
});
});
describe('getModelsByPerformanceTier', () => {
it('should return fast models for fast tier', () => {
const fastModels = ModelCharacteristicsService.getModelsByPerformanceTier('fast');
fastModels.forEach((model) => {
expect(model.performance.averageLatency).toBeLessThan(2000);
});
// Should be sorted by latency
for (let i = 0; i < fastModels.length - 1; i++) {
expect(fastModels[i].performance.averageLatency).toBeLessThanOrEqual(fastModels[i + 1].performance.averageLatency);
}
});
it('should return balanced models for balanced tier', () => {
const balancedModels = ModelCharacteristicsService.getModelsByPerformanceTier('balanced');
balancedModels.forEach((model) => {
expect(model.performance.averageLatency).toBeLessThan(3500);
expect(model.qualityScores.overall).toBeGreaterThanOrEqual(0.85);
});
});
it('should return high-quality models for quality tier', () => {
const qualityModels = ModelCharacteristicsService.getModelsByPerformanceTier('quality');
qualityModels.forEach((model) => {
expect(model.qualityScores.overall).toBeGreaterThanOrEqual(0.88);
});
// Should be sorted by quality score (descending)
for (let i = 0; i < qualityModels.length - 1; i++) {
expect(qualityModels[i].qualityScores.overall).toBeGreaterThanOrEqual(qualityModels[i + 1].qualityScores.overall);
}
});
});
describe('compareModels', () => {
it('should compare two existing models', () => {
const comparison = ModelCharacteristicsService.compareModels('gemini-2.5-pro', 'gpt-4');
expect(comparison).toBeDefined();
expect(comparison?.models).toHaveLength(2);
expect(comparison?.comparison.quality.winner).toBeDefined();
expect(comparison?.comparison.speed.winner).toBeDefined();
expect(comparison?.comparison.cost.winner).toBeDefined();
expect(comparison?.recommendation).toBeDefined();
});
it('should return undefined for non-existent models', () => {
const comparison = ModelCharacteristicsService.compareModels('non-existent', 'gpt-4');
expect(comparison).toBeUndefined();
});
it('should provide meaningful recommendations', () => {
const comparison = ModelCharacteristicsService.compareModels('gemini-2.5-pro', 'claude-3-sonnet');
expect(comparison?.recommendation).toContain(''); // Should have some recommendation text
expect(typeof comparison?.recommendation).toBe('string');
});
});
describe('Model Characteristics Validation', () => {
it('should have valid characteristics for all models', () => {
Object.values(MODEL_CHARACTERISTICS).forEach((model) => {
// Basic structure validation
expect(model.id).toBeDefined();
expect(model.name).toBeDefined();
expect(model.provider).toBeDefined();
// Capabilities validation
expect(Array.isArray(model.capabilities)).toBe(true);
expect(model.capabilities.length).toBeGreaterThan(0);
// Strengths and weaknesses validation
expect(Array.isArray(model.strengths)).toBe(true);
expect(Array.isArray(model.weaknesses)).toBe(true);
// Performance metrics validation
expect(model.performance.averageLatency).toBeGreaterThan(0);
expect(model.performance.accuracy).toBeGreaterThan(0);
expect(model.performance.accuracy).toBeLessThanOrEqual(1);
// Cost validation
expect(model.cost.inputTokenCost).toBeGreaterThan(0);
expect(model.cost.outputTokenCost).toBeGreaterThan(0);
expect(model.cost.costEfficiencyRank).toBeGreaterThan(0);
expect(model.cost.costEfficiencyRank).toBeLessThanOrEqual(10);
// Use cases validation
expect(Array.isArray(model.useCases)).toBe(true);
model.useCases.forEach((useCase) => {
expect(useCase.suitabilityScore).toBeGreaterThan(0);
expect(useCase.suitabilityScore).toBeLessThanOrEqual(1);
expect(useCase.confidence).toBeGreaterThan(0);
expect(useCase.confidence).toBeLessThanOrEqual(1);
expect(Array.isArray(useCase.reasoning)).toBe(true);
});
// Quality scores validation
Object.values(model.qualityScores).forEach((score) => {
expect(score).toBeGreaterThan(0);
expect(score).toBeLessThanOrEqual(1);
});
// Constraints validation
expect(model.constraints.maxTokens).toBeGreaterThan(0);
expect(model.constraints.rateLimits.requestsPerMinute).toBeGreaterThan(0);
});
});
it('should have all query types covered in use cases', () => {
Object.values(MODEL_CHARACTERISTICS).forEach((model) => {
const queryTypes = model.useCases.map((uc) => uc.queryType);
// Each model should have ratings for all query types
expect(queryTypes).toContain(QueryType.CODE);
expect(queryTypes).toContain(QueryType.CREATIVE);
expect(queryTypes).toContain(QueryType.ANALYSIS);
expect(queryTypes).toContain(QueryType.SECURITY);
expect(queryTypes).toContain(QueryType.GENERAL);
});
});
it('should have logical relationships between strengths and use case scores', () => {
const gptModel = MODEL_CHARACTERISTICS['gpt-4'];
// GPT-4 has CREATIVE_TASKS as strength, should score high on creative queries
expect(gptModel.strengths).toContain(ModelStrength.CREATIVE_TASKS);
const creativeUseCase = gptModel.useCases.find((uc) => uc.queryType === QueryType.CREATIVE);
expect(creativeUseCase?.suitabilityScore).toBeGreaterThan(0.9);
const geminiModel = MODEL_CHARACTERISTICS['gemini-2.5-pro'];
// Gemini has CODE_UNDERSTANDING as strength, should score high on code queries
expect(geminiModel.strengths).toContain(ModelStrength.CODE_UNDERSTANDING);
const codeUseCase = geminiModel.useCases.find((uc) => uc.queryType === QueryType.CODE);
expect(codeUseCase?.suitabilityScore).toBeGreaterThan(0.9);
});
});
describe('Performance Tier Logic', () => {
it('should correctly categorize models by speed', () => {
const allModels = ModelCharacteristicsService.getAllModels();
const fastModels = ModelCharacteristicsService.getModelsByPerformanceTier('fast');
const balancedModels = ModelCharacteristicsService.getModelsByPerformanceTier('balanced');
// Fast models should have lower latency than balanced models
if (fastModels.length > 0 && balancedModels.length > 0) {
const slowestFast = Math.max(...fastModels.map((m) => m.performance.averageLatency));
const fastestBalanced = Math.min(...balancedModels.map((m) => m.performance.averageLatency));
// This might not always be true, but generally fast tier should be faster
expect(slowestFast).toBeLessThan(2000); // By definition of fast tier
}
});
});
});
describe('Model Characteristics Data Quality', () => {
it('should have realistic performance metrics', () => {
Object.values(MODEL_CHARACTERISTICS).forEach((model) => {
// Latency should be reasonable (not 0, not > 30 seconds)
expect(model.performance.averageLatency).toBeGreaterThan(100);
expect(model.performance.averageLatency).toBeLessThan(30000);
// P95 should be higher than average
expect(model.performance.p95Latency).toBeGreaterThan(model.performance.averageLatency);
// P99 should be higher than P95
expect(model.performance.p99Latency).toBeGreaterThan(model.performance.p95Latency);
// Tokens per second should be reasonable
expect(model.performance.tokensPerSecond).toBeGreaterThan(1);
expect(model.performance.tokensPerSecond).toBeLessThan(1000);
// Error rates should be low
expect(model.performance.errorRate).toBeGreaterThanOrEqual(0);
expect(model.performance.errorRate).toBeLessThan(0.1); // Less than 10%
});
});
it('should have consistent cost-quality relationships', () => {
const models = Object.values(MODEL_CHARACTERISTICS);
// Generally, higher quality should correlate with higher cost per quality point
// (though this isn't always true due to efficiency differences)
models.forEach((model) => {
const qualityToCostRatio = model.qualityScores.overall / model.cost.costPerQualityPoint;
expect(qualityToCostRatio).toBeGreaterThan(0);
});
});
it('should have logical capability-strength mappings', () => {
Object.values(MODEL_CHARACTERISTICS).forEach((model) => {
// If a model has CODE_GENERATION capability, it should have some code-related strength
if (model.capabilities.includes(ModelCapability.CODE_GENERATION)) {
const hasCodeStrength = model.strengths.some((strength) => [
ModelStrength.CODE_UNDERSTANDING,
ModelStrength.PROBLEM_SOLVING,
].includes(strength));
expect(hasCodeStrength).toBe(true);
}
// If a model has CREATIVE_WRITING capability, it should have creative strength
if (model.capabilities.includes(ModelCapability.CREATIVE_WRITING)) {
const hasCreativeStrength = model.strengths.includes(ModelStrength.CREATIVE_TASKS);
// This is flexible since not all models with creative capability excel at it
}
});
});
});
//# sourceMappingURL=model-characteristics.test.js.map