quantum-cli-core
Version:
Quantum CLI Core - Multi-LLM Collaboration System
524 lines • 19.6 kB
JavaScript
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { QueryType } from './types.js';
// Re-export QueryType for external use
export { QueryType };
export var ModelCapability;
(function (ModelCapability) {
ModelCapability["TEXT_GENERATION"] = "text_generation";
ModelCapability["CODE_GENERATION"] = "code_generation";
ModelCapability["ANALYSIS"] = "analysis";
ModelCapability["REASONING"] = "reasoning";
ModelCapability["CREATIVE_WRITING"] = "creative_writing";
ModelCapability["INSTRUCTION_FOLLOWING"] = "instruction_following";
ModelCapability["MULTI_LANGUAGE"] = "multi_language";
ModelCapability["LONG_CONTEXT"] = "long_context";
ModelCapability["FUNCTION_CALLING"] = "function_calling";
ModelCapability["JSON_MODE"] = "json_mode";
ModelCapability["STREAMING"] = "streaming";
})(ModelCapability || (ModelCapability = {}));
export var ModelStrength;
(function (ModelStrength) {
// Technical strengths
ModelStrength["FAST_RESPONSE"] = "fast_response";
ModelStrength["HIGH_ACCURACY"] = "high_accuracy";
ModelStrength["CONSISTENT_OUTPUT"] = "consistent_output";
ModelStrength["COMPLEX_REASONING"] = "complex_reasoning";
ModelStrength["CODE_UNDERSTANDING"] = "code_understanding";
ModelStrength["MATHEMATICAL_REASONING"] = "mathematical_reasoning";
// Domain strengths
ModelStrength["CREATIVE_TASKS"] = "creative_tasks";
ModelStrength["TECHNICAL_WRITING"] = "technical_writing";
ModelStrength["PROBLEM_SOLVING"] = "problem_solving";
ModelStrength["DETAILED_EXPLANATIONS"] = "detailed_explanations";
ModelStrength["CONCISE_RESPONSES"] = "concise_responses";
// Language and format
ModelStrength["MULTILINGUAL"] = "multilingual";
ModelStrength["STRUCTURED_OUTPUT"] = "structured_output";
ModelStrength["FOLLOW_INSTRUCTIONS"] = "follow_instructions";
})(ModelStrength || (ModelStrength = {}));
export var ModelWeakness;
(function (ModelWeakness) {
// Performance limitations
ModelWeakness["SLOW_RESPONSE"] = "slow_response";
ModelWeakness["INCONSISTENT_QUALITY"] = "inconsistent_quality";
ModelWeakness["LIMITED_CONTEXT"] = "limited_context";
ModelWeakness["HIGH_COST"] = "high_cost";
// Capability limitations
ModelWeakness["POOR_CODE_GEN"] = "poor_code_generation";
ModelWeakness["WEAK_REASONING"] = "weak_reasoning";
ModelWeakness["VERBOSE_OUTPUT"] = "verbose_output";
ModelWeakness["HALLUCINATION_PRONE"] = "hallucination_prone";
ModelWeakness["LIMITED_CREATIVITY"] = "limited_creativity";
// Domain limitations
ModelWeakness["TECHNICAL_ACCURACY"] = "technical_accuracy";
ModelWeakness["OUTDATED_KNOWLEDGE"] = "outdated_knowledge";
ModelWeakness["FACTUAL_ERRORS"] = "factual_errors";
})(ModelWeakness || (ModelWeakness = {}));
// Predefined model characteristics for major providers
export const MODEL_CHARACTERISTICS = {
'gemini-2.5-pro': {
id: 'gemini-2.5-pro',
name: 'Gemini 2.5 Pro',
provider: 'google',
capabilities: [
ModelCapability.TEXT_GENERATION,
ModelCapability.CODE_GENERATION,
ModelCapability.ANALYSIS,
ModelCapability.REASONING,
ModelCapability.LONG_CONTEXT,
ModelCapability.FUNCTION_CALLING,
ModelCapability.STREAMING,
],
strengths: [
ModelStrength.COMPLEX_REASONING,
ModelStrength.CODE_UNDERSTANDING,
ModelStrength.DETAILED_EXPLANATIONS,
ModelStrength.FOLLOW_INSTRUCTIONS,
ModelStrength.STRUCTURED_OUTPUT,
],
weaknesses: [ModelWeakness.HIGH_COST, ModelWeakness.SLOW_RESPONSE],
performance: {
averageLatency: 2500,
p95Latency: 4000,
p99Latency: 6000,
tokensPerSecond: 45,
accuracy: 0.92,
consistency: 0.88,
reliability: 0.94,
maxContextLength: 1000000,
contextUtilization: 0.85,
errorRate: 0.02,
timeoutRate: 0.01,
},
cost: {
pricingModel: 'per_token',
inputTokenCost: 0.00125, // $1.25 per 1K tokens
outputTokenCost: 0.005, // $5.00 per 1K tokens
costPerQualityPoint: 0.0054,
costEfficiencyRank: 6,
rateLimitRpm: 60,
rateLimitTpm: 32000,
},
useCases: [
{
queryType: QueryType.CODE,
suitabilityScore: 0.95,
confidence: 0.9,
reasoning: [
'Excellent code understanding',
'Strong debugging capabilities',
],
},
{
queryType: QueryType.ANALYSIS,
suitabilityScore: 0.93,
confidence: 0.88,
reasoning: ['Deep analytical reasoning', 'Comprehensive explanations'],
},
{
queryType: QueryType.SECURITY,
suitabilityScore: 0.9,
confidence: 0.85,
reasoning: [
'Good security knowledge',
'Thorough vulnerability analysis',
],
},
{
queryType: QueryType.CREATIVE,
suitabilityScore: 0.75,
confidence: 0.7,
reasoning: ['Decent creativity', 'Sometimes overly structured'],
},
{
queryType: QueryType.GENERAL,
suitabilityScore: 0.88,
confidence: 0.92,
reasoning: ['Well-rounded performance', 'Reliable responses'],
},
],
qualityScores: {
overall: 0.9,
accuracy: 0.92,
creativity: 0.75,
reasoning: 0.95,
codeGeneration: 0.93,
instruction_following: 0.94,
factualAccuracy: 0.91,
consistency: 0.88,
},
constraints: {
maxTokens: 8192,
maxRequests: 1000,
allowedRegions: ['us', 'eu', 'asia'],
requiresApiKey: true,
supportsBatch: false,
supportsStreaming: true,
rateLimits: {
requestsPerMinute: 60,
tokensPerMinute: 32000,
requestsPerDay: 1000,
},
},
},
'gpt-4': {
id: 'gpt-4',
name: 'GPT-4',
provider: 'openai',
capabilities: [
ModelCapability.TEXT_GENERATION,
ModelCapability.CODE_GENERATION,
ModelCapability.ANALYSIS,
ModelCapability.REASONING,
ModelCapability.CREATIVE_WRITING,
ModelCapability.FUNCTION_CALLING,
ModelCapability.JSON_MODE,
ModelCapability.STREAMING,
],
strengths: [
ModelStrength.HIGH_ACCURACY,
ModelStrength.COMPLEX_REASONING,
ModelStrength.CREATIVE_TASKS,
ModelStrength.PROBLEM_SOLVING,
ModelStrength.FOLLOW_INSTRUCTIONS,
],
weaknesses: [
ModelWeakness.HIGH_COST,
ModelWeakness.SLOW_RESPONSE,
ModelWeakness.LIMITED_CONTEXT,
],
performance: {
averageLatency: 3200,
p95Latency: 5500,
p99Latency: 8000,
tokensPerSecond: 35,
accuracy: 0.94,
consistency: 0.91,
reliability: 0.93,
maxContextLength: 128000,
contextUtilization: 0.82,
errorRate: 0.015,
timeoutRate: 0.008,
},
cost: {
pricingModel: 'per_token',
inputTokenCost: 0.03, // $30 per 1M tokens
outputTokenCost: 0.06, // $60 per 1M tokens
costPerQualityPoint: 0.032,
costEfficiencyRank: 4,
rateLimitRpm: 10000,
rateLimitTpm: 300000,
},
useCases: [
{
queryType: QueryType.CREATIVE,
suitabilityScore: 0.95,
confidence: 0.92,
reasoning: ['Exceptional creativity', 'Natural language flow'],
},
{
queryType: QueryType.ANALYSIS,
suitabilityScore: 0.92,
confidence: 0.9,
reasoning: ['Strong analytical skills', 'Nuanced understanding'],
},
{
queryType: QueryType.CODE,
suitabilityScore: 0.88,
confidence: 0.85,
reasoning: ['Good code generation', 'Sometimes verbose'],
},
{
queryType: QueryType.GENERAL,
suitabilityScore: 0.91,
confidence: 0.93,
reasoning: ['Versatile performance', 'Human-like responses'],
},
{
queryType: QueryType.SECURITY,
suitabilityScore: 0.85,
confidence: 0.8,
reasoning: ['Adequate security knowledge', 'May lack latest updates'],
},
],
qualityScores: {
overall: 0.91,
accuracy: 0.94,
creativity: 0.95,
reasoning: 0.93,
codeGeneration: 0.88,
instruction_following: 0.92,
factualAccuracy: 0.89,
consistency: 0.91,
},
constraints: {
maxTokens: 4096,
maxRequests: 500,
allowedRegions: ['global'],
requiresApiKey: true,
supportsBatch: true,
supportsStreaming: true,
rateLimits: {
requestsPerMinute: 3500,
tokensPerMinute: 350000,
requestsPerDay: 10000,
},
},
},
'claude-3-sonnet': {
id: 'claude-3-sonnet',
name: 'Claude 3 Sonnet',
provider: 'anthropic',
capabilities: [
ModelCapability.TEXT_GENERATION,
ModelCapability.CODE_GENERATION,
ModelCapability.ANALYSIS,
ModelCapability.REASONING,
ModelCapability.LONG_CONTEXT,
ModelCapability.CREATIVE_WRITING,
ModelCapability.STREAMING,
],
strengths: [
ModelStrength.CONSISTENT_OUTPUT,
ModelStrength.TECHNICAL_WRITING,
ModelStrength.DETAILED_EXPLANATIONS,
ModelStrength.CONCISE_RESPONSES,
ModelStrength.FOLLOW_INSTRUCTIONS,
],
weaknesses: [
ModelWeakness.LIMITED_CREATIVITY,
ModelWeakness.TECHNICAL_ACCURACY,
],
performance: {
averageLatency: 2800,
p95Latency: 4200,
p99Latency: 6500,
tokensPerSecond: 42,
accuracy: 0.89,
consistency: 0.94,
reliability: 0.92,
maxContextLength: 200000,
contextUtilization: 0.88,
errorRate: 0.018,
timeoutRate: 0.012,
},
cost: {
pricingModel: 'per_token',
inputTokenCost: 0.003, // $3 per 1M tokens
outputTokenCost: 0.015, // $15 per 1M tokens
costPerQualityPoint: 0.017,
costEfficiencyRank: 7,
rateLimitRpm: 1000,
rateLimitTpm: 100000,
},
useCases: [
{
queryType: QueryType.ANALYSIS,
suitabilityScore: 0.93,
confidence: 0.9,
reasoning: ['Excellent analytical depth', 'Methodical approach'],
},
{
queryType: QueryType.SECURITY,
suitabilityScore: 0.88,
confidence: 0.85,
reasoning: ['Good security practices', 'Thorough considerations'],
},
{
queryType: QueryType.CODE,
suitabilityScore: 0.85,
confidence: 0.82,
reasoning: ['Solid code understanding', 'Conservative approach'],
},
{
queryType: QueryType.GENERAL,
suitabilityScore: 0.87,
confidence: 0.88,
reasoning: ['Reliable general performance', 'Balanced responses'],
},
{
queryType: QueryType.CREATIVE,
suitabilityScore: 0.78,
confidence: 0.75,
reasoning: ['Decent creativity', 'Sometimes too cautious'],
},
],
qualityScores: {
overall: 0.87,
accuracy: 0.89,
creativity: 0.78,
reasoning: 0.91,
codeGeneration: 0.85,
instruction_following: 0.93,
factualAccuracy: 0.9,
consistency: 0.94,
},
constraints: {
maxTokens: 4096,
maxRequests: 1000,
allowedRegions: ['us', 'eu'],
requiresApiKey: true,
supportsBatch: false,
supportsStreaming: true,
rateLimits: {
requestsPerMinute: 1000,
tokensPerMinute: 100000,
requestsPerDay: 5000,
},
},
},
};
// Model selection utilities
export class ModelCharacteristicsService {
/**
* Get characteristics for a specific model
*/
static getModelCharacteristics(modelId) {
return MODEL_CHARACTERISTICS[modelId];
}
/**
* Get all available models
*/
static getAllModels() {
return Object.values(MODEL_CHARACTERISTICS);
}
/**
* Find best model for a specific query type
*/
static getBestModelForQueryType(queryType, maxCost) {
const models = this.getAllModels();
return models
.filter((model) => {
if (maxCost) {
// Estimate cost for a typical query (500 input + 200 output tokens)
const estimatedCost = model.cost.inputTokenCost * 0.5 + model.cost.outputTokenCost * 0.2;
return estimatedCost <= maxCost;
}
return true;
})
.map((model) => {
const useCase = model.useCases.find((uc) => uc.queryType === queryType);
return {
model,
score: useCase ? useCase.suitabilityScore * useCase.confidence : 0,
};
})
.sort((a, b) => b.score - a.score)
.map((item) => item.model)[0];
}
/**
* Rank models by cost efficiency for a query type
*/
static rankModelsByCostEfficiency(queryType) {
const models = this.getAllModels();
return models
.map((model) => {
const useCase = model.useCases.find((uc) => uc.queryType === queryType);
const qualityScore = useCase
? useCase.suitabilityScore
: model.qualityScores.overall;
const costEfficiency = qualityScore / model.cost.costPerQualityPoint;
return { model, costEfficiency };
})
.sort((a, b) => b.costEfficiency - a.costEfficiency)
.map((item) => item.model);
}
/**
* Get models by performance tier
*/
static getModelsByPerformanceTier(tier) {
const models = this.getAllModels();
switch (tier) {
case 'fast':
return models
.filter((model) => model.performance.averageLatency < 2000)
.sort((a, b) => a.performance.averageLatency - b.performance.averageLatency);
case 'balanced':
return models
.filter((model) => model.performance.averageLatency < 3500 &&
model.qualityScores.overall >= 0.85)
.sort((a, b) => {
const aScore = (1 - a.performance.averageLatency / 10000) *
a.qualityScores.overall;
const bScore = (1 - b.performance.averageLatency / 10000) *
b.qualityScores.overall;
return bScore - aScore;
});
case 'quality':
return models
.filter((model) => model.qualityScores.overall >= 0.88)
.sort((a, b) => b.qualityScores.overall - a.qualityScores.overall);
default:
return models;
}
}
/**
* Compare two models across multiple dimensions
*/
static compareModels(modelId1, modelId2) {
const model1 = this.getModelCharacteristics(modelId1);
const model2 = this.getModelCharacteristics(modelId2);
if (!model1 || !model2)
return undefined;
return {
models: [model1, model2],
comparison: {
quality: {
winner: model1.qualityScores.overall > model2.qualityScores.overall
? model1.id
: model2.id,
difference: Math.abs(model1.qualityScores.overall - model2.qualityScores.overall),
},
speed: {
winner: model1.performance.averageLatency <
model2.performance.averageLatency
? model1.id
: model2.id,
difference: Math.abs(model1.performance.averageLatency -
model2.performance.averageLatency),
},
cost: {
winner: model1.cost.costPerQualityPoint < model2.cost.costPerQualityPoint
? model1.id
: model2.id,
difference: Math.abs(model1.cost.costPerQualityPoint - model2.cost.costPerQualityPoint),
},
},
recommendation: this.getRecommendation(model1, model2),
};
}
static getRecommendation(model1, model2) {
const qualityDiff = model1.qualityScores.overall - model2.qualityScores.overall;
const speedDiff = model2.performance.averageLatency - model1.performance.averageLatency;
const costDiff = model2.cost.costPerQualityPoint - model1.cost.costPerQualityPoint;
if (Math.abs(qualityDiff) < 0.05 &&
Math.abs(speedDiff) < 500 &&
Math.abs(costDiff) < 0.01) {
return 'Both models are very similar. Choose based on specific use case requirements.';
}
if (qualityDiff > 0.1) {
return `${model1.name} offers significantly better quality and may be worth the trade-offs.`;
}
else if (qualityDiff < -0.1) {
return `${model2.name} offers significantly better quality and may be worth the trade-offs.`;
}
if (speedDiff > 1000) {
return `${model1.name} is significantly faster for time-sensitive applications.`;
}
else if (speedDiff < -1000) {
return `${model2.name} is significantly faster for time-sensitive applications.`;
}
if (costDiff > 0.02) {
return `${model1.name} offers better cost efficiency for budget-conscious usage.`;
}
else if (costDiff < -0.02) {
return `${model2.name} offers better cost efficiency for budget-conscious usage.`;
}
return 'Choose based on your specific priorities: quality, speed, or cost.';
}
}
//# sourceMappingURL=model-characteristics.js.map