UNPKG

quantum-cli-core

Version:

Quantum CLI Core - Multi-LLM Collaboration System

github.com/kanghunlee/quantum-cli

kanghunlee/quantum-cli

524 lines • 19.6 kB

JavaScript

/** * @license * Copyright 2025 Google LLC * SPDX-License-Identifier: Apache-2.0 */ import { QueryType } from './types.js'; // Re-export QueryType for external use export { QueryType }; export var ModelCapability; (function (ModelCapability) { ModelCapability["TEXT_GENERATION"] = "text_generation"; ModelCapability["CODE_GENERATION"] = "code_generation"; ModelCapability["ANALYSIS"] = "analysis"; ModelCapability["REASONING"] = "reasoning"; ModelCapability["CREATIVE_WRITING"] = "creative_writing"; ModelCapability["INSTRUCTION_FOLLOWING"] = "instruction_following"; ModelCapability["MULTI_LANGUAGE"] = "multi_language"; ModelCapability["LONG_CONTEXT"] = "long_context"; ModelCapability["FUNCTION_CALLING"] = "function_calling"; ModelCapability["JSON_MODE"] = "json_mode"; ModelCapability["STREAMING"] = "streaming"; })(ModelCapability || (ModelCapability = {})); export var ModelStrength; (function (ModelStrength) { // Technical strengths ModelStrength["FAST_RESPONSE"] = "fast_response"; ModelStrength["HIGH_ACCURACY"] = "high_accuracy"; ModelStrength["CONSISTENT_OUTPUT"] = "consistent_output"; ModelStrength["COMPLEX_REASONING"] = "complex_reasoning"; ModelStrength["CODE_UNDERSTANDING"] = "code_understanding"; ModelStrength["MATHEMATICAL_REASONING"] = "mathematical_reasoning"; // Domain strengths ModelStrength["CREATIVE_TASKS"] = "creative_tasks"; ModelStrength["TECHNICAL_WRITING"] = "technical_writing"; ModelStrength["PROBLEM_SOLVING"] = "problem_solving"; ModelStrength["DETAILED_EXPLANATIONS"] = "detailed_explanations"; ModelStrength["CONCISE_RESPONSES"] = "concise_responses"; // Language and format ModelStrength["MULTILINGUAL"] = "multilingual"; ModelStrength["STRUCTURED_OUTPUT"] = "structured_output"; ModelStrength["FOLLOW_INSTRUCTIONS"] = "follow_instructions"; })(ModelStrength || (ModelStrength = {})); export var ModelWeakness; (function (ModelWeakness) { // Performance limitations ModelWeakness["SLOW_RESPONSE"] = "slow_response"; ModelWeakness["INCONSISTENT_QUALITY"] = "inconsistent_quality"; ModelWeakness["LIMITED_CONTEXT"] = "limited_context"; ModelWeakness["HIGH_COST"] = "high_cost"; // Capability limitations ModelWeakness["POOR_CODE_GEN"] = "poor_code_generation"; ModelWeakness["WEAK_REASONING"] = "weak_reasoning"; ModelWeakness["VERBOSE_OUTPUT"] = "verbose_output"; ModelWeakness["HALLUCINATION_PRONE"] = "hallucination_prone"; ModelWeakness["LIMITED_CREATIVITY"] = "limited_creativity"; // Domain limitations ModelWeakness["TECHNICAL_ACCURACY"] = "technical_accuracy"; ModelWeakness["OUTDATED_KNOWLEDGE"] = "outdated_knowledge"; ModelWeakness["FACTUAL_ERRORS"] = "factual_errors"; })(ModelWeakness || (ModelWeakness = {})); // Predefined model characteristics for major providers export const MODEL_CHARACTERISTICS = { 'gemini-2.5-pro': { id: 'gemini-2.5-pro', name: 'Gemini 2.5 Pro', provider: 'google', capabilities: [ ModelCapability.TEXT_GENERATION, ModelCapability.CODE_GENERATION, ModelCapability.ANALYSIS, ModelCapability.REASONING, ModelCapability.LONG_CONTEXT, ModelCapability.FUNCTION_CALLING, ModelCapability.STREAMING, ], strengths: [ ModelStrength.COMPLEX_REASONING, ModelStrength.CODE_UNDERSTANDING, ModelStrength.DETAILED_EXPLANATIONS, ModelStrength.FOLLOW_INSTRUCTIONS, ModelStrength.STRUCTURED_OUTPUT, ], weaknesses: [ModelWeakness.HIGH_COST, ModelWeakness.SLOW_RESPONSE], performance: { averageLatency: 2500, p95Latency: 4000, p99Latency: 6000, tokensPerSecond: 45, accuracy: 0.92, consistency: 0.88, reliability: 0.94, maxContextLength: 1000000, contextUtilization: 0.85, errorRate: 0.02, timeoutRate: 0.01, }, cost: { pricingModel: 'per_token', inputTokenCost: 0.00125, // $1.25 per 1K tokens outputTokenCost: 0.005, // $5.00 per 1K tokens costPerQualityPoint: 0.0054, costEfficiencyRank: 6, rateLimitRpm: 60, rateLimitTpm: 32000, }, useCases: [ { queryType: QueryType.CODE, suitabilityScore: 0.95, confidence: 0.9, reasoning: [ 'Excellent code understanding', 'Strong debugging capabilities', ], }, { queryType: QueryType.ANALYSIS, suitabilityScore: 0.93, confidence: 0.88, reasoning: ['Deep analytical reasoning', 'Comprehensive explanations'], }, { queryType: QueryType.SECURITY, suitabilityScore: 0.9, confidence: 0.85, reasoning: [ 'Good security knowledge', 'Thorough vulnerability analysis', ], }, { queryType: QueryType.CREATIVE, suitabilityScore: 0.75, confidence: 0.7, reasoning: ['Decent creativity', 'Sometimes overly structured'], }, { queryType: QueryType.GENERAL, suitabilityScore: 0.88, confidence: 0.92, reasoning: ['Well-rounded performance', 'Reliable responses'], }, ], qualityScores: { overall: 0.9, accuracy: 0.92, creativity: 0.75, reasoning: 0.95, codeGeneration: 0.93, instruction_following: 0.94, factualAccuracy: 0.91, consistency: 0.88, }, constraints: { maxTokens: 8192, maxRequests: 1000, allowedRegions: ['us', 'eu', 'asia'], requiresApiKey: true, supportsBatch: false, supportsStreaming: true, rateLimits: { requestsPerMinute: 60, tokensPerMinute: 32000, requestsPerDay: 1000, }, }, }, 'gpt-4': { id: 'gpt-4', name: 'GPT-4', provider: 'openai', capabilities: [ ModelCapability.TEXT_GENERATION, ModelCapability.CODE_GENERATION, ModelCapability.ANALYSIS, ModelCapability.REASONING, ModelCapability.CREATIVE_WRITING, ModelCapability.FUNCTION_CALLING, ModelCapability.JSON_MODE, ModelCapability.STREAMING, ], strengths: [ ModelStrength.HIGH_ACCURACY, ModelStrength.COMPLEX_REASONING, ModelStrength.CREATIVE_TASKS, ModelStrength.PROBLEM_SOLVING, ModelStrength.FOLLOW_INSTRUCTIONS, ], weaknesses: [ ModelWeakness.HIGH_COST, ModelWeakness.SLOW_RESPONSE, ModelWeakness.LIMITED_CONTEXT, ], performance: { averageLatency: 3200, p95Latency: 5500, p99Latency: 8000, tokensPerSecond: 35, accuracy: 0.94, consistency: 0.91, reliability: 0.93, maxContextLength: 128000, contextUtilization: 0.82, errorRate: 0.015, timeoutRate: 0.008, }, cost: { pricingModel: 'per_token', inputTokenCost: 0.03, // $30 per 1M tokens outputTokenCost: 0.06, // $60 per 1M tokens costPerQualityPoint: 0.032, costEfficiencyRank: 4, rateLimitRpm: 10000, rateLimitTpm: 300000, }, useCases: [ { queryType: QueryType.CREATIVE, suitabilityScore: 0.95, confidence: 0.92, reasoning: ['Exceptional creativity', 'Natural language flow'], }, { queryType: QueryType.ANALYSIS, suitabilityScore: 0.92, confidence: 0.9, reasoning: ['Strong analytical skills', 'Nuanced understanding'], }, { queryType: QueryType.CODE, suitabilityScore: 0.88, confidence: 0.85, reasoning: ['Good code generation', 'Sometimes verbose'], }, { queryType: QueryType.GENERAL, suitabilityScore: 0.91, confidence: 0.93, reasoning: ['Versatile performance', 'Human-like responses'], }, { queryType: QueryType.SECURITY, suitabilityScore: 0.85, confidence: 0.8, reasoning: ['Adequate security knowledge', 'May lack latest updates'], }, ], qualityScores: { overall: 0.91, accuracy: 0.94, creativity: 0.95, reasoning: 0.93, codeGeneration: 0.88, instruction_following: 0.92, factualAccuracy: 0.89, consistency: 0.91, }, constraints: { maxTokens: 4096, maxRequests: 500, allowedRegions: ['global'], requiresApiKey: true, supportsBatch: true, supportsStreaming: true, rateLimits: { requestsPerMinute: 3500, tokensPerMinute: 350000, requestsPerDay: 10000, }, }, }, 'claude-3-sonnet': { id: 'claude-3-sonnet', name: 'Claude 3 Sonnet', provider: 'anthropic', capabilities: [ ModelCapability.TEXT_GENERATION, ModelCapability.CODE_GENERATION, ModelCapability.ANALYSIS, ModelCapability.REASONING, ModelCapability.LONG_CONTEXT, ModelCapability.CREATIVE_WRITING, ModelCapability.STREAMING, ], strengths: [ ModelStrength.CONSISTENT_OUTPUT, ModelStrength.TECHNICAL_WRITING, ModelStrength.DETAILED_EXPLANATIONS, ModelStrength.CONCISE_RESPONSES, ModelStrength.FOLLOW_INSTRUCTIONS, ], weaknesses: [ ModelWeakness.LIMITED_CREATIVITY, ModelWeakness.TECHNICAL_ACCURACY, ], performance: { averageLatency: 2800, p95Latency: 4200, p99Latency: 6500, tokensPerSecond: 42, accuracy: 0.89, consistency: 0.94, reliability: 0.92, maxContextLength: 200000, contextUtilization: 0.88, errorRate: 0.018, timeoutRate: 0.012, }, cost: { pricingModel: 'per_token', inputTokenCost: 0.003, // $3 per 1M tokens outputTokenCost: 0.015, // $15 per 1M tokens costPerQualityPoint: 0.017, costEfficiencyRank: 7, rateLimitRpm: 1000, rateLimitTpm: 100000, }, useCases: [ { queryType: QueryType.ANALYSIS, suitabilityScore: 0.93, confidence: 0.9, reasoning: ['Excellent analytical depth', 'Methodical approach'], }, { queryType: QueryType.SECURITY, suitabilityScore: 0.88, confidence: 0.85, reasoning: ['Good security practices', 'Thorough considerations'], }, { queryType: QueryType.CODE, suitabilityScore: 0.85, confidence: 0.82, reasoning: ['Solid code understanding', 'Conservative approach'], }, { queryType: QueryType.GENERAL, suitabilityScore: 0.87, confidence: 0.88, reasoning: ['Reliable general performance', 'Balanced responses'], }, { queryType: QueryType.CREATIVE, suitabilityScore: 0.78, confidence: 0.75, reasoning: ['Decent creativity', 'Sometimes too cautious'], }, ], qualityScores: { overall: 0.87, accuracy: 0.89, creativity: 0.78, reasoning: 0.91, codeGeneration: 0.85, instruction_following: 0.93, factualAccuracy: 0.9, consistency: 0.94, }, constraints: { maxTokens: 4096, maxRequests: 1000, allowedRegions: ['us', 'eu'], requiresApiKey: true, supportsBatch: false, supportsStreaming: true, rateLimits: { requestsPerMinute: 1000, tokensPerMinute: 100000, requestsPerDay: 5000, }, }, }, }; // Model selection utilities export class ModelCharacteristicsService { /** * Get characteristics for a specific model */ static getModelCharacteristics(modelId) { return MODEL_CHARACTERISTICS[modelId]; } /** * Get all available models */ static getAllModels() { return Object.values(MODEL_CHARACTERISTICS); } /** * Find best model for a specific query type */ static getBestModelForQueryType(queryType, maxCost) { const models = this.getAllModels(); return models .filter((model) => { if (maxCost) { // Estimate cost for a typical query (500 input + 200 output tokens) const estimatedCost = model.cost.inputTokenCost * 0.5 + model.cost.outputTokenCost * 0.2; return estimatedCost <= maxCost; } return true; }) .map((model) => { const useCase = model.useCases.find((uc) => uc.queryType === queryType); return { model, score: useCase ? useCase.suitabilityScore * useCase.confidence : 0, }; }) .sort((a, b) => b.score - a.score) .map((item) => item.model)[0]; } /** * Rank models by cost efficiency for a query type */ static rankModelsByCostEfficiency(queryType) { const models = this.getAllModels(); return models .map((model) => { const useCase = model.useCases.find((uc) => uc.queryType === queryType); const qualityScore = useCase ? useCase.suitabilityScore : model.qualityScores.overall; const costEfficiency = qualityScore / model.cost.costPerQualityPoint; return { model, costEfficiency }; }) .sort((a, b) => b.costEfficiency - a.costEfficiency) .map((item) => item.model); } /** * Get models by performance tier */ static getModelsByPerformanceTier(tier) { const models = this.getAllModels(); switch (tier) { case 'fast': return models .filter((model) => model.performance.averageLatency < 2000) .sort((a, b) => a.performance.averageLatency - b.performance.averageLatency); case 'balanced': return models .filter((model) => model.performance.averageLatency < 3500 && model.qualityScores.overall >= 0.85) .sort((a, b) => { const aScore = (1 - a.performance.averageLatency / 10000) * a.qualityScores.overall; const bScore = (1 - b.performance.averageLatency / 10000) * b.qualityScores.overall; return bScore - aScore; }); case 'quality': return models .filter((model) => model.qualityScores.overall >= 0.88) .sort((a, b) => b.qualityScores.overall - a.qualityScores.overall); default: return models; } } /** * Compare two models across multiple dimensions */ static compareModels(modelId1, modelId2) { const model1 = this.getModelCharacteristics(modelId1); const model2 = this.getModelCharacteristics(modelId2); if (!model1 || !model2) return undefined; return { models: [model1, model2], comparison: { quality: { winner: model1.qualityScores.overall > model2.qualityScores.overall ? model1.id : model2.id, difference: Math.abs(model1.qualityScores.overall - model2.qualityScores.overall), }, speed: { winner: model1.performance.averageLatency < model2.performance.averageLatency ? model1.id : model2.id, difference: Math.abs(model1.performance.averageLatency - model2.performance.averageLatency), }, cost: { winner: model1.cost.costPerQualityPoint < model2.cost.costPerQualityPoint ? model1.id : model2.id, difference: Math.abs(model1.cost.costPerQualityPoint - model2.cost.costPerQualityPoint), }, }, recommendation: this.getRecommendation(model1, model2), }; } static getRecommendation(model1, model2) { const qualityDiff = model1.qualityScores.overall - model2.qualityScores.overall; const speedDiff = model2.performance.averageLatency - model1.performance.averageLatency; const costDiff = model2.cost.costPerQualityPoint - model1.cost.costPerQualityPoint; if (Math.abs(qualityDiff) < 0.05 && Math.abs(speedDiff) < 500 && Math.abs(costDiff) < 0.01) { return 'Both models are very similar. Choose based on specific use case requirements.'; } if (qualityDiff > 0.1) { return `${model1.name} offers significantly better quality and may be worth the trade-offs.`; } else if (qualityDiff < -0.1) { return `${model2.name} offers significantly better quality and may be worth the trade-offs.`; } if (speedDiff > 1000) { return `${model1.name} is significantly faster for time-sensitive applications.`; } else if (speedDiff < -1000) { return `${model2.name} is significantly faster for time-sensitive applications.`; } if (costDiff > 0.02) { return `${model1.name} offers better cost efficiency for budget-conscious usage.`; } else if (costDiff < -0.02) { return `${model2.name} offers better cost efficiency for budget-conscious usage.`; } return 'Choose based on your specific priorities: quality, speed, or cost.'; } } //# sourceMappingURL=model-characteristics.js.map