UNPKG

quantum-cli-core

Version:

Quantum CLI Core - Multi-LLM Collaboration System

github.com/kanghunlee/quantum-cli

kanghunlee/quantum-cli

403 lines • 17.1 kB

JavaScript

/** * @license * Copyright 2025 Google LLC * SPDX-License-Identifier: Apache-2.0 */ import { EventEmitter } from 'events'; import { ModelCharacteristicsService, } from './model-characteristics.js'; import { QueryType } from './types.js'; export class PerformanceTracker extends EventEmitter { metrics = []; maxMetricsHistory = 10000; // Keep last 10k metrics aggregationInterval = null; aggregationIntervalMs = 60000; // 1 minute // Cost tracking dailyCostLimits = new Map(); monthlyCostLimits = new Map(); currentDailyCosts = new Map(); currentMonthlyCosts = new Map(); // Performance baselines performanceBaselines = new Map(); constructor() { super(); this.startAggregation(); this.loadBaselines(); } /** * Record a performance metric */ recordMetric(metric) { this.metrics.push(metric); // Maintain history limit if (this.metrics.length > this.maxMetricsHistory) { this.metrics = this.metrics.slice(-this.maxMetricsHistory); } // Update cost tracking this.updateCostTracking(metric); // Check for alerts this.checkForAlerts(metric); // Emit event for real-time listeners this.emit('metric_recorded', metric); } /** * Get aggregated metrics for a model within a time window */ getAggregatedMetrics(modelId, startTime, endTime = new Date()) { const modelMetrics = this.metrics.filter((m) => m.modelId === modelId && m.timestamp >= startTime && m.timestamp <= endTime); if (modelMetrics.length === 0) { return null; } const successfulMetrics = modelMetrics.filter((m) => m.success); const latencies = successfulMetrics .map((m) => m.latency) .sort((a, b) => a - b); const errorBreakdown = {}; const queryTypeBreakdown = { [QueryType.CODE]: 0, [QueryType.CREATIVE]: 0, [QueryType.ANALYSIS]: 0, [QueryType.SECURITY]: 0, [QueryType.GENERAL]: 0, }; modelMetrics.forEach((metric) => { if (!metric.success && metric.errorType) { errorBreakdown[metric.errorType] = (errorBreakdown[metric.errorType] || 0) + 1; } if (metric.queryType) { queryTypeBreakdown[metric.queryType] = (queryTypeBreakdown[metric.queryType] || 0) + 1; } }); const totalTokens = successfulMetrics.reduce((sum, m) => sum + m.tokens.input + m.tokens.output, 0); const totalTime = successfulMetrics.reduce((sum, m) => sum + m.latency, 0); const userRatings = modelMetrics .filter((m) => m.userRating) .map((m) => m.userRating); return { modelId, timeWindow: { start: startTime, end: endTime }, requestCount: modelMetrics.length, successRate: successfulMetrics.length / modelMetrics.length, averageLatency: latencies.length > 0 ? latencies.reduce((a, b) => a + b, 0) / latencies.length : 0, p95Latency: latencies.length > 0 ? latencies[Math.floor(latencies.length * 0.95)] : 0, p99Latency: latencies.length > 0 ? latencies[Math.floor(latencies.length * 0.99)] : 0, totalCost: modelMetrics.reduce((sum, m) => sum + m.cost.total, 0), averageCost: modelMetrics.length > 0 ? modelMetrics.reduce((sum, m) => sum + m.cost.total, 0) / modelMetrics.length : 0, tokensPerSecond: totalTime > 0 ? (totalTokens / totalTime) * 1000 : 0, userSatisfaction: userRatings.length > 0 ? userRatings.reduce((a, b) => a + b, 0) / userRatings.length : 0, errorBreakdown, queryTypeBreakdown, }; } /** * Get performance comparison between models */ compareModelPerformance(modelIds, timeWindow) { const comparison = {}; modelIds.forEach((modelId) => { comparison[modelId] = this.getAggregatedMetrics(modelId, timeWindow.start, timeWindow.end); }); return comparison; } /** * Set cost limits for a model */ setCostLimits(modelId, dailyLimit, monthlyLimit) { if (dailyLimit !== undefined) { this.dailyCostLimits.set(modelId, dailyLimit); } if (monthlyLimit !== undefined) { this.monthlyCostLimits.set(modelId, monthlyLimit); } } /** * Get current cost usage for a model */ getCurrentCostUsage(modelId) { return { daily: this.currentDailyCosts.get(modelId) || 0, monthly: this.currentMonthlyCosts.get(modelId) || 0, }; } /** * Get cost efficiency ranking of models */ getCostEfficiencyRanking(timeWindow) { const allModelIds = [...new Set(this.metrics.map((m) => m.modelId))]; const rankings = allModelIds .map((modelId) => { const metrics = this.getAggregatedMetrics(modelId, timeWindow.start, timeWindow.end); if (!metrics || metrics.requestCount === 0) { return null; } const characteristics = ModelCharacteristicsService.getModelCharacteristics(modelId); const qualityScore = characteristics?.qualityScores.overall || 0.5; return { modelId, costPerQualityPoint: metrics.averageCost / qualityScore, userSatisfactionPerDollar: metrics.userSatisfaction > 0 ? metrics.userSatisfaction / metrics.averageCost : 0, }; }) .filter(Boolean); return rankings.sort((a, b) => a.costPerQualityPoint - b.costPerQualityPoint); } /** * Update model characteristics based on real performance data */ updateModelCharacteristics(modelId) { const last30Days = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000); const recentMetrics = this.getAggregatedMetrics(modelId, last30Days); if (!recentMetrics || recentMetrics.requestCount < 50) { return null; // Need sufficient data } const characteristics = ModelCharacteristicsService.getModelCharacteristics(modelId); if (!characteristics) { return null; } const baseline = this.performanceBaselines.get(modelId) || characteristics.performance; const newMetrics = { averageLatency: recentMetrics.averageLatency, p95Latency: recentMetrics.p95Latency, p99Latency: recentMetrics.p99Latency, tokensPerSecond: recentMetrics.tokensPerSecond, accuracy: recentMetrics.successRate, reliability: recentMetrics.successRate, errorRate: 1 - recentMetrics.successRate, }; // Check for significant changes (>15% difference) const significantChange = this.detectSignificantChange(baseline, newMetrics); if (significantChange.hasChange) { const update = { modelId, oldMetrics: baseline, newMetrics, significantChange: true, changeReason: significantChange.reason, }; // Update baseline this.performanceBaselines.set(modelId, { ...baseline, ...newMetrics }); // Emit update event this.emit('performance_update', update); return update; } return null; } /** * Get performance insights and recommendations */ getPerformanceInsights(modelId) { const last7Days = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000); const last30Days = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000); const recentMetrics = this.getAggregatedMetrics(modelId, last7Days); const historicalMetrics = this.getAggregatedMetrics(modelId, last30Days, last7Days); const insights = []; const recommendations = []; const trends = {}; if (!recentMetrics || !historicalMetrics) { return { insights: ['Insufficient data for analysis'], recommendations: [], trends: {}, }; } // Latency trends const latencyChange = (recentMetrics.averageLatency - historicalMetrics.averageLatency) / historicalMetrics.averageLatency; if (Math.abs(latencyChange) > 0.1) { trends.latency = latencyChange < 0 ? 'improving' : 'declining'; insights.push(`Latency has ${latencyChange < 0 ? 'improved' : 'degraded'} by ${Math.abs(latencyChange * 100).toFixed(1)}%`); if (latencyChange > 0.2) { recommendations.push('Consider switching to a faster model for time-sensitive queries'); } } else { trends.latency = 'stable'; } // Cost trends const costChange = (recentMetrics.averageCost - historicalMetrics.averageCost) / historicalMetrics.averageCost; if (Math.abs(costChange) > 0.15) { trends.cost = costChange < 0 ? 'improving' : 'declining'; insights.push(`Average cost has ${costChange < 0 ? 'decreased' : 'increased'} by ${Math.abs(costChange * 100).toFixed(1)}%`); if (costChange > 0.3) { recommendations.push('Review usage patterns and consider cost optimization strategies'); } } else { trends.cost = 'stable'; } // Success rate trends const successChange = recentMetrics.successRate - historicalMetrics.successRate; if (Math.abs(successChange) > 0.05) { trends.reliability = successChange > 0 ? 'improving' : 'declining'; insights.push(`Success rate has ${successChange > 0 ? 'improved' : 'declined'} by ${Math.abs(successChange * 100).toFixed(1)}%`); if (successChange < -0.1) { recommendations.push('Investigate error patterns and consider using a more reliable model'); } } else { trends.reliability = 'stable'; } // User satisfaction trends if (recentMetrics.userSatisfaction > 0 && historicalMetrics.userSatisfaction > 0) { const satisfactionChange = recentMetrics.userSatisfaction - historicalMetrics.userSatisfaction; if (Math.abs(satisfactionChange) > 0.3) { trends.satisfaction = satisfactionChange > 0 ? 'improving' : 'declining'; insights.push(`User satisfaction has ${satisfactionChange > 0 ? 'improved' : 'declined'} by ${Math.abs(satisfactionChange).toFixed(1)} points`); if (satisfactionChange < -0.5) { recommendations.push('Review output quality and consider model tuning or switching'); } } else { trends.satisfaction = 'stable'; } } return { insights, recommendations, trends }; } /** * Clean up resources */ destroy() { if (this.aggregationInterval) { clearInterval(this.aggregationInterval); this.aggregationInterval = null; } this.removeAllListeners(); } updateCostTracking(metric) { const modelId = metric.modelId; const cost = metric.cost.total; // Update daily costs const currentDaily = this.currentDailyCosts.get(modelId) || 0; this.currentDailyCosts.set(modelId, currentDaily + cost); // Update monthly costs const currentMonthly = this.currentMonthlyCosts.get(modelId) || 0; this.currentMonthlyCosts.set(modelId, currentMonthly + cost); // Reset daily costs at midnight (simplified) const now = new Date(); if (now.getHours() === 0 && now.getMinutes() === 0) { this.currentDailyCosts.set(modelId, 0); } // Reset monthly costs on the 1st (simplified) if (now.getDate() === 1 && now.getHours() === 0 && now.getMinutes() === 0) { this.currentMonthlyCosts.set(modelId, 0); } } checkForAlerts(metric) { const modelId = metric.modelId; const currentDaily = this.currentDailyCosts.get(modelId) || 0; const currentMonthly = this.currentMonthlyCosts.get(modelId) || 0; const dailyLimit = this.dailyCostLimits.get(modelId); const monthlyLimit = this.monthlyCostLimits.get(modelId); // Daily limit alert if (dailyLimit && currentDaily > dailyLimit * 0.9) { const alert = { type: 'daily_limit', modelId, threshold: dailyLimit, current: currentDaily, message: `Daily cost approaching limit: $${currentDaily.toFixed(2)} / $${dailyLimit.toFixed(2)}`, severity: currentDaily > dailyLimit ? 'critical' : 'high', }; this.emit('cost_alert', alert); } // Monthly limit alert if (monthlyLimit && currentMonthly > monthlyLimit * 0.9) { const alert = { type: 'monthly_limit', modelId, threshold: monthlyLimit, current: currentMonthly, message: `Monthly cost approaching limit: $${currentMonthly.toFixed(2)} / $${monthlyLimit.toFixed(2)}`, severity: currentMonthly > monthlyLimit ? 'critical' : 'high', }; this.emit('cost_alert', alert); } // Cost spike detection const recentCosts = this.metrics .filter((m) => m.modelId === modelId && m.timestamp > new Date(Date.now() - 60000)) .map((m) => m.cost.total); if (recentCosts.length > 5) { const averageRecent = recentCosts.reduce((a, b) => a + b, 0) / recentCosts.length; const baseline = this.getBaselineCost(modelId); if (averageRecent > baseline * 2) { const alert = { type: 'cost_spike', modelId, threshold: baseline, current: averageRecent, message: `Cost spike detected: ${((averageRecent / baseline - 1) * 100).toFixed(0)}% above baseline`, severity: 'medium', }; this.emit('cost_alert', alert); } } } getBaselineCost(modelId) { const characteristics = ModelCharacteristicsService.getModelCharacteristics(modelId); if (!characteristics) return 0.01; // Estimate baseline cost for typical query (500 input + 200 output tokens) return (characteristics.cost.inputTokenCost * 0.5 + characteristics.cost.outputTokenCost * 0.2); } detectSignificantChange(baseline, current) { const changes = []; if (baseline.averageLatency && current.averageLatency) { const latencyChange = Math.abs(current.averageLatency - baseline.averageLatency) / baseline.averageLatency; if (latencyChange > 0.15) { changes.push(`latency ${latencyChange > 0 ? 'increased' : 'decreased'} by ${(latencyChange * 100).toFixed(1)}%`); } } if (baseline.accuracy && current.accuracy) { const accuracyChange = Math.abs((current.accuracy || 0) - baseline.accuracy) / baseline.accuracy; if (accuracyChange > 0.1) { changes.push(`accuracy ${(current.accuracy || 0) > baseline.accuracy ? 'improved' : 'declined'} by ${(accuracyChange * 100).toFixed(1)}%`); } } return { hasChange: changes.length > 0, reason: changes.join(', '), }; } loadBaselines() { // Load baseline performance metrics from model characteristics Object.values(ModelCharacteristicsService.getAllModels()).forEach((model) => { this.performanceBaselines.set(model.id, model.performance); }); } startAggregation() { this.aggregationInterval = setInterval(() => { // Emit aggregated metrics every minute const modelIds = [...new Set(this.metrics.map((m) => m.modelId))]; const lastMinute = new Date(Date.now() - this.aggregationIntervalMs); modelIds.forEach((modelId) => { const metrics = this.getAggregatedMetrics(modelId, lastMinute); if (metrics && metrics.requestCount > 0) { this.emit('aggregated_metrics', metrics); } }); }, this.aggregationIntervalMs); } } //# sourceMappingURL=performance-tracker.js.map