quantum-cli-core
Version:
Quantum CLI Core - Multi-LLM Collaboration System
403 lines • 17.1 kB
JavaScript
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { EventEmitter } from 'events';
import { ModelCharacteristicsService, } from './model-characteristics.js';
import { QueryType } from './types.js';
export class PerformanceTracker extends EventEmitter {
metrics = [];
maxMetricsHistory = 10000; // Keep last 10k metrics
aggregationInterval = null;
aggregationIntervalMs = 60000; // 1 minute
// Cost tracking
dailyCostLimits = new Map();
monthlyCostLimits = new Map();
currentDailyCosts = new Map();
currentMonthlyCosts = new Map();
// Performance baselines
performanceBaselines = new Map();
constructor() {
super();
this.startAggregation();
this.loadBaselines();
}
/**
* Record a performance metric
*/
recordMetric(metric) {
this.metrics.push(metric);
// Maintain history limit
if (this.metrics.length > this.maxMetricsHistory) {
this.metrics = this.metrics.slice(-this.maxMetricsHistory);
}
// Update cost tracking
this.updateCostTracking(metric);
// Check for alerts
this.checkForAlerts(metric);
// Emit event for real-time listeners
this.emit('metric_recorded', metric);
}
/**
* Get aggregated metrics for a model within a time window
*/
getAggregatedMetrics(modelId, startTime, endTime = new Date()) {
const modelMetrics = this.metrics.filter((m) => m.modelId === modelId &&
m.timestamp >= startTime &&
m.timestamp <= endTime);
if (modelMetrics.length === 0) {
return null;
}
const successfulMetrics = modelMetrics.filter((m) => m.success);
const latencies = successfulMetrics
.map((m) => m.latency)
.sort((a, b) => a - b);
const errorBreakdown = {};
const queryTypeBreakdown = {
[QueryType.CODE]: 0,
[QueryType.CREATIVE]: 0,
[QueryType.ANALYSIS]: 0,
[QueryType.SECURITY]: 0,
[QueryType.GENERAL]: 0,
};
modelMetrics.forEach((metric) => {
if (!metric.success && metric.errorType) {
errorBreakdown[metric.errorType] =
(errorBreakdown[metric.errorType] || 0) + 1;
}
if (metric.queryType) {
queryTypeBreakdown[metric.queryType] =
(queryTypeBreakdown[metric.queryType] || 0) + 1;
}
});
const totalTokens = successfulMetrics.reduce((sum, m) => sum + m.tokens.input + m.tokens.output, 0);
const totalTime = successfulMetrics.reduce((sum, m) => sum + m.latency, 0);
const userRatings = modelMetrics
.filter((m) => m.userRating)
.map((m) => m.userRating);
return {
modelId,
timeWindow: { start: startTime, end: endTime },
requestCount: modelMetrics.length,
successRate: successfulMetrics.length / modelMetrics.length,
averageLatency: latencies.length > 0
? latencies.reduce((a, b) => a + b, 0) / latencies.length
: 0,
p95Latency: latencies.length > 0
? latencies[Math.floor(latencies.length * 0.95)]
: 0,
p99Latency: latencies.length > 0
? latencies[Math.floor(latencies.length * 0.99)]
: 0,
totalCost: modelMetrics.reduce((sum, m) => sum + m.cost.total, 0),
averageCost: modelMetrics.length > 0
? modelMetrics.reduce((sum, m) => sum + m.cost.total, 0) /
modelMetrics.length
: 0,
tokensPerSecond: totalTime > 0 ? (totalTokens / totalTime) * 1000 : 0,
userSatisfaction: userRatings.length > 0
? userRatings.reduce((a, b) => a + b, 0) / userRatings.length
: 0,
errorBreakdown,
queryTypeBreakdown,
};
}
/**
* Get performance comparison between models
*/
compareModelPerformance(modelIds, timeWindow) {
const comparison = {};
modelIds.forEach((modelId) => {
comparison[modelId] = this.getAggregatedMetrics(modelId, timeWindow.start, timeWindow.end);
});
return comparison;
}
/**
* Set cost limits for a model
*/
setCostLimits(modelId, dailyLimit, monthlyLimit) {
if (dailyLimit !== undefined) {
this.dailyCostLimits.set(modelId, dailyLimit);
}
if (monthlyLimit !== undefined) {
this.monthlyCostLimits.set(modelId, monthlyLimit);
}
}
/**
* Get current cost usage for a model
*/
getCurrentCostUsage(modelId) {
return {
daily: this.currentDailyCosts.get(modelId) || 0,
monthly: this.currentMonthlyCosts.get(modelId) || 0,
};
}
/**
* Get cost efficiency ranking of models
*/
getCostEfficiencyRanking(timeWindow) {
const allModelIds = [...new Set(this.metrics.map((m) => m.modelId))];
const rankings = allModelIds
.map((modelId) => {
const metrics = this.getAggregatedMetrics(modelId, timeWindow.start, timeWindow.end);
if (!metrics || metrics.requestCount === 0) {
return null;
}
const characteristics = ModelCharacteristicsService.getModelCharacteristics(modelId);
const qualityScore = characteristics?.qualityScores.overall || 0.5;
return {
modelId,
costPerQualityPoint: metrics.averageCost / qualityScore,
userSatisfactionPerDollar: metrics.userSatisfaction > 0
? metrics.userSatisfaction / metrics.averageCost
: 0,
};
})
.filter(Boolean);
return rankings.sort((a, b) => a.costPerQualityPoint - b.costPerQualityPoint);
}
/**
* Update model characteristics based on real performance data
*/
updateModelCharacteristics(modelId) {
const last30Days = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000);
const recentMetrics = this.getAggregatedMetrics(modelId, last30Days);
if (!recentMetrics || recentMetrics.requestCount < 50) {
return null; // Need sufficient data
}
const characteristics = ModelCharacteristicsService.getModelCharacteristics(modelId);
if (!characteristics) {
return null;
}
const baseline = this.performanceBaselines.get(modelId) || characteristics.performance;
const newMetrics = {
averageLatency: recentMetrics.averageLatency,
p95Latency: recentMetrics.p95Latency,
p99Latency: recentMetrics.p99Latency,
tokensPerSecond: recentMetrics.tokensPerSecond,
accuracy: recentMetrics.successRate,
reliability: recentMetrics.successRate,
errorRate: 1 - recentMetrics.successRate,
};
// Check for significant changes (>15% difference)
const significantChange = this.detectSignificantChange(baseline, newMetrics);
if (significantChange.hasChange) {
const update = {
modelId,
oldMetrics: baseline,
newMetrics,
significantChange: true,
changeReason: significantChange.reason,
};
// Update baseline
this.performanceBaselines.set(modelId, { ...baseline, ...newMetrics });
// Emit update event
this.emit('performance_update', update);
return update;
}
return null;
}
/**
* Get performance insights and recommendations
*/
getPerformanceInsights(modelId) {
const last7Days = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000);
const last30Days = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000);
const recentMetrics = this.getAggregatedMetrics(modelId, last7Days);
const historicalMetrics = this.getAggregatedMetrics(modelId, last30Days, last7Days);
const insights = [];
const recommendations = [];
const trends = {};
if (!recentMetrics || !historicalMetrics) {
return {
insights: ['Insufficient data for analysis'],
recommendations: [],
trends: {},
};
}
// Latency trends
const latencyChange = (recentMetrics.averageLatency - historicalMetrics.averageLatency) /
historicalMetrics.averageLatency;
if (Math.abs(latencyChange) > 0.1) {
trends.latency = latencyChange < 0 ? 'improving' : 'declining';
insights.push(`Latency has ${latencyChange < 0 ? 'improved' : 'degraded'} by ${Math.abs(latencyChange * 100).toFixed(1)}%`);
if (latencyChange > 0.2) {
recommendations.push('Consider switching to a faster model for time-sensitive queries');
}
}
else {
trends.latency = 'stable';
}
// Cost trends
const costChange = (recentMetrics.averageCost - historicalMetrics.averageCost) /
historicalMetrics.averageCost;
if (Math.abs(costChange) > 0.15) {
trends.cost = costChange < 0 ? 'improving' : 'declining';
insights.push(`Average cost has ${costChange < 0 ? 'decreased' : 'increased'} by ${Math.abs(costChange * 100).toFixed(1)}%`);
if (costChange > 0.3) {
recommendations.push('Review usage patterns and consider cost optimization strategies');
}
}
else {
trends.cost = 'stable';
}
// Success rate trends
const successChange = recentMetrics.successRate - historicalMetrics.successRate;
if (Math.abs(successChange) > 0.05) {
trends.reliability = successChange > 0 ? 'improving' : 'declining';
insights.push(`Success rate has ${successChange > 0 ? 'improved' : 'declined'} by ${Math.abs(successChange * 100).toFixed(1)}%`);
if (successChange < -0.1) {
recommendations.push('Investigate error patterns and consider using a more reliable model');
}
}
else {
trends.reliability = 'stable';
}
// User satisfaction trends
if (recentMetrics.userSatisfaction > 0 &&
historicalMetrics.userSatisfaction > 0) {
const satisfactionChange = recentMetrics.userSatisfaction - historicalMetrics.userSatisfaction;
if (Math.abs(satisfactionChange) > 0.3) {
trends.satisfaction =
satisfactionChange > 0 ? 'improving' : 'declining';
insights.push(`User satisfaction has ${satisfactionChange > 0 ? 'improved' : 'declined'} by ${Math.abs(satisfactionChange).toFixed(1)} points`);
if (satisfactionChange < -0.5) {
recommendations.push('Review output quality and consider model tuning or switching');
}
}
else {
trends.satisfaction = 'stable';
}
}
return { insights, recommendations, trends };
}
/**
* Clean up resources
*/
destroy() {
if (this.aggregationInterval) {
clearInterval(this.aggregationInterval);
this.aggregationInterval = null;
}
this.removeAllListeners();
}
updateCostTracking(metric) {
const modelId = metric.modelId;
const cost = metric.cost.total;
// Update daily costs
const currentDaily = this.currentDailyCosts.get(modelId) || 0;
this.currentDailyCosts.set(modelId, currentDaily + cost);
// Update monthly costs
const currentMonthly = this.currentMonthlyCosts.get(modelId) || 0;
this.currentMonthlyCosts.set(modelId, currentMonthly + cost);
// Reset daily costs at midnight (simplified)
const now = new Date();
if (now.getHours() === 0 && now.getMinutes() === 0) {
this.currentDailyCosts.set(modelId, 0);
}
// Reset monthly costs on the 1st (simplified)
if (now.getDate() === 1 && now.getHours() === 0 && now.getMinutes() === 0) {
this.currentMonthlyCosts.set(modelId, 0);
}
}
checkForAlerts(metric) {
const modelId = metric.modelId;
const currentDaily = this.currentDailyCosts.get(modelId) || 0;
const currentMonthly = this.currentMonthlyCosts.get(modelId) || 0;
const dailyLimit = this.dailyCostLimits.get(modelId);
const monthlyLimit = this.monthlyCostLimits.get(modelId);
// Daily limit alert
if (dailyLimit && currentDaily > dailyLimit * 0.9) {
const alert = {
type: 'daily_limit',
modelId,
threshold: dailyLimit,
current: currentDaily,
message: `Daily cost approaching limit: $${currentDaily.toFixed(2)} / $${dailyLimit.toFixed(2)}`,
severity: currentDaily > dailyLimit ? 'critical' : 'high',
};
this.emit('cost_alert', alert);
}
// Monthly limit alert
if (monthlyLimit && currentMonthly > monthlyLimit * 0.9) {
const alert = {
type: 'monthly_limit',
modelId,
threshold: monthlyLimit,
current: currentMonthly,
message: `Monthly cost approaching limit: $${currentMonthly.toFixed(2)} / $${monthlyLimit.toFixed(2)}`,
severity: currentMonthly > monthlyLimit ? 'critical' : 'high',
};
this.emit('cost_alert', alert);
}
// Cost spike detection
const recentCosts = this.metrics
.filter((m) => m.modelId === modelId && m.timestamp > new Date(Date.now() - 60000))
.map((m) => m.cost.total);
if (recentCosts.length > 5) {
const averageRecent = recentCosts.reduce((a, b) => a + b, 0) / recentCosts.length;
const baseline = this.getBaselineCost(modelId);
if (averageRecent > baseline * 2) {
const alert = {
type: 'cost_spike',
modelId,
threshold: baseline,
current: averageRecent,
message: `Cost spike detected: ${((averageRecent / baseline - 1) * 100).toFixed(0)}% above baseline`,
severity: 'medium',
};
this.emit('cost_alert', alert);
}
}
}
getBaselineCost(modelId) {
const characteristics = ModelCharacteristicsService.getModelCharacteristics(modelId);
if (!characteristics)
return 0.01;
// Estimate baseline cost for typical query (500 input + 200 output tokens)
return (characteristics.cost.inputTokenCost * 0.5 +
characteristics.cost.outputTokenCost * 0.2);
}
detectSignificantChange(baseline, current) {
const changes = [];
if (baseline.averageLatency && current.averageLatency) {
const latencyChange = Math.abs(current.averageLatency - baseline.averageLatency) /
baseline.averageLatency;
if (latencyChange > 0.15) {
changes.push(`latency ${latencyChange > 0 ? 'increased' : 'decreased'} by ${(latencyChange * 100).toFixed(1)}%`);
}
}
if (baseline.accuracy && current.accuracy) {
const accuracyChange = Math.abs((current.accuracy || 0) - baseline.accuracy) /
baseline.accuracy;
if (accuracyChange > 0.1) {
changes.push(`accuracy ${(current.accuracy || 0) > baseline.accuracy ? 'improved' : 'declined'} by ${(accuracyChange * 100).toFixed(1)}%`);
}
}
return {
hasChange: changes.length > 0,
reason: changes.join(', '),
};
}
loadBaselines() {
// Load baseline performance metrics from model characteristics
Object.values(ModelCharacteristicsService.getAllModels()).forEach((model) => {
this.performanceBaselines.set(model.id, model.performance);
});
}
startAggregation() {
this.aggregationInterval = setInterval(() => {
// Emit aggregated metrics every minute
const modelIds = [...new Set(this.metrics.map((m) => m.modelId))];
const lastMinute = new Date(Date.now() - this.aggregationIntervalMs);
modelIds.forEach((modelId) => {
const metrics = this.getAggregatedMetrics(modelId, lastMinute);
if (metrics && metrics.requestCount > 0) {
this.emit('aggregated_metrics', metrics);
}
});
}, this.aggregationIntervalMs);
}
}
//# sourceMappingURL=performance-tracker.js.map