recoder-analytics
Version:
Comprehensive analytics and monitoring for the Recoder.xyz ecosystem
399 lines • 16.1 kB
JavaScript
"use strict";
/**
* Individual Model Health Tracking
*
* Tracks detailed health metrics for individual AI models including quality scoring,
* performance trends, and reliability indicators.
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.modelTracker = exports.ModelTracker = void 0;
const shared_1 = require("@recoder/shared");
const events_1 = require("events");
class ModelTracker extends events_1.EventEmitter {
constructor() {
super();
this.modelMetrics = new Map();
this.qualityHistory = new Map();
this.reliabilityData = new Map();
this.benchmarks = new Map();
this.performanceWindow = 24 * 60 * 60 * 1000; // 24 hours
this.initializeTracking();
}
initializeTracking() {
// Initialize tracking for known models
const knownModels = [
{ name: 'claude-sonnet-4', provider: 'anthropic' },
{ name: 'claude-haiku-3', provider: 'anthropic' },
{ name: 'gpt-4-turbo', provider: 'openai' },
{ name: 'gpt-4o', provider: 'openai' },
{ name: 'gemini-2.5-pro', provider: 'google' },
{ name: 'mistral-large-2407', provider: 'mistral' },
{ name: 'deepseek-v3', provider: 'deepseek' },
{ name: 'llama-3.3-70b', provider: 'ollama' }
];
knownModels.forEach(model => {
this.initializeModelMetrics(model.name, model.provider);
});
shared_1.Logger.info(`Initialized tracking for ${knownModels.length} models`);
}
initializeModelMetrics(modelName, provider) {
const initialMetrics = {
modelName,
provider,
availability: 1.0,
avgLatency: 0,
errorRate: 0,
costPerToken: 0,
qualityScore: 0.8, // Default starting score
throughput: 0,
lastUpdated: new Date(),
trends: {
latency: 'stable',
cost: 'stable',
quality: 'stable'
}
};
this.modelMetrics.set(modelName, initialMetrics);
this.qualityHistory.set(modelName, []);
}
/**
* Update model metrics based on recent performance data
*/
async updateModelMetrics(modelName, latency, success, cost, qualityMetrics) {
const existing = this.modelMetrics.get(modelName);
if (!existing) {
shared_1.Logger.warn(`Unknown model: ${modelName}`);
return;
}
const now = new Date();
// Update basic metrics with exponential moving average
const alpha = 0.1; // Smoothing factor
existing.avgLatency = existing.avgLatency * (1 - alpha) + latency * alpha;
existing.costPerToken = cost;
existing.lastUpdated = now;
// Update error rate (over last 100 requests)
const recentErrors = await this.getRecentErrorCount(modelName);
existing.errorRate = recentErrors / 100;
// Update quality score if provided
if (qualityMetrics) {
existing.qualityScore = qualityMetrics.overall;
await this.updateQualityHistory(modelName, qualityMetrics);
}
// Update throughput (tokens per second estimate)
existing.throughput = this.estimateThroughput(existing.avgLatency);
// Analyze trends
existing.trends = await this.analyzeTrends(modelName);
this.modelMetrics.set(modelName, existing);
// Emit events for significant changes
await this.checkForSignificantChanges(modelName, existing);
shared_1.Logger.debug(`Updated metrics for ${modelName}`);
}
/**
* Get current health metrics for a model
*/
getModelMetrics(modelName) {
return this.modelMetrics.get(modelName) || null;
}
/**
* Get quality score for a model
*/
async getQualityScore(modelName) {
const metrics = this.modelMetrics.get(modelName);
if (!metrics)
return 0;
// If we have recent quality history, use more sophisticated calculation
const history = this.qualityHistory.get(modelName) || [];
if (history.length === 0)
return metrics.qualityScore;
// Calculate weighted average of recent quality scores
const recent = history.slice(-10); // Last 10 measurements
const weights = recent.map((_, i) => i + 1); // Linear weighting
const totalWeight = weights.reduce((sum, w) => sum + w, 0);
const weightedScore = recent.reduce((sum, quality, i) => sum + quality.overall * weights[i], 0) / totalWeight;
return weightedScore;
}
/**
* Analyze performance trends for a model
*/
async getPerformanceTrends(modelName) {
const trends = [];
// Analyze latency trend
const latencyTrend = await this.analyzeMetricTrend(modelName, 'latency');
trends.push(latencyTrend);
// Analyze quality trend
const qualityTrend = await this.analyzeMetricTrend(modelName, 'quality');
trends.push(qualityTrend);
// Analyze cost trend
const costTrend = await this.analyzeMetricTrend(modelName, 'cost');
trends.push(costTrend);
// Analyze error rate trend
const errorTrend = await this.analyzeMetricTrend(modelName, 'errorRate');
trends.push(errorTrend);
return trends;
}
/**
* Get model reliability statistics
*/
async getModelReliability(modelName) {
return this.reliabilityData.get(modelName) || null;
}
/**
* Update model reliability data
*/
async updateReliability(modelName, isAvailable, timestamp = new Date()) {
let reliability = this.reliabilityData.get(modelName);
if (!reliability) {
reliability = {
modelName,
uptimePercentage: 100,
mtbf: 0,
mttr: 0,
failurePattern: {
timeOfDay: {},
dayOfWeek: {},
loadBased: false
},
slaCompliance: 100
};
this.reliabilityData.set(modelName, reliability);
}
// Update availability tracking
await this.updateAvailabilityTracking(modelName, isAvailable, timestamp);
// Recalculate reliability metrics
reliability.uptimePercentage = await this.calculateUptimePercentage(modelName);
reliability.mtbf = await this.calculateMTBF(modelName);
reliability.mttr = await this.calculateMTTR(modelName);
reliability.slaCompliance = await this.calculateSLACompliance(modelName);
this.reliabilityData.set(modelName, reliability);
}
/**
* Run benchmarks for all models
*/
async runBenchmarks(taskType = 'general') {
const modelNames = Array.from(this.modelMetrics.keys());
const benchmarks = [];
for (const modelName of modelNames) {
try {
const benchmark = await this.runModelBenchmark(modelName, taskType);
benchmarks.push(benchmark);
}
catch (error) {
shared_1.Logger.error(`Benchmark failed for ${modelName}:`, error);
}
}
// Sort by score and assign ranks
benchmarks.sort((a, b) => b.score - a.score);
benchmarks.forEach((benchmark, index) => {
benchmark.comparisonRank = index + 1;
});
// Store benchmarks
benchmarks.forEach(benchmark => {
const existing = this.benchmarks.get(benchmark.modelName) || [];
existing.push(benchmark);
// Keep only last 10 benchmarks per model
if (existing.length > 10) {
existing.splice(0, existing.length - 10);
}
this.benchmarks.set(benchmark.modelName, existing);
});
shared_1.Logger.info(`Completed benchmarks for ${benchmarks.length} models`);
return benchmarks;
}
/**
* Get benchmark history for a model
*/
getBenchmarkHistory(modelName) {
return this.benchmarks.get(modelName) || [];
}
/**
* Compare models for a specific task
*/
async compareModels(taskType, metrics = ['quality', 'latency', 'cost']) {
const modelNames = Array.from(this.modelMetrics.keys());
const comparisons = [];
for (const modelName of modelNames) {
const modelMetrics = this.modelMetrics.get(modelName);
const benchmarks = this.benchmarks.get(modelName) || [];
const relevantBenchmark = benchmarks.find(b => b.taskType === taskType);
if (!modelMetrics)
continue;
const comparison = {
modelName,
provider: modelMetrics.provider,
overallScore: 0,
details: {}
};
// Calculate scores for each metric
let totalScore = 0;
let weightSum = 0;
if (metrics.includes('quality')) {
const weight = 0.4;
comparison.details.quality = modelMetrics.qualityScore * 100;
totalScore += comparison.details.quality * weight;
weightSum += weight;
}
if (metrics.includes('latency')) {
const weight = 0.3;
const latencyScore = Math.max(0, 100 - (modelMetrics.avgLatency / 100));
comparison.details.latency = {
ms: modelMetrics.avgLatency,
score: latencyScore
};
totalScore += latencyScore * weight;
weightSum += weight;
}
if (metrics.includes('cost')) {
const weight = 0.2;
const costScore = Math.max(0, 100 - (modelMetrics.costPerToken * 10000));
comparison.details.cost = {
perToken: modelMetrics.costPerToken,
score: costScore
};
totalScore += costScore * weight;
weightSum += weight;
}
if (metrics.includes('reliability')) {
const weight = 0.1;
const availabilityScore = modelMetrics.availability * 100;
comparison.details.reliability = {
availability: modelMetrics.availability,
score: availabilityScore
};
totalScore += availabilityScore * weight;
weightSum += weight;
}
comparison.overallScore = weightSum > 0 ? totalScore / weightSum : 0;
comparisons.push(comparison);
}
return comparisons.sort((a, b) => b.overallScore - a.overallScore);
}
// Private helper methods
async updateQualityHistory(modelName, qualityMetrics) {
const history = this.qualityHistory.get(modelName) || [];
history.push({
...qualityMetrics,
timestamp: new Date()
});
// Keep only last 100 quality measurements
if (history.length > 100) {
history.splice(0, history.length - 100);
}
this.qualityHistory.set(modelName, history);
}
async analyzeTrends(modelName) {
const metrics = this.modelMetrics.get(modelName);
if (!metrics) {
return { latency: 'stable', cost: 'stable', quality: 'stable' };
}
// Simplified trend analysis - in production, this would use more sophisticated algorithms
const latencyTrend = await this.calculateTrendDirection(modelName, 'latency');
const costTrendRaw = await this.calculateTrendDirection(modelName, 'cost');
const qualityTrend = await this.calculateTrendDirection(modelName, 'quality');
return {
latency: latencyTrend,
cost: costTrendRaw,
quality: qualityTrend
};
}
async calculateTrendDirection(modelName, metric) {
// This would analyze historical data points to determine trend
// For now, return stable as default
return 'stable';
}
estimateThroughput(avgLatency) {
if (avgLatency === 0)
return 0;
// Rough estimate based on average latency
// Assumes ~100 tokens per request
const requestsPerSecond = 1000 / avgLatency;
return Math.round(requestsPerSecond * 100); // tokens per second
}
async checkForSignificantChanges(modelName, metrics) {
// Check for significant degradation
if (metrics.availability < 0.9) {
this.emit('availabilityDegraded', modelName, metrics.availability);
}
if (metrics.errorRate > 0.05) {
this.emit('errorRateIncreased', modelName, metrics.errorRate);
}
if (metrics.avgLatency > 10000) {
this.emit('latencyIncreased', modelName, metrics.avgLatency);
}
if (metrics.qualityScore < 0.6) {
this.emit('qualityDegraded', modelName, metrics.qualityScore);
}
}
async getRecentErrorCount(modelName) {
// This would query actual error logs
// For now, return a simulated value
return Math.floor(Math.random() * 5);
}
async analyzeMetricTrend(modelName, metric) {
// Simplified trend analysis
return {
metric,
direction: 'stable',
magnitude: 0,
confidence: 0.8,
timeframe: '24h',
dataPoints: 100
};
}
async updateAvailabilityTracking(modelName, isAvailable, timestamp) {
// Store availability data points for reliability calculations
// This would typically use a time-series database
shared_1.Logger.debug(`Availability update for ${modelName}: ${isAvailable}`);
}
async calculateUptimePercentage(modelName) {
// Calculate uptime percentage over the last 24 hours
// This would query actual availability data
return 99.5; // Simulated value
}
async calculateMTBF(modelName) {
// Mean Time Between Failures calculation
return 720; // 12 hours (simulated)
}
async calculateMTTR(modelName) {
// Mean Time To Recovery calculation
return 15; // 15 minutes (simulated)
}
async calculateSLACompliance(modelName) {
// SLA compliance percentage
return 99.9; // Simulated value
}
async runModelBenchmark(modelName, taskType) {
const metrics = this.modelMetrics.get(modelName);
if (!metrics) {
throw new Error(`Model ${modelName} not found`);
}
// Simulate benchmark run
const startTime = Date.now();
// This would run actual benchmark tests
await new Promise(resolve => setTimeout(resolve, 100));
const endTime = Date.now();
const benchmarkLatency = endTime - startTime;
// Calculate benchmark score based on multiple factors
const latencyScore = Math.max(0, 100 - (benchmarkLatency / 10));
const qualityScore = metrics.qualityScore * 100;
const reliabilityScore = metrics.availability * 100;
const costScore = Math.max(0, 100 - (metrics.costPerToken * 10000));
const overallScore = (latencyScore * 0.3 + qualityScore * 0.4 + reliabilityScore * 0.2 + costScore * 0.1);
return {
modelName,
taskType,
score: Math.round(overallScore),
timestamp: new Date(),
details: {
latency: benchmarkLatency,
accuracy: metrics.qualityScore,
cost: metrics.costPerToken,
throughput: metrics.throughput
},
comparisonRank: 0 // Will be set when comparing with others
};
}
}
exports.ModelTracker = ModelTracker;
// Export singleton instance
exports.modelTracker = new ModelTracker();
//# sourceMappingURL=model-tracker.js.map