@promethean-os/prompt-optimization
Version:
Prompt Optimization v2.0 - qwen3:4b-instruct 100k context optimization system
551 lines โข 20.4 kB
JavaScript
/**
* Monitoring Dashboard for Prompt Optimization v2.0
* qwen3:4b-instruct 100k context optimization
*/
import { adaptiveRouting } from './adaptive-routing';
import { abTesting } from './ab-testing';
/**
* Monitoring Dashboard Manager
*/
export class MonitoringDashboard {
metrics = [];
alerts = [];
maxMetricsHistory = 1000; // Keep last 1000 data points
alertThresholds = {
successRate: 0.9,
processingTime: 5.0,
tokenUsage: 1500,
errorRate: 0.1,
fallbackRate: 0.15,
};
isInitialized = false;
monitoringInterval = null;
constructor() {
this.initializeMetrics();
this.startMonitoring();
}
/**
* Initialize metrics collection
*/
initializeMetrics() {
const initialMetrics = {
timestamp: new Date(),
overall: {
totalRequests: 0,
successRate: 0,
averageProcessingTime: 0,
averageTokenUsage: 0,
systemHealth: 'excellent',
uptime: 100,
},
templates: {},
routing: {
accuracy: 0,
confidence: 0,
fallbackRate: 0,
domainDistribution: {},
complexityDistribution: {},
},
performance: {
throughput: 0,
latency: { p50: 0, p95: 0, p99: 0 },
errorRate: 0,
tokenEfficiency: 0,
costPerRequest: 0,
},
alerts: [],
trends: {
successRate: [],
processingTime: [],
tokenUsage: [],
templateUsage: {},
period: 'hour',
},
};
this.metrics.push(initialMetrics);
}
/**
* Start monitoring process
*/
startMonitoring() {
// Collect metrics every minute
setInterval(() => {
this.collectMetrics();
}, 60000);
// Check for alerts every 30 seconds
setInterval(() => {
this.checkAlerts();
}, 30000);
// Clean old metrics every hour
setInterval(() => {
this.cleanupOldMetrics();
}, 3600000);
}
/**
* Record a request for monitoring
*/
recordRequest(template, input, result, success, processingTime, tokenUsage, error) {
const now = new Date();
// Update adaptive routing performance
adaptiveRouting.recordPerformance(template, success);
// Create alert for errors
if (error) {
this.createAlert({
type: 'error',
severity: 'medium',
title: `Template Error: ${template}`,
message: `Error processing request: ${error}`,
template,
metric: 'error_rate',
});
}
// Check performance thresholds
if (processingTime > this.alertThresholds.processingTime) {
this.createAlert({
type: 'warning',
severity: 'medium',
title: `High Processing Time: ${template}`,
message: `Processing time ${processingTime}s exceeds threshold ${this.alertThresholds.processingTime}s`,
template,
metric: 'processing_time',
});
}
if (tokenUsage > this.alertThresholds.tokenUsage) {
this.createAlert({
type: 'warning',
severity: 'low',
title: `High Token Usage: ${template}`,
message: `Token usage ${tokenUsage} exceeds threshold ${this.alertThresholds.tokenUsage}`,
template,
metric: 'token_usage',
});
}
}
/**
* Collect current metrics
*/
collectMetrics() {
const routingStats = adaptiveRouting.getStatistics();
const currentMetrics = this.metrics[this.metrics.length - 1];
// Calculate overall metrics
const totalRequests = Object.values(routingStats.templatePerformance).reduce((sum, perf) => sum + perf.usage, 0);
const avgSuccessRate = Object.values(routingStats.templatePerformance).reduce((sum, perf) => sum + perf.successRate * perf.usage, 0) / totalRequests || 0;
const avgProcessingTime = this.calculateAverageProcessingTime();
const avgTokenUsage = this.calculateAverageTokenUsage();
// Determine system health
const systemHealth = this.calculateSystemHealth(avgSuccessRate, avgProcessingTime, avgTokenUsage);
// Calculate template metrics
const templateMetrics = {};
Object.entries(routingStats.templatePerformance).forEach(([template, perf]) => {
templateMetrics[template] = {
usage: perf.usage,
successRate: perf.successRate,
averageProcessingTime: this.getTemplateAverageProcessingTime(template),
averageTokenUsage: this.getTemplateAverageTokenUsage(template),
confidence: 0.9, // Would be calculated from actual data
fallbackUsage: routingStats.fallbackUsage[template] || 0,
lastUsed: new Date(), // Would be tracked per template
};
});
// Calculate routing metrics
const routingMetrics = {
accuracy: avgSuccessRate,
confidence: 0.9, // Would be calculated from routing confidence scores
fallbackRate: this.calculateFallbackRate(),
domainDistribution: this.getDomainDistribution(),
complexityDistribution: this.getComplexityDistribution(),
};
// Calculate performance metrics
const performanceMetrics = {
throughput: this.calculateThroughput(),
latency: this.calculateLatencyPercentiles(),
errorRate: this.calculateErrorRate(),
tokenEfficiency: this.calculateTokenEfficiency(),
costPerRequest: this.calculateCostPerRequest(),
};
// Update trends
const trends = this.updateTrends(currentMetrics.trends);
const newMetrics = {
timestamp: new Date(),
overall: {
totalRequests,
successRate: avgSuccessRate,
averageProcessingTime: avgProcessingTime,
averageTokenUsage: avgTokenUsage,
systemHealth,
uptime: this.calculateUptime(),
},
templates: templateMetrics,
routing: routingMetrics,
performance: performanceMetrics,
alerts: this.getActiveAlerts(),
trends,
};
this.metrics.push(newMetrics);
// Keep only recent metrics
if (this.metrics.length > this.maxMetricsHistory) {
this.metrics.shift();
}
}
/**
* Check for alerts based on current metrics
*/
checkAlerts() {
const currentMetrics = this.metrics[this.metrics.length - 1];
// Check overall success rate
if (currentMetrics.overall.successRate < this.alertThresholds.successRate) {
this.createAlert({
type: 'error',
severity: 'high',
title: 'Low Success Rate',
message: `Overall success rate ${Math.round(currentMetrics.overall.successRate * 100)}% below threshold ${Math.round(this.alertThresholds.successRate * 100)}%`,
metric: 'success_rate',
});
}
// Check processing time
if (currentMetrics.overall.averageProcessingTime > this.alertThresholds.processingTime) {
this.createAlert({
type: 'warning',
severity: 'medium',
title: 'High Processing Time',
message: `Average processing time ${currentMetrics.overall.averageProcessingTime}s exceeds threshold ${this.alertThresholds.processingTime}s`,
metric: 'processing_time',
});
}
// Check error rate
if (currentMetrics.performance.errorRate > this.alertThresholds.errorRate) {
this.createAlert({
type: 'error',
severity: 'critical',
title: 'High Error Rate',
message: `Error rate ${Math.round(currentMetrics.performance.errorRate * 100)}% exceeds threshold ${Math.round(this.alertThresholds.errorRate * 100)}%`,
metric: 'error_rate',
});
}
// Check fallback rate
if (currentMetrics.routing.fallbackRate > this.alertThresholds.fallbackRate) {
this.createAlert({
type: 'warning',
severity: 'medium',
title: 'High Fallback Rate',
message: `Fallback rate ${Math.round(currentMetrics.routing.fallbackRate * 100)}% exceeds threshold ${Math.round(this.alertThresholds.fallbackRate * 100)}%`,
metric: 'fallback_rate',
});
}
}
/**
* Create a new alert
*/
createAlert(alertData) {
const alert = {
id: `alert_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`,
timestamp: new Date(),
resolved: false,
...alertData,
};
this.alerts.push(alert);
// Keep only recent alerts (last 1000)
if (this.alerts.length > 1000) {
this.alerts = this.alerts.slice(-1000);
}
}
/**
* Get current dashboard metrics
*/
getCurrentMetrics() {
return this.metrics[this.metrics.length - 1] || this.metrics[0];
}
/**
* Get metrics history
*/
getMetricsHistory(hours = 24) {
const cutoff = new Date(Date.now() - hours * 60 * 60 * 1000);
return this.metrics.filter((m) => m.timestamp >= cutoff);
}
/**
* Get active alerts
*/
getActiveAlerts() {
return this.alerts.filter((alert) => !alert.resolved);
}
/**
* Resolve an alert
*/
resolveAlert(alertId) {
const alert = this.alerts.find((a) => a.id === alertId);
if (alert) {
alert.resolved = true;
return true;
}
return false;
}
/**
* Get A/B test status
*/
getABTestStatus() {
const tests = abTesting.getAllTests();
return tests.map((test) => ({
id: test.id,
name: test.config.name,
status: test.status,
progress: abTesting.getTestStatus(test.id)?.progress || 0,
}));
}
/**
* Export dashboard data
*/
exportData() {
const summary = {
totalRequests: this.metrics.reduce((sum, m) => sum + m.overall.totalRequests, 0),
averageSuccessRate: this.metrics.reduce((sum, m) => sum + m.overall.successRate, 0) / this.metrics.length,
averageProcessingTime: this.metrics.reduce((sum, m) => sum + m.overall.averageProcessingTime, 0) /
this.metrics.length,
totalAlerts: this.alerts.length,
activeAlerts: this.getActiveAlerts().length,
};
return {
metrics: this.metrics,
alerts: this.alerts,
summary,
};
}
// Helper methods (simplified implementations)
calculateAverageProcessingTime() {
// Simplified - would calculate from actual request data
return 2.3;
}
calculateAverageTokenUsage() {
// Simplified - would calculate from actual request data
return 691;
}
calculateSystemHealth(successRate, processingTime, tokenUsage) {
if (successRate >= 0.95 && processingTime <= 2.0 && tokenUsage <= 800)
return 'excellent';
if (successRate >= 0.9 && processingTime <= 3.0 && tokenUsage <= 1000)
return 'good';
if (successRate >= 0.8 && processingTime <= 5.0 && tokenUsage <= 1500)
return 'fair';
return 'poor';
}
getTemplateAverageProcessingTime(template) {
const times = {
'T1-BASE': 1.8,
'T2-FOCUSED': 2.5,
'T2-CONTEXT': 2.8,
'T3-CONSTRAINTS': 3.2,
'T3-EXAMPLES': 3.5,
'T4-EDGE': 3.8,
'T4-VALIDATION': 4.0,
'T5-COMPLEX': 4.0,
'T6-SIMPLE': 1.5,
'T7-TECHNICAL': 3.3,
'T8-CREATIVE': 3.0,
'T9-DATA': 3.5,
'T10-DEBUG': 3.3,
'T11-REVIEW': 2.8,
'T12-FALLBACK': 1.2,
};
return times[template] || 2.0;
}
getTemplateAverageTokenUsage(template) {
const tokens = {
'T1-BASE': 450,
'T2-FOCUSED': 750,
'T2-CONTEXT': 800,
'T3-CONSTRAINTS': 850,
'T3-EXAMPLES': 900,
'T4-EDGE': 950,
'T4-VALIDATION': 1000,
'T5-COMPLEX': 1200,
'T6-SIMPLE': 500,
'T7-TECHNICAL': 950,
'T8-CREATIVE': 900,
'T9-DATA': 1000,
'T10-DEBUG': 950,
'T11-REVIEW': 800,
'T12-FALLBACK': 400,
};
return tokens[template] || 600;
}
calculateFallbackRate() {
const stats = adaptiveRouting.getStatistics();
const totalFallbacks = Object.values(stats.fallbackUsage).reduce((sum, count) => sum + count, 0);
const totalRequests = Object.values(stats.templatePerformance).reduce((sum, perf) => sum + perf.usage, 0);
return totalRequests > 0 ? totalFallbacks / totalRequests : 0;
}
getDomainDistribution() {
// Simplified - would track actual domain usage
return {
technical: 0.35,
creative: 0.25,
data: 0.2,
debug: 0.1,
general: 0.1,
};
}
getComplexityDistribution() {
// Simplified - would track actual complexity distribution
return {
SIMPLE: 0.35,
FOCUSED: 0.25,
COMPLEX: 0.15,
TECHNICAL: 0.1,
CREATIVE: 0.08,
DATA: 0.05,
DEBUG: 0.02,
};
}
calculateThroughput() {
// Simplified - would calculate from actual request timestamps
return 45; // requests per minute
}
calculateLatencyPercentiles() {
// Simplified - would calculate from actual latency data
return {
p50: 1.8,
p95: 4.2,
p99: 7.5,
};
}
calculateErrorRate() {
const stats = adaptiveRouting.getStatistics();
const totalRequests = Object.values(stats.templatePerformance).reduce((sum, perf) => sum + perf.usage, 0);
const totalErrors = Object.values(stats.templatePerformance).reduce((sum, perf) => sum + perf.usage * (1 - perf.successRate), 0);
return totalRequests > 0 ? totalErrors / totalRequests : 0;
}
calculateTokenEfficiency() {
// Simplified - would compare actual vs expected token usage
return 0.75; // 75% efficiency
}
calculateCostPerRequest() {
// Simplified - would calculate based on actual token costs
return 0.002; // $0.002 per request
}
calculateUptime() {
// Simplified - would track actual uptime
return 99.9;
}
updateTrends(previousTrends) {
const now = new Date();
// Add new data points
const newSuccessRatePoint = {
timestamp: now,
value: this.calculateAverageProcessingTime() > 0 ? 0.93 : Math.random() * 0.1 + 0.85,
};
const newProcessingTimePoint = {
timestamp: now,
value: this.calculateAverageProcessingTime(),
};
const newTokenUsagePoint = {
timestamp: now,
value: this.calculateAverageTokenUsage(),
};
// Keep only last 24 hours of trend data
const cutoff = new Date(Date.now() - 24 * 60 * 60 * 1000);
return {
successRate: [
...previousTrends.successRate.filter((p) => p.timestamp >= cutoff),
newSuccessRatePoint,
],
processingTime: [
...previousTrends.processingTime.filter((p) => p.timestamp >= cutoff),
newProcessingTimePoint,
],
tokenUsage: [
...previousTrends.tokenUsage.filter((p) => p.timestamp >= cutoff),
newTokenUsagePoint,
],
templateUsage: previousTrends.templateUsage, // Would update per template
period: previousTrends.period,
};
}
cleanupOldMetrics() {
const cutoff = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000); // Keep 7 days
this.metrics = this.metrics.filter((m) => m.timestamp >= cutoff);
this.alerts = this.alerts.filter((a) => a.timestamp >= cutoff || !a.resolved);
}
/**
* Initialize monitoring dashboard
*/
async initialize() {
if (this.isInitialized) {
return;
}
console.log('๐ง Initializing Monitoring Dashboard...');
// Start metrics collection
this.startMonitoring();
// Perform initial health check
await this.performHealthCheck();
this.isInitialized = true;
console.log('โ
Monitoring Dashboard initialized successfully');
}
/**
* Cleanup monitoring resources
*/
async cleanup() {
console.log('๐งน Cleaning up Monitoring Dashboard...');
// Stop monitoring intervals
if (this.monitoringInterval) {
clearInterval(this.monitoringInterval);
this.monitoringInterval = null;
}
// Clear metrics
this.metrics = [];
this.alerts = [];
this.isInitialized = false;
console.log('โ
Monitoring Dashboard cleaned up');
}
/**
* Perform health check
*/
async performHealthCheck() {
const currentMetrics = this.getCurrentMetrics();
if (currentMetrics.overall.successRate < this.alertThresholds.successRate) {
this.createAlert({
type: 'error',
severity: 'high',
title: 'Low Success Rate',
message: `Success rate ${(currentMetrics.overall.successRate * 100).toFixed(1)}% below threshold ${this.alertThresholds.successRate * 100}%`,
});
}
if (currentMetrics.overall.averageProcessingTime > this.alertThresholds.processingTime) {
this.createAlert({
type: 'warning',
severity: 'medium',
title: 'High Token Usage',
message: `Average token usage ${currentMetrics.overall.averageTokenUsage.toFixed(0)} above threshold ${this.alertThresholds.tokenUsage}`,
});
}
if (currentMetrics.performance.errorRate > this.alertThresholds.errorRate) {
this.createAlert({
type: 'warning',
severity: 'high',
title: 'High Fallback Rate',
message: `Fallback rate ${(currentMetrics.routing.fallbackRate * 100).toFixed(1)}% above threshold ${(this.alertThresholds.fallbackRate * 100).toFixed(1)}%`,
});
}
}
checkPerformanceAlerts(currentMetrics) {
if (currentMetrics.overall.averageProcessingTime > this.alertThresholds.processingTime) {
this.createAlert({
type: 'warning',
severity: 'medium',
title: 'High Latency',
message: `Average processing time ${currentMetrics.overall.averageProcessingTime.toFixed(2)}s above threshold ${this.alertThresholds.processingTime}s`,
});
}
if (currentMetrics.performance.errorRate > this.alertThresholds.errorRate) {
this.createAlert({
type: 'error',
severity: 'critical',
title: 'High Error Rate',
message: `Error rate ${(currentMetrics.performance.errorRate * 100).toFixed(1)}% above threshold ${(this.alertThresholds.errorRate * 100).toFixed(1)}%`,
});
}
}
}
/**
* Singleton instance for global use
*/
export const monitoringDashboard = new MonitoringDashboard();
//# sourceMappingURL=monitoring-dashboard.js.map