UNPKG

aiwf

Version:

AI Workflow Framework for Claude Code with multi-language support (Korean/English)

885 lines (733 loc) 23.9 kB
/** * Metrics Collector * Collects and analyzes metrics for AI persona performance */ import fs from 'fs/promises'; import path from 'path'; import { EventEmitter } from 'events'; class MetricsCollector extends EventEmitter { constructor(options = {}) { super(); this.options = { metricsPath: options.metricsPath || path.join(process.cwd(), '.aiwf', 'metrics'), maxHistorySize: options.maxHistorySize || 1000, aggregationInterval: options.aggregationInterval || 3600000, // 1 hour persistInterval: options.persistInterval || 300000, // 5 minutes ...options }; this.metrics = { sessions: [], personaPerformance: {}, taskOutcomes: [], aggregated: {} }; this.currentSession = null; this.startTime = Date.now(); this.persistTimer = null; } /** * Initialize metrics collector */ async init() { try { // Ensure metrics directory exists await fs.mkdir(this.options.metricsPath, { recursive: true }); // Load existing metrics await this.loadMetrics(); // Start persist timer this.startPersistTimer(); this.emit('initialized', { sessionsLoaded: this.metrics.sessions.length, startTime: this.startTime }); return true; } catch (error) { this.emit('error', { error, phase: 'initialization' }); throw error; } } /** * Start a new metrics session */ startSession(persona, task) { // End current session if exists if (this.currentSession) { this.endSession(); } this.currentSession = { id: this.generateSessionId(), persona: persona.name || persona, task: { type: task.type || 'general', description: task.description || task.command || '', complexity: this.estimateTaskComplexity(task) }, startTime: Date.now(), events: [], metrics: { tokenUsage: { start: this.getCurrentTokenCount(), end: null, contextSwitches: 0 }, interactions: 0, errors: 0, completionStatus: null } }; this.emit('sessionStarted', { sessionId: this.currentSession.id, persona: this.currentSession.persona }); return this.currentSession.id; } /** * End current session */ endSession(status = 'completed') { if (!this.currentSession) return; this.currentSession.endTime = Date.now(); this.currentSession.duration = this.currentSession.endTime - this.currentSession.startTime; this.currentSession.metrics.completionStatus = status; this.currentSession.metrics.tokenUsage.end = this.getCurrentTokenCount(); // Calculate quality metrics this.currentSession.qualityMetrics = this.calculateQualityMetrics(this.currentSession); // Store session this.metrics.sessions.push(this.currentSession); this.updatePersonaPerformance(this.currentSession); // Limit history size if (this.metrics.sessions.length > this.options.maxHistorySize) { this.metrics.sessions = this.metrics.sessions.slice(-this.options.maxHistorySize); } this.emit('sessionEnded', { sessionId: this.currentSession.id, duration: this.currentSession.duration, status }); const session = this.currentSession; this.currentSession = null; return session; } /** * Track an event */ trackEvent(eventType, data = {}) { const event = { type: eventType, timestamp: Date.now(), data }; // Add to current session if active if (this.currentSession) { this.currentSession.events.push(event); // Update session metrics based on event this.updateSessionMetrics(eventType, data); } // Track global events this.trackGlobalEvent(eventType, data); this.emit('eventTracked', event); } /** * Update session metrics based on event */ updateSessionMetrics(eventType, data) { if (!this.currentSession) return; switch (eventType) { case 'interaction': this.currentSession.metrics.interactions++; break; case 'error': this.currentSession.metrics.errors++; break; case 'context_switch': this.currentSession.metrics.tokenUsage.contextSwitches++; break; case 'task_completed': this.currentSession.metrics.completionStatus = 'completed'; break; case 'task_failed': this.currentSession.metrics.completionStatus = 'failed'; break; } } /** * Track global events (not session-specific) */ trackGlobalEvent(eventType, data) { // Aggregate event counts if (!this.metrics.aggregated.eventCounts) { this.metrics.aggregated.eventCounts = {}; } this.metrics.aggregated.eventCounts[eventType] = (this.metrics.aggregated.eventCounts[eventType] || 0) + 1; // Track persona switches if (eventType === 'persona_switch') { if (!this.metrics.aggregated.personaSwitches) { this.metrics.aggregated.personaSwitches = []; } this.metrics.aggregated.personaSwitches.push({ from: data.from, to: data.to, trigger: data.trigger, timestamp: Date.now() }); } } /** * Calculate quality metrics for a session */ calculateQualityMetrics(session) { const metrics = { completionTime: session.duration, tokenEfficiency: this.calculateTokenEfficiency(session), errorRate: this.calculateErrorRate(session), interactionEfficiency: this.calculateInteractionEfficiency(session), overallScore: 0 }; // Calculate overall score (0-1) const weights = { completion: 0.3, tokenEfficiency: 0.3, errorRate: 0.2, interactionEfficiency: 0.2 }; let score = 0; // Completion score if (session.metrics.completionStatus === 'completed') { score += weights.completion; } // Token efficiency score score += metrics.tokenEfficiency * weights.tokenEfficiency; // Error rate score (inverse - lower is better) score += (1 - metrics.errorRate) * weights.errorRate; // Interaction efficiency score score += metrics.interactionEfficiency * weights.interactionEfficiency; metrics.overallScore = score; return metrics; } /** * Calculate token efficiency */ calculateTokenEfficiency(session) { const tokensUsed = (session.metrics.tokenUsage.end || 0) - (session.metrics.tokenUsage.start || 0); if (tokensUsed === 0) return 1; // Estimate expected tokens based on task complexity const expectedTokens = session.task.complexity * 1000; // Efficiency is inverse of usage ratio (lower usage = higher efficiency) const efficiency = Math.min(expectedTokens / tokensUsed, 1); return efficiency; } /** * Calculate error rate */ calculateErrorRate(session) { if (session.metrics.interactions === 0) return 0; return Math.min(session.metrics.errors / session.metrics.interactions, 1); } /** * Calculate interaction efficiency */ calculateInteractionEfficiency(session) { if (session.metrics.interactions === 0) return 0; // Estimate expected interactions based on task complexity const expectedInteractions = Math.max(session.task.complexity * 2, 1); // Efficiency is inverse of interaction ratio const efficiency = Math.min(expectedInteractions / session.metrics.interactions, 1); return efficiency; } /** * Estimate task complexity */ estimateTaskComplexity(task) { let complexity = 1; // Base complexity // Increase based on task type const complexTypes = ['debug', 'optimize', 'architect', 'refactor']; if (complexTypes.includes(task.type)) { complexity += 1; } // Increase based on keywords const complexKeywords = ['complex', 'difficult', 'large', 'entire', 'all']; const description = (task.description || '').toLowerCase(); complexKeywords.forEach(keyword => { if (description.includes(keyword)) { complexity += 0.5; } }); // Cap at 5 return Math.min(complexity, 5); } /** * Update persona performance metrics */ updatePersonaPerformance(session) { const personaName = session.persona; if (!this.metrics.personaPerformance[personaName]) { this.metrics.personaPerformance[personaName] = { totalSessions: 0, completedSessions: 0, totalDuration: 0, totalTokens: 0, totalErrors: 0, qualityScores: [], taskTypes: {} }; } const perf = this.metrics.personaPerformance[personaName]; perf.totalSessions++; if (session.metrics.completionStatus === 'completed') { perf.completedSessions++; } perf.totalDuration += session.duration; const tokensUsed = (session.metrics.tokenUsage.end || 0) - (session.metrics.tokenUsage.start || 0); perf.totalTokens += tokensUsed; perf.totalErrors += session.metrics.errors; if (session.qualityMetrics) { perf.qualityScores.push(session.qualityMetrics.overallScore); } // Track task types const taskType = session.task.type; perf.taskTypes[taskType] = (perf.taskTypes[taskType] || 0) + 1; } /** * Get current token count (simulated) */ getCurrentTokenCount() { // In a real implementation, this would interface with the token tracking system // For now, return a simulated increasing value return Math.floor((Date.now() - this.startTime) / 100); } /** * Collect current metrics */ async collectCurrentMetrics() { if (!this.currentSession) return null; return { duration: Date.now() - this.currentSession.startTime, interactions: this.currentSession.metrics.interactions, errors: this.currentSession.metrics.errors, tokenUsage: { current: this.getCurrentTokenCount() - this.currentSession.metrics.tokenUsage.start, contextSwitches: this.currentSession.metrics.tokenUsage.contextSwitches } }; } /** * Get persona history */ async getPersonaHistory(personaName) { return this.metrics.sessions.filter( session => session.persona === personaName ); } /** * Get persona statistics */ async getPersonaStats(personaName) { const perf = this.metrics.personaPerformance[personaName]; if (!perf || perf.totalSessions === 0) { return { message: `No data available for persona: ${personaName}` }; } const avgQualityScore = perf.qualityScores.length > 0 ? perf.qualityScores.reduce((a, b) => a + b, 0) / perf.qualityScores.length : 0; return { totalSessions: perf.totalSessions, completionRate: perf.completedSessions / perf.totalSessions, averageDuration: perf.totalDuration / perf.totalSessions, averageTokens: perf.totalTokens / perf.totalSessions, errorRate: perf.totalErrors / perf.totalSessions, averageQualityScore: avgQualityScore, taskDistribution: perf.taskTypes }; } /** * Generate comprehensive report */ async generateReport(options = {}) { const timeRange = options.timeRange || 'all'; const startTime = this.getStartTime(timeRange); // Filter sessions by time range const sessions = this.metrics.sessions.filter( session => session.startTime >= startTime ); const report = { generatedAt: new Date().toISOString(), timeRange, summary: { totalSessions: sessions.length, averageCompletionTime: this.calculateAverageTime(sessions), personaUsage: this.getPersonaUsageStats(sessions), qualityTrend: this.calculateQualityTrend(sessions) }, personaAnalysis: this.analyzePersonaEffectiveness(sessions), taskAnalysis: this.analyzeTaskTypes(sessions), recommendations: this.generateRecommendations(sessions) }; return report; } /** * Get start time for time range */ getStartTime(timeRange) { const now = Date.now(); switch (timeRange) { case 'hour': return now - 3600000; case 'day': return now - 86400000; case 'week': return now - 604800000; case 'month': return now - 2592000000; default: return 0; } } /** * Calculate average completion time */ calculateAverageTime(sessions) { if (sessions.length === 0) return 0; const totalTime = sessions.reduce((sum, session) => sum + session.duration, 0); return totalTime / sessions.length; } /** * Get persona usage statistics */ getPersonaUsageStats(sessions) { const usage = {}; sessions.forEach(session => { usage[session.persona] = (usage[session.persona] || 0) + 1; }); // Convert to percentages const total = sessions.length; const stats = {}; for (const [persona, count] of Object.entries(usage)) { stats[persona] = { count, percentage: (count / total) * 100 }; } return stats; } /** * Calculate quality trend */ calculateQualityTrend(sessions) { if (sessions.length < 2) { return { trend: 'insufficient_data' }; } // Get quality scores over time const scores = sessions .filter(s => s.qualityMetrics) .map(s => ({ time: s.startTime, score: s.qualityMetrics.overallScore })) .sort((a, b) => a.time - b.time); if (scores.length < 2) { return { trend: 'insufficient_data' }; } // Calculate trend (simple linear regression) const n = scores.length; const sumX = scores.reduce((sum, s, i) => sum + i, 0); const sumY = scores.reduce((sum, s) => sum + s.score, 0); const sumXY = scores.reduce((sum, s, i) => sum + i * s.score, 0); const sumX2 = scores.reduce((sum, s, i) => sum + i * i, 0); const slope = (n * sumXY - sumX * sumY) / (n * sumX2 - sumX * sumX); return { trend: slope > 0.01 ? 'improving' : slope < -0.01 ? 'declining' : 'stable', slope, latestScore: scores[scores.length - 1].score, averageScore: sumY / n }; } /** * Analyze persona effectiveness */ analyzePersonaEffectiveness(sessions) { const analysis = {}; // Group sessions by persona const personaSessions = {}; sessions.forEach(session => { if (!personaSessions[session.persona]) { personaSessions[session.persona] = []; } personaSessions[session.persona].push(session); }); // Analyze each persona for (const [persona, personaSessions] of Object.entries(personaSessions)) { const completed = personaSessions.filter(s => s.metrics.completionStatus === 'completed'); const qualityScores = personaSessions .filter(s => s.qualityMetrics) .map(s => s.qualityMetrics.overallScore); analysis[persona] = { sessionCount: personaSessions.length, completionRate: completed.length / personaSessions.length, averageCompletionTime: this.calculateAverageTime(personaSessions), averageQualityScore: qualityScores.length > 0 ? qualityScores.reduce((a, b) => a + b, 0) / qualityScores.length : 0, tokenEfficiency: this.calculateAverageTokenEfficiency(personaSessions), bestUseCases: this.identifyBestUseCases(persona, personaSessions), improvementAreas: this.identifyImprovementAreas(persona, personaSessions) }; } return analysis; } /** * Calculate average token efficiency */ calculateAverageTokenEfficiency(sessions) { const efficiencies = sessions .filter(s => s.qualityMetrics) .map(s => s.qualityMetrics.tokenEfficiency); if (efficiencies.length === 0) return 0; return efficiencies.reduce((a, b) => a + b, 0) / efficiencies.length; } /** * Identify best use cases for persona */ identifyBestUseCases(persona, sessions) { // Group by task type and calculate success rate const taskStats = {}; sessions.forEach(session => { const taskType = session.task.type; if (!taskStats[taskType]) { taskStats[taskType] = { total: 0, successful: 0, avgQuality: 0, qualityCount: 0 }; } taskStats[taskType].total++; if (session.metrics.completionStatus === 'completed') { taskStats[taskType].successful++; } if (session.qualityMetrics) { taskStats[taskType].avgQuality += session.qualityMetrics.overallScore; taskStats[taskType].qualityCount++; } }); // Calculate success rates and find best const taskScores = Object.entries(taskStats).map(([type, stats]) => ({ type, successRate: stats.successful / stats.total, avgQuality: stats.qualityCount > 0 ? stats.avgQuality / stats.qualityCount : 0, count: stats.total })); // Return top 3 task types return taskScores .filter(t => t.count >= 2) // Minimum sample size .sort((a, b) => (b.successRate + b.avgQuality) - (a.successRate + a.avgQuality)) .slice(0, 3) .map(t => t.type); } /** * Identify improvement areas */ identifyImprovementAreas(persona, sessions) { const areas = []; // Check completion rate const completionRate = sessions.filter(s => s.metrics.completionStatus === 'completed' ).length / sessions.length; if (completionRate < 0.8) { areas.push('completion_rate'); } // Check error rate const avgErrors = sessions.reduce((sum, s) => sum + s.metrics.errors, 0 ) / sessions.length; if (avgErrors > 1) { areas.push('error_rate'); } // Check token efficiency const avgTokenEfficiency = this.calculateAverageTokenEfficiency(sessions); if (avgTokenEfficiency < 0.5) { areas.push('token_efficiency'); } return areas; } /** * Analyze task types */ analyzeTaskTypes(sessions) { const taskAnalysis = {}; sessions.forEach(session => { const taskType = session.task.type; if (!taskAnalysis[taskType]) { taskAnalysis[taskType] = { count: 0, totalDuration: 0, completedCount: 0, totalQuality: 0, qualityCount: 0, personas: {} }; } const analysis = taskAnalysis[taskType]; analysis.count++; analysis.totalDuration += session.duration; if (session.metrics.completionStatus === 'completed') { analysis.completedCount++; } if (session.qualityMetrics) { analysis.totalQuality += session.qualityMetrics.overallScore; analysis.qualityCount++; } // Track which personas handle this task type analysis.personas[session.persona] = (analysis.personas[session.persona] || 0) + 1; }); // Calculate averages for (const [taskType, analysis] of Object.entries(taskAnalysis)) { analysis.averageDuration = analysis.totalDuration / analysis.count; analysis.completionRate = analysis.completedCount / analysis.count; analysis.averageQuality = analysis.qualityCount > 0 ? analysis.totalQuality / analysis.qualityCount : 0; // Find best persona for this task type analysis.bestPersona = Object.entries(analysis.personas) .sort(([,a], [,b]) => b - a)[0]?.[0]; } return taskAnalysis; } /** * Generate recommendations */ generateRecommendations(sessions) { const recommendations = []; // Analyze overall patterns const personaAnalysis = this.analyzePersonaEffectiveness(sessions); const taskAnalysis = this.analyzeTaskTypes(sessions); // Recommendation 1: Underutilized personas for (const [persona, stats] of Object.entries(personaAnalysis)) { if (stats.sessionCount < sessions.length * 0.1) { recommendations.push({ type: 'persona_usage', priority: 'medium', message: `Consider using ${persona} persona more often for ${stats.bestUseCases.join(', ')} tasks`, data: { persona, usage: stats.sessionCount } }); } } // Recommendation 2: Task-persona mismatches for (const [taskType, analysis] of Object.entries(taskAnalysis)) { if (analysis.averageQuality < 0.6 && analysis.count > 3) { recommendations.push({ type: 'task_persona_match', priority: 'high', message: `${taskType} tasks have low quality scores. Try using ${analysis.bestPersona} persona`, data: { taskType, currentQuality: analysis.averageQuality } }); } } // Recommendation 3: High error rates for (const [persona, stats] of Object.entries(personaAnalysis)) { if (stats.improvementAreas.includes('error_rate')) { recommendations.push({ type: 'error_rate', priority: 'high', message: `${persona} persona has high error rate. Review context rules and tool recommendations`, data: { persona, areas: stats.improvementAreas } }); } } return recommendations; } /** * Load metrics from disk */ async loadMetrics() { try { const metricsFile = path.join(this.options.metricsPath, 'metrics.json'); const data = await fs.readFile(metricsFile, 'utf-8'); const loaded = JSON.parse(data); // Merge with current metrics this.metrics = { ...this.metrics, ...loaded, sessions: [...(loaded.sessions || []), ...this.metrics.sessions] }; // Limit to max history size if (this.metrics.sessions.length > this.options.maxHistorySize) { this.metrics.sessions = this.metrics.sessions.slice(-this.options.maxHistorySize); } } catch (error) { // No existing metrics or error loading - start fresh } } /** * Persist metrics to disk */ async persistMetrics() { try { const metricsFile = path.join(this.options.metricsPath, 'metrics.json'); await fs.writeFile( metricsFile, JSON.stringify(this.metrics, null, 2), 'utf-8' ); this.emit('metricsPersisted', { sessionCount: this.metrics.sessions.length }); } catch (error) { this.emit('error', { error, phase: 'persist' }); } } /** * Start persist timer */ startPersistTimer() { this.persistTimer = setInterval( () => this.persistMetrics(), this.options.persistInterval ); } /** * Generate session ID */ generateSessionId() { return `session_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`; } /** * Get current session metrics */ async getCurrentSessionMetrics() { if (!this.currentSession) { return { message: 'No active session' }; } return await this.collectCurrentMetrics(); } /** * Reset metrics */ async reset() { this.metrics = { sessions: [], personaPerformance: {}, taskOutcomes: [], aggregated: {} }; this.currentSession = null; await this.persistMetrics(); this.emit('reset'); } /** * Cleanup resources */ async cleanup() { if (this.persistTimer) { clearInterval(this.persistTimer); this.persistTimer = null; } // Persist final metrics await this.persistMetrics(); this.removeAllListeners(); } } export default MetricsCollector; export { MetricsCollector };