UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

312 lines (311 loc) 11.4 kB
/** * workflow/utils/workflowMetrics.ts * Metrics tracking and collection for workflow execution */ import { logger } from "../../utils/logger.js"; const functionTag = "WorkflowMetrics"; /** * In-memory metrics storage (can be replaced with persistent storage) */ const metricsStore = new Map(); // ============================================================================ // METRICS COLLECTION // ============================================================================ /** * Workflow metrics tracker */ export class WorkflowMetrics { /** * Record a workflow execution */ recordExecution(workflowId, result) { const existing = metricsStore.get(workflowId); if (!existing) { // Initialize new metrics metricsStore.set(workflowId, { workflowId, executionCount: 1, successCount: 1, failureCount: 0, averageExecutionTime: result.totalTime, averageScore: result.score, averageConfidence: result.confidence, totalCost: result.cost || 0, lastExecutionTime: result.timestamp, }); } else { // Update existing metrics const newCount = existing.executionCount + 1; metricsStore.set(workflowId, { ...existing, executionCount: newCount, successCount: existing.successCount + 1, averageExecutionTime: (existing.averageExecutionTime * existing.executionCount + result.totalTime) / newCount, averageScore: (existing.averageScore * existing.executionCount + result.score) / newCount, averageConfidence: (existing.averageConfidence * existing.executionCount + result.confidence) / newCount, totalCost: existing.totalCost + (result.cost || 0), lastExecutionTime: result.timestamp, }); } logger.debug(`[${functionTag}] Recorded workflow execution`, { workflowId, totalExecutions: metricsStore.get(workflowId)?.executionCount, }); } /** * Record a workflow failure */ recordFailure(workflowId, error) { const existing = metricsStore.get(workflowId); if (!existing) { // Initialize with failure metricsStore.set(workflowId, { workflowId, executionCount: 1, successCount: 0, failureCount: 1, averageExecutionTime: 0, averageScore: 0, averageConfidence: 0, totalCost: 0, lastExecutionTime: new Date().toISOString(), }); } else { // Update with failure metricsStore.set(workflowId, { ...existing, executionCount: existing.executionCount + 1, failureCount: existing.failureCount + 1, lastExecutionTime: new Date().toISOString(), }); } logger.warn(`[${functionTag}] Recorded workflow failure`, { workflowId, error: error.message, totalFailures: metricsStore.get(workflowId)?.failureCount, }); } /** * Get metrics for a specific workflow */ getMetrics(workflowId) { return metricsStore.get(workflowId); } /** * Get all workflow metrics */ getAllMetrics() { return Array.from(metricsStore.values()); } /** * Clear metrics for a workflow */ clearMetrics(workflowId) { metricsStore.delete(workflowId); logger.debug(`[${functionTag}] Cleared metrics`, { workflowId }); } /** * Clear all metrics */ clearAllMetrics() { metricsStore.clear(); logger.debug(`[${functionTag}] Cleared all metrics`); } /** * Export metrics as JSON */ exportMetrics() { const metrics = this.getAllMetrics(); return JSON.stringify(metrics, null, 2); } } // ============================================================================ // ANALYTICS HELPERS // ============================================================================ /** * Calculate model-specific metrics from ensemble responses */ export function calculateModelMetrics(responses) { const modelStats = new Map(); responses.forEach((response) => { const key = `${response.provider}/${response.model}`; const existing = modelStats.get(key) || { total: 0, successful: 0, totalTime: 0, }; modelStats.set(key, { total: existing.total + 1, successful: existing.successful + (response.status === "success" ? 1 : 0), totalTime: existing.totalTime + response.responseTime, }); }); const result = {}; modelStats.forEach((stats, key) => { result[key] = { successRate: stats.successful / stats.total, avgResponseTime: stats.totalTime / stats.total, }; }); return result; } /** * Calculate consensus level between responses * NOTE: Placeholder implementation - uses response length similarity * TODO: Implement semantic similarity in Phase 2 */ export function calculateConsensus(responses) { const successful = responses.filter((r) => r.status === "success"); if (successful.length < 2) { return 1.0; // Perfect consensus with single response } // Simple length-based similarity (placeholder) const lengths = successful.map((r) => r.content.length); const avgLength = lengths.reduce((a, b) => a + b, 0) / lengths.length; if (avgLength === 0) { logger.warn("[WorkflowMetrics] All responses have zero length - semantic similarity needed for accurate consensus"); return 0; } const variance = lengths.reduce((sum, len) => sum + (len - avgLength) ** 2, 0) / lengths.length; const stdDev = Math.sqrt(variance); // Normalize to 0-1 (lower std dev = higher consensus) const normalized = Math.max(0, 1 - stdDev / avgLength); return Math.min(1, Math.max(0, normalized)); } /** * Calculate confidence score from judge results and ensemble data */ export function calculateConfidence(ensembleResponses, judgeConfidence, scores) { // If judge provided confidence, use it if (judgeConfidence !== undefined) { return Math.min(1, Math.max(0, judgeConfidence)); } // Calculate from judge scores if (scores && Object.keys(scores).length > 0) { const scoreValues = Object.keys(scores).map((k) => scores[k]); const maxScore = Math.max(...scoreValues); const avgScore = scoreValues.reduce((a, b) => a + b, 0) / scoreValues.length; // Normalize 0-100 scores to 0-1 const maxNormalized = maxScore / 100; const avgNormalized = avgScore / 100; // Combine max and average (weighted 60/40) return maxNormalized * 0.6 + avgNormalized * 0.4; } // Fallback: based on success rate if (ensembleResponses.length === 0) { return 0; } const successCount = ensembleResponses.filter((r) => r.status === "success").length; return successCount / ensembleResponses.length; } /** * Format metrics for logging * @param result - Workflow result to format * @returns Formatted metrics as JSON-compatible record */ export function formatMetricsForLogging(result) { return { workflowId: result.workflow, workflowType: result.analytics?.workflowType ?? null, totalTime: result.totalTime, ensembleTime: result.ensembleTime, judgeTime: result.judgeTime ?? null, score: result.score, reasoning: result.reasoning, confidence: result.confidence, consensus: result.consensus ?? null, modelsExecuted: result.ensembleResponses.length, modelsSuccessful: result.ensembleResponses.filter((r) => r.status === "success").length, selectedModel: result.selectedResponse ? `${result.selectedResponse.provider}/${result.selectedResponse.model}` : null, totalTokens: result.usage?.totalTokens ?? null, estimatedCost: result.cost ?? null, timestamp: result.timestamp, }; } /** * Generate summary statistics for multiple executions * @param results - Array of workflow results to analyze * @returns Summary statistics including averages and success rate */ export function generateSummaryStats(results) { if (results.length === 0) { return { totalExecutions: 0, averageScore: 0, averageConfidence: 0, averageExecutionTime: 0, successRate: 0, totalCost: 0, }; } const totalScore = results.reduce((sum, r) => sum + r.score, 0); const totalConfidence = results.reduce((sum, r) => sum + r.confidence, 0); const totalTime = results.reduce((sum, r) => sum + r.totalTime, 0); const totalCost = results.reduce((sum, r) => sum + (r.cost || 0), 0); const successCount = results.filter((r) => r.score > 0).length; return { totalExecutions: results.length, averageScore: totalScore / results.length, averageConfidence: totalConfidence / results.length, averageExecutionTime: totalTime / results.length, successRate: successCount / results.length, totalCost, }; } /** * Compare two workflows based on metrics * @param workflow1Results - Results from first workflow * @param workflow2Results - Results from second workflow * @returns Comparison with stats for both workflows and winner determination */ export function compareWorkflows(workflow1Results, workflow2Results) { const stats1 = generateSummaryStats(workflow1Results); const stats2 = generateSummaryStats(workflow2Results); // Simple scoring: 40% quality (score), 30% confidence, 20% speed, 10% cost const speedScore1 = stats1.averageExecutionTime > 0 ? (1 / stats1.averageExecutionTime) * 10000 * 0.2 : 0; const speedScore2 = stats2.averageExecutionTime > 0 ? (1 / stats2.averageExecutionTime) * 10000 * 0.2 : 0; const score1 = stats1.averageScore * 0.4 + stats1.averageConfidence * 100 * 0.3 + speedScore1 + (1 / (stats1.totalCost + 1)) * 100 * 0.1; const score2 = stats2.averageScore * 0.4 + stats2.averageConfidence * 100 * 0.3 + speedScore2 + (1 / (stats2.totalCost + 1)) * 100 * 0.1; const diff = Math.abs(score1 - score2); let winner; let reasoning; if (diff < 5) { winner = "tie"; reasoning = "Workflows perform similarly overall"; } else if (score1 > score2) { winner = "workflow1"; reasoning = `Workflow 1 scores higher (${score1.toFixed(2)} vs ${score2.toFixed(2)})`; } else { winner = "workflow2"; reasoning = `Workflow 2 scores higher (${score2.toFixed(2)} vs ${score1.toFixed(2)})`; } return { workflow1: stats1, workflow2: stats2, winner, reasoning, }; }