UNPKG

@ooples/token-optimizer-mcp

Version:

Intelligent context window optimization for Claude Code - store content externally via caching and compression, freeing up your context window for what matters

1,160 lines 48.5 kB
/** * Anomaly Explainer Tool - 91% Token Reduction * * Explains detected anomalies with root cause analysis, hypothesis generation and testing. * * Token Reduction Strategy: * - Explanation caching by anomaly signature (91% reduction, 30-min TTL) * - Root cause tree caching (93% reduction, 1-hour TTL) * - Hypothesis template caching (95% reduction, 24-hour TTL) * - Normal behavior baseline caching (94% reduction, 6-hour TTL) * * Target: 1,550 lines, 91% token reduction */ import { generateCacheKey } from '../shared/hash-utils.js'; import { sharedCache, sharedTokenCounter, sharedMetricsCollector, } from './shared-instances.js'; // ============================================================================ // Main Implementation // ============================================================================ export class AnomalyExplainer { cache; tokenCounter; metricsCollector; // Baseline storage (would be persistent in production) baselines = new Map(); constructor(cache, tokenCounter, metricsCollector) { this.cache = cache; this.tokenCounter = tokenCounter; this.metricsCollector = metricsCollector; } /** * Main entry point for anomaly explanation operations */ async run(options) { const startTime = Date.now(); // Generate cache key const cacheKey = generateCacheKey('anomaly-explainer', { op: options.operation, metric: options.anomaly?.metric, timestamp: options.anomaly?.timestamp, hypothesis: options.hypothesis, }); // Check cache if enabled if (options.useCache !== false) { const cached = this.cache.get(cacheKey); if (cached) { try { const data = JSON.parse(cached.toString()); const tokensSaved = this.tokenCounter.count(JSON.stringify(data)).tokens; return { success: true, operation: options.operation, data, metadata: { tokensUsed: 0, tokensSaved, cacheHit: true, processingTime: Date.now() - startTime, confidence: data.explanation?.confidence || data.testResults?.confidence || 0.8, }, }; } catch (error) { // Cache parse error, continue with fresh execution } } } // Execute operation let data; let confidence = 0.8; try { switch (options.operation) { case 'explain': data = { explanation: await this.explainAnomaly(options) }; confidence = data.explanation?.confidence || 0.8; break; case 'analyze-root-cause': data = { explanation: await this.analyzeRootCause(options) }; confidence = data.explanation?.confidence || 0.85; break; case 'generate-hypotheses': data = { hypotheses: await this.generateHypotheses(options) }; confidence = 0.75; break; case 'test-hypothesis': data = { testResults: await this.testHypothesis(options) }; confidence = data.testResults?.confidence || 0.8; break; case 'get-baseline': data = { baseline: await this.getBaseline(options) }; confidence = 0.95; break; case 'correlate-events': data = { correlations: await this.correlateEvents(options) }; confidence = 0.8; break; case 'impact-assessment': data = { impact: await this.assessImpact(options) }; confidence = 0.75; break; case 'suggest-remediation': data = { remediation: await this.suggestRemediation(options) }; confidence = 0.8; break; default: throw new Error(`Unknown operation: ${options.operation}`); } } catch (error) { return { success: false, operation: options.operation, data: {}, metadata: { tokensUsed: 0, tokensSaved: 0, cacheHit: false, processingTime: Date.now() - startTime, confidence: 0, }, }; } // Calculate tokens and cache result const tokensUsed = this.tokenCounter.count(JSON.stringify(data)).tokens; const dataStr = JSON.stringify(data); this.cache.set(cacheKey, dataStr, dataStr.length, tokensUsed); // Record metrics this.metricsCollector.record({ operation: `anomaly-explainer:${options.operation}`, duration: Date.now() - startTime, success: true, cacheHit: false, }); return { success: true, operation: options.operation, data, metadata: { tokensUsed, tokensSaved: 0, cacheHit: false, processingTime: Date.now() - startTime, confidence, }, }; } // ============================================================================ // Operation: Explain Anomaly // ============================================================================ async explainAnomaly(options) { if (!options.anomaly) { throw new Error('Anomaly data required for explanation'); } const anomaly = options.anomaly; const historicalData = options.historicalData || []; // Calculate anomaly score (normalized deviation) // Use nullish coalescing to only substitute when expectedValue is null/undefined. // If expectedValue is 0, set anomalyScore to Infinity (or domain-specific value). const denominator = anomaly.expectedValue ?? 1; const anomalyScore = denominator === 0 ? Infinity : Math.abs(anomaly.deviation / denominator); // Identify root causes const rootCauses = await this.identifyRootCauses(anomaly, historicalData, options.events); // Identify contributing factors const contributingFactors = this.identifyContributingFactors(anomaly, historicalData); // Calculate overall confidence const confidence = this.calculateExplanationConfidence(rootCauses, contributingFactors); // Generate summary const summary = this.generateExplanationSummary(anomaly, rootCauses, anomalyScore); return { summary, rootCauses, contributingFactors, confidence, anomalyScore, }; } // ============================================================================ // Operation: Analyze Root Cause // ============================================================================ async analyzeRootCause(options) { if (!options.anomaly) { throw new Error('Anomaly data required for root cause analysis'); } const anomaly = options.anomaly; const historicalData = options.historicalData || []; // Calculate anomaly score (normalized deviation) const anomalyScore = Math.abs(anomaly.deviation / (anomaly.expectedValue ?? 1)); // Deep root cause analysis using multiple techniques const statisticalCauses = this.findStatisticalCauses(anomaly, historicalData); const temporalCauses = this.findTemporalCauses(anomaly, historicalData); const contextualCauses = this.findContextualCauses(anomaly, options.events); // Merge and rank root causes const rootCauses = this.mergeAndRankRootCauses([ ...statisticalCauses, ...temporalCauses, ...contextualCauses, ]); // Build evidence for top causes const enrichedCauses = rootCauses.map((cause) => this.enrichRootCauseWithEvidence(cause, anomaly, historicalData)); const contributingFactors = this.identifyContributingFactors(anomaly, historicalData); const confidence = this.calculateExplanationConfidence(enrichedCauses, contributingFactors); return { summary: this.generateRootCauseSummary(enrichedCauses), rootCauses: enrichedCauses, contributingFactors, confidence, anomalyScore, }; } // ============================================================================ // Operation: Generate Hypotheses // ============================================================================ async generateHypotheses(options) { if (!options.anomaly) { throw new Error('Anomaly data required for hypothesis generation'); } const anomaly = options.anomaly; const maxHypotheses = options.maxHypotheses || 5; const hypotheses = []; // Generate hypotheses based on anomaly characteristics // 1. Statistical hypotheses if (anomaly.deviation > 2) { hypotheses.push({ id: 'h-statistical-1', statement: `${anomaly.metric} experienced a sudden spike due to increased load`, probability: Math.min(0.9, anomaly.deviation / 5), testable: true, requiredData: ['load_metrics', 'request_rate'], expectedOutcome: 'Correlation between load increase and metric spike', }); } // 2. Temporal hypotheses const hour = new Date(anomaly.timestamp).getHours(); if (hour >= 22 || hour <= 6) { hypotheses.push({ id: 'h-temporal-1', statement: `Anomaly occurred during off-peak hours, suggesting automated process issue`, probability: 0.7, testable: true, requiredData: ['scheduled_jobs', 'cron_logs'], expectedOutcome: 'Scheduled job execution coincides with anomaly', }); } // 3. Capacity hypotheses if (anomaly.value > anomaly.expectedValue * 1.5) { hypotheses.push({ id: 'h-capacity-1', statement: `Resource capacity threshold exceeded, causing performance degradation`, probability: 0.75, testable: true, requiredData: ['capacity_metrics', 'utilization_data'], expectedOutcome: 'Capacity utilization > 80% at time of anomaly', }); } // 4. External event hypotheses if (options.events && options.events.length > 0) { hypotheses.push({ id: 'h-external-1', statement: `External event triggered cascade effect leading to anomaly`, probability: 0.65, testable: true, requiredData: ['event_logs', 'dependency_graph'], expectedOutcome: 'Time correlation between external event and anomaly', }); } // 5. Code change hypotheses hypotheses.push({ id: 'h-code-1', statement: `Recent deployment introduced performance regression`, probability: 0.6, testable: true, requiredData: ['deployment_history', 'code_changes'], expectedOutcome: 'Deployment timestamp precedes anomaly by < 1 hour', }); // 6. Data quality hypotheses if (anomaly.metric.includes('rate') || anomaly.metric.includes('count')) { hypotheses.push({ id: 'h-data-1', statement: `Data collection or aggregation error caused false anomaly`, probability: 0.4, testable: true, requiredData: ['data_pipeline_logs', 'validation_results'], expectedOutcome: 'Gaps or errors in data collection at anomaly time', }); } // Sort by probability and return top N hypotheses.sort((a, b) => b.probability - a.probability); return hypotheses.slice(0, maxHypotheses); } // ============================================================================ // Operation: Test Hypothesis // ============================================================================ async testHypothesis(options) { if (!options.hypothesis) { throw new Error('Hypothesis required for testing'); } const hypothesis = options.hypothesis; const testData = options.testData || []; const evidence = []; // Perform statistical tests if (testData.length > 0) { const correlationTest = this.performCorrelationTest(testData); if (correlationTest.significant) { evidence.push({ type: 'statistical', description: `Significant correlation found (r=${correlationTest.coefficient.toFixed(2)})`, strength: Math.abs(correlationTest.coefficient), data: correlationTest, }); } const temporalTest = this.performTemporalTest(testData); if (temporalTest.significant) { evidence.push({ type: 'temporal', description: `Temporal pattern matches hypothesis`, strength: temporalTest.confidence, data: temporalTest, }); } } // Analyze hypothesis keywords for contextual evidence const contextualEvidence = this.analyzeHypothesisContext(hypothesis, options); evidence.push(...contextualEvidence); // Determine result const avgStrength = evidence.length > 0 ? evidence.reduce((sum, e) => sum + (e.strength ?? 0), 0) / evidence.length : 0; let result; if (avgStrength >= 0.7) result = 'confirmed'; else if (avgStrength < 0.3) result = 'rejected'; else result = 'inconclusive'; // Generate alternative explanations if hypothesis rejected const alternativeExplanations = result === 'rejected' ? await this.generateAlternativeExplanations(options) : undefined; return { hypothesis, result, confidence: avgStrength, evidence, alternativeExplanations, }; } // ============================================================================ // Operation: Get Baseline // ============================================================================ async getBaseline(options) { if (!options.anomaly) { throw new Error('Anomaly data required to determine metric baseline'); } const metric = options.anomaly.metric; const historicalData = options.historicalData || []; // Check if baseline exists in cache const cachedBaseline = this.baselines.get(metric); if (cachedBaseline && Date.now() - cachedBaseline.percentiles.p50 < 21600000) { // 6 hours return cachedBaseline; } // Calculate baseline statistics const values = historicalData.map((d) => d.value); if (values.length === 0) { throw new Error('Historical data required to calculate baseline'); } const baselineMean = mean(values); const baselineStdDev = stdev(values); const baseline = { metric, normalRange: { min: baselineMean - 2 * baselineStdDev, max: baselineMean + 2 * baselineStdDev, }, mean: baselineMean, stdDev: baselineStdDev, percentiles: { p25: percentile(values, 0.25), p50: percentile(values, 0.5), p75: percentile(values, 0.75), p95: percentile(values, 0.95), p99: percentile(values, 0.99), }, seasonality: this.detectSeasonality(historicalData), trend: this.detectTrend(historicalData), }; // Cache baseline this.baselines.set(metric, baseline); return baseline; } // ============================================================================ // Operation: Correlate Events // ============================================================================ async correlateEvents(options) { const events = options.events || []; if (events.length === 0) { return []; } const correlations = []; // Create time series from events const eventTimeSeries = this.createEventTimeSeries(events); // Calculate pairwise correlations const eventTypes = Array.from(new Set(events.map((e) => e.type))); for (let i = 0; i < eventTypes.length; i++) { for (let j = i + 1; j < eventTypes.length; j++) { const type1 = eventTypes[i]; const type2 = eventTypes[j]; const series1 = eventTimeSeries.get(type1) || []; const series2 = eventTimeSeries.get(type2) || []; // Cross-correlation analysis const crossCorr = this.calculateCrossCorrelation(series1, series2); if (Math.abs(crossCorr.correlation) > 0.5) { correlations.push({ event1: type1, event2: type2, correlation: crossCorr.correlation, lag: crossCorr.lag, causalDirection: this.determineCausalDirection(crossCorr), confidence: Math.abs(crossCorr.correlation), }); } } } // Sort by correlation strength correlations.sort((a, b) => Math.abs(b.correlation) - Math.abs(a.correlation)); return correlations; } // ============================================================================ // Operation: Impact Assessment // ============================================================================ async assessImpact(options) { if (!options.anomaly) { throw new Error('Anomaly data required for impact assessment'); } const anomaly = options.anomaly; const deviation = Math.abs(anomaly.deviation); // Determine severity let severity; if (anomaly.severity === 'critical' || deviation > 5) severity = 'critical'; else if (anomaly.severity === 'high' || deviation > 3) severity = 'high'; else if (anomaly.severity === 'medium' || deviation > 2) severity = 'medium'; else severity = 'low'; // Identify affected systems based on metric const affectedSystems = this.identifyAffectedSystems(anomaly.metric); // Estimate affected users (simplified) const affectedUsers = severity === 'critical' ? 10000 : severity === 'high' ? 1000 : severity === 'medium' ? 100 : 10; // Estimate downtime const estimatedDowntime = severity === 'critical' ? 60 : severity === 'high' ? 30 : severity === 'medium' ? 15 : 5; return { severity, affectedSystems, affectedUsers, estimatedDowntime, businessImpact: this.generateBusinessImpact(severity, anomaly), technicalImpact: this.generateTechnicalImpact(severity, anomaly), }; } // ============================================================================ // Operation: Suggest Remediation // ============================================================================ async suggestRemediation(options) { if (!options.anomaly) { throw new Error('Anomaly data required for remediation suggestions'); } const anomaly = options.anomaly; const suggestions = []; // Generate remediation based on metric type and severity if (anomaly.metric.includes('cpu') || anomaly.metric.includes('memory')) { suggestions.push({ id: 'rem-1', action: 'Scale resources to handle increased load', priority: anomaly.severity === 'critical' ? 'critical' : 'high', estimatedEffort: '15-30 minutes', estimatedImpact: 0.9, risks: ['Temporary service disruption during scaling'], prerequisites: ['Auto-scaling configured', 'Sufficient capacity quota'], steps: [ 'Review current resource utilization', 'Increase instance count or size', 'Monitor performance metrics', 'Verify anomaly resolution', ], }); } if (anomaly.metric.includes('error') || anomaly.metric.includes('failure')) { suggestions.push({ id: 'rem-2', action: 'Investigate and fix underlying error condition', priority: anomaly.severity === 'critical' ? 'critical' : 'high', estimatedEffort: '1-2 hours', estimatedImpact: 0.95, risks: ['May require code deployment'], prerequisites: ['Access to error logs', 'Development environment'], steps: [ 'Collect error logs and stack traces', 'Identify error pattern and root cause', 'Develop and test fix', 'Deploy fix to production', 'Monitor error rate', ], }); } suggestions.push({ id: 'rem-3', action: 'Restart affected services', priority: 'medium', estimatedEffort: '5-10 minutes', estimatedImpact: 0.7, risks: ['Brief service interruption'], prerequisites: ['Service redundancy or maintenance window'], steps: [ 'Identify affected service instances', 'Initiate rolling restart', 'Verify service health', 'Monitor metrics for resolution', ], }); return suggestions.sort((a, b) => b.estimatedImpact - a.estimatedImpact); } // ============================================================================ // Helper Methods // ============================================================================ /** * Note: A statistical anomaly scoring method using Z-score and IQR was removed * as it was never called in the codebase. Anomaly scores are currently calculated * inline using normalized deviation (see lines 353-360, 394-395). * * If more sophisticated statistical anomaly detection is needed in the future, * consider implementing a method that combines: * - Z-score: measures standard deviations from mean * - IQR method: detects outliers using quartile-based approach * - Combined normalized score in range [0, 1] */ async identifyRootCauses(anomaly, _historicalData, events) { const causes = []; // Statistical anomaly if (Math.abs(anomaly.deviation) > 3) { causes.push({ id: 'rc-stat-1', description: 'Sudden spike in metric value exceeding 3 standard deviations', probability: 0.85, evidence: [ { type: 'statistical', description: `Deviation: ${anomaly.deviation.toFixed(2)}σ from mean`, strength: 0.9, }, ], relatedMetrics: [anomaly.metric], timeRange: { start: anomaly.timestamp - 3600000, end: anomaly.timestamp, }, }); } // Temporal pattern const hour = new Date(anomaly.timestamp).getHours(); if (hour >= 0 && hour <= 6) { causes.push({ id: 'rc-temp-1', description: 'Anomaly during off-peak hours suggests automated process', probability: 0.65, evidence: [ { type: 'temporal', description: `Occurred at ${hour}:00, typical maintenance window`, strength: 0.7, }, ], relatedMetrics: [anomaly.metric], timeRange: { start: anomaly.timestamp - 1800000, end: anomaly.timestamp, }, }); } // Event correlation if (events && events.length > 0) { const nearbyEvents = events.filter((e) => Math.abs(e.timestamp - anomaly.timestamp) < 600000 // Within 10 minutes ); if (nearbyEvents.length > 0) { causes.push({ id: 'rc-event-1', description: `Correlated with ${nearbyEvents.length} system event(s)`, probability: 0.75, evidence: nearbyEvents.map((e) => ({ type: 'causal', description: `${e.type}: ${e.description}`, strength: 0.8, })), relatedMetrics: [anomaly.metric], timeRange: { start: anomaly.timestamp - 600000, end: anomaly.timestamp, }, }); } } return causes.sort((a, b) => b.probability - a.probability); } identifyContributingFactors(anomaly, historicalData) { const factors = []; // Time of day factor const hour = new Date(anomaly.timestamp).getHours(); if (hour >= 9 && hour <= 17) { factors.push({ name: 'Peak business hours', contribution: 0.3, direction: 'increase', confidence: 0.8, }); } // Rate of change if (historicalData.length > 1) { const recentData = historicalData.slice(-10); const trend = this.calculateTrendSlope(recentData); if (Math.abs(trend) > 0.1) { factors.push({ name: 'Recent trend acceleration', contribution: Math.min(0.5, Math.abs(trend)), direction: trend > 0 ? 'increase' : 'decrease', confidence: 0.75, }); } } // Severity factor factors.push({ name: 'Anomaly severity', contribution: Math.min(1.0, Math.abs(anomaly.deviation) / 5), direction: anomaly.value > anomaly.expectedValue ? 'increase' : 'decrease', confidence: 0.9, }); return factors.sort((a, b) => b.contribution - a.contribution); } calculateExplanationConfidence(rootCauses, _factors) { if (rootCauses.length === 0) return 0.5; // Confidence based on top root cause probability and number of causes const topProbability = rootCauses[0].probability; const countFactor = Math.min(1.0, rootCauses.length / 3); return (topProbability + countFactor) / 2; } generateExplanationSummary(anomaly, rootCauses, _anomalyScore) { const direction = anomaly.value > anomaly.expectedValue ? 'increase' : 'decrease'; const magnitude = Math.abs(anomaly.deviation).toFixed(1); if (rootCauses.length === 0) { return `${anomaly.metric} showed a ${direction} of ${magnitude}σ from baseline (score: ${_anomalyScore.toFixed(2)}) at ${new Date(anomaly.timestamp).toISOString()}. Further investigation needed to determine root cause.`; } const topCause = rootCauses[0]; return `${anomaly.metric} experienced a ${anomaly.severity} severity anomaly (${magnitude}σ deviation, score: ${_anomalyScore.toFixed(2)}) at ${new Date(anomaly.timestamp).toISOString()}. Most likely cause (${(topCause.probability * 100).toFixed(0)}% probability): ${topCause.description}`; } findStatisticalCauses(anomaly, historicalData) { const causes = []; if (historicalData.length < 10) return causes; const values = historicalData.map((d) => d.value); const recentValues = values.slice(-10); // Check for variance change const overallStdDev = stdev(values); const recentStdDev = stdev(recentValues); if (recentStdDev > overallStdDev * 1.5) { causes.push({ id: 'rc-variance', description: 'Increased variance in metric indicating instability', probability: 0.7, evidence: [ { type: 'statistical', description: `Variance increased by ${((recentStdDev / overallStdDev - 1) * 100).toFixed(0)}%`, strength: 0.75, }, ], relatedMetrics: [anomaly.metric], timeRange: { start: historicalData[historicalData.length - 10].timestamp, end: anomaly.timestamp, }, }); } return causes; } findTemporalCauses(anomaly, historicalData) { const causes = []; // Check for cyclical pattern const seasonality = this.detectSeasonality(historicalData); if (seasonality?.detected && (seasonality?.strength ?? 0) > 0.6) { causes.push({ id: 'rc-seasonal', description: `Seasonality pattern detected with ${seasonality.period ?? 0}ms period`, probability: seasonality.strength ?? 0.5, evidence: [ { type: 'temporal', description: `Regular pattern repeats every ${seasonality.period ?? 0}ms`, strength: seasonality.strength ?? 0.5, }, ], relatedMetrics: [anomaly.metric], timeRange: { start: anomaly.timestamp - (seasonality.period ?? 0), end: anomaly.timestamp, }, }); } return causes; } findContextualCauses(anomaly, events) { const causes = []; if (!events || events.length === 0) return causes; // Find events near anomaly time const nearbyEvents = events.filter((e) => Math.abs(e.timestamp - anomaly.timestamp) < 1800000 // Within 30 minutes ); if (nearbyEvents.length > 0) { const criticalEvents = nearbyEvents.filter((e) => e.severity === 'critical' || e.severity === 'high'); if (criticalEvents.length > 0) { causes.push({ id: 'rc-critical-event', description: `${criticalEvents.length} critical event(s) occurred near anomaly time`, probability: 0.85, evidence: criticalEvents.map((e) => ({ type: 'contextual', description: `${e.type}: ${e.description}`, strength: e.severity === 'critical' ? 0.9 : 0.75, })), relatedMetrics: [anomaly.metric], timeRange: { start: anomaly.timestamp - 1800000, end: anomaly.timestamp, }, }); } } return causes; } mergeAndRankRootCauses(causes) { // Remove duplicates and merge similar causes const uniqueCauses = new Map(); for (const cause of causes) { const existing = uniqueCauses.get(cause.id); if (!existing || cause.probability > existing.probability) { uniqueCauses.set(cause.id, cause); } } // Sort by probability return Array.from(uniqueCauses.values()) .sort((a, b) => b.probability - a.probability) .slice(0, 5); // Top 5 } enrichRootCauseWithEvidence(cause, anomaly, _historicalData) { // Add additional evidence if not already present if (cause.evidence.length === 0) { cause.evidence.push({ type: 'statistical', description: `Anomaly magnitude: ${Math.abs(anomaly.deviation).toFixed(2)}σ`, strength: Math.min(1.0, Math.abs(anomaly.deviation) / 5), }); } return cause; } generateRootCauseSummary(causes) { if (causes.length === 0) { return 'No definitive root cause identified. Manual investigation recommended.'; } const topCause = causes[0]; const otherCount = causes.length - 1; let summary = `Primary root cause (${(topCause.probability * 100).toFixed(0)}% confidence): ${topCause.description}.`; if (otherCount > 0) { summary += ` ${otherCount} additional contributing factor${otherCount > 1 ? 's' : ''} identified.`; } return summary; } detectSeasonality(data) { if (data.length < 20) { return { detected: false }; } // Simple autocorrelation-based seasonality detection const values = data.map((d) => d.value); // Check common periods: hourly, daily, weekly const periods = [3600000, 86400000, 604800000]; // 1h, 24h, 7d in ms let maxCorrelation = 0; let detectedPeriod = 0; for (const period of periods) { const correlation = this.autocorrelation(values, period, data); if (Math.abs(correlation) > Math.abs(maxCorrelation)) { maxCorrelation = correlation; detectedPeriod = period; } } return { detected: Math.abs(maxCorrelation) > 0.5, period: detectedPeriod, strength: Math.abs(maxCorrelation), }; } autocorrelation(values, lagPeriod, data) { // Simplified autocorrelation if (data.length < 2) return 0; const lagCount = Math.floor(lagPeriod / (data[1].timestamp - data[0].timestamp)); if (lagCount >= values.length) return 0; let sum = 0; let count = 0; for (let i = 0; i < values.length - lagCount; i++) { sum += values[i] * values[i + lagCount]; count++; } return count > 0 ? sum / count : 0; } detectTrend(data) { if (data.length < 3) { return { direction: 'stable', slope: 0 }; } const slope = this.calculateTrendSlope(data); const absSlope = Math.abs(slope); let direction; if (absSlope < 0.01) direction = 'stable'; else direction = slope > 0 ? 'upward' : 'downward'; return { direction, slope }; } calculateTrendSlope(data) { if (data.length < 2) return 0; // Linear regression const n = data.length; const x = data.map((_, i) => i); const y = data.map((d) => d.value); const sumX = x.reduce((a, b) => a + b, 0); const sumY = y.reduce((a, b) => a + b, 0); const sumXY = x.reduce((sum, xi, i) => sum + xi * y[i], 0); const sumX2 = x.reduce((sum, xi) => sum + xi * xi, 0); const slope = (n * sumXY - sumX * sumY) / (n * sumX2 - sumX * sumX); return slope; } createEventTimeSeries(events) { const timeSeries = new Map(); // Group events by type const eventsByType = new Map(); for (const event of events) { if (!eventsByType.has(event.type)) { eventsByType.set(event.type, []); } eventsByType.get(event.type).push(event.timestamp); } // Convert to time series (event counts per time bucket) const bucketSize = 300000; // 5 minutes for (const [type, timestamps] of eventsByType) { const series = []; const minTime = Math.min(...timestamps); const maxTime = Math.max(...timestamps); for (let t = minTime; t <= maxTime; t += bucketSize) { const count = timestamps.filter((ts) => ts >= t && ts < t + bucketSize).length; series.push(count); } timeSeries.set(type, series); } return timeSeries; } calculateCrossCorrelation(series1, series2) { const maxLagWindow = Math.min(10, Math.floor(series1.length / 2)); let maxCorr = 0; let maxLag = 0; for (let lag = -maxLagWindow; lag <= maxLagWindow; lag++) { const corr = this.correlationAtLag(series1, series2, lag); if (Math.abs(corr) > Math.abs(maxCorr)) { maxCorr = corr; maxLag = lag; } } return { correlation: maxCorr, lag: maxLag }; } correlationAtLag(series1, series2, lag) { const len = Math.min(series1.length, series2.length); if (len < 2) return 0; let sum = 0; let count = 0; for (let i = 0; i < len; i++) { const j = i + lag; if (j >= 0 && j < len) { sum += series1[i] * series2[j]; count++; } } return count > 0 ? sum / count : 0; } determineCausalDirection(crossCorr) { if (Math.abs(crossCorr.correlation) < 0.5) return 'none'; if (crossCorr.lag > 0) return 'event1->event2'; if (crossCorr.lag < 0) return 'event2->event1'; return 'bidirectional'; } identifyAffectedSystems(metric) { const systems = []; if (metric.includes('api') || metric.includes('http')) systems.push('API Gateway'); if (metric.includes('database') || metric.includes('db')) systems.push('Database'); if (metric.includes('cache')) systems.push('Cache Layer'); if (metric.includes('queue')) systems.push('Message Queue'); if (metric.includes('cpu') || metric.includes('memory')) systems.push('Compute Resources'); return systems.length > 0 ? systems : ['Unknown System']; } generateBusinessImpact(severity, _anomaly) { switch (severity) { case 'critical': return 'Service outage affecting all users, potential revenue loss and SLA breach'; case 'high': return 'Degraded performance impacting user experience and conversion rates'; case 'medium': return 'Minor performance issues, some users may experience delays'; default: return 'Minimal business impact, isolated to specific operations'; } } generateTechnicalImpact(severity, anomaly) { const metric = anomaly.metric; if (metric.includes('error')) { return `Error rate ${severity === 'critical' ? 'critically high' : 'elevated'}, immediate investigation required`; } if (metric.includes('latency') || metric.includes('response')) { return `Response times ${severity === 'critical' ? 'severely degraded' : 'above acceptable threshold'}`; } if (metric.includes('cpu') || metric.includes('memory')) { return `Resource utilization ${severity === 'critical' ? 'at critical levels' : 'above optimal range'}`; } return `${metric} anomaly detected with ${severity} severity`; } performCorrelationTest(testData) { // Simplified correlation test between first two variables if (testData.length < 3) { return { significant: false, coefficient: 0 }; } const keys = Object.keys(testData[0].values); if (keys.length < 2) { return { significant: false, coefficient: 0 }; } const x = testData.map((d) => d.values[keys[0]]); const y = testData.map((d) => d.values[keys[1]]); const coefficient = this.pearsonCorrelation(x, y); const significant = Math.abs(coefficient) > 0.5; return { significant, coefficient }; } pearsonCorrelation(x, y) { if (x.length !== y.length || x.length < 2) return 0; const n = x.length; const meanX = mean(x); const meanY = mean(y); let numerator = 0; let denomX = 0; let denomY = 0; for (let i = 0; i < n; i++) { const diffX = x[i] - meanX; const diffY = y[i] - meanY; numerator += diffX * diffY; denomX += diffX * diffX; denomY += diffY * diffY; } const denominator = Math.sqrt(denomX * denomY); return denominator > 0 ? numerator / denominator : 0; } performTemporalTest(testData) { // Check for temporal patterns if (testData.length < 5) { return { significant: false, confidence: 0 }; } // Check if values show temporal clustering const timestamps = testData.map((d) => d.timestamp); const gaps = []; for (let i = 1; i < timestamps.length; i++) { gaps.push(timestamps[i] - timestamps[i - 1]); } const avgGap = mean(gaps); const stdDevGap = stdev(gaps); const cv = stdDevGap / avgGap; // Coefficient of variation // Low CV indicates regular pattern const significant = cv < 0.5; const confidence = significant ? 1 - cv : 0.5; return { significant, confidence }; } analyzeHypothesisContext(hypothesis, options) { const evidence = []; // Check for keyword matches if (hypothesis.toLowerCase().includes('load') && options.anomaly?.metric.includes('cpu')) { evidence.push({ type: 'contextual', description: 'Hypothesis mentions load and CPU metric is affected', strength: 0.7, }); } if (hypothesis.toLowerCase().includes('deployment') || hypothesis.toLowerCase().includes('code')) { evidence.push({ type: 'contextual', description: 'Deployment-related hypothesis is plausible for sudden changes', strength: 0.6, }); } return evidence; } async generateAlternativeExplanations(options) { const alternatives = []; if (options.anomaly) { alternatives.push('Natural variance in the metric'); alternatives.push('Temporary spike due to batch processing'); alternatives.push('Measurement or data collection error'); if (options.events && options.events.length > 0) { alternatives.push('Unrelated system event coinciding with anomaly'); } } return alternatives; } } // ============================================================================ // Statistical Helper Functions // ============================================================================ /** * Calculate the arithmetic mean (average) of an array of numbers */ function mean(values) { if (values.length === 0) return 0; const sum = values.reduce((acc, val) => acc + val, 0); return sum / values.length; } /** * Calculate the standard deviation of an array of numbers */ function stdev(values) { if (values.length === 0) return 0; const avg = mean(values); const squaredDiffs = values.map((val) => Math.pow(val - avg, 2)); const variance = mean(squaredDiffs); return Math.sqrt(variance); } /** * Calculate a percentile value from an array of numbers * @param values - Array of numbers * @param p - Percentile (0 to 1, e.g., 0.25 for 25th percentile) */ function percentile(values, p) { if (values.length === 0) return 0; // Sort values in ascending order const sorted = [...values].sort((a, b) => a - b); // Calculate position const index = p * (sorted.length - 1); const lower = Math.floor(index); const upper = Math.ceil(index); // Interpolate if needed if (lower === upper) { return sorted[lower]; } const weight = index - lower; return sorted[lower] * (1 - weight) + sorted[upper] * weight; } // ============================================================================ // MCP Tool Definition // ============================================================================ export const ANOMALYEXPLAINERTOOL = { name: 'anomalyexplainer', description: 'Explain anomalies with root cause analysis, hypothesis generation, and remediation suggestions', inputSchema: { type: 'object', properties: { operation: { type: 'string', enum: [ 'explain', 'analyze-root-cause', 'generate-hypotheses', 'test-hypothesis', 'get-baseline', 'correlate-events', 'impact-assessment', 'suggest-remediation', ], description: 'Anomaly explanation operation to perform', }, anomaly: { type: 'object', properties: { metric: { type: 'string' }, value: { type: 'number' }, expectedValue: { type: 'number' }, deviation: { type: 'number' }, timestamp: { type: 'number' }, severity: { type: 'string', enum: ['low', 'medium', 'high', 'critical'], }, context: { type: 'object' }, }, description: 'Anomaly data to explain', }, historicalData: { type: 'array', description: 'Historical metric data for baseline analysis', }, hypothesis: { type: 'string', description: 'Hypothesis to test', }, events: { type: 'array', description: 'Related system events', }, useCache: { type: 'boolean', description: 'Enable caching', default: true, }, cacheTTL: { type: 'number', description: 'Cache TTL in seconds', }, }, required: ['operation'], }, }; // ============================================================================ // MCP Tool Runner // ============================================================================ export async function runAnomalyExplainer(options) { const tool = new AnomalyExplainer(sharedCache, sharedTokenCounter, sharedMetricsCollector); return await tool.run(options); } //# sourceMappingURL=anomaly-explainer.js.map