UNPKG

lynkr

Version:

Self-hosted LLM gateway and tier-routing proxy for Claude Code, Cursor, and Codex. Routes across Ollama, AWS Bedrock, OpenRouter, Databricks, Azure OpenAI, llama.cpp, and LM Studio with prompt caching, MCP tools, and 60-80% cost savings.

424 lines (365 loc) 10.8 kB
/** * Provider Health Tracking * * Tracks real-time health metrics per provider for intelligent routing decisions: * - Latency percentiles (p50, p95, p99) * - Success/failure rates * - In-flight request counting * - Circuit breaker state integration * * @module observability/health-tracker */ const logger = require("../logger"); /** * Sliding window statistics for latency and rate tracking */ class SlidingWindowStats { constructor(windowMs = 300000) { // 5 minutes default this.windowMs = windowMs; this.samples = []; } /** * Record a sample value */ record(value, timestamp = Date.now()) { this.samples.push({ value, timestamp }); this.prune(); } /** * Remove samples outside the window */ prune() { const cutoff = Date.now() - this.windowMs; this.samples = this.samples.filter(s => s.timestamp > cutoff); } /** * Get count of samples in window */ count() { this.prune(); return this.samples.length; } /** * Calculate percentile */ percentile(p) { this.prune(); if (this.samples.length === 0) return 0; const sorted = [...this.samples] .map(s => s.value) .sort((a, b) => a - b); const idx = Math.ceil((p / 100) * sorted.length) - 1; return sorted[Math.max(0, idx)]; } /** * Calculate mean */ mean() { this.prune(); if (this.samples.length === 0) return 0; const sum = this.samples.reduce((acc, s) => acc + s.value, 0); return sum / this.samples.length; } /** * Get all values in window */ values() { this.prune(); return this.samples.map(s => s.value); } } /** * Health metrics for a single provider */ class ProviderHealth { constructor(name, windowMs = 300000) { this.name = name; this.windowMs = windowMs; // Latency tracking this.latencyStats = new SlidingWindowStats(windowMs); // Success/failure tracking this.successCount = 0; this.failureCount = 0; this.recentResults = []; // {success: boolean, timestamp: number} // In-flight tracking this.inFlight = 0; // Recent errors this.recentErrors = []; this.maxRecentErrors = 10; // Timestamps this.lastSuccess = null; this.lastFailure = null; this.lastRequest = null; } /** * Record start of a request */ recordRequestStart() { this.inFlight++; this.lastRequest = Date.now(); } /** * Record successful completion */ recordSuccess(latencyMs) { this.inFlight = Math.max(0, this.inFlight - 1); this.successCount++; this.lastSuccess = Date.now(); this.latencyStats.record(latencyMs); this.recentResults.push({ success: true, timestamp: Date.now() }); this._pruneResults(); } /** * Record failure */ recordFailure(error, statusCode = null) { this.inFlight = Math.max(0, this.inFlight - 1); this.failureCount++; this.lastFailure = Date.now(); this.recentResults.push({ success: false, timestamp: Date.now() }); this._pruneResults(); // Track recent errors this.recentErrors.unshift({ code: statusCode, message: error?.message || String(error), timestamp: new Date().toISOString(), }); if (this.recentErrors.length > this.maxRecentErrors) { this.recentErrors = this.recentErrors.slice(0, this.maxRecentErrors); } } /** * Prune old results outside window */ _pruneResults() { const cutoff = Date.now() - this.windowMs; this.recentResults = this.recentResults.filter(r => r.timestamp > cutoff); } /** * Calculate success rate in window */ getSuccessRate() { this._pruneResults(); if (this.recentResults.length === 0) return 1.0; // No data = healthy const successes = this.recentResults.filter(r => r.success).length; return successes / this.recentResults.length; } /** * Get health metrics summary */ getMetrics() { const successRate = this.getSuccessRate(); return { name: this.name, latency: { p50: Math.round(this.latencyStats.percentile(50)), p95: Math.round(this.latencyStats.percentile(95)), p99: Math.round(this.latencyStats.percentile(99)), mean: Math.round(this.latencyStats.mean()), samples: this.latencyStats.count(), }, requests: { total: this.successCount + this.failureCount, success: this.successCount, failed: this.failureCount, success_rate: Math.round(successRate * 1000) / 1000, // 3 decimal places in_window: this.recentResults.length, }, current: { in_flight: this.inFlight, last_success: this.lastSuccess ? new Date(this.lastSuccess).toISOString() : null, last_failure: this.lastFailure ? new Date(this.lastFailure).toISOString() : null, last_request: this.lastRequest ? new Date(this.lastRequest).toISOString() : null, }, recent_errors: this.recentErrors, }; } } /** * Provider Health Tracker - tracks health for all providers */ class ProviderHealthTracker { constructor(options = {}) { this.windowMs = options.windowMs || 300000; // 5 minutes this.providers = new Map(); // Health thresholds this.thresholds = { unhealthy: { successRate: 0.5, // < 50% success = unhealthy circuitOpen: true, // Circuit open = unhealthy }, degraded: { successRate: 0.9, // < 90% success = degraded latencyP95: 5000, // > 5s p95 latency = degraded }, }; } /** * Get or create provider health tracker */ _getProvider(name) { if (!this.providers.has(name)) { this.providers.set(name, new ProviderHealth(name, this.windowMs)); } return this.providers.get(name); } /** * Record start of a request */ recordRequestStart(providerName) { const provider = this._getProvider(providerName); provider.recordRequestStart(); } /** * Record successful request completion */ recordSuccess(providerName, latencyMs) { const provider = this._getProvider(providerName); provider.recordSuccess(latencyMs); logger.debug({ provider: providerName, latencyMs, inFlight: provider.inFlight, }, "Provider health: success recorded"); } /** * Record failed request */ recordFailure(providerName, error, statusCode = null) { const provider = this._getProvider(providerName); provider.recordFailure(error, statusCode); logger.debug({ provider: providerName, error: error?.message || String(error), statusCode, inFlight: provider.inFlight, }, "Provider health: failure recorded"); } /** * Calculate health status for a provider */ getStatus(providerName, circuitState = null) { const provider = this._getProvider(providerName); const metrics = provider.getMetrics(); const successRate = metrics.requests.success_rate; const latencyP95 = metrics.latency.p95; // Unhealthy conditions if (circuitState === "OPEN" || successRate < this.thresholds.unhealthy.successRate) { return "unhealthy"; } // Degraded conditions if (successRate < this.thresholds.degraded.successRate || latencyP95 > this.thresholds.degraded.latencyP95) { return "degraded"; } return "healthy"; } /** * Get latency stats for a provider */ getLatency(providerName) { const provider = this._getProvider(providerName); return provider.getMetrics().latency; } /** * Get in-flight count for a provider */ getInFlight(providerName) { const provider = this._getProvider(providerName); return provider.inFlight; } /** * Get success rate for a provider */ getSuccessRate(providerName) { const provider = this._getProvider(providerName); return provider.getSuccessRate(); } /** * Get all providers health summary */ getAllHealth(circuitBreakerStates = {}) { const health = []; for (const [name, provider] of this.providers) { const circuitState = circuitBreakerStates[name] || "CLOSED"; const metrics = provider.getMetrics(); const status = this.getStatus(name, circuitState); health.push({ name, status, latency_p95_ms: metrics.latency.p95, success_rate: metrics.requests.success_rate, in_flight: metrics.current.in_flight, circuit_state: circuitState, total_requests: metrics.requests.total, last_success: metrics.current.last_success, last_failure: metrics.current.last_failure, }); } // Sort by status (unhealthy last) then by success rate return health.sort((a, b) => { const statusOrder = { healthy: 0, degraded: 1, unhealthy: 2 }; const statusDiff = statusOrder[a.status] - statusOrder[b.status]; if (statusDiff !== 0) return statusDiff; return b.success_rate - a.success_rate; }); } /** * Get detailed metrics for a specific provider */ getProviderMetrics(providerName) { const provider = this._getProvider(providerName); return provider.getMetrics(); } /** * Select best provider based on health (for routing) */ selectBestProvider(providerNames, circuitBreakerStates = {}, strategy = "latency") { const healthyProviders = providerNames.filter(name => { const circuitState = circuitBreakerStates[name] || "CLOSED"; const status = this.getStatus(name, circuitState); return status !== "unhealthy"; }); if (healthyProviders.length === 0) { // All unhealthy, return first as fallback return providerNames[0]; } if (strategy === "latency") { // Sort by p95 latency, return fastest return healthyProviders.sort((a, b) => { return this.getLatency(a).p95 - this.getLatency(b).p95; })[0]; } if (strategy === "least-busy") { // Sort by in-flight count, return least busy return healthyProviders.sort((a, b) => { return this.getInFlight(a) - this.getInFlight(b); })[0]; } if (strategy === "success-rate") { // Sort by success rate, return highest return healthyProviders.sort((a, b) => { return this.getSuccessRate(b) - this.getSuccessRate(a); })[0]; } // Default: return first healthy return healthyProviders[0]; } } // Singleton instance let trackerInstance = null; /** * Get or create the health tracker singleton */ function getHealthTracker() { if (!trackerInstance) { trackerInstance = new ProviderHealthTracker(); } return trackerInstance; } module.exports = { SlidingWindowStats, ProviderHealth, ProviderHealthTracker, getHealthTracker, };