UNPKG

codecrucible-synth

Version:

Production-Ready AI Development Platform with Multi-Voice Synthesis, Smithery MCP Integration, Enterprise Security, and Zero-Timeout Reliability

390 lines 10.8 kB
/** * Comprehensive Observability System for CodeCrucible Synth * Production-ready monitoring, metrics collection, logging, and telemetry system * with OpenTelemetry integration and performance analytics */ import { EventEmitter } from 'events'; export interface MetricPoint { name: string; value: number; timestamp: Date; tags: Record<string, string>; unit: string; type: 'counter' | 'gauge' | 'histogram' | 'timer'; } export interface TraceSpan { traceId: string; spanId: string; parentSpanId?: string; operationName: string; startTime: Date; endTime?: Date; duration?: number; tags: Record<string, string>; logs: SpanLog[]; status: 'ok' | 'error' | 'timeout'; baggage?: Record<string, string>; } export interface ModelRequestSpanAttributes { 'codecrucible.model': string; 'codecrucible.provider': string; 'codecrucible.request.type': string; 'codecrucible.request.complexity': string; 'codecrucible.request.tokens.input'?: number; 'codecrucible.request.tokens.output'?: number; 'codecrucible.request.temperature'?: number; 'codecrucible.streaming.enabled'?: boolean; 'codecrucible.tools.count'?: number; 'codecrucible.voice.archetype'?: string; 'codecrucible.hybrid.routing.decision'?: string; } export interface StreamingSpanAttributes { 'codecrucible.streaming.session_id': string; 'codecrucible.streaming.chunk_type': string; 'codecrucible.streaming.block_id'?: string; 'codecrucible.streaming.total_chunks': number; 'codecrucible.streaming.bytes_streamed': number; } export interface ToolExecutionSpanAttributes { 'codecrucible.tool.name': string; 'codecrucible.tool.execution_time': number; 'codecrucible.tool.success': boolean; 'codecrucible.tool.error_type'?: string; } export interface SpanLog { timestamp: Date; level: 'debug' | 'info' | 'warn' | 'error'; message: string; fields?: Record<string, any>; } export interface SystemHealth { status: 'healthy' | 'degraded' | 'critical' | 'unknown'; components: ComponentHealth[]; overallScore: number; lastChecked: Date; uptime: number; version: string; telemetryEnabled?: boolean; tracingStatus?: 'active' | 'disabled' | 'error'; metricsStatus?: 'active' | 'disabled' | 'error'; } export interface ComponentHealth { name: string; status: 'healthy' | 'degraded' | 'critical' | 'unknown'; metrics: ComponentMetrics; dependencies: string[]; lastChecked: Date; errorRate: number; responseTime: number; } export interface ComponentMetrics { cpu: number; memory: number; diskUsage: number; networkLatency: number; errorCount: number; requestCount: number; customMetrics: Record<string, number>; } export interface PerformanceProfile { operation: string; measurements: PerformanceMeasurement[]; statistics: PerformanceStatistics; trends: PerformanceTrend[]; } export interface PerformanceMeasurement { timestamp: Date; duration: number; memoryUsage: number; cpuUsage: number; success: boolean; metadata: Record<string, any>; } export interface PerformanceStatistics { count: number; mean: number; median: number; p95: number; p99: number; min: number; max: number; stdDev: number; } export interface PerformanceTrend { period: string; direction: 'improving' | 'degrading' | 'stable'; changePercent: number; significance: number; } export interface AlertRule { id: string; name: string; description: string; condition: AlertCondition; threshold: AlertThreshold; severity: 'low' | 'medium' | 'high' | 'critical'; enabled: boolean; cooldown: number; actions: AlertAction[]; } export interface AlertCondition { metric: string; operator: 'gt' | 'lt' | 'eq' | 'gte' | 'lte' | 'change'; timeWindow: number; aggregation: 'avg' | 'sum' | 'max' | 'min' | 'count'; } export interface AlertThreshold { warning: number; critical: number; unit: string; } export interface AlertAction { type: 'log' | 'email' | 'webhook' | 'slack'; configuration: Record<string, any>; enabled: boolean; } export interface Alert { id: string; ruleId: string; severity: 'low' | 'medium' | 'high' | 'critical'; status: 'active' | 'resolved' | 'silenced'; triggeredAt: Date; resolvedAt?: Date; message: string; details: Record<string, any>; acknowledgedBy?: string; acknowledgedAt?: Date; } export interface ObservabilityConfig { metrics: { enabled: boolean; retentionDays: number; exportInterval: number; exporters: MetricExporter[]; }; tracing: { enabled: boolean; samplingRate: number; maxSpansPerTrace: number; exporters: TraceExporter[]; }; logging: { level: 'debug' | 'info' | 'warn' | 'error'; outputs: LogOutput[]; structured: boolean; includeStackTrace: boolean; }; health: { checkInterval: number; timeoutMs: number; retryAttempts: number; }; alerting: { enabled: boolean; rules: AlertRule[]; defaultCooldown: number; }; storage: { dataPath: string; maxFileSize: number; compressionEnabled: boolean; encryptionEnabled: boolean; }; } export interface MetricExporter { type: 'prometheus' | 'statsd' | 'opentelemetry' | 'file'; endpoint?: string; authentication?: Record<string, string>; batchSize: number; flushInterval: number; } export interface TraceExporter { type: 'jaeger' | 'zipkin' | 'opentelemetry' | 'file'; endpoint?: string; authentication?: Record<string, string>; batchSize: number; flushInterval: number; } export interface LogOutput { type: 'console' | 'file' | 'syslog' | 'elasticsearch'; configuration: Record<string, any>; level?: string; format?: string; } export declare class ObservabilitySystem extends EventEmitter { private logger; private config; private metricsCollector; private tracingSystem; private healthMonitor; private alertManager; private performanceProfiler; private dataStorage; private isRunning; private systemStartTime; constructor(config: ObservabilityConfig); /** * Initialize and start the observability system */ initialize(): Promise<void>; /** * Record a metric */ recordMetric(name: string, value: number, tags?: Record<string, string>, unit?: string): void; /** * Increment a counter */ incrementCounter(name: string, tags?: Record<string, string>, value?: number): void; /** * Record a timer */ recordTimer(name: string, duration: number, tags?: Record<string, string>): void; /** * Start a trace span */ startSpan(operationName: string, parentSpan?: TraceSpan): TraceSpan; /** * Finish a trace span */ finishSpan(span: TraceSpan, tags?: Record<string, string>): void; /** * Profile an operation */ profileOperation<T>(operationName: string, operation: () => Promise<T>, metadata?: Record<string, any>): Promise<T>; /** * Check system health */ checkHealth(): Promise<SystemHealth>; /** * Get metrics summary */ getMetricsSummary(timeRange?: { start: Date; end: Date; }): MetricsSummary; /** * Get performance profiles */ getPerformanceProfiles(): PerformanceProfile[]; /** * Get active alerts */ getActiveAlerts(): Alert[]; /** * Create custom alert rule */ createAlertRule(rule: AlertRule): void; /** * Enhanced: OpenTelemetry integration - Trace model requests */ traceModelRequest<T>(operation: string, attributes: Partial<ModelRequestSpanAttributes>, fn: () => Promise<T>): Promise<T>; /** * Enhanced: OpenTelemetry integration - Trace agent communication */ traceAgentCommunication<T>(attributes: Record<string, string | number | boolean>, fn: () => Promise<T>): Promise<T>; /** * Enhanced: Record tool execution metrics */ recordToolExecution(toolName: string, executionTime: number, success: boolean, errorType?: string): void; /** * Get system statistics */ getSystemStats(): ObservabilityStats; /** * Export observability data */ exportData(format: 'json' | 'csv' | 'prometheus', timeRange?: { start: Date; end: Date; }): Promise<string>; /** * Shutdown the observability system */ shutdown(): Promise<void>; /** * Private Methods */ private startSystemMonitoring; private collectSystemMetrics; private measureEventLoopLag; private convertToCSV; private convertToPrometheus; } interface MetricsSummary { totalMetrics: number; uniqueMetrics: number; timeRange: { start: Date; end: Date; }; topMetrics: Array<{ name: string; count: number; avgValue: number; }>; aggregations: Record<string, any>; } interface ObservabilityStats { systemInfo: { uptime: number; version: string; nodeVersion: string; platform: string; arch: string; }; metrics: MetricsStats; tracing: TracingStats; health: HealthStats; alerts: AlertStats; performance: PerformanceStats; storage: StorageStats; } interface MetricsStats { totalCollected: number; uniqueNames: number; aggregatedMetrics: number; memoryUsage: number; exporterStatus: Array<{ type: string; healthy: boolean; }>; } interface TracingStats { totalTraces: number; totalSpans: number; activeSpans: number; averageSpansPerTrace: number; averageDuration: number; } interface HealthStats { totalComponents: number; healthyComponents: number; degradedComponents: number; criticalComponents: number; lastHealthCheck: Date; } interface AlertStats { totalRules: number; activeAlerts: number; alertsLast24h: number; criticalAlerts: number; resolvedAlertsLast24h: number; } interface PerformanceStats { totalOperations: number; totalMeasurements: number; averageDuration: number; memoryEfficiency: number; } interface StorageStats { dataPath: string; totalSize: number; compressionEnabled: boolean; encryptionEnabled: boolean; } /** * Enhanced: Factory function for creating observability system with OpenTelemetry support */ export declare function getTelemetryProvider(): ObservabilitySystem; export {}; //# sourceMappingURL=observability-system.d.ts.map