codecrucible-synth
Version:
Production-Ready AI Development Platform with Multi-Voice Synthesis, Smithery MCP Integration, Enterprise Security, and Zero-Timeout Reliability
390 lines • 10.8 kB
TypeScript
/**
* Comprehensive Observability System for CodeCrucible Synth
* Production-ready monitoring, metrics collection, logging, and telemetry system
* with OpenTelemetry integration and performance analytics
*/
import { EventEmitter } from 'events';
export interface MetricPoint {
name: string;
value: number;
timestamp: Date;
tags: Record<string, string>;
unit: string;
type: 'counter' | 'gauge' | 'histogram' | 'timer';
}
export interface TraceSpan {
traceId: string;
spanId: string;
parentSpanId?: string;
operationName: string;
startTime: Date;
endTime?: Date;
duration?: number;
tags: Record<string, string>;
logs: SpanLog[];
status: 'ok' | 'error' | 'timeout';
baggage?: Record<string, string>;
}
export interface ModelRequestSpanAttributes {
'codecrucible.model': string;
'codecrucible.provider': string;
'codecrucible.request.type': string;
'codecrucible.request.complexity': string;
'codecrucible.request.tokens.input'?: number;
'codecrucible.request.tokens.output'?: number;
'codecrucible.request.temperature'?: number;
'codecrucible.streaming.enabled'?: boolean;
'codecrucible.tools.count'?: number;
'codecrucible.voice.archetype'?: string;
'codecrucible.hybrid.routing.decision'?: string;
}
export interface StreamingSpanAttributes {
'codecrucible.streaming.session_id': string;
'codecrucible.streaming.chunk_type': string;
'codecrucible.streaming.block_id'?: string;
'codecrucible.streaming.total_chunks': number;
'codecrucible.streaming.bytes_streamed': number;
}
export interface ToolExecutionSpanAttributes {
'codecrucible.tool.name': string;
'codecrucible.tool.execution_time': number;
'codecrucible.tool.success': boolean;
'codecrucible.tool.error_type'?: string;
}
export interface SpanLog {
timestamp: Date;
level: 'debug' | 'info' | 'warn' | 'error';
message: string;
fields?: Record<string, any>;
}
export interface SystemHealth {
status: 'healthy' | 'degraded' | 'critical' | 'unknown';
components: ComponentHealth[];
overallScore: number;
lastChecked: Date;
uptime: number;
version: string;
telemetryEnabled?: boolean;
tracingStatus?: 'active' | 'disabled' | 'error';
metricsStatus?: 'active' | 'disabled' | 'error';
}
export interface ComponentHealth {
name: string;
status: 'healthy' | 'degraded' | 'critical' | 'unknown';
metrics: ComponentMetrics;
dependencies: string[];
lastChecked: Date;
errorRate: number;
responseTime: number;
}
export interface ComponentMetrics {
cpu: number;
memory: number;
diskUsage: number;
networkLatency: number;
errorCount: number;
requestCount: number;
customMetrics: Record<string, number>;
}
export interface PerformanceProfile {
operation: string;
measurements: PerformanceMeasurement[];
statistics: PerformanceStatistics;
trends: PerformanceTrend[];
}
export interface PerformanceMeasurement {
timestamp: Date;
duration: number;
memoryUsage: number;
cpuUsage: number;
success: boolean;
metadata: Record<string, any>;
}
export interface PerformanceStatistics {
count: number;
mean: number;
median: number;
p95: number;
p99: number;
min: number;
max: number;
stdDev: number;
}
export interface PerformanceTrend {
period: string;
direction: 'improving' | 'degrading' | 'stable';
changePercent: number;
significance: number;
}
export interface AlertRule {
id: string;
name: string;
description: string;
condition: AlertCondition;
threshold: AlertThreshold;
severity: 'low' | 'medium' | 'high' | 'critical';
enabled: boolean;
cooldown: number;
actions: AlertAction[];
}
export interface AlertCondition {
metric: string;
operator: 'gt' | 'lt' | 'eq' | 'gte' | 'lte' | 'change';
timeWindow: number;
aggregation: 'avg' | 'sum' | 'max' | 'min' | 'count';
}
export interface AlertThreshold {
warning: number;
critical: number;
unit: string;
}
export interface AlertAction {
type: 'log' | 'email' | 'webhook' | 'slack';
configuration: Record<string, any>;
enabled: boolean;
}
export interface Alert {
id: string;
ruleId: string;
severity: 'low' | 'medium' | 'high' | 'critical';
status: 'active' | 'resolved' | 'silenced';
triggeredAt: Date;
resolvedAt?: Date;
message: string;
details: Record<string, any>;
acknowledgedBy?: string;
acknowledgedAt?: Date;
}
export interface ObservabilityConfig {
metrics: {
enabled: boolean;
retentionDays: number;
exportInterval: number;
exporters: MetricExporter[];
};
tracing: {
enabled: boolean;
samplingRate: number;
maxSpansPerTrace: number;
exporters: TraceExporter[];
};
logging: {
level: 'debug' | 'info' | 'warn' | 'error';
outputs: LogOutput[];
structured: boolean;
includeStackTrace: boolean;
};
health: {
checkInterval: number;
timeoutMs: number;
retryAttempts: number;
};
alerting: {
enabled: boolean;
rules: AlertRule[];
defaultCooldown: number;
};
storage: {
dataPath: string;
maxFileSize: number;
compressionEnabled: boolean;
encryptionEnabled: boolean;
};
}
export interface MetricExporter {
type: 'prometheus' | 'statsd' | 'opentelemetry' | 'file';
endpoint?: string;
authentication?: Record<string, string>;
batchSize: number;
flushInterval: number;
}
export interface TraceExporter {
type: 'jaeger' | 'zipkin' | 'opentelemetry' | 'file';
endpoint?: string;
authentication?: Record<string, string>;
batchSize: number;
flushInterval: number;
}
export interface LogOutput {
type: 'console' | 'file' | 'syslog' | 'elasticsearch';
configuration: Record<string, any>;
level?: string;
format?: string;
}
export declare class ObservabilitySystem extends EventEmitter {
private logger;
private config;
private metricsCollector;
private tracingSystem;
private healthMonitor;
private alertManager;
private performanceProfiler;
private dataStorage;
private isRunning;
private systemStartTime;
constructor(config: ObservabilityConfig);
/**
* Initialize and start the observability system
*/
initialize(): Promise<void>;
/**
* Record a metric
*/
recordMetric(name: string, value: number, tags?: Record<string, string>, unit?: string): void;
/**
* Increment a counter
*/
incrementCounter(name: string, tags?: Record<string, string>, value?: number): void;
/**
* Record a timer
*/
recordTimer(name: string, duration: number, tags?: Record<string, string>): void;
/**
* Start a trace span
*/
startSpan(operationName: string, parentSpan?: TraceSpan): TraceSpan;
/**
* Finish a trace span
*/
finishSpan(span: TraceSpan, tags?: Record<string, string>): void;
/**
* Profile an operation
*/
profileOperation<T>(operationName: string, operation: () => Promise<T>, metadata?: Record<string, any>): Promise<T>;
/**
* Check system health
*/
checkHealth(): Promise<SystemHealth>;
/**
* Get metrics summary
*/
getMetricsSummary(timeRange?: {
start: Date;
end: Date;
}): MetricsSummary;
/**
* Get performance profiles
*/
getPerformanceProfiles(): PerformanceProfile[];
/**
* Get active alerts
*/
getActiveAlerts(): Alert[];
/**
* Create custom alert rule
*/
createAlertRule(rule: AlertRule): void;
/**
* Enhanced: OpenTelemetry integration - Trace model requests
*/
traceModelRequest<T>(operation: string, attributes: Partial<ModelRequestSpanAttributes>, fn: () => Promise<T>): Promise<T>;
/**
* Enhanced: OpenTelemetry integration - Trace agent communication
*/
traceAgentCommunication<T>(attributes: Record<string, string | number | boolean>, fn: () => Promise<T>): Promise<T>;
/**
* Enhanced: Record tool execution metrics
*/
recordToolExecution(toolName: string, executionTime: number, success: boolean, errorType?: string): void;
/**
* Get system statistics
*/
getSystemStats(): ObservabilityStats;
/**
* Export observability data
*/
exportData(format: 'json' | 'csv' | 'prometheus', timeRange?: {
start: Date;
end: Date;
}): Promise<string>;
/**
* Shutdown the observability system
*/
shutdown(): Promise<void>;
/**
* Private Methods
*/
private startSystemMonitoring;
private collectSystemMetrics;
private measureEventLoopLag;
private convertToCSV;
private convertToPrometheus;
}
interface MetricsSummary {
totalMetrics: number;
uniqueMetrics: number;
timeRange: {
start: Date;
end: Date;
};
topMetrics: Array<{
name: string;
count: number;
avgValue: number;
}>;
aggregations: Record<string, any>;
}
interface ObservabilityStats {
systemInfo: {
uptime: number;
version: string;
nodeVersion: string;
platform: string;
arch: string;
};
metrics: MetricsStats;
tracing: TracingStats;
health: HealthStats;
alerts: AlertStats;
performance: PerformanceStats;
storage: StorageStats;
}
interface MetricsStats {
totalCollected: number;
uniqueNames: number;
aggregatedMetrics: number;
memoryUsage: number;
exporterStatus: Array<{
type: string;
healthy: boolean;
}>;
}
interface TracingStats {
totalTraces: number;
totalSpans: number;
activeSpans: number;
averageSpansPerTrace: number;
averageDuration: number;
}
interface HealthStats {
totalComponents: number;
healthyComponents: number;
degradedComponents: number;
criticalComponents: number;
lastHealthCheck: Date;
}
interface AlertStats {
totalRules: number;
activeAlerts: number;
alertsLast24h: number;
criticalAlerts: number;
resolvedAlertsLast24h: number;
}
interface PerformanceStats {
totalOperations: number;
totalMeasurements: number;
averageDuration: number;
memoryEfficiency: number;
}
interface StorageStats {
dataPath: string;
totalSize: number;
compressionEnabled: boolean;
encryptionEnabled: boolean;
}
/**
* Enhanced: Factory function for creating observability system with OpenTelemetry support
*/
export declare function getTelemetryProvider(): ObservabilitySystem;
export {};
//# sourceMappingURL=observability-system.d.ts.map