llmverify
Version:
AI Output Verification Toolkit — Local-first LLM safety, hallucination detection, PII redaction, prompt injection defense, and runtime monitoring. Zero telemetry. OWASP LLM Top 10 aligned.
24 lines • 9.07 kB
JavaScript
;
/**
* Runtime Monitoring Types
*
* Types for LLM runtime health monitoring, performance tracking,
* and behavioral fingerprinting.
*
* WHAT THIS PROVIDES:
* ✅ Structured call records for LLM interactions
* ✅ Engine result standardization
* ✅ Baseline state for drift detection
* ✅ Health reporting with actionable status
*
* WHAT THIS DOES NOT DO:
* ❌ Store sensitive data (prompts/responses are transient)
* ❌ Make predictions about LLM behavior
* ❌ Guarantee detection of all anomalies
*
* @module types/runtime
* @author Haiec
* @license MIT
*/
Object.defineProperty(exports, "__esModule", { value: true });
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"runtime.js","sourceRoot":"","sources":["../../src/types/runtime.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;GAoBG","sourcesContent":["/**\n * Runtime Monitoring Types\n * \n * Types for LLM runtime health monitoring, performance tracking,\n * and behavioral fingerprinting.\n * \n * WHAT THIS PROVIDES:\n * ✅ Structured call records for LLM interactions\n * ✅ Engine result standardization\n * ✅ Baseline state for drift detection\n * ✅ Health reporting with actionable status\n * \n * WHAT THIS DOES NOT DO:\n * ❌ Store sensitive data (prompts/responses are transient)\n * ❌ Make predictions about LLM behavior\n * ❌ Guarantee detection of all anomalies\n * \n * @module types/runtime\n * @author Haiec\n * @license MIT\n */\n\n/**\n * Record of a single LLM API call.\n * All fields are captured at call time and are ephemeral.\n */\nexport interface CallRecord {\n  /** Unique identifier for this call (UUID v4) */\n  id: string;\n  /** Unix timestamp when call was initiated */\n  timestamp: number;\n  /** The prompt sent to the LLM (not stored, only used for analysis) */\n  prompt: string;\n  /** Model identifier (e.g., \"gpt-4\", \"claude-3\") */\n  model: string;\n  /** Response text from the LLM */\n  responseText: string;\n  /** Token count of the response */\n  responseTokens: number;\n  /** End-to-end latency in milliseconds */\n  latencyMs: number;\n  /** Optional: Provider-specific metadata */\n  metadata?: Record<string, unknown>;\n}\n\n/**\n * Status indicator for engine results.\n * - ok: Within normal parameters\n * - warn: Deviation detected, may warrant attention\n * - error: Significant anomaly detected\n */\nexport type EngineStatus = 'ok' | 'warn' | 'error';\n\n/**\n * Standardized result from any runtime engine.\n * All engines return this format for consistent aggregation.\n */\nexport interface EngineResult {\n  /** Metric name (e.g., \"latency\", \"token_rate\", \"fingerprint\") */\n  metric: string;\n  /** Normalized value 0-1 (0 = healthy, 1 = anomalous) */\n  value: number;\n  /** Status indicator */\n  status: EngineStatus;\n  /** Engine-specific details for debugging */\n  details: Record<string, unknown>;\n  /** Optional: Limitations of this specific check */\n  limitations?: string[];\n}\n\n/**\n * Response fingerprint for behavioral drift detection.\n * Captures structural characteristics without storing content.\n */\nexport interface ResponseFingerprint {\n  /** Total token count */\n  tokens: number;\n  /** Sentence count */\n  sentences: number;\n  /** Average sentence length in tokens */\n  avgSentLength: number;\n  /** Shannon entropy of character distribution */\n  entropy: number;\n}\n\n/**\n * Baseline state for drift detection.\n * Uses exponential moving average for stability.\n */\nexport interface BaselineState {\n  /** Average latency in milliseconds */\n  avgLatencyMs: number;\n  /** Average tokens per second */\n  avgTokensPerSecond: number;\n  /** Average similarity score (0-1) */\n  avgSimilarity: number;\n  /** Baseline fingerprint for comparison */\n  fingerprint: ResponseFingerprint | Record<string, never>;\n  /** Number of samples used to build baseline */\n  sampleCount: number;\n}\n\n/**\n * Overall health status of the LLM.\n * - stable: All metrics within normal range\n * - minor_variation: Small deviations, likely normal\n * - degraded: Notable issues, may affect quality\n * - unstable: Significant problems detected\n */\nexport type HealthStatus = 'stable' | 'minor_variation' | 'degraded' | 'unstable';\n\n/**\n * Comprehensive health report from the monitoring system.\n */\nexport interface HealthReport {\n  /** Overall health status */\n  health: HealthStatus;\n  /** Composite health score (0 = healthy, 1 = critical) */\n  score: number;\n  /** Individual engine results */\n  engineResults: EngineResult[];\n  /** Timestamp of this report */\n  timestamp?: number;\n  /** Recommendations based on current state */\n  recommendations?: string[];\n}\n\n/**\n * Configuration for the monitorLLM wrapper.\n */\nexport interface MonitorConfig {\n  /** Enable/disable specific engines */\n  engines?: {\n    latency?: boolean;\n    tokenRate?: boolean;\n    fingerprint?: boolean;\n    structure?: boolean;\n    consistency?: boolean;\n  };\n  /** Lifecycle hooks for health state changes */\n  hooks?: {\n    /** Called when health transitions to unstable */\n    onUnstable?: (report: HealthReport) => void;\n    /** Called when health transitions to degraded */\n    onDegraded?: (report: HealthReport) => void;\n    /** Called when health recovers to stable */\n    onRecovery?: (report: HealthReport) => void;\n    /** Called on every health check */\n    onHealthCheck?: (report: HealthReport) => void;\n  };\n  /** Thresholds for status determination */\n  thresholds?: {\n    /** Latency ratio threshold for warning (default: 1.2) */\n    latencyWarnRatio?: number;\n    /** Latency ratio threshold for error (default: 3.0) */\n    latencyErrorRatio?: number;\n    /** Token rate ratio threshold for warning (default: 0.8) */\n    tokenRateWarnRatio?: number;\n    /** Token rate ratio threshold for error (default: 0.2) */\n    tokenRateErrorRatio?: number;\n  };\n  /** Baseline learning rate (0-1, default: 0.1) */\n  learningRate?: number;\n  /** Minimum samples before baseline is considered stable */\n  minSamplesForBaseline?: number;\n}\n\n/**\n * Sentinel test result for proactive LLM verification.\n */\nexport interface SentinelTestResult {\n  /** Test name */\n  test: string;\n  /** Whether the test passed */\n  passed: boolean;\n  /** Detailed message */\n  message: string;\n  /** Test-specific details */\n  details: Record<string, unknown>;\n  /** Confidence in the result */\n  confidence: number;\n  /** Limitations of this test */\n  limitations: string[];\n}\n\n/**\n * Configuration for sentinel tests.\n */\nexport interface SentinelConfig {\n  /** LLM client to test */\n  client: {\n    generate: (opts: { prompt: string; model?: string }) => Promise<{ text: string; tokens?: number }>;\n  };\n  /** Model to use for tests */\n  model?: string;\n  /** Timeout for each test in milliseconds */\n  timeout?: number;\n  /** Number of retries on failure */\n  retries?: number;\n}\n"]}