llmverify
Version:
AI Output Verification Toolkit — Local-first LLM safety, hallucination detection, PII redaction, prompt injection defense, and runtime monitoring. Zero telemetry. OWASP LLM Top 10 aligned.
40 lines (39 loc) • 1.19 kB
TypeScript
/**
* Token Rate Engine
*
* Monitors tokens-per-second throughput and detects degradation.
* Lower token rates may indicate provider throttling or issues.
*
* WHAT THIS DOES:
* ✅ Calculates tokens per second
* ✅ Compares to baseline throughput
* ✅ Detects throughput degradation
*
* WHAT THIS DOES NOT DO:
* ❌ Predict throughput changes
* ❌ Identify cause of slowdowns
* ❌ Account for response complexity
*
* @module engines/runtime/token-rate
* @author Haiec
* @license MIT
*/
import { CallRecord, EngineResult, BaselineState } from '../../types/runtime';
/**
* Analyzes token generation rate against baseline.
*
* @param call - The call record to analyze
* @param baseline - Current baseline state
* @param thresholds - Optional custom thresholds
* @returns Engine result with token rate analysis
*
* @example
* const result = TokenRateEngine(callRecord, baseline);
* if (result.status === 'warn') {
* console.log('Token rate below normal');
* }
*/
export declare function TokenRateEngine(call: CallRecord, baseline: Pick<BaselineState, 'avgTokensPerSecond'>, thresholds?: {
warnRatio?: number;
errorRatio?: number;
}): EngineResult;