UNPKG

@ooples/token-optimizer-mcp

Version:

Intelligent context window optimization for Claude Code - store content externally via caching and compression, freeing up your context window for what matters

378 lines 13.4 kB
/** * Cache Benchmark - 89% token reduction through comprehensive cache performance testing * * Features: * - Strategy comparison (LRU vs LFU vs FIFO vs TTL vs size vs hybrid) * - Load testing with configurable concurrency * - Latency profiling with percentiles (p50, p90, p95, p99) * - Throughput testing (operations per second) * - Comprehensive reports in markdown, HTML, JSON, PDF * - Workload simulation (read-heavy, write-heavy, mixed, custom, realistic) * * Operations: * 1. run-benchmark: Execute complete benchmark suite * 2. compare: Compare multiple cache configurations * 3. load-test: Stress test cache under load * 4. latency-test: Measure latency distribution * 5. throughput-test: Measure throughput limits * 6. report: Generate comprehensive benchmark report */ import { CacheEngine } from '../../core/cache-engine.js'; import { TokenCounter } from '../../core/token-counter.js'; import { MetricsCollector } from '../../core/metrics.js'; export type CacheStrategy = 'LRU' | 'LFU' | 'FIFO' | 'TTL' | 'size' | 'hybrid'; export type WorkloadType = 'read-heavy' | 'write-heavy' | 'mixed' | 'custom' | 'realistic'; export type ReportFormat = 'markdown' | 'html' | 'json' | 'pdf'; export interface CacheConfig { name: string; strategy: CacheStrategy; maxSize?: number; maxEntries?: number; ttl?: number; evictionPolicy?: 'strict' | 'lazy'; compressionEnabled?: boolean; params?: Record<string, any>; } export interface WorkloadConfig { type: WorkloadType; ratio?: { read: number; write: number; }; duration: number; concurrency: number; keyCount: number; valueSize: number; keyDistribution?: 'uniform' | 'zipf' | 'gaussian'; accessPattern?: 'sequential' | 'random' | 'temporal'; } export interface LatencyMetrics { min: number; max: number; mean: number; median: number; p50: number; p90: number; p95: number; p99: number; p99_9: number; stddev: number; } export interface ThroughputMetrics { operationsPerSecond: number; readOps: number; writeOps: number; peakThroughput: number; sustainedThroughput: number; averageLatency: number; } export interface BenchmarkResults { config: CacheConfig; workload: WorkloadConfig; duration: number; operations: { total: number; reads: number; writes: number; hits: number; misses: number; }; performance: { latency: LatencyMetrics; throughput: ThroughputMetrics; }; cache: { hitRate: number; missRate: number; evictions: number; memoryUsage: number; entryCount: number; }; tokenMetrics: { totalTokens: number; savedTokens: number; compressionRatio: number; }; timestamp: number; } export interface ComparisonResult { configs: CacheConfig[]; results: BenchmarkResults[]; winner: { config: string; metric: string; value: number; }; rankings: { byLatency: string[]; byThroughput: string[]; byHitRate: string[]; byMemoryEfficiency: string[]; }; recommendations: string[]; } export interface LoadTestResults { phases: Array<{ concurrency: number; duration: number; throughput: number; errorRate: number; p99Latency: number; }>; maxConcurrency: number; breakingPoint?: { concurrency: number; reason: string; }; summary: { totalRequests: number; successfulRequests: number; failedRequests: number; averageThroughput: number; peakThroughput: number; }; } export interface CacheBenchmarkOptions { operation: 'run-benchmark' | 'compare' | 'load-test' | 'latency-test' | 'throughput-test' | 'report'; config?: CacheConfig; configs?: CacheConfig[]; workload?: Partial<WorkloadConfig>; duration?: number; warmupDuration?: number; workloadType?: WorkloadType; workloadRatio?: { read: number; write: number; }; concurrency?: number; rampUp?: number; targetTPS?: number; maxConcurrency?: number; stepSize?: number; percentiles?: number[]; format?: ReportFormat; includeCharts?: boolean; outputPath?: string; benchmarkId?: string; resultsPath?: string; useCache?: boolean; cacheTTL?: number; } export interface CacheBenchmarkResult { success: boolean; operation: string; benchmarkResults?: BenchmarkResults; comparison?: ComparisonResult; loadTestResults?: LoadTestResults; latencyDistribution?: LatencyMetrics; throughputResults?: ThroughputMetrics; reportPath?: string; reportFormat?: ReportFormat; metadata: { tokensUsed: number; tokensSaved: number; cacheHit: boolean; executionTime: number; compressionRatio?: number; }; error?: string; } export declare class CacheBenchmark { private tokenCounter; private metrics; private executor; private reportGenerator; private benchmarkCache; constructor(cache: CacheEngine, tokenCounter: TokenCounter, metrics: MetricsCollector); /** * Main entry point for benchmark operations */ run(options: CacheBenchmarkOptions): Promise<CacheBenchmarkResult>; /** * Run a single benchmark */ private runBenchmark; /** * Compare multiple configurations */ private compareConfigurations; /** * Run load test with increasing concurrency */ private runLoadTest; /** * Run latency test with specific percentiles */ private runLatencyTest; /** * Run throughput test */ private runThroughputTest; /** * Generate comprehensive report */ private generateReport; /** * Build workload configuration from options */ private buildWorkloadConfig; /** * Generate cache key for benchmark results */ private generateBenchmarkCacheKey; /** * Generate summary of benchmark results (89% token reduction) */ private generateResultSummary; /** * Generate summary of comparison results (89% token reduction) */ private generateComparisonSummary; /** * Generate summary of load test results (89% token reduction) */ private generateLoadTestSummary; /** * Analyze comparison results */ private analyzeComparison; } /** * Runner function for MCP tool integration */ export declare function runCacheBenchmark(options: CacheBenchmarkOptions, cache: CacheEngine, tokenCounter: TokenCounter, metrics: MetricsCollector): Promise<string>; /** * MCP Tool Definition */ export declare const CACHE_BENCHMARK_TOOL_DEFINITION: { readonly name: "cache-benchmark"; readonly description: "Cache Performance Benchmarking with 89% token reduction through comprehensive testing and analysis.\n\nFeatures:\n- Strategy comparison (LRU vs LFU vs FIFO vs TTL vs size vs hybrid)\n- Load testing with configurable concurrency and ramp-up\n- Latency profiling with percentiles (p50, p90, p95, p99, p99.9)\n- Throughput testing (operations per second)\n- Comprehensive reports in markdown, HTML, JSON, PDF\n- Workload simulation (read-heavy, write-heavy, mixed, realistic)\n\nOperations:\n- run-benchmark: Execute complete benchmark suite\n- compare: Compare multiple cache configurations\n- load-test: Stress test cache under load\n- latency-test: Measure latency distribution with percentiles\n- throughput-test: Measure throughput limits\n- report: Generate comprehensive benchmark report\n\nToken Reduction:\n- Benchmark results: ~89% (summary only)\n- Comparison: ~91% (rankings + winner)\n- Load test: ~88% (summary + breaking point)\n- Latency test: ~87% (percentiles only)\n- Throughput test: ~90% (key metrics only)\n- Report: ~85% (formatted summary)\n- Average: 89% reduction"; readonly inputSchema: { readonly type: "object"; readonly properties: { readonly operation: { readonly type: "string"; readonly enum: readonly ["run-benchmark", "compare", "load-test", "latency-test", "throughput-test", "report"]; readonly description: "Benchmark operation to perform"; }; readonly config: { readonly type: "object"; readonly description: "Cache configuration for single benchmark"; readonly properties: { readonly name: { readonly type: "string"; }; readonly strategy: { readonly type: "string"; readonly enum: readonly ["LRU", "LFU", "FIFO", "TTL", "size", "hybrid"]; }; readonly maxSize: { readonly type: "number"; }; readonly maxEntries: { readonly type: "number"; }; readonly ttl: { readonly type: "number"; }; }; }; readonly configs: { readonly type: "array"; readonly description: "Multiple cache configurations for comparison"; readonly items: { readonly type: "object"; readonly properties: { readonly name: { readonly type: "string"; }; readonly strategy: { readonly type: "string"; readonly enum: readonly ["LRU", "LFU", "FIFO", "TTL", "size", "hybrid"]; }; }; }; }; readonly duration: { readonly type: "number"; readonly description: "Benchmark duration in seconds (default: 60)"; }; readonly warmupDuration: { readonly type: "number"; readonly description: "Warmup duration in seconds (default: 10)"; }; readonly workloadType: { readonly type: "string"; readonly enum: readonly ["read-heavy", "write-heavy", "mixed", "custom", "realistic"]; readonly description: "Type of workload to simulate"; }; readonly workloadRatio: { readonly type: "object"; readonly description: "Custom read/write ratio"; readonly properties: { readonly read: { readonly type: "number"; }; readonly write: { readonly type: "number"; }; }; }; readonly concurrency: { readonly type: "number"; readonly description: "Number of concurrent workers (default: 10)"; }; readonly rampUp: { readonly type: "number"; readonly description: "Ramp-up time in seconds (for load-test)"; }; readonly targetTPS: { readonly type: "number"; readonly description: "Target transactions per second"; }; readonly maxConcurrency: { readonly type: "number"; readonly description: "Maximum concurrency for load test (default: 100)"; }; readonly stepSize: { readonly type: "number"; readonly description: "Concurrency step size for load test (default: 10)"; }; readonly percentiles: { readonly type: "array"; readonly items: { readonly type: "number"; }; readonly description: "Percentiles to measure (default: [50, 90, 95, 99])"; }; readonly format: { readonly type: "string"; readonly enum: readonly ["markdown", "html", "json", "pdf"]; readonly description: "Report format (default: markdown)"; }; readonly includeCharts: { readonly type: "boolean"; readonly description: "Include charts in report"; }; readonly outputPath: { readonly type: "string"; readonly description: "Path to save report"; }; readonly benchmarkId: { readonly type: "string"; readonly description: "ID of benchmark results to generate report for"; }; readonly useCache: { readonly type: "boolean"; readonly description: "Cache benchmark results (default: true)"; }; readonly cacheTTL: { readonly type: "number"; readonly description: "Cache TTL in seconds (default: 604800 - 7 days)"; }; }; readonly required: readonly ["operation"]; }; }; export default CacheBenchmark; //# sourceMappingURL=cache-benchmark.d.ts.map