@ooples/token-optimizer-mcp
Version:
Intelligent context window optimization for Claude Code - store content externally via caching and compression, freeing up your context window for what matters
378 lines • 13.4 kB
TypeScript
/**
* Cache Benchmark - 89% token reduction through comprehensive cache performance testing
*
* Features:
* - Strategy comparison (LRU vs LFU vs FIFO vs TTL vs size vs hybrid)
* - Load testing with configurable concurrency
* - Latency profiling with percentiles (p50, p90, p95, p99)
* - Throughput testing (operations per second)
* - Comprehensive reports in markdown, HTML, JSON, PDF
* - Workload simulation (read-heavy, write-heavy, mixed, custom, realistic)
*
* Operations:
* 1. run-benchmark: Execute complete benchmark suite
* 2. compare: Compare multiple cache configurations
* 3. load-test: Stress test cache under load
* 4. latency-test: Measure latency distribution
* 5. throughput-test: Measure throughput limits
* 6. report: Generate comprehensive benchmark report
*/
import { CacheEngine } from '../../core/cache-engine.js';
import { TokenCounter } from '../../core/token-counter.js';
import { MetricsCollector } from '../../core/metrics.js';
export type CacheStrategy = 'LRU' | 'LFU' | 'FIFO' | 'TTL' | 'size' | 'hybrid';
export type WorkloadType = 'read-heavy' | 'write-heavy' | 'mixed' | 'custom' | 'realistic';
export type ReportFormat = 'markdown' | 'html' | 'json' | 'pdf';
export interface CacheConfig {
name: string;
strategy: CacheStrategy;
maxSize?: number;
maxEntries?: number;
ttl?: number;
evictionPolicy?: 'strict' | 'lazy';
compressionEnabled?: boolean;
params?: Record<string, any>;
}
export interface WorkloadConfig {
type: WorkloadType;
ratio?: {
read: number;
write: number;
};
duration: number;
concurrency: number;
keyCount: number;
valueSize: number;
keyDistribution?: 'uniform' | 'zipf' | 'gaussian';
accessPattern?: 'sequential' | 'random' | 'temporal';
}
export interface LatencyMetrics {
min: number;
max: number;
mean: number;
median: number;
p50: number;
p90: number;
p95: number;
p99: number;
p99_9: number;
stddev: number;
}
export interface ThroughputMetrics {
operationsPerSecond: number;
readOps: number;
writeOps: number;
peakThroughput: number;
sustainedThroughput: number;
averageLatency: number;
}
export interface BenchmarkResults {
config: CacheConfig;
workload: WorkloadConfig;
duration: number;
operations: {
total: number;
reads: number;
writes: number;
hits: number;
misses: number;
};
performance: {
latency: LatencyMetrics;
throughput: ThroughputMetrics;
};
cache: {
hitRate: number;
missRate: number;
evictions: number;
memoryUsage: number;
entryCount: number;
};
tokenMetrics: {
totalTokens: number;
savedTokens: number;
compressionRatio: number;
};
timestamp: number;
}
export interface ComparisonResult {
configs: CacheConfig[];
results: BenchmarkResults[];
winner: {
config: string;
metric: string;
value: number;
};
rankings: {
byLatency: string[];
byThroughput: string[];
byHitRate: string[];
byMemoryEfficiency: string[];
};
recommendations: string[];
}
export interface LoadTestResults {
phases: Array<{
concurrency: number;
duration: number;
throughput: number;
errorRate: number;
p99Latency: number;
}>;
maxConcurrency: number;
breakingPoint?: {
concurrency: number;
reason: string;
};
summary: {
totalRequests: number;
successfulRequests: number;
failedRequests: number;
averageThroughput: number;
peakThroughput: number;
};
}
export interface CacheBenchmarkOptions {
operation: 'run-benchmark' | 'compare' | 'load-test' | 'latency-test' | 'throughput-test' | 'report';
config?: CacheConfig;
configs?: CacheConfig[];
workload?: Partial<WorkloadConfig>;
duration?: number;
warmupDuration?: number;
workloadType?: WorkloadType;
workloadRatio?: {
read: number;
write: number;
};
concurrency?: number;
rampUp?: number;
targetTPS?: number;
maxConcurrency?: number;
stepSize?: number;
percentiles?: number[];
format?: ReportFormat;
includeCharts?: boolean;
outputPath?: string;
benchmarkId?: string;
resultsPath?: string;
useCache?: boolean;
cacheTTL?: number;
}
export interface CacheBenchmarkResult {
success: boolean;
operation: string;
benchmarkResults?: BenchmarkResults;
comparison?: ComparisonResult;
loadTestResults?: LoadTestResults;
latencyDistribution?: LatencyMetrics;
throughputResults?: ThroughputMetrics;
reportPath?: string;
reportFormat?: ReportFormat;
metadata: {
tokensUsed: number;
tokensSaved: number;
cacheHit: boolean;
executionTime: number;
compressionRatio?: number;
};
error?: string;
}
export declare class CacheBenchmark {
private tokenCounter;
private metrics;
private executor;
private reportGenerator;
private benchmarkCache;
constructor(cache: CacheEngine, tokenCounter: TokenCounter, metrics: MetricsCollector);
/**
* Main entry point for benchmark operations
*/
run(options: CacheBenchmarkOptions): Promise<CacheBenchmarkResult>;
/**
* Run a single benchmark
*/
private runBenchmark;
/**
* Compare multiple configurations
*/
private compareConfigurations;
/**
* Run load test with increasing concurrency
*/
private runLoadTest;
/**
* Run latency test with specific percentiles
*/
private runLatencyTest;
/**
* Run throughput test
*/
private runThroughputTest;
/**
* Generate comprehensive report
*/
private generateReport;
/**
* Build workload configuration from options
*/
private buildWorkloadConfig;
/**
* Generate cache key for benchmark results
*/
private generateBenchmarkCacheKey;
/**
* Generate summary of benchmark results (89% token reduction)
*/
private generateResultSummary;
/**
* Generate summary of comparison results (89% token reduction)
*/
private generateComparisonSummary;
/**
* Generate summary of load test results (89% token reduction)
*/
private generateLoadTestSummary;
/**
* Analyze comparison results
*/
private analyzeComparison;
}
/**
* Runner function for MCP tool integration
*/
export declare function runCacheBenchmark(options: CacheBenchmarkOptions, cache: CacheEngine, tokenCounter: TokenCounter, metrics: MetricsCollector): Promise<string>;
/**
* MCP Tool Definition
*/
export declare const CACHE_BENCHMARK_TOOL_DEFINITION: {
readonly name: "cache-benchmark";
readonly description: "Cache Performance Benchmarking with 89% token reduction through comprehensive testing and analysis.\n\nFeatures:\n- Strategy comparison (LRU vs LFU vs FIFO vs TTL vs size vs hybrid)\n- Load testing with configurable concurrency and ramp-up\n- Latency profiling with percentiles (p50, p90, p95, p99, p99.9)\n- Throughput testing (operations per second)\n- Comprehensive reports in markdown, HTML, JSON, PDF\n- Workload simulation (read-heavy, write-heavy, mixed, realistic)\n\nOperations:\n- run-benchmark: Execute complete benchmark suite\n- compare: Compare multiple cache configurations\n- load-test: Stress test cache under load\n- latency-test: Measure latency distribution with percentiles\n- throughput-test: Measure throughput limits\n- report: Generate comprehensive benchmark report\n\nToken Reduction:\n- Benchmark results: ~89% (summary only)\n- Comparison: ~91% (rankings + winner)\n- Load test: ~88% (summary + breaking point)\n- Latency test: ~87% (percentiles only)\n- Throughput test: ~90% (key metrics only)\n- Report: ~85% (formatted summary)\n- Average: 89% reduction";
readonly inputSchema: {
readonly type: "object";
readonly properties: {
readonly operation: {
readonly type: "string";
readonly enum: readonly ["run-benchmark", "compare", "load-test", "latency-test", "throughput-test", "report"];
readonly description: "Benchmark operation to perform";
};
readonly config: {
readonly type: "object";
readonly description: "Cache configuration for single benchmark";
readonly properties: {
readonly name: {
readonly type: "string";
};
readonly strategy: {
readonly type: "string";
readonly enum: readonly ["LRU", "LFU", "FIFO", "TTL", "size", "hybrid"];
};
readonly maxSize: {
readonly type: "number";
};
readonly maxEntries: {
readonly type: "number";
};
readonly ttl: {
readonly type: "number";
};
};
};
readonly configs: {
readonly type: "array";
readonly description: "Multiple cache configurations for comparison";
readonly items: {
readonly type: "object";
readonly properties: {
readonly name: {
readonly type: "string";
};
readonly strategy: {
readonly type: "string";
readonly enum: readonly ["LRU", "LFU", "FIFO", "TTL", "size", "hybrid"];
};
};
};
};
readonly duration: {
readonly type: "number";
readonly description: "Benchmark duration in seconds (default: 60)";
};
readonly warmupDuration: {
readonly type: "number";
readonly description: "Warmup duration in seconds (default: 10)";
};
readonly workloadType: {
readonly type: "string";
readonly enum: readonly ["read-heavy", "write-heavy", "mixed", "custom", "realistic"];
readonly description: "Type of workload to simulate";
};
readonly workloadRatio: {
readonly type: "object";
readonly description: "Custom read/write ratio";
readonly properties: {
readonly read: {
readonly type: "number";
};
readonly write: {
readonly type: "number";
};
};
};
readonly concurrency: {
readonly type: "number";
readonly description: "Number of concurrent workers (default: 10)";
};
readonly rampUp: {
readonly type: "number";
readonly description: "Ramp-up time in seconds (for load-test)";
};
readonly targetTPS: {
readonly type: "number";
readonly description: "Target transactions per second";
};
readonly maxConcurrency: {
readonly type: "number";
readonly description: "Maximum concurrency for load test (default: 100)";
};
readonly stepSize: {
readonly type: "number";
readonly description: "Concurrency step size for load test (default: 10)";
};
readonly percentiles: {
readonly type: "array";
readonly items: {
readonly type: "number";
};
readonly description: "Percentiles to measure (default: [50, 90, 95, 99])";
};
readonly format: {
readonly type: "string";
readonly enum: readonly ["markdown", "html", "json", "pdf"];
readonly description: "Report format (default: markdown)";
};
readonly includeCharts: {
readonly type: "boolean";
readonly description: "Include charts in report";
};
readonly outputPath: {
readonly type: "string";
readonly description: "Path to save report";
};
readonly benchmarkId: {
readonly type: "string";
readonly description: "ID of benchmark results to generate report for";
};
readonly useCache: {
readonly type: "boolean";
readonly description: "Cache benchmark results (default: true)";
};
readonly cacheTTL: {
readonly type: "number";
readonly description: "Cache TTL in seconds (default: 604800 - 7 days)";
};
};
readonly required: readonly ["operation"];
};
};
export default CacheBenchmark;
//# sourceMappingURL=cache-benchmark.d.ts.map