claude-flow

Version:

Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration

github.com/ruvnet/claude-flow

ruvnet/claude-flow

584 lines (507 loc) • 15.5 kB

text/typescript

/** * V3 Performance Benchmark Framework * * Comprehensive benchmarking system with statistical analysis, * memory tracking, and regression detection capabilities. * * Target Performance Metrics: * - CLI Startup: <500ms (5x faster) * - MCP Init: <400ms (4.5x faster) * - Agent Spawn: <200ms (4x faster) * - Vector Search: <1ms (150x faster) * - Memory Write: <5ms (10x faster) * - Swarm Consensus: <100ms (5x faster) * - Flash Attention: 2.49x-7.47x speedup * - Memory Usage: <256MB (50% reduction) */ import { performance, PerformanceObserver } from 'perf_hooks'; import os from 'node:os'; // ============================================================================ // Types and Interfaces // ============================================================================ export interface MemoryUsage { heapUsed: number; heapTotal: number; external: number; arrayBuffers: number; rss: number; } export interface BenchmarkResult { name: string; iterations: number; mean: number; median: number; p95: number; p99: number; min: number; max: number; stdDev: number; opsPerSecond: number; memoryUsage: MemoryUsage; memoryDelta: number; timestamp: number; metadata?: Record<string, unknown>; } export interface BenchmarkOptions { /** Number of iterations (default: 100) */ iterations?: number; /** Number of warmup iterations (default: 10) */ warmup?: number; /** Timeout per iteration in ms (default: 30000) */ timeout?: number; /** Force garbage collection between iterations */ forceGC?: boolean; /** Custom metadata to attach to results */ metadata?: Record<string, unknown>; /** Minimum number of runs to ensure statistical significance */ minRuns?: number; /** Target time in ms for auto-calibration */ targetTime?: number; } export interface BenchmarkSuite { name: string; benchmarks: BenchmarkResult[]; totalTime: number; timestamp: number; environment: EnvironmentInfo; } export interface EnvironmentInfo { nodeVersion: string; platform: string; arch: string; cpus: number; memory: number; v8Version?: string; } export interface ComparisonResult { benchmark: string; baseline: number; current: number; change: number; changePercent: number; improved: boolean; significant: boolean; target?: number; targetMet: boolean; } // ============================================================================ // Statistical Functions // ============================================================================ /** * Calculate mean of an array of numbers */ function calculateMean(values: number[]): number { if (values.length === 0) return 0; return values.reduce((sum, val) => sum + val, 0) / values.length; } /** * Calculate median of an array of numbers */ function calculateMedian(values: number[]): number { if (values.length === 0) return 0; const sorted = [...values].sort((a, b) => a - b); const mid = Math.floor(sorted.length / 2); return sorted.length % 2 !== 0 ? sorted[mid]! : (sorted[mid - 1]! + sorted[mid]!) / 2; } /** * Calculate percentile of an array of numbers */ function calculatePercentile(values: number[], percentile: number): number { if (values.length === 0) return 0; const sorted = [...values].sort((a, b) => a - b); const index = Math.ceil((percentile / 100) * sorted.length) - 1; return sorted[Math.max(0, index)]!; } /** * Calculate standard deviation of an array of numbers */ function calculateStdDev(values: number[]): number { if (values.length < 2) return 0; const mean = calculateMean(values); const squaredDiffs = values.map((val) => Math.pow(val - mean, 2)); return Math.sqrt(calculateMean(squaredDiffs)); } /** * Remove outliers using IQR method */ function removeOutliers(values: number[]): number[] { if (values.length < 4) return values; const sorted = [...values].sort((a, b) => a - b); const q1 = calculatePercentile(sorted, 25); const q3 = calculatePercentile(sorted, 75); const iqr = q3 - q1; const lowerBound = q1 - 1.5 * iqr; const upperBound = q3 + 1.5 * iqr; return sorted.filter((val) => val >= lowerBound && val <= upperBound); } // ============================================================================ // Memory Utilities // ============================================================================ /** * Get current memory usage */ function getMemoryUsage(): MemoryUsage { const mem = process.memoryUsage(); return { heapUsed: mem.heapUsed, heapTotal: mem.heapTotal, external: mem.external, arrayBuffers: mem.arrayBuffers, rss: mem.rss, }; } /** * Format bytes to human-readable string */ export function formatBytes(bytes: number): string { const units = ['B', 'KB', 'MB', 'GB']; let unitIndex = 0; let value = bytes; while (value >= 1024 && unitIndex < units.length - 1) { value /= 1024; unitIndex++; } return `${value.toFixed(2)} ${units[unitIndex]}`; } /** * Format time in milliseconds to human-readable string */ export function formatTime(ms: number): string { if (ms < 0.001) { return `${(ms * 1000000).toFixed(2)} ns`; } else if (ms < 1) { return `${(ms * 1000).toFixed(2)} us`; } else if (ms < 1000) { return `${ms.toFixed(2)} ms`; } else { return `${(ms / 1000).toFixed(2)} s`; } } /** * Force garbage collection if available */ function forceGC(): void { if (typeof global.gc === 'function') { global.gc(); } } // ============================================================================ // Core Benchmark Function // ============================================================================ /** * Execute a benchmark with comprehensive statistics */ export async function benchmark( name: string, fn: () => Promise<void> | void, options: BenchmarkOptions = {} ): Promise<BenchmarkResult> { const { iterations = 100, warmup = 10, timeout = 30000, forceGC: doForceGC = false, metadata = {}, minRuns = 10, targetTime = 1000, } = options; // Calculate actual iterations based on target time let actualIterations = iterations; // Warmup phase for (let i = 0; i < warmup; i++) { await Promise.race([ fn(), new Promise((_, reject) => setTimeout(() => reject(new Error('Warmup timeout')), timeout) ), ]).catch(() => {}); } // Auto-calibrate iterations if needed const calibrationStart = performance.now(); await fn(); const calibrationTime = performance.now() - calibrationStart; if (calibrationTime > 0) { const estimatedIterations = Math.ceil(targetTime / calibrationTime); actualIterations = Math.max(minRuns, Math.min(iterations, estimatedIterations)); } // Memory before benchmark if (doForceGC) forceGC(); const memoryBefore = getMemoryUsage(); // Run benchmark const times: number[] = []; const startTime = performance.now(); for (let i = 0; i < actualIterations; i++) { if (doForceGC && i % 10 === 0) forceGC(); const iterStart = performance.now(); await Promise.race([ fn(), new Promise((_, reject) => setTimeout(() => reject(new Error('Iteration timeout')), timeout) ), ]); const iterEnd = performance.now(); times.push(iterEnd - iterStart); } const totalTime = performance.now() - startTime; // Memory after benchmark const memoryAfter = getMemoryUsage(); // Calculate statistics (remove outliers for more accurate results) const cleanedTimes = removeOutliers(times); const mean = calculateMean(cleanedTimes); const median = calculateMedian(cleanedTimes); const p95 = calculatePercentile(cleanedTimes, 95); const p99 = calculatePercentile(cleanedTimes, 99); const min = Math.min(...cleanedTimes); const max = Math.max(...cleanedTimes); const stdDev = calculateStdDev(cleanedTimes); const opsPerSecond = mean > 0 ? 1000 / mean : 0; return { name, iterations: actualIterations, mean, median, p95, p99, min, max, stdDev, opsPerSecond, memoryUsage: memoryAfter, memoryDelta: memoryAfter.heapUsed - memoryBefore.heapUsed, timestamp: Date.now(), metadata, }; } // ============================================================================ // Benchmark Suite Runner // ============================================================================ export class BenchmarkRunner { private results: BenchmarkResult[] = []; private suiteName: string; constructor(name: string) { this.suiteName = name; } /** * Run a single benchmark and add to results */ async run( name: string, fn: () => Promise<void> | void, options?: BenchmarkOptions ): Promise<BenchmarkResult> { const result = await benchmark(name, fn, options); this.results.push(result); return result; } /** * Run multiple benchmarks in sequence */ async runAll( benchmarks: Array<{ name: string; fn: () => Promise<void> | void; options?: BenchmarkOptions; }> ): Promise<BenchmarkSuite> { const startTime = performance.now(); for (const bench of benchmarks) { await this.run(bench.name, bench.fn, bench.options); } return { name: this.suiteName, benchmarks: this.results, totalTime: performance.now() - startTime, timestamp: Date.now(), environment: this.getEnvironmentInfo(), }; } /** * Get environment information */ private getEnvironmentInfo(): EnvironmentInfo { return { nodeVersion: process.version, platform: process.platform, arch: process.arch, cpus: os.cpus().length, memory: os.totalmem(), v8Version: process.versions.v8, }; } /** * Get all results */ getResults(): BenchmarkResult[] { return this.results; } /** * Clear all results */ clear(): void { this.results = []; } /** * Print formatted results to console */ printResults(): void { console.log(`\n${'='.repeat(60)}`); console.log(`Benchmark Suite: ${this.suiteName}`); console.log(`${'='.repeat(60)}\n`); for (const result of this.results) { console.log(`${result.name}:`); console.log(` Iterations: ${result.iterations}`); console.log(` Mean: ${formatTime(result.mean)}`); console.log(` Median: ${formatTime(result.median)}`); console.log(` Std Dev: ${formatTime(result.stdDev)}`); console.log(` P95: ${formatTime(result.p95)}`); console.log(` P99: ${formatTime(result.p99)}`); console.log(` Min: ${formatTime(result.min)}`); console.log(` Max: ${formatTime(result.max)}`); console.log(` Ops/sec: ${result.opsPerSecond.toFixed(2)}`); console.log(` Memory Delta: ${formatBytes(result.memoryDelta)}`); console.log(''); } } /** * Export results as JSON */ toJSON(): string { return JSON.stringify( { name: this.suiteName, benchmarks: this.results, timestamp: Date.now(), environment: this.getEnvironmentInfo(), }, null, 2 ); } } // ============================================================================ // Comparison Utilities // ============================================================================ /** * Compare benchmark results against baseline */ export function compareResults( baseline: BenchmarkResult[], current: BenchmarkResult[], targets?: Record<string, number> ): ComparisonResult[] { const comparisons: ComparisonResult[] = []; for (const curr of current) { const base = baseline.find((b) => b.name === curr.name); if (!base) continue; const change = curr.mean - base.mean; const changePercent = (change / base.mean) * 100; const improved = change < 0; // Consider significant if change is > 5% and > 2 standard deviations const combinedStdDev = Math.sqrt( Math.pow(base.stdDev, 2) + Math.pow(curr.stdDev, 2) ); const significant = Math.abs(change) > 2 * combinedStdDev; const target = targets?.[curr.name]; const targetMet = target !== undefined ? curr.mean <= target : true; comparisons.push({ benchmark: curr.name, baseline: base.mean, current: curr.mean, change, changePercent, improved, significant, target, targetMet, }); } return comparisons; } /** * Print comparison report */ export function printComparisonReport(comparisons: ComparisonResult[]): void { console.log('\n' + '='.repeat(80)); console.log('Performance Comparison Report'); console.log('='.repeat(80) + '\n'); console.log( `${'Benchmark'.padEnd(35)} ${'Baseline'.padEnd(12)} ${'Current'.padEnd(12)} ${'Change'.padEnd(12)} Status` ); console.log('-'.repeat(80)); for (const comp of comparisons) { const baselineStr = formatTime(comp.baseline); const currentStr = formatTime(comp.current); const changeStr = `${comp.changePercent >= 0 ? '+' : ''}${comp.changePercent.toFixed(1)}%`; let status = ''; if (comp.significant) { status = comp.improved ? '[IMPROVED]' : '[REGRESSED]'; } else { status = '[~]'; } if (!comp.targetMet) { status += ' [MISSED TARGET]'; } console.log( `${comp.benchmark.padEnd(35)} ${baselineStr.padEnd(12)} ${currentStr.padEnd(12)} ${changeStr.padEnd(12)} ${status}` ); } console.log('\n'); } // ============================================================================ // V3 Performance Targets // ============================================================================ export const V3_PERFORMANCE_TARGETS = { // Startup Performance 'cli-cold-start': 500, // <500ms (5x faster) 'cli-warm-start': 100, // <100ms 'mcp-server-init': 400, // <400ms (4.5x faster) 'agent-spawn': 200, // <200ms (4x faster) // Memory Operations 'vector-search': 1, // <1ms (150x faster) 'hnsw-indexing': 10, // <10ms 'memory-write': 5, // <5ms (10x faster) 'cache-hit': 0.1, // <0.1ms // Swarm Coordination 'agent-coordination': 50, // <50ms 'task-decomposition': 20, // <20ms 'consensus-latency': 100, // <100ms (5x faster) 'message-throughput': 0.1, // <0.1ms per message // Attention Mechanisms 'flash-attention': 100, // Baseline comparison target 'multi-head-attention': 200, // Baseline comparison target // SONA Learning 'sona-adaptation': 0.05, // <0.05ms } as const; export type PerformanceTarget = keyof typeof V3_PERFORMANCE_TARGETS; /** * Check if a benchmark meets its target */ export function meetsTarget( benchmarkName: string, value: number ): { met: boolean; target: number | undefined; ratio: number | undefined } { const target = V3_PERFORMANCE_TARGETS[benchmarkName as PerformanceTarget]; if (target === undefined) { return { met: true, target: undefined, ratio: undefined }; } return { met: value <= target, target, ratio: value / target, }; } // ============================================================================ // Export Default Runner Instance // ============================================================================ export default { benchmark, BenchmarkRunner, compareResults, printComparisonReport, formatBytes, formatTime, meetsTarget, V3_PERFORMANCE_TARGETS, };