UNPKG

@clawdcc/cvm-benchmark

Version:

Comprehensive benchmarking and performance analysis tools for Claude Code versions

1,317 lines (1,302 loc) 45.1 kB
var __defProp = Object.defineProperty; var __getOwnPropNames = Object.getOwnPropertyNames; var __esm = (fn, res) => function __init() { return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res; }; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; // node_modules/tsup/assets/esm_shims.js import path from "path"; import { fileURLToPath } from "url"; var init_esm_shims = __esm({ "node_modules/tsup/assets/esm_shims.js"() { "use strict"; } }); // src/utils/version-filter.ts function parseVersion(version) { const [major, minor, patch] = version.split(".").map(Number); return { major, minor, patch }; } function compareVersions(a, b) { const vA = parseVersion(a); const vB = parseVersion(b); if (vA.major !== vB.major) return vA.major - vB.major; if (vA.minor !== vB.minor) return vA.minor - vB.minor; return vA.patch - vB.patch; } function sortVersions(versions) { return [...versions].sort(compareVersions); } function filterVersions(allVersions, config) { let filtered = [...allVersions]; if (config.versions.include && config.versions.include.length > 0) { filtered = filtered.filter((v) => config.versions.include.includes(v)); } if (config.versions.min) { filtered = filtered.filter((v) => compareVersions(v, config.versions.min) >= 0); } if (config.versions.max) { filtered = filtered.filter((v) => compareVersions(v, config.versions.max) <= 0); } if (config.versions.exclude.length > 0) { filtered = filtered.filter((v) => !config.versions.exclude.includes(v)); } filtered = sortVersions(filtered); if (config.versions.limit) { filtered = filtered.slice(-config.versions.limit); } return filtered; } function describeVersionFilter(config, totalVersions, filteredVersions) { const filters = []; if (config.versions.include && config.versions.include.length > 0) { filters.push(`include=${config.versions.include.join(", ")}`); } if (config.versions.min) { filters.push(`min=${config.versions.min}`); } if (config.versions.max) { filters.push(`max=${config.versions.max}`); } if (config.versions.exclude.length > 0) { filters.push(`exclude=${config.versions.exclude.length} versions`); } if (config.versions.limit) { filters.push(`limit=${config.versions.limit}`); } const filterDesc = filters.length > 0 ? ` (${filters.join(", ")})` : ""; return `${filteredVersions}/${totalVersions} versions${filterDesc}`; } var init_version_filter = __esm({ "src/utils/version-filter.ts"() { "use strict"; init_esm_shims(); if (false) { const { describe, it, expect } = void 0; describe("parseVersion", () => { it("should parse semantic versions correctly", () => { expect(parseVersion("1.2.3")).toEqual({ major: 1, minor: 2, patch: 3 }); expect(parseVersion("0.2.120")).toEqual({ major: 0, minor: 2, patch: 120 }); expect(parseVersion("2.0.50")).toEqual({ major: 2, minor: 0, patch: 50 }); }); }); describe("compareVersions", () => { it("should compare major versions", () => { expect(compareVersions("2.0.0", "1.0.0")).toBeGreaterThan(0); expect(compareVersions("1.0.0", "2.0.0")).toBeLessThan(0); }); it("should compare minor versions", () => { expect(compareVersions("1.2.0", "1.1.0")).toBeGreaterThan(0); expect(compareVersions("1.1.0", "1.2.0")).toBeLessThan(0); }); it("should compare patch versions", () => { expect(compareVersions("1.0.10", "1.0.9")).toBeGreaterThan(0); expect(compareVersions("1.0.9", "1.0.10")).toBeLessThan(0); }); it("should return 0 for equal versions", () => { expect(compareVersions("1.2.3", "1.2.3")).toBe(0); }); }); describe("sortVersions", () => { it("should sort versions in ascending order", () => { const unsorted = ["2.0.10", "1.0.5", "2.0.1", "0.2.120", "1.0.24"]; const sorted = sortVersions(unsorted); expect(sorted).toEqual(["0.2.120", "1.0.5", "1.0.24", "2.0.1", "2.0.10"]); }); it("should not mutate original array", () => { const original = ["2.0.10", "1.0.5"]; const sorted = sortVersions(original); expect(original).toEqual(["2.0.10", "1.0.5"]); expect(sorted).toEqual(["1.0.5", "2.0.10"]); }); }); describe("filterVersions", () => { const allVersions = ["0.2.120", "1.0.5", "1.0.24", "2.0.1", "2.0.10", "2.0.37", "2.0.50"]; it("should filter by min version", () => { const config = { benchmark: { runsPerVersion: 3, timeout: 12e4, runBoth: true }, versions: { min: "2.0.0", exclude: [] }, storage: { baseDir: "~/.cvm/benchmarks", cleanupSessions: true, keepErrorSessions: true }, reporting: { autoGenerate: true, outputDir: "./reports", includeErrors: true } }; const result = filterVersions(allVersions, config); expect(result).toEqual(["2.0.1", "2.0.10", "2.0.37", "2.0.50"]); }); it("should filter by max version", () => { const config = { benchmark: { runsPerVersion: 3, timeout: 12e4, runBoth: true }, versions: { max: "1.0.24", exclude: [] }, storage: { baseDir: "~/.cvm/benchmarks", cleanupSessions: true, keepErrorSessions: true }, reporting: { autoGenerate: true, outputDir: "./reports", includeErrors: true } }; const result = filterVersions(allVersions, config); expect(result).toEqual(["0.2.120", "1.0.5", "1.0.24"]); }); it("should filter by min and max", () => { const config = { benchmark: { runsPerVersion: 3, timeout: 12e4, runBoth: true }, versions: { min: "1.0.0", max: "2.0.10", exclude: [] }, storage: { baseDir: "~/.cvm/benchmarks", cleanupSessions: true, keepErrorSessions: true }, reporting: { autoGenerate: true, outputDir: "./reports", includeErrors: true } }; const result = filterVersions(allVersions, config); expect(result).toEqual(["1.0.5", "1.0.24", "2.0.1", "2.0.10"]); }); it("should apply limit (latest N)", () => { const config = { benchmark: { runsPerVersion: 3, timeout: 12e4, runBoth: true }, versions: { limit: 3, exclude: [] }, storage: { baseDir: "~/.cvm/benchmarks", cleanupSessions: true, keepErrorSessions: true }, reporting: { autoGenerate: true, outputDir: "./reports", includeErrors: true } }; const result = filterVersions(allVersions, config); expect(result).toEqual(["2.0.10", "2.0.37", "2.0.50"]); }); it("should apply exclusions", () => { const config = { benchmark: { runsPerVersion: 3, timeout: 12e4, runBoth: true }, versions: { exclude: ["1.0.5", "2.0.10"] }, storage: { baseDir: "~/.cvm/benchmarks", cleanupSessions: true, keepErrorSessions: true }, reporting: { autoGenerate: true, outputDir: "./reports", includeErrors: true } }; const result = filterVersions(allVersions, config); expect(result).toEqual(["0.2.120", "1.0.24", "2.0.1", "2.0.37", "2.0.50"]); }); it("should prioritize include list", () => { const config = { benchmark: { runsPerVersion: 3, timeout: 12e4, runBoth: true }, versions: { include: ["2.0.37", "2.0.50"], exclude: [] }, storage: { baseDir: "~/.cvm/benchmarks", cleanupSessions: true, keepErrorSessions: true }, reporting: { autoGenerate: true, outputDir: "./reports", includeErrors: true } }; const result = filterVersions(allVersions, config); expect(result).toEqual(["2.0.37", "2.0.50"]); }); }); } } }); // src/utils/logger.ts var logger_exports = {}; __export(logger_exports, { logger: () => logger }); import chalk from "chalk"; var Logger, logger; var init_logger = __esm({ "src/utils/logger.ts"() { "use strict"; init_esm_shims(); Logger = class { level = "info"; silent = false; setLevel(level) { this.level = level; } setSilent(silent) { this.silent = silent; } debug(message, ...args) { if (!this.silent && this.shouldLog("debug")) { console.log(chalk.gray(`[DEBUG] ${message}`), ...args); } } info(message, ...args) { if (!this.silent && this.shouldLog("info")) { console.log(chalk.blue(`\u2139 ${message}`), ...args); } } warn(message, ...args) { if (!this.silent && this.shouldLog("warn")) { console.warn(chalk.yellow(`\u26A0 ${message}`), ...args); } } error(message, ...args) { if (!this.silent && this.shouldLog("error")) { console.error(chalk.red(`\u2716 ${message}`), ...args); } } success(message, ...args) { if (!this.silent && this.shouldLog("success")) { console.log(chalk.green(`\u2713 ${message}`), ...args); } } shouldLog(level) { const levels = ["debug", "info", "warn", "error", "success"]; return levels.indexOf(level) >= levels.indexOf(this.level); } }; logger = new Logger(); } }); // src/core/version-manager.ts import { readdir, stat } from "fs/promises"; import { join } from "path"; import { homedir } from "os"; import { execSync, spawn } from "child_process"; var VersionManager; var init_version_manager = __esm({ "src/core/version-manager.ts"() { "use strict"; init_esm_shims(); init_version_filter(); init_logger(); VersionManager = class { versionsDir; constructor(cvmDir = join(homedir(), ".cvm")) { this.versionsDir = join(cvmDir, "versions"); } /** * Get all installed Claude Code versions from CVM */ async getInstalledVersions() { try { const entries = await readdir(this.versionsDir); const versions = []; for (const entry of entries) { const entryPath = join(this.versionsDir, entry); const stats = await stat(entryPath); if (stats.isDirectory() && /^\d+\.\d+\.\d+$/.test(entry)) { const claudePath = join( entryPath, "installed", "node_modules", ".bin", "claude" ); try { await stat(claudePath); versions.push(entry); } catch { logger.warn(`Version ${entry} not fully installed, skipping`); } } } return sortVersions(versions); } catch (error) { logger.error("Failed to read installed versions:", error); return []; } } /** * Get all available versions from npm registry */ async getAvailableVersions() { try { const output = execSync( "npm view @anthropic-ai/claude-code versions --json", { encoding: "utf8", stdio: ["pipe", "pipe", "pipe"] } ); const versions = JSON.parse(output); return sortVersions(versions); } catch (error) { logger.error("Failed to fetch available versions from npm:", error); return []; } } /** * Get versions that are available but not installed */ async getMissingVersions() { const installed = new Set(await this.getInstalledVersions()); const available = await this.getAvailableVersions(); return available.filter((v) => !installed.has(v)); } /** * Install a version using CVM * Returns true if installation succeeded */ async installVersion(version) { return new Promise((resolve) => { logger.info(`Installing version ${version}...`); const proc = spawn("cvm", ["install", version], { stdio: ["ignore", "pipe", "pipe"] }); let stderr = ""; proc.stderr.on("data", (data) => { stderr += data.toString(); }); proc.on("close", (code) => { if (code === 0) { logger.success(`Installed ${version}`); resolve(true); } else { logger.error(`Failed to install ${version}: ${stderr}`); resolve(false); } }); proc.on("error", (error) => { logger.error(`Failed to install ${version}: ${error.message}`); resolve(false); }); }); } /** * Install multiple versions * Returns array of successfully installed versions */ async installVersions(versions) { const installed = []; for (const version of versions) { const success = await this.installVersion(version); if (success) { installed.push(version); } } return installed; } /** * Parse a version range string (e.g., "2.0.40-2.0.53" or "2.0.40-latest") * Returns [minVersion, maxVersion] or null if not a range */ parseVersionRange(range) { const match = range.match(/^(\d+\.\d+\.\d+)-(\d+\.\d+\.\d+|latest)$/); if (!match) return null; return { min: match[1], max: match[2] }; } /** * Get Claude binary path for a version */ getClaudePath(version) { return join( this.versionsDir, version, "installed", "node_modules", ".bin", "claude" ); } /** * Check if a version is installed */ async isInstalled(version) { try { await stat(this.getClaudePath(version)); return true; } catch { return false; } } /** * Get version installation directory */ getVersionDir(version) { return join(this.versionsDir, version); } }; } }); // src/storage/result-store.ts import { writeFile, readFile, mkdir } from "fs/promises"; import { join as join2 } from "path"; import { homedir as homedir2 } from "os"; var ResultStore; var init_result_store = __esm({ "src/storage/result-store.ts"() { "use strict"; init_esm_shims(); ResultStore = class { constructor(baseDir = join2(homedir2(), ".cvm", "benchmarks")) { this.baseDir = baseDir; } /** * Save suite results to run directory */ async saveSuiteResults(runNumber, results) { const runDir = join2(this.baseDir, `run-${runNumber}`); await mkdir(runDir, { recursive: true }); const filePath = join2(runDir, "results.json"); await writeFile(filePath, JSON.stringify(results, null, 2)); } /** * Save version benchmark results */ async saveVersionResults(runNumber, results) { const versionDir = join2(this.baseDir, `run-${runNumber}`, "version"); await mkdir(versionDir, { recursive: true }); const filePath = join2(versionDir, "benchmarks-all.json"); await writeFile( filePath, JSON.stringify({ results, timestamp: (/* @__PURE__ */ new Date()).toISOString() }, null, 2) ); } /** * Save interactive benchmark results (individual files) */ async saveInteractiveResults(runNumber, results) { const interactiveDir = join2(this.baseDir, `run-${runNumber}`, "interactive"); await mkdir(interactiveDir, { recursive: true }); for (const result of results) { const fileName = `benchmark-startup-${result.version.replace(/\./g, "-")}.json`; const filePath = join2(interactiveDir, fileName); await writeFile( filePath, JSON.stringify( { data: { version: result.version, results: result.runs }, analysis: { version: result.version, avg: result.avgTime, min: result.minTime, max: result.maxTime, result: result.result }, timestamp: result.timestamp }, null, 2 ) ); } } /** * Save metadata for a run */ async saveMetadata(runNumber, metadata) { const runDir = join2(this.baseDir, `run-${runNumber}`); await mkdir(runDir, { recursive: true }); const filePath = join2(runDir, "metadata.json"); await writeFile(filePath, JSON.stringify(metadata, null, 2)); } /** * Load suite results from run directory */ async loadSuiteResults(runNumber) { try { const filePath = join2(this.baseDir, `run-${runNumber}`, "results.json"); const data = await readFile(filePath, "utf-8"); return JSON.parse(data); } catch { return null; } } /** * Get next run number */ async getNextRunNumber() { try { const { readdir: readdir3 } = await import("fs/promises"); const entries = await readdir3(this.baseDir); const runs = entries.filter((f) => f.startsWith("run-")).map((f) => parseInt(f.replace("run-", ""))).filter((n) => !isNaN(n)); return runs.length > 0 ? Math.max(...runs) + 1 : 1; } catch { return 1; } } /** * Get all versions that have been benchmarked across all runs */ async getBenchmarkedVersions() { const benchmarked = /* @__PURE__ */ new Set(); try { const { readdir: readdir3 } = await import("fs/promises"); const entries = await readdir3(this.baseDir); const runDirs = entries.filter((f) => f.startsWith("run-")); for (const runDir of runDirs) { const results = await this.loadSuiteResults(parseInt(runDir.replace("run-", ""))); if (results?.results) { for (const result of results.results) { if (!result.error && result.version) { benchmarked.add(result.version); } } } } } catch { } return benchmarked; } }; } }); // src/benchmarks/version-spawn.ts import { spawn as spawn2 } from "child_process"; async function benchmarkVersion(options) { const { claudePath, timeout = 1e4 } = options; return new Promise((resolve, reject) => { const startTime = Date.now(); const proc = spawn2(claudePath, ["--version"], { stdio: "ignore", timeout }); proc.on("close", (code) => { const elapsed = Date.now() - startTime; if (code === 0) { resolve(elapsed); } else { reject(new Error(`Process exited with code ${code}`)); } }); proc.on("error", (error) => { reject(error); }); }); } var init_version_spawn = __esm({ "src/benchmarks/version-spawn.ts"() { "use strict"; init_esm_shims(); } }); // src/utils/cleanup.ts import { readdir as readdir2, unlink, stat as stat2 } from "fs/promises"; import { join as join3 } from "path"; import { homedir as homedir3 } from "os"; function getProjectDir(cwd) { const encoded = cwd.replace(/\//g, "-"); return join3(homedir3(), ".claude", "projects", encoded); } async function cleanupSessions(sessionIds, cwd) { const projectDir = getProjectDir(cwd); let cleaned = 0; let failed = 0; for (const sessionId of sessionIds) { try { const sessionFile = join3(projectDir, `${sessionId}.jsonl`); try { await stat2(sessionFile); } catch { continue; } await unlink(sessionFile); cleaned++; logger.debug(`Cleaned up session: ${sessionId}`); } catch (error) { failed++; logger.warn(`Failed to cleanup session ${sessionId}:`, error); } } return { cleaned, failed }; } async function getSessionSize(cwd) { const projectDir = getProjectDir(cwd); try { const files = await readdir2(projectDir); let totalSize = 0; for (const file of files) { if (file.endsWith(".jsonl")) { const filePath = join3(projectDir, file); const stats = await stat2(filePath); totalSize += stats.size; } } return totalSize; } catch { return 0; } } async function countSessions(cwd) { const projectDir = getProjectDir(cwd); try { const files = await readdir2(projectDir); return files.filter((f) => f.endsWith(".jsonl")).length; } catch { return 0; } } var init_cleanup = __esm({ "src/utils/cleanup.ts"() { "use strict"; init_esm_shims(); init_logger(); } }); // src/utils/progress.ts import ora from "ora"; function formatDuration(ms) { if (ms < 1e3) return `${ms}ms`; if (ms < 6e4) return `${(ms / 1e3).toFixed(1)}s`; if (ms < 36e5) return `${Math.floor(ms / 6e4)}m ${Math.floor(ms % 6e4 / 1e3)}s`; return `${Math.floor(ms / 36e5)}h ${Math.floor(ms % 36e5 / 6e4)}m`; } function formatBytes(bytes) { if (bytes < 1024) return `${bytes}B`; if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)}KB`; if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)}MB`; return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)}GB`; } var ProgressTracker; var init_progress = __esm({ "src/utils/progress.ts"() { "use strict"; init_esm_shims(); ProgressTracker = class { spinner = null; startTime = 0; total = 0; start(message, total) { this.total = total; this.startTime = Date.now(); this.spinner = ora({ text: message, spinner: "dots" }).start(); } update(current, message) { const percentage = Math.round(current / this.total * 100); const elapsed = Date.now() - this.startTime; const eta = current > 0 ? Math.round(elapsed / current * (this.total - current) / 1e3) : 0; const text = message || `Progress: ${current}/${this.total} (${percentage}%) - ETA: ${eta}s`; if (this.spinner?.text) { this.spinner.text = text; } } succeed(message) { const elapsed = Math.round((Date.now() - this.startTime) / 1e3); this.spinner?.succeed(message || `Completed ${this.total} items in ${elapsed}s`); this.spinner = null; } fail(message) { this.spinner?.fail(message || "Failed"); this.spinner = null; } stop() { this.spinner?.stop(); this.spinner = null; } }; } }); // src/core/benchmark-runner.ts var benchmark_runner_exports = {}; __export(benchmark_runner_exports, { BenchmarkRunner: () => BenchmarkRunner }); import { spawn as spawn3 } from "child_process"; import { fileURLToPath as fileURLToPath2 } from "url"; import { dirname, join as join4 } from "path"; import { existsSync } from "fs"; var BenchmarkRunner; var init_benchmark_runner = __esm({ "src/core/benchmark-runner.ts"() { "use strict"; init_esm_shims(); init_version_manager(); init_result_store(); init_version_spawn(); init_version_filter(); init_cleanup(); init_logger(); init_progress(); BenchmarkRunner = class { versionManager; resultStore; constructor(_config) { this.versionManager = new VersionManager(); this.resultStore = new ResultStore(); } /** * Run interactive benchmark in a separate process to avoid node-pty threading bugs */ async benchmarkInteractiveIsolated(claudePath, cwd, timeout) { return new Promise((resolve, reject) => { const currentFile = fileURLToPath2(import.meta.url); const currentDir = dirname(currentFile); let projectRoot = currentDir; while (!existsSync(join4(projectRoot, "package.json"))) { const parent = dirname(projectRoot); if (parent === projectRoot) throw new Error("Could not find project root"); projectRoot = parent; } const workerScript = join4(projectRoot, "dist/benchmarks/interactive-worker.js"); const proc = spawn3(process.execPath, [workerScript, claudePath, cwd, String(timeout)], { stdio: ["ignore", "pipe", "pipe"] }); let stdout = ""; let stderr = ""; proc.stdout.on("data", (data) => { stdout += data.toString(); }); proc.stderr.on("data", (data) => { stderr += data.toString(); }); proc.on("close", (code) => { if (code === 0 && stdout) { try { const result = JSON.parse(stdout); resolve(result); } catch (error) { reject(new Error(`Failed to parse benchmark result: ${error}`)); } } else { reject(new Error(`Benchmark worker failed with code ${code}: ${stderr}`)); } }); proc.on("error", reject); }); } /** * Run benchmark suite for all filtered versions */ async runSuite(config, options = {}) { const startTime = Date.now(); const runNumber = await this.resultStore.getNextRunNumber(); logger.info(`Starting benchmark run #${runNumber}`); const allVersions = await this.versionManager.getInstalledVersions(); let versions = filterVersions(allVersions, config); if (options.incremental) { const benchmarked = await this.resultStore.getBenchmarkedVersions(); const beforeCount = versions.length; versions = versions.filter((v) => !benchmarked.has(v)); const skipped = beforeCount - versions.length; if (skipped > 0) { logger.info(`Incremental mode: skipping ${skipped} already benchmarked versions`); } } logger.info(describeVersionFilter(config, allVersions.length, versions.length)); if (versions.length === 0) { throw new Error("No versions match the filter criteria"); } const results = []; const errors = []; const sessionIds = []; const progress = new ProgressTracker(); progress.start(`Benchmarking ${versions.length} versions...`, versions.length); for (let i = 0; i < versions.length; i++) { const version = versions[i]; progress.update(i + 1, `[${i + 1}/${versions.length}] Benchmarking ${version}...`); try { const result = await this.benchmarkVersion(version, config); results.push(result); if (result.interactiveBenchmark) { result.interactiveBenchmark.runs.forEach((run) => { if (run.sessionId) sessionIds.push(run.sessionId); }); } } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error); logger.error(`Failed to benchmark ${version}: ${errorMsg}`); errors.push({ version, error: errorMsg }); results.push({ version, error: errorMsg }); } } progress.succeed(`Completed ${versions.length} versions`); if (config.storage.cleanupSessions && sessionIds.length > 0) { logger.info(`Cleaning up ${sessionIds.length} test sessions...`); const { cleaned, failed: failed2 } = await cleanupSessions(sessionIds, process.cwd()); logger.info(`Cleaned: ${cleaned}, Failed: ${failed2}`); } const successful = results.filter((r) => !r.error).length; const failed = results.filter((r) => r.error).length; const suiteResult = { runNumber, timestamp: (/* @__PURE__ */ new Date()).toISOString(), config, results, errors, metadata: { totalVersions: versions.length, successfulVersions: successful, failedVersions: failed, duration: Date.now() - startTime } }; await this.resultStore.saveSuiteResults(runNumber, suiteResult); await this.resultStore.saveMetadata(runNumber, { timestamp: suiteResult.timestamp, versionsCount: versions.length, config }); const versionResults = results.filter((r) => r.versionBenchmark).map((r) => r.versionBenchmark); const interactiveResults = results.filter((r) => r.interactiveBenchmark).map((r) => r.interactiveBenchmark); if (versionResults.length > 0) { await this.resultStore.saveVersionResults(runNumber, versionResults); } if (interactiveResults.length > 0) { await this.resultStore.saveInteractiveResults(runNumber, interactiveResults); } logger.success(`Benchmark run #${runNumber} complete`); logger.info(`Successful: ${successful}, Failed: ${failed}`); return suiteResult; } /** * Benchmark a single version (both spawn and interactive) */ async benchmarkVersion(version, config) { const claudePath = this.versionManager.getClaudePath(version); const result = { version }; if (config.benchmark.runBoth) { try { const runs = []; for (let i = 0; i < config.benchmark.runsPerVersion; i++) { const time = await benchmarkVersion({ claudePath, timeout: config.benchmark.timeout }); runs.push(time); } const avgTime = Math.round(runs.reduce((a, b) => a + b, 0) / runs.length); const minTime = Math.min(...runs); const maxTime = Math.max(...runs); const mean = runs.reduce((a, b) => a + b, 0) / runs.length; const variance = runs.reduce((a, b) => a + Math.pow(b - mean, 2), 0) / runs.length; const stdDev = Math.round(Math.sqrt(variance)); result.versionBenchmark = { version, timestamp: (/* @__PURE__ */ new Date()).toISOString(), runs, avgTime, minTime, maxTime, stdDev }; } catch (error) { logger.warn(`Version spawn benchmark failed for ${version}: ${error}`); } } try { const runs = []; for (let i = 0; i < config.benchmark.runsPerVersion; i++) { const run = await this.benchmarkInteractiveIsolated( claudePath, process.cwd(), config.benchmark.timeout ); runs.push(run); } const times = runs.map((r) => r.time); const avgTime = Math.round(times.reduce((a, b) => a + b, 0) / times.length); const minTime = Math.min(...times); const maxTime = Math.max(...times); const mean = times.reduce((a, b) => a + b, 0) / times.length; const variance = times.reduce((a, b) => a + Math.pow(b - mean, 2), 0) / times.length; const stdDev = Math.round(Math.sqrt(variance)); result.interactiveBenchmark = { version, timestamp: (/* @__PURE__ */ new Date()).toISOString(), runs, avgTime, minTime, maxTime, stdDev, result: runs[0].result, reason: runs[0].reason }; } catch (error) { const errorMsg = error instanceof Error ? error.message : String(error); throw new Error(`Interactive benchmark failed: ${errorMsg}`); } return result; } }; } }); // src/types/config.ts var config_exports = {}; __export(config_exports, { BenchmarkConfigSchema: () => BenchmarkConfigSchema, DEFAULT_CONFIG: () => DEFAULT_CONFIG, EXAMPLE_CONFIGS: () => EXAMPLE_CONFIGS }); import { z } from "zod"; var BenchmarkConfigSchema, DEFAULT_CONFIG, EXAMPLE_CONFIGS; var init_config = __esm({ "src/types/config.ts"() { "use strict"; init_esm_shims(); BenchmarkConfigSchema = z.object({ /** Benchmark configuration */ benchmark: z.object({ /** Number of runs per version */ runsPerVersion: z.number().min(1).max(10).default(3), /** Timeout for each benchmark run (ms) */ timeout: z.number().min(1e3).max(6e5).default(12e4), /** Run both version and interactive benchmarks */ runBoth: z.boolean().default(true) }).default({}), /** Version filtering */ versions: z.object({ /** Minimum version to benchmark (inclusive) */ min: z.string().regex(/^\d+\.\d+\.\d+$/).optional(), /** Maximum version to benchmark (inclusive) */ max: z.string().regex(/^\d+\.\d+\.\d+$/).optional(), /** Maximum number of versions to benchmark */ limit: z.number().min(1).optional(), /** Specific versions to include (overrides min/max) */ include: z.array(z.string()).optional(), /** Specific versions to exclude */ exclude: z.array(z.string()).default([]) }).default({}), /** Storage configuration */ storage: z.object({ /** Base directory for benchmark results */ baseDir: z.string().default("~/.cvm/benchmarks"), /** Clean up old sessions after each run */ cleanupSessions: z.boolean().default(true), /** Keep session files for error analysis */ keepErrorSessions: z.boolean().default(true) }).default({}), /** Reporting configuration */ reporting: z.object({ /** Generate HTML reports automatically */ autoGenerate: z.boolean().default(true), /** Report output directory */ outputDir: z.string().default("./reports"), /** Include error details in reports */ includeErrors: z.boolean().default(true) }).default({}) }); DEFAULT_CONFIG = { benchmark: { runsPerVersion: 3, timeout: 12e4, runBoth: true }, versions: { exclude: [] }, storage: { baseDir: "~/.cvm/benchmarks", cleanupSessions: true, keepErrorSessions: true }, reporting: { autoGenerate: true, outputDir: "./reports", includeErrors: true } }; EXAMPLE_CONFIGS = { /** Test only latest 10 versions */ latest10: { versions: { limit: 10 } }, /** Test only 2.x versions */ v2Only: { versions: { min: "2.0.0" } }, /** Test range */ range: { versions: { min: "1.0.24", max: "2.0.50" } }, /** Quick test (1 run per version, limit 5) */ quick: { benchmark: { runsPerVersion: 1 }, versions: { limit: 5 } } }; } }); // src/index.ts init_esm_shims(); init_benchmark_runner(); init_version_manager(); init_result_store(); init_version_spawn(); // src/benchmarks/interactive-pty.ts init_esm_shims(); init_logger(); import * as pty from "node-pty"; async function benchmarkInteractive(options) { const { claudePath, cwd, timeout = 3e4 } = options; return new Promise((resolve) => { const startTime = Date.now(); let output = ""; const signals = { bracketedPaste: false, focusEvents: false, prompt: false }; let readyDetected = false; let errorDetected = false; let sessionId = void 0; let trustPromptHandled = false; let ptyProcess; try { ptyProcess = pty.spawn(claudePath, [], { name: "xterm-256color", cols: 80, rows: 30, cwd, env: process.env }); } catch (error) { resolve({ time: Date.now() - startTime, result: "failed", reason: `PTY spawn failed: ${error instanceof Error ? error.message : String(error)}`, signals, sessionId }); return; } const timeoutId = setTimeout(() => { ptyProcess.kill(); resolve({ time: Date.now() - startTime, result: "timeout", reason: `Benchmark timed out after ${timeout}ms`, signals, sessionId }); }, timeout); ptyProcess.onData((data) => { output += data; if (!trustPromptHandled && output.includes("Do you trust the files")) { trustPromptHandled = true; logger.debug("Trust prompt detected, auto-accepting..."); setTimeout(() => ptyProcess.write("\r"), 100); } if (!sessionId) { const sessionMatch = output.match(/"session_id":"([a-f0-9-]+)"/); if (sessionMatch) { sessionId = sessionMatch[1]; } } if ((output.includes("needs update") || output.includes("newer version") || output.includes("requires") || output.includes("minimum version")) && !errorDetected) { errorDetected = true; const cleanOutput = output.replace(/\x1b\[[0-9;]*[a-zA-Z]/g, ""); const lines = cleanOutput.split("\n"); const errorStartIdx = lines.findIndex( (l) => l.includes("needs update") || l.includes("newer version") || l.includes("requires") || l.includes("minimum version") ); const errorLines = lines.slice(Math.max(0, errorStartIdx - 1), errorStartIdx + 6); const fullErrorMessage = errorLines.join("\n").trim(); const versionMatch = cleanOutput.match(/(\d+\.\d+\.\d+)\s+or higher/i) || cleanOutput.match(/version\s+\((\d+\.\d+\.\d+)/i) || cleanOutput.match(/requires\s+(\d+\.\d+\.\d+)/i) || cleanOutput.match(/minimum\s+version[:\s]+(\d+\.\d+\.\d+)/i) || cleanOutput.match(/v?(\d+\.\d+\.\d+)\+/); const minVersion = versionMatch ? versionMatch[1] : null; const EXPECTED_MIN_VERSION = "1.0.24"; if (minVersion && minVersion !== EXPECTED_MIN_VERSION) { logger.warn(`Minimum version changed: expected ${EXPECTED_MIN_VERSION}, found ${minVersion}`); } clearTimeout(timeoutId); ptyProcess.kill(); resolve({ time: Date.now() - startTime, result: "error_detected", reason: "version_requirement_not_met", minVersionRequired: minVersion || EXPECTED_MIN_VERSION, errorMessage: fullErrorMessage, rawOutput: cleanOutput.substring(0, 2e3), sessionId }); return; } if (data.includes("\x1B[?2004h") && !signals.bracketedPaste) { signals.bracketedPaste = true; } if (data.includes("\x1B[?1004h") && !signals.focusEvents) { signals.focusEvents = true; } const stripped = data.replace(/\x1b\[[0-9;]*[a-zA-Z]/g, ""); if (/>\s/.test(stripped) && !signals.prompt) { signals.prompt = true; } if (signals.bracketedPaste && signals.focusEvents && signals.prompt && !readyDetected) { readyDetected = true; setTimeout(() => { clearTimeout(timeoutId); ptyProcess.kill(); resolve({ time: Date.now() - startTime, result: "ready", reason: "all terminal signals received and process stable", signals: { ...signals }, sessionId }); }, 500); } }); ptyProcess.onExit(({ exitCode }) => { if (errorDetected || readyDetected) return; clearTimeout(timeoutId); const elapsed = Date.now() - startTime; if (signals.prompt) { resolve({ time: elapsed, result: "ui_then_exit", reason: "showed prompt but immediately exited", signals: { ...signals }, exitCode, sessionId }); return; } resolve({ time: elapsed, result: "exited_early", reason: "process exited before showing prompt", signals: { ...signals }, exitCode, sessionId }); }); }); } // src/index.ts init_version_filter(); init_cleanup(); init_logger(); init_progress(); init_config(); // src/types/benchmark.ts init_esm_shims(); import { z as z2 } from "zod"; var BenchmarkResultState = z2.enum([ "ready", // Version started successfully and is interactive "error_detected", // Version shows error message (< 1.0.24) "ui_then_exit", // Shows UI but immediately exits "exited_early", // Exited before showing prompt "timeout", // Benchmark timed out "failed" // Benchmark failed with error ]); var TerminalSignalsSchema = z2.object({ bracketedPaste: z2.boolean(), focusEvents: z2.boolean(), prompt: z2.boolean() }); var BenchmarkRunResultSchema = z2.object({ /** Time taken for this run (ms) */ time: z2.number(), /** Result state */ result: BenchmarkResultState, /** Human-readable reason */ reason: z2.string(), /** Terminal signals (PTY benchmark only) */ signals: TerminalSignalsSchema.optional(), /** Exit code (if exited) */ exitCode: z2.number().optional(), /** Session ID (if created) */ sessionId: z2.string().optional(), /** Minimum version required (if error_detected) */ minVersionRequired: z2.string().optional(), /** Error message (if error_detected) */ errorMessage: z2.string().optional(), /** Raw output for debugging */ rawOutput: z2.string().optional() }); var VersionBenchmarkResultSchema = z2.object({ version: z2.string(), timestamp: z2.string(), runs: z2.array(z2.number()), avgTime: z2.number(), minTime: z2.number(), maxTime: z2.number(), stdDev: z2.number() }); var InteractiveBenchmarkResultSchema = z2.object({ version: z2.string(), timestamp: z2.string(), runs: z2.array(BenchmarkRunResultSchema), avgTime: z2.number(), minTime: z2.number(), maxTime: z2.number(), stdDev: z2.number(), result: BenchmarkResultState, reason: z2.string() }); var CombinedBenchmarkResultSchema = z2.object({ version: z2.string(), versionBenchmark: VersionBenchmarkResultSchema.optional(), interactiveBenchmark: InteractiveBenchmarkResultSchema.optional(), error: z2.string().optional() }); var BenchmarkSuiteResultSchema = z2.object({ runNumber: z2.number(), timestamp: z2.string(), config: z2.any(), // BenchmarkConfig results: z2.array(CombinedBenchmarkResultSchema), errors: z2.array(z2.object({ version: z2.string(), error: z2.string() })), metadata: z2.object({ totalVersions: z2.number(), successfulVersions: z2.number(), failedVersions: z2.number(), duration: z2.number() }) }); // src/index.ts var plugin = { metadata: { name: "benchmark", version: "2.0.0", description: "Benchmark and analyze Claude Code performance", author: "clawdcc" }, commands: [ { name: "benchmark", description: "Run benchmarks on Claude Code versions", handler: async (_args) => { const { BenchmarkRunner: BenchmarkRunner2 } = await Promise.resolve().then(() => (init_benchmark_runner(), benchmark_runner_exports)); const { DEFAULT_CONFIG: DEFAULT_CONFIG2 } = await Promise.resolve().then(() => (init_config(), config_exports)); const { logger: logger2 } = await Promise.resolve().then(() => (init_logger(), logger_exports)); try { const runner = new BenchmarkRunner2(); const result = await runner.runSuite(DEFAULT_CONFIG2); logger2.success("Benchmark complete!"); logger2.info(`Results: ~/.cvm/benchmarks/run-${result.runNumber}/`); return result; } catch (error) { logger2.error("Benchmark failed:", error); throw error; } } } ], hooks: { afterInstall: async (version) => { const { logger: logger2 } = await Promise.resolve().then(() => (init_logger(), logger_exports)); logger2.info(`Version ${version} installed. Run: cvm benchmark`); } } }; export { BenchmarkConfigSchema, BenchmarkResultState, BenchmarkRunResultSchema, BenchmarkRunner, BenchmarkSuiteResultSchema, CombinedBenchmarkResultSchema, DEFAULT_CONFIG, EXAMPLE_CONFIGS, InteractiveBenchmarkResultSchema, ProgressTracker, ResultStore, TerminalSignalsSchema, VersionBenchmarkResultSchema, VersionManager, benchmarkInteractive, benchmarkVersion, cleanupSessions, compareVersions, countSessions, describeVersionFilter, filterVersions, formatBytes, formatDuration, getSessionSize, logger, plugin, sortVersions }; //# sourceMappingURL=index.js.map