@clawdcc/cvm-benchmark
Version:
Comprehensive benchmarking and performance analysis tools for Claude Code versions
1,317 lines (1,302 loc) • 45.1 kB
JavaScript
var __defProp = Object.defineProperty;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __esm = (fn, res) => function __init() {
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
};
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
// node_modules/tsup/assets/esm_shims.js
import path from "path";
import { fileURLToPath } from "url";
var init_esm_shims = __esm({
"node_modules/tsup/assets/esm_shims.js"() {
"use strict";
}
});
// src/utils/version-filter.ts
function parseVersion(version) {
const [major, minor, patch] = version.split(".").map(Number);
return { major, minor, patch };
}
function compareVersions(a, b) {
const vA = parseVersion(a);
const vB = parseVersion(b);
if (vA.major !== vB.major) return vA.major - vB.major;
if (vA.minor !== vB.minor) return vA.minor - vB.minor;
return vA.patch - vB.patch;
}
function sortVersions(versions) {
return [...versions].sort(compareVersions);
}
function filterVersions(allVersions, config) {
let filtered = [...allVersions];
if (config.versions.include && config.versions.include.length > 0) {
filtered = filtered.filter((v) => config.versions.include.includes(v));
}
if (config.versions.min) {
filtered = filtered.filter((v) => compareVersions(v, config.versions.min) >= 0);
}
if (config.versions.max) {
filtered = filtered.filter((v) => compareVersions(v, config.versions.max) <= 0);
}
if (config.versions.exclude.length > 0) {
filtered = filtered.filter((v) => !config.versions.exclude.includes(v));
}
filtered = sortVersions(filtered);
if (config.versions.limit) {
filtered = filtered.slice(-config.versions.limit);
}
return filtered;
}
function describeVersionFilter(config, totalVersions, filteredVersions) {
const filters = [];
if (config.versions.include && config.versions.include.length > 0) {
filters.push(`include=${config.versions.include.join(", ")}`);
}
if (config.versions.min) {
filters.push(`min=${config.versions.min}`);
}
if (config.versions.max) {
filters.push(`max=${config.versions.max}`);
}
if (config.versions.exclude.length > 0) {
filters.push(`exclude=${config.versions.exclude.length} versions`);
}
if (config.versions.limit) {
filters.push(`limit=${config.versions.limit}`);
}
const filterDesc = filters.length > 0 ? ` (${filters.join(", ")})` : "";
return `${filteredVersions}/${totalVersions} versions${filterDesc}`;
}
var init_version_filter = __esm({
"src/utils/version-filter.ts"() {
"use strict";
init_esm_shims();
if (false) {
const { describe, it, expect } = void 0;
describe("parseVersion", () => {
it("should parse semantic versions correctly", () => {
expect(parseVersion("1.2.3")).toEqual({ major: 1, minor: 2, patch: 3 });
expect(parseVersion("0.2.120")).toEqual({ major: 0, minor: 2, patch: 120 });
expect(parseVersion("2.0.50")).toEqual({ major: 2, minor: 0, patch: 50 });
});
});
describe("compareVersions", () => {
it("should compare major versions", () => {
expect(compareVersions("2.0.0", "1.0.0")).toBeGreaterThan(0);
expect(compareVersions("1.0.0", "2.0.0")).toBeLessThan(0);
});
it("should compare minor versions", () => {
expect(compareVersions("1.2.0", "1.1.0")).toBeGreaterThan(0);
expect(compareVersions("1.1.0", "1.2.0")).toBeLessThan(0);
});
it("should compare patch versions", () => {
expect(compareVersions("1.0.10", "1.0.9")).toBeGreaterThan(0);
expect(compareVersions("1.0.9", "1.0.10")).toBeLessThan(0);
});
it("should return 0 for equal versions", () => {
expect(compareVersions("1.2.3", "1.2.3")).toBe(0);
});
});
describe("sortVersions", () => {
it("should sort versions in ascending order", () => {
const unsorted = ["2.0.10", "1.0.5", "2.0.1", "0.2.120", "1.0.24"];
const sorted = sortVersions(unsorted);
expect(sorted).toEqual(["0.2.120", "1.0.5", "1.0.24", "2.0.1", "2.0.10"]);
});
it("should not mutate original array", () => {
const original = ["2.0.10", "1.0.5"];
const sorted = sortVersions(original);
expect(original).toEqual(["2.0.10", "1.0.5"]);
expect(sorted).toEqual(["1.0.5", "2.0.10"]);
});
});
describe("filterVersions", () => {
const allVersions = ["0.2.120", "1.0.5", "1.0.24", "2.0.1", "2.0.10", "2.0.37", "2.0.50"];
it("should filter by min version", () => {
const config = {
benchmark: { runsPerVersion: 3, timeout: 12e4, runBoth: true },
versions: { min: "2.0.0", exclude: [] },
storage: { baseDir: "~/.cvm/benchmarks", cleanupSessions: true, keepErrorSessions: true },
reporting: { autoGenerate: true, outputDir: "./reports", includeErrors: true }
};
const result = filterVersions(allVersions, config);
expect(result).toEqual(["2.0.1", "2.0.10", "2.0.37", "2.0.50"]);
});
it("should filter by max version", () => {
const config = {
benchmark: { runsPerVersion: 3, timeout: 12e4, runBoth: true },
versions: { max: "1.0.24", exclude: [] },
storage: { baseDir: "~/.cvm/benchmarks", cleanupSessions: true, keepErrorSessions: true },
reporting: { autoGenerate: true, outputDir: "./reports", includeErrors: true }
};
const result = filterVersions(allVersions, config);
expect(result).toEqual(["0.2.120", "1.0.5", "1.0.24"]);
});
it("should filter by min and max", () => {
const config = {
benchmark: { runsPerVersion: 3, timeout: 12e4, runBoth: true },
versions: { min: "1.0.0", max: "2.0.10", exclude: [] },
storage: { baseDir: "~/.cvm/benchmarks", cleanupSessions: true, keepErrorSessions: true },
reporting: { autoGenerate: true, outputDir: "./reports", includeErrors: true }
};
const result = filterVersions(allVersions, config);
expect(result).toEqual(["1.0.5", "1.0.24", "2.0.1", "2.0.10"]);
});
it("should apply limit (latest N)", () => {
const config = {
benchmark: { runsPerVersion: 3, timeout: 12e4, runBoth: true },
versions: { limit: 3, exclude: [] },
storage: { baseDir: "~/.cvm/benchmarks", cleanupSessions: true, keepErrorSessions: true },
reporting: { autoGenerate: true, outputDir: "./reports", includeErrors: true }
};
const result = filterVersions(allVersions, config);
expect(result).toEqual(["2.0.10", "2.0.37", "2.0.50"]);
});
it("should apply exclusions", () => {
const config = {
benchmark: { runsPerVersion: 3, timeout: 12e4, runBoth: true },
versions: { exclude: ["1.0.5", "2.0.10"] },
storage: { baseDir: "~/.cvm/benchmarks", cleanupSessions: true, keepErrorSessions: true },
reporting: { autoGenerate: true, outputDir: "./reports", includeErrors: true }
};
const result = filterVersions(allVersions, config);
expect(result).toEqual(["0.2.120", "1.0.24", "2.0.1", "2.0.37", "2.0.50"]);
});
it("should prioritize include list", () => {
const config = {
benchmark: { runsPerVersion: 3, timeout: 12e4, runBoth: true },
versions: { include: ["2.0.37", "2.0.50"], exclude: [] },
storage: { baseDir: "~/.cvm/benchmarks", cleanupSessions: true, keepErrorSessions: true },
reporting: { autoGenerate: true, outputDir: "./reports", includeErrors: true }
};
const result = filterVersions(allVersions, config);
expect(result).toEqual(["2.0.37", "2.0.50"]);
});
});
}
}
});
// src/utils/logger.ts
var logger_exports = {};
__export(logger_exports, {
logger: () => logger
});
import chalk from "chalk";
var Logger, logger;
var init_logger = __esm({
"src/utils/logger.ts"() {
"use strict";
init_esm_shims();
Logger = class {
level = "info";
silent = false;
setLevel(level) {
this.level = level;
}
setSilent(silent) {
this.silent = silent;
}
debug(message, ...args) {
if (!this.silent && this.shouldLog("debug")) {
console.log(chalk.gray(`[DEBUG] ${message}`), ...args);
}
}
info(message, ...args) {
if (!this.silent && this.shouldLog("info")) {
console.log(chalk.blue(`\u2139 ${message}`), ...args);
}
}
warn(message, ...args) {
if (!this.silent && this.shouldLog("warn")) {
console.warn(chalk.yellow(`\u26A0 ${message}`), ...args);
}
}
error(message, ...args) {
if (!this.silent && this.shouldLog("error")) {
console.error(chalk.red(`\u2716 ${message}`), ...args);
}
}
success(message, ...args) {
if (!this.silent && this.shouldLog("success")) {
console.log(chalk.green(`\u2713 ${message}`), ...args);
}
}
shouldLog(level) {
const levels = ["debug", "info", "warn", "error", "success"];
return levels.indexOf(level) >= levels.indexOf(this.level);
}
};
logger = new Logger();
}
});
// src/core/version-manager.ts
import { readdir, stat } from "fs/promises";
import { join } from "path";
import { homedir } from "os";
import { execSync, spawn } from "child_process";
var VersionManager;
var init_version_manager = __esm({
"src/core/version-manager.ts"() {
"use strict";
init_esm_shims();
init_version_filter();
init_logger();
VersionManager = class {
versionsDir;
constructor(cvmDir = join(homedir(), ".cvm")) {
this.versionsDir = join(cvmDir, "versions");
}
/**
* Get all installed Claude Code versions from CVM
*/
async getInstalledVersions() {
try {
const entries = await readdir(this.versionsDir);
const versions = [];
for (const entry of entries) {
const entryPath = join(this.versionsDir, entry);
const stats = await stat(entryPath);
if (stats.isDirectory() && /^\d+\.\d+\.\d+$/.test(entry)) {
const claudePath = join(
entryPath,
"installed",
"node_modules",
".bin",
"claude"
);
try {
await stat(claudePath);
versions.push(entry);
} catch {
logger.warn(`Version ${entry} not fully installed, skipping`);
}
}
}
return sortVersions(versions);
} catch (error) {
logger.error("Failed to read installed versions:", error);
return [];
}
}
/**
* Get all available versions from npm registry
*/
async getAvailableVersions() {
try {
const output = execSync(
"npm view @anthropic-ai/claude-code versions --json",
{ encoding: "utf8", stdio: ["pipe", "pipe", "pipe"] }
);
const versions = JSON.parse(output);
return sortVersions(versions);
} catch (error) {
logger.error("Failed to fetch available versions from npm:", error);
return [];
}
}
/**
* Get versions that are available but not installed
*/
async getMissingVersions() {
const installed = new Set(await this.getInstalledVersions());
const available = await this.getAvailableVersions();
return available.filter((v) => !installed.has(v));
}
/**
* Install a version using CVM
* Returns true if installation succeeded
*/
async installVersion(version) {
return new Promise((resolve) => {
logger.info(`Installing version ${version}...`);
const proc = spawn("cvm", ["install", version], {
stdio: ["ignore", "pipe", "pipe"]
});
let stderr = "";
proc.stderr.on("data", (data) => {
stderr += data.toString();
});
proc.on("close", (code) => {
if (code === 0) {
logger.success(`Installed ${version}`);
resolve(true);
} else {
logger.error(`Failed to install ${version}: ${stderr}`);
resolve(false);
}
});
proc.on("error", (error) => {
logger.error(`Failed to install ${version}: ${error.message}`);
resolve(false);
});
});
}
/**
* Install multiple versions
* Returns array of successfully installed versions
*/
async installVersions(versions) {
const installed = [];
for (const version of versions) {
const success = await this.installVersion(version);
if (success) {
installed.push(version);
}
}
return installed;
}
/**
* Parse a version range string (e.g., "2.0.40-2.0.53" or "2.0.40-latest")
* Returns [minVersion, maxVersion] or null if not a range
*/
parseVersionRange(range) {
const match = range.match(/^(\d+\.\d+\.\d+)-(\d+\.\d+\.\d+|latest)$/);
if (!match) return null;
return { min: match[1], max: match[2] };
}
/**
* Get Claude binary path for a version
*/
getClaudePath(version) {
return join(
this.versionsDir,
version,
"installed",
"node_modules",
".bin",
"claude"
);
}
/**
* Check if a version is installed
*/
async isInstalled(version) {
try {
await stat(this.getClaudePath(version));
return true;
} catch {
return false;
}
}
/**
* Get version installation directory
*/
getVersionDir(version) {
return join(this.versionsDir, version);
}
};
}
});
// src/storage/result-store.ts
import { writeFile, readFile, mkdir } from "fs/promises";
import { join as join2 } from "path";
import { homedir as homedir2 } from "os";
var ResultStore;
var init_result_store = __esm({
"src/storage/result-store.ts"() {
"use strict";
init_esm_shims();
ResultStore = class {
constructor(baseDir = join2(homedir2(), ".cvm", "benchmarks")) {
this.baseDir = baseDir;
}
/**
* Save suite results to run directory
*/
async saveSuiteResults(runNumber, results) {
const runDir = join2(this.baseDir, `run-${runNumber}`);
await mkdir(runDir, { recursive: true });
const filePath = join2(runDir, "results.json");
await writeFile(filePath, JSON.stringify(results, null, 2));
}
/**
* Save version benchmark results
*/
async saveVersionResults(runNumber, results) {
const versionDir = join2(this.baseDir, `run-${runNumber}`, "version");
await mkdir(versionDir, { recursive: true });
const filePath = join2(versionDir, "benchmarks-all.json");
await writeFile(
filePath,
JSON.stringify({ results, timestamp: (/* @__PURE__ */ new Date()).toISOString() }, null, 2)
);
}
/**
* Save interactive benchmark results (individual files)
*/
async saveInteractiveResults(runNumber, results) {
const interactiveDir = join2(this.baseDir, `run-${runNumber}`, "interactive");
await mkdir(interactiveDir, { recursive: true });
for (const result of results) {
const fileName = `benchmark-startup-${result.version.replace(/\./g, "-")}.json`;
const filePath = join2(interactiveDir, fileName);
await writeFile(
filePath,
JSON.stringify(
{
data: {
version: result.version,
results: result.runs
},
analysis: {
version: result.version,
avg: result.avgTime,
min: result.minTime,
max: result.maxTime,
result: result.result
},
timestamp: result.timestamp
},
null,
2
)
);
}
}
/**
* Save metadata for a run
*/
async saveMetadata(runNumber, metadata) {
const runDir = join2(this.baseDir, `run-${runNumber}`);
await mkdir(runDir, { recursive: true });
const filePath = join2(runDir, "metadata.json");
await writeFile(filePath, JSON.stringify(metadata, null, 2));
}
/**
* Load suite results from run directory
*/
async loadSuiteResults(runNumber) {
try {
const filePath = join2(this.baseDir, `run-${runNumber}`, "results.json");
const data = await readFile(filePath, "utf-8");
return JSON.parse(data);
} catch {
return null;
}
}
/**
* Get next run number
*/
async getNextRunNumber() {
try {
const { readdir: readdir3 } = await import("fs/promises");
const entries = await readdir3(this.baseDir);
const runs = entries.filter((f) => f.startsWith("run-")).map((f) => parseInt(f.replace("run-", ""))).filter((n) => !isNaN(n));
return runs.length > 0 ? Math.max(...runs) + 1 : 1;
} catch {
return 1;
}
}
/**
* Get all versions that have been benchmarked across all runs
*/
async getBenchmarkedVersions() {
const benchmarked = /* @__PURE__ */ new Set();
try {
const { readdir: readdir3 } = await import("fs/promises");
const entries = await readdir3(this.baseDir);
const runDirs = entries.filter((f) => f.startsWith("run-"));
for (const runDir of runDirs) {
const results = await this.loadSuiteResults(parseInt(runDir.replace("run-", "")));
if (results?.results) {
for (const result of results.results) {
if (!result.error && result.version) {
benchmarked.add(result.version);
}
}
}
}
} catch {
}
return benchmarked;
}
};
}
});
// src/benchmarks/version-spawn.ts
import { spawn as spawn2 } from "child_process";
async function benchmarkVersion(options) {
const { claudePath, timeout = 1e4 } = options;
return new Promise((resolve, reject) => {
const startTime = Date.now();
const proc = spawn2(claudePath, ["--version"], {
stdio: "ignore",
timeout
});
proc.on("close", (code) => {
const elapsed = Date.now() - startTime;
if (code === 0) {
resolve(elapsed);
} else {
reject(new Error(`Process exited with code ${code}`));
}
});
proc.on("error", (error) => {
reject(error);
});
});
}
var init_version_spawn = __esm({
"src/benchmarks/version-spawn.ts"() {
"use strict";
init_esm_shims();
}
});
// src/utils/cleanup.ts
import { readdir as readdir2, unlink, stat as stat2 } from "fs/promises";
import { join as join3 } from "path";
import { homedir as homedir3 } from "os";
function getProjectDir(cwd) {
const encoded = cwd.replace(/\//g, "-");
return join3(homedir3(), ".claude", "projects", encoded);
}
async function cleanupSessions(sessionIds, cwd) {
const projectDir = getProjectDir(cwd);
let cleaned = 0;
let failed = 0;
for (const sessionId of sessionIds) {
try {
const sessionFile = join3(projectDir, `${sessionId}.jsonl`);
try {
await stat2(sessionFile);
} catch {
continue;
}
await unlink(sessionFile);
cleaned++;
logger.debug(`Cleaned up session: ${sessionId}`);
} catch (error) {
failed++;
logger.warn(`Failed to cleanup session ${sessionId}:`, error);
}
}
return { cleaned, failed };
}
async function getSessionSize(cwd) {
const projectDir = getProjectDir(cwd);
try {
const files = await readdir2(projectDir);
let totalSize = 0;
for (const file of files) {
if (file.endsWith(".jsonl")) {
const filePath = join3(projectDir, file);
const stats = await stat2(filePath);
totalSize += stats.size;
}
}
return totalSize;
} catch {
return 0;
}
}
async function countSessions(cwd) {
const projectDir = getProjectDir(cwd);
try {
const files = await readdir2(projectDir);
return files.filter((f) => f.endsWith(".jsonl")).length;
} catch {
return 0;
}
}
var init_cleanup = __esm({
"src/utils/cleanup.ts"() {
"use strict";
init_esm_shims();
init_logger();
}
});
// src/utils/progress.ts
import ora from "ora";
function formatDuration(ms) {
if (ms < 1e3) return `${ms}ms`;
if (ms < 6e4) return `${(ms / 1e3).toFixed(1)}s`;
if (ms < 36e5) return `${Math.floor(ms / 6e4)}m ${Math.floor(ms % 6e4 / 1e3)}s`;
return `${Math.floor(ms / 36e5)}h ${Math.floor(ms % 36e5 / 6e4)}m`;
}
function formatBytes(bytes) {
if (bytes < 1024) return `${bytes}B`;
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)}KB`;
if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)}MB`;
return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)}GB`;
}
var ProgressTracker;
var init_progress = __esm({
"src/utils/progress.ts"() {
"use strict";
init_esm_shims();
ProgressTracker = class {
spinner = null;
startTime = 0;
total = 0;
start(message, total) {
this.total = total;
this.startTime = Date.now();
this.spinner = ora({
text: message,
spinner: "dots"
}).start();
}
update(current, message) {
const percentage = Math.round(current / this.total * 100);
const elapsed = Date.now() - this.startTime;
const eta = current > 0 ? Math.round(elapsed / current * (this.total - current) / 1e3) : 0;
const text = message || `Progress: ${current}/${this.total} (${percentage}%) - ETA: ${eta}s`;
if (this.spinner?.text) {
this.spinner.text = text;
}
}
succeed(message) {
const elapsed = Math.round((Date.now() - this.startTime) / 1e3);
this.spinner?.succeed(message || `Completed ${this.total} items in ${elapsed}s`);
this.spinner = null;
}
fail(message) {
this.spinner?.fail(message || "Failed");
this.spinner = null;
}
stop() {
this.spinner?.stop();
this.spinner = null;
}
};
}
});
// src/core/benchmark-runner.ts
var benchmark_runner_exports = {};
__export(benchmark_runner_exports, {
BenchmarkRunner: () => BenchmarkRunner
});
import { spawn as spawn3 } from "child_process";
import { fileURLToPath as fileURLToPath2 } from "url";
import { dirname, join as join4 } from "path";
import { existsSync } from "fs";
var BenchmarkRunner;
var init_benchmark_runner = __esm({
"src/core/benchmark-runner.ts"() {
"use strict";
init_esm_shims();
init_version_manager();
init_result_store();
init_version_spawn();
init_version_filter();
init_cleanup();
init_logger();
init_progress();
BenchmarkRunner = class {
versionManager;
resultStore;
constructor(_config) {
this.versionManager = new VersionManager();
this.resultStore = new ResultStore();
}
/**
* Run interactive benchmark in a separate process to avoid node-pty threading bugs
*/
async benchmarkInteractiveIsolated(claudePath, cwd, timeout) {
return new Promise((resolve, reject) => {
const currentFile = fileURLToPath2(import.meta.url);
const currentDir = dirname(currentFile);
let projectRoot = currentDir;
while (!existsSync(join4(projectRoot, "package.json"))) {
const parent = dirname(projectRoot);
if (parent === projectRoot) throw new Error("Could not find project root");
projectRoot = parent;
}
const workerScript = join4(projectRoot, "dist/benchmarks/interactive-worker.js");
const proc = spawn3(process.execPath, [workerScript, claudePath, cwd, String(timeout)], {
stdio: ["ignore", "pipe", "pipe"]
});
let stdout = "";
let stderr = "";
proc.stdout.on("data", (data) => {
stdout += data.toString();
});
proc.stderr.on("data", (data) => {
stderr += data.toString();
});
proc.on("close", (code) => {
if (code === 0 && stdout) {
try {
const result = JSON.parse(stdout);
resolve(result);
} catch (error) {
reject(new Error(`Failed to parse benchmark result: ${error}`));
}
} else {
reject(new Error(`Benchmark worker failed with code ${code}: ${stderr}`));
}
});
proc.on("error", reject);
});
}
/**
* Run benchmark suite for all filtered versions
*/
async runSuite(config, options = {}) {
const startTime = Date.now();
const runNumber = await this.resultStore.getNextRunNumber();
logger.info(`Starting benchmark run #${runNumber}`);
const allVersions = await this.versionManager.getInstalledVersions();
let versions = filterVersions(allVersions, config);
if (options.incremental) {
const benchmarked = await this.resultStore.getBenchmarkedVersions();
const beforeCount = versions.length;
versions = versions.filter((v) => !benchmarked.has(v));
const skipped = beforeCount - versions.length;
if (skipped > 0) {
logger.info(`Incremental mode: skipping ${skipped} already benchmarked versions`);
}
}
logger.info(describeVersionFilter(config, allVersions.length, versions.length));
if (versions.length === 0) {
throw new Error("No versions match the filter criteria");
}
const results = [];
const errors = [];
const sessionIds = [];
const progress = new ProgressTracker();
progress.start(`Benchmarking ${versions.length} versions...`, versions.length);
for (let i = 0; i < versions.length; i++) {
const version = versions[i];
progress.update(i + 1, `[${i + 1}/${versions.length}] Benchmarking ${version}...`);
try {
const result = await this.benchmarkVersion(version, config);
results.push(result);
if (result.interactiveBenchmark) {
result.interactiveBenchmark.runs.forEach((run) => {
if (run.sessionId) sessionIds.push(run.sessionId);
});
}
} catch (error) {
const errorMsg = error instanceof Error ? error.message : String(error);
logger.error(`Failed to benchmark ${version}: ${errorMsg}`);
errors.push({ version, error: errorMsg });
results.push({
version,
error: errorMsg
});
}
}
progress.succeed(`Completed ${versions.length} versions`);
if (config.storage.cleanupSessions && sessionIds.length > 0) {
logger.info(`Cleaning up ${sessionIds.length} test sessions...`);
const { cleaned, failed: failed2 } = await cleanupSessions(sessionIds, process.cwd());
logger.info(`Cleaned: ${cleaned}, Failed: ${failed2}`);
}
const successful = results.filter((r) => !r.error).length;
const failed = results.filter((r) => r.error).length;
const suiteResult = {
runNumber,
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
config,
results,
errors,
metadata: {
totalVersions: versions.length,
successfulVersions: successful,
failedVersions: failed,
duration: Date.now() - startTime
}
};
await this.resultStore.saveSuiteResults(runNumber, suiteResult);
await this.resultStore.saveMetadata(runNumber, {
timestamp: suiteResult.timestamp,
versionsCount: versions.length,
config
});
const versionResults = results.filter((r) => r.versionBenchmark).map((r) => r.versionBenchmark);
const interactiveResults = results.filter((r) => r.interactiveBenchmark).map((r) => r.interactiveBenchmark);
if (versionResults.length > 0) {
await this.resultStore.saveVersionResults(runNumber, versionResults);
}
if (interactiveResults.length > 0) {
await this.resultStore.saveInteractiveResults(runNumber, interactiveResults);
}
logger.success(`Benchmark run #${runNumber} complete`);
logger.info(`Successful: ${successful}, Failed: ${failed}`);
return suiteResult;
}
/**
* Benchmark a single version (both spawn and interactive)
*/
async benchmarkVersion(version, config) {
const claudePath = this.versionManager.getClaudePath(version);
const result = { version };
if (config.benchmark.runBoth) {
try {
const runs = [];
for (let i = 0; i < config.benchmark.runsPerVersion; i++) {
const time = await benchmarkVersion({
claudePath,
timeout: config.benchmark.timeout
});
runs.push(time);
}
const avgTime = Math.round(runs.reduce((a, b) => a + b, 0) / runs.length);
const minTime = Math.min(...runs);
const maxTime = Math.max(...runs);
const mean = runs.reduce((a, b) => a + b, 0) / runs.length;
const variance = runs.reduce((a, b) => a + Math.pow(b - mean, 2), 0) / runs.length;
const stdDev = Math.round(Math.sqrt(variance));
result.versionBenchmark = {
version,
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
runs,
avgTime,
minTime,
maxTime,
stdDev
};
} catch (error) {
logger.warn(`Version spawn benchmark failed for ${version}: ${error}`);
}
}
try {
const runs = [];
for (let i = 0; i < config.benchmark.runsPerVersion; i++) {
const run = await this.benchmarkInteractiveIsolated(
claudePath,
process.cwd(),
config.benchmark.timeout
);
runs.push(run);
}
const times = runs.map((r) => r.time);
const avgTime = Math.round(times.reduce((a, b) => a + b, 0) / times.length);
const minTime = Math.min(...times);
const maxTime = Math.max(...times);
const mean = times.reduce((a, b) => a + b, 0) / times.length;
const variance = times.reduce((a, b) => a + Math.pow(b - mean, 2), 0) / times.length;
const stdDev = Math.round(Math.sqrt(variance));
result.interactiveBenchmark = {
version,
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
runs,
avgTime,
minTime,
maxTime,
stdDev,
result: runs[0].result,
reason: runs[0].reason
};
} catch (error) {
const errorMsg = error instanceof Error ? error.message : String(error);
throw new Error(`Interactive benchmark failed: ${errorMsg}`);
}
return result;
}
};
}
});
// src/types/config.ts
var config_exports = {};
__export(config_exports, {
BenchmarkConfigSchema: () => BenchmarkConfigSchema,
DEFAULT_CONFIG: () => DEFAULT_CONFIG,
EXAMPLE_CONFIGS: () => EXAMPLE_CONFIGS
});
import { z } from "zod";
var BenchmarkConfigSchema, DEFAULT_CONFIG, EXAMPLE_CONFIGS;
var init_config = __esm({
"src/types/config.ts"() {
"use strict";
init_esm_shims();
BenchmarkConfigSchema = z.object({
/** Benchmark configuration */
benchmark: z.object({
/** Number of runs per version */
runsPerVersion: z.number().min(1).max(10).default(3),
/** Timeout for each benchmark run (ms) */
timeout: z.number().min(1e3).max(6e5).default(12e4),
/** Run both version and interactive benchmarks */
runBoth: z.boolean().default(true)
}).default({}),
/** Version filtering */
versions: z.object({
/** Minimum version to benchmark (inclusive) */
min: z.string().regex(/^\d+\.\d+\.\d+$/).optional(),
/** Maximum version to benchmark (inclusive) */
max: z.string().regex(/^\d+\.\d+\.\d+$/).optional(),
/** Maximum number of versions to benchmark */
limit: z.number().min(1).optional(),
/** Specific versions to include (overrides min/max) */
include: z.array(z.string()).optional(),
/** Specific versions to exclude */
exclude: z.array(z.string()).default([])
}).default({}),
/** Storage configuration */
storage: z.object({
/** Base directory for benchmark results */
baseDir: z.string().default("~/.cvm/benchmarks"),
/** Clean up old sessions after each run */
cleanupSessions: z.boolean().default(true),
/** Keep session files for error analysis */
keepErrorSessions: z.boolean().default(true)
}).default({}),
/** Reporting configuration */
reporting: z.object({
/** Generate HTML reports automatically */
autoGenerate: z.boolean().default(true),
/** Report output directory */
outputDir: z.string().default("./reports"),
/** Include error details in reports */
includeErrors: z.boolean().default(true)
}).default({})
});
DEFAULT_CONFIG = {
benchmark: {
runsPerVersion: 3,
timeout: 12e4,
runBoth: true
},
versions: {
exclude: []
},
storage: {
baseDir: "~/.cvm/benchmarks",
cleanupSessions: true,
keepErrorSessions: true
},
reporting: {
autoGenerate: true,
outputDir: "./reports",
includeErrors: true
}
};
EXAMPLE_CONFIGS = {
/** Test only latest 10 versions */
latest10: {
versions: {
limit: 10
}
},
/** Test only 2.x versions */
v2Only: {
versions: {
min: "2.0.0"
}
},
/** Test range */
range: {
versions: {
min: "1.0.24",
max: "2.0.50"
}
},
/** Quick test (1 run per version, limit 5) */
quick: {
benchmark: {
runsPerVersion: 1
},
versions: {
limit: 5
}
}
};
}
});
// src/index.ts
init_esm_shims();
init_benchmark_runner();
init_version_manager();
init_result_store();
init_version_spawn();
// src/benchmarks/interactive-pty.ts
init_esm_shims();
init_logger();
import * as pty from "node-pty";
async function benchmarkInteractive(options) {
const { claudePath, cwd, timeout = 3e4 } = options;
return new Promise((resolve) => {
const startTime = Date.now();
let output = "";
const signals = {
bracketedPaste: false,
focusEvents: false,
prompt: false
};
let readyDetected = false;
let errorDetected = false;
let sessionId = void 0;
let trustPromptHandled = false;
let ptyProcess;
try {
ptyProcess = pty.spawn(claudePath, [], {
name: "xterm-256color",
cols: 80,
rows: 30,
cwd,
env: process.env
});
} catch (error) {
resolve({
time: Date.now() - startTime,
result: "failed",
reason: `PTY spawn failed: ${error instanceof Error ? error.message : String(error)}`,
signals,
sessionId
});
return;
}
const timeoutId = setTimeout(() => {
ptyProcess.kill();
resolve({
time: Date.now() - startTime,
result: "timeout",
reason: `Benchmark timed out after ${timeout}ms`,
signals,
sessionId
});
}, timeout);
ptyProcess.onData((data) => {
output += data;
if (!trustPromptHandled && output.includes("Do you trust the files")) {
trustPromptHandled = true;
logger.debug("Trust prompt detected, auto-accepting...");
setTimeout(() => ptyProcess.write("\r"), 100);
}
if (!sessionId) {
const sessionMatch = output.match(/"session_id":"([a-f0-9-]+)"/);
if (sessionMatch) {
sessionId = sessionMatch[1];
}
}
if ((output.includes("needs update") || output.includes("newer version") || output.includes("requires") || output.includes("minimum version")) && !errorDetected) {
errorDetected = true;
const cleanOutput = output.replace(/\x1b\[[0-9;]*[a-zA-Z]/g, "");
const lines = cleanOutput.split("\n");
const errorStartIdx = lines.findIndex(
(l) => l.includes("needs update") || l.includes("newer version") || l.includes("requires") || l.includes("minimum version")
);
const errorLines = lines.slice(Math.max(0, errorStartIdx - 1), errorStartIdx + 6);
const fullErrorMessage = errorLines.join("\n").trim();
const versionMatch = cleanOutput.match(/(\d+\.\d+\.\d+)\s+or higher/i) || cleanOutput.match(/version\s+\((\d+\.\d+\.\d+)/i) || cleanOutput.match(/requires\s+(\d+\.\d+\.\d+)/i) || cleanOutput.match(/minimum\s+version[:\s]+(\d+\.\d+\.\d+)/i) || cleanOutput.match(/v?(\d+\.\d+\.\d+)\+/);
const minVersion = versionMatch ? versionMatch[1] : null;
const EXPECTED_MIN_VERSION = "1.0.24";
if (minVersion && minVersion !== EXPECTED_MIN_VERSION) {
logger.warn(`Minimum version changed: expected ${EXPECTED_MIN_VERSION}, found ${minVersion}`);
}
clearTimeout(timeoutId);
ptyProcess.kill();
resolve({
time: Date.now() - startTime,
result: "error_detected",
reason: "version_requirement_not_met",
minVersionRequired: minVersion || EXPECTED_MIN_VERSION,
errorMessage: fullErrorMessage,
rawOutput: cleanOutput.substring(0, 2e3),
sessionId
});
return;
}
if (data.includes("\x1B[?2004h") && !signals.bracketedPaste) {
signals.bracketedPaste = true;
}
if (data.includes("\x1B[?1004h") && !signals.focusEvents) {
signals.focusEvents = true;
}
const stripped = data.replace(/\x1b\[[0-9;]*[a-zA-Z]/g, "");
if (/>\s/.test(stripped) && !signals.prompt) {
signals.prompt = true;
}
if (signals.bracketedPaste && signals.focusEvents && signals.prompt && !readyDetected) {
readyDetected = true;
setTimeout(() => {
clearTimeout(timeoutId);
ptyProcess.kill();
resolve({
time: Date.now() - startTime,
result: "ready",
reason: "all terminal signals received and process stable",
signals: { ...signals },
sessionId
});
}, 500);
}
});
ptyProcess.onExit(({ exitCode }) => {
if (errorDetected || readyDetected) return;
clearTimeout(timeoutId);
const elapsed = Date.now() - startTime;
if (signals.prompt) {
resolve({
time: elapsed,
result: "ui_then_exit",
reason: "showed prompt but immediately exited",
signals: { ...signals },
exitCode,
sessionId
});
return;
}
resolve({
time: elapsed,
result: "exited_early",
reason: "process exited before showing prompt",
signals: { ...signals },
exitCode,
sessionId
});
});
});
}
// src/index.ts
init_version_filter();
init_cleanup();
init_logger();
init_progress();
init_config();
// src/types/benchmark.ts
init_esm_shims();
import { z as z2 } from "zod";
var BenchmarkResultState = z2.enum([
"ready",
// Version started successfully and is interactive
"error_detected",
// Version shows error message (< 1.0.24)
"ui_then_exit",
// Shows UI but immediately exits
"exited_early",
// Exited before showing prompt
"timeout",
// Benchmark timed out
"failed"
// Benchmark failed with error
]);
var TerminalSignalsSchema = z2.object({
bracketedPaste: z2.boolean(),
focusEvents: z2.boolean(),
prompt: z2.boolean()
});
var BenchmarkRunResultSchema = z2.object({
/** Time taken for this run (ms) */
time: z2.number(),
/** Result state */
result: BenchmarkResultState,
/** Human-readable reason */
reason: z2.string(),
/** Terminal signals (PTY benchmark only) */
signals: TerminalSignalsSchema.optional(),
/** Exit code (if exited) */
exitCode: z2.number().optional(),
/** Session ID (if created) */
sessionId: z2.string().optional(),
/** Minimum version required (if error_detected) */
minVersionRequired: z2.string().optional(),
/** Error message (if error_detected) */
errorMessage: z2.string().optional(),
/** Raw output for debugging */
rawOutput: z2.string().optional()
});
var VersionBenchmarkResultSchema = z2.object({
version: z2.string(),
timestamp: z2.string(),
runs: z2.array(z2.number()),
avgTime: z2.number(),
minTime: z2.number(),
maxTime: z2.number(),
stdDev: z2.number()
});
var InteractiveBenchmarkResultSchema = z2.object({
version: z2.string(),
timestamp: z2.string(),
runs: z2.array(BenchmarkRunResultSchema),
avgTime: z2.number(),
minTime: z2.number(),
maxTime: z2.number(),
stdDev: z2.number(),
result: BenchmarkResultState,
reason: z2.string()
});
var CombinedBenchmarkResultSchema = z2.object({
version: z2.string(),
versionBenchmark: VersionBenchmarkResultSchema.optional(),
interactiveBenchmark: InteractiveBenchmarkResultSchema.optional(),
error: z2.string().optional()
});
var BenchmarkSuiteResultSchema = z2.object({
runNumber: z2.number(),
timestamp: z2.string(),
config: z2.any(),
// BenchmarkConfig
results: z2.array(CombinedBenchmarkResultSchema),
errors: z2.array(z2.object({
version: z2.string(),
error: z2.string()
})),
metadata: z2.object({
totalVersions: z2.number(),
successfulVersions: z2.number(),
failedVersions: z2.number(),
duration: z2.number()
})
});
// src/index.ts
var plugin = {
metadata: {
name: "benchmark",
version: "2.0.0",
description: "Benchmark and analyze Claude Code performance",
author: "clawdcc"
},
commands: [
{
name: "benchmark",
description: "Run benchmarks on Claude Code versions",
handler: async (_args) => {
const { BenchmarkRunner: BenchmarkRunner2 } = await Promise.resolve().then(() => (init_benchmark_runner(), benchmark_runner_exports));
const { DEFAULT_CONFIG: DEFAULT_CONFIG2 } = await Promise.resolve().then(() => (init_config(), config_exports));
const { logger: logger2 } = await Promise.resolve().then(() => (init_logger(), logger_exports));
try {
const runner = new BenchmarkRunner2();
const result = await runner.runSuite(DEFAULT_CONFIG2);
logger2.success("Benchmark complete!");
logger2.info(`Results: ~/.cvm/benchmarks/run-${result.runNumber}/`);
return result;
} catch (error) {
logger2.error("Benchmark failed:", error);
throw error;
}
}
}
],
hooks: {
afterInstall: async (version) => {
const { logger: logger2 } = await Promise.resolve().then(() => (init_logger(), logger_exports));
logger2.info(`Version ${version} installed. Run: cvm benchmark`);
}
}
};
export {
BenchmarkConfigSchema,
BenchmarkResultState,
BenchmarkRunResultSchema,
BenchmarkRunner,
BenchmarkSuiteResultSchema,
CombinedBenchmarkResultSchema,
DEFAULT_CONFIG,
EXAMPLE_CONFIGS,
InteractiveBenchmarkResultSchema,
ProgressTracker,
ResultStore,
TerminalSignalsSchema,
VersionBenchmarkResultSchema,
VersionManager,
benchmarkInteractive,
benchmarkVersion,
cleanupSessions,
compareVersions,
countSessions,
describeVersionFilter,
filterVersions,
formatBytes,
formatDuration,
getSessionSize,
logger,
plugin,
sortVersions
};
//# sourceMappingURL=index.js.map