cmte
Version:
Design by Committee™ except it's just you and LLMs
173 lines (144 loc) • 6.55 kB
JavaScript
import { spawn } from 'child_process';
import path from 'path';
import { fileURLToPath } from 'url';
import dotenv from 'dotenv';
import { expect } from 'chai';
// Load .env file to check for API key and set concurrency if needed
dotenv.config();
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const ROOT_DIR = path.resolve(__dirname, '..');
const MAX_PARALLEL_REQUESTS = parseInt(process.env.MAX_PARALLEL_REQUESTS || '10', 10);
// Function to run the CLI and capture output
async function runCLIWithDebug(args) {
return new Promise((resolve, reject) => {
let stdoutData = '';
let stderrData = '';
let stdoutEnded = false;
let stderrEnded = false;
let processClosed = false;
const commandArgs = [
path.join(ROOT_DIR, 'src/bin/cmte.js'),
'--debug', // Ensure debug logging is enabled
'--prompts', // Optional: save prompts if helpful
...args
];
console.log(`[Test] Running command: node ${commandArgs.join(' ')}`);
const committee = spawn('node', commandArgs, {
stdio: ['inherit', 'pipe', 'pipe'],
env: { ...process.env } // Pass environment variables
});
// Function to check if everything is finished
const checkDone = (exitCode) => {
if (stdoutEnded && stderrEnded && processClosed) {
console.log(`[Test] All streams ended and process closed with code ${exitCode}. Resolving.`);
if (exitCode !== 0) {
console.error('[Test] CLI Error Output:\n', stderrData);
}
resolve({ stdout: stdoutData, stderr: stderrData, code: exitCode });
}
};
committee.stdout.on('data', (data) => {
const str = data.toString();
stdoutData += str;
//process.stdout.write(str);
});
committee.stdout.on('end', () => {
console.log('[Test] stdout stream ended.');
stdoutEnded = true;
checkDone(committee.exitCode);
});
committee.stdout.on('error', (err) => console.error('[Test] stdout error:', err)); // Log errors
committee.stderr.on('data', (data) => {
const str = data.toString();
stderrData += str;
//process.stderr.write(str);
});
committee.stderr.on('end', () => {
console.log('[Test] stderr stream ended.');
stderrEnded = true;
checkDone(committee.exitCode);
});
committee.stderr.on('error', (err) => console.error('[Test] stderr error:', err)); // Log errors
committee.on('close', (code) => {
console.log(`[Test] CLI process exited (closed) with code ${code}`);
processClosed = true;
checkDone(code);
});
committee.on('error', (err) => {
console.error('[Test] Failed to start or communicate with CLI process:', err);
reject(err); // Reject on spawn error
});
});
}
// Function to parse logs and check parallelism
function analyzeParallelism(logOutput, maxConcurrent) {
const startRegex = /\[Parallelism\] Starting request (\S+)\. Active: (\d+)/g;
const finishRegex = /\[Parallelism\] Finished request (\S+)\. Duration: (\d+)ms\. Active: (\d+)\. Queue size: (\d+)/g;
let maxObservedActive = 0;
let concurrentStarts = 0;
let matches;
console.log(`[Analysis] Analyzing logs for max concurrency of ${maxConcurrent}...`);
// Find the highest reported active count
while ((matches = startRegex.exec(logOutput)) !== null) {
const activeCount = parseInt(matches[2], 10);
maxObservedActive = Math.max(maxObservedActive, activeCount);
if (activeCount > 1) {
concurrentStarts++; // Count instances where a request starts while others are active
}
}
console.log(`[Analysis] Max observed active requests reported in logs: ${maxObservedActive}`);
console.log(`[Analysis] Number of requests started while others were active: ${concurrentStarts}`);
// Basic checks
expect(maxObservedActive, 'Max observed active requests should be greater than 1 for parallelism').to.be.greaterThan(1);
expect(maxObservedActive, 'Max observed active requests should not exceed configured limit').to.be.at.most(maxConcurrent);
expect(concurrentStarts, 'Should have observed requests starting while others were active').to.be.greaterThan(0);
// More detailed analysis could be added here (e.g., tracking specific request IDs and their overlap)
console.log(`[Analysis] Parallelism checks passed (based on log analysis).`);
}
// Main test execution
async function runParallelTest() {
// Skip if Anthropic key is not set
if (!process.env.ANTHROPIC_API_KEY) {
console.warn('[SKIP] ANTHROPIC_API_KEY not set. Skipping parallel E2E test against real API.');
return;
}
console.log('[Test] Starting parallel load test using Claude API...');
// Point to the new parallel load test workflow
const workflowPath = path.resolve(__dirname, 'workflows/parallel-load-test/workflow.yaml');
// Set a generous timeout for the entire test
const TEST_TIMEOUT_MS = 15 * 60 * 1000; // 15 minutes
let timeoutHandle = null; // Handle for the timeout
const timeoutPromise = new Promise((_, reject) =>
timeoutHandle = setTimeout(() => reject(new Error(`Test timed out after ${TEST_TIMEOUT_MS / 1000 / 60} minutes`)), TEST_TIMEOUT_MS)
);
try {
await Promise.race([
(async () => {
// Run the workflow - DO NOT use --local, use default Claude
console.log('[Test] Running workflow via CLI... This may take several minutes.');
const { stdout, stderr, code } = await runCLIWithDebug([workflowPath]);
if (code !== 0) {
console.error(`[Test] Workflow execution failed with code ${code}. Cannot reliably analyze parallelism.`);
console.error('Stderr:', stderr);
throw new Error(`Workflow failed with code ${code}`);
}
console.log('[Test] Workflow finished. Analyzing logs...');
// Analyze the stdout logs for parallelism markers
analyzeParallelism(stdout, MAX_PARALLEL_REQUESTS);
console.log('[Test] Parallel E2E test completed successfully.');
})(),
timeoutPromise
]);
} catch (error) {
console.error('[Test] Parallel E2E test failed:', error);
process.exit(1); // Exit with error code
} finally {
// Ensure the timeout is cleared regardless of outcome
if (timeoutHandle) {
clearTimeout(timeoutHandle);
console.log('[Test] Timeout cleared.');
}
}
}
runParallelTest();