UNPKG

cmte

Version:

Design by Committee™ except it's just you and LLMs

173 lines (144 loc) 6.55 kB
import { spawn } from 'child_process'; import path from 'path'; import { fileURLToPath } from 'url'; import dotenv from 'dotenv'; import { expect } from 'chai'; // Load .env file to check for API key and set concurrency if needed dotenv.config(); const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); const ROOT_DIR = path.resolve(__dirname, '..'); const MAX_PARALLEL_REQUESTS = parseInt(process.env.MAX_PARALLEL_REQUESTS || '10', 10); // Function to run the CLI and capture output async function runCLIWithDebug(args) { return new Promise((resolve, reject) => { let stdoutData = ''; let stderrData = ''; let stdoutEnded = false; let stderrEnded = false; let processClosed = false; const commandArgs = [ path.join(ROOT_DIR, 'src/bin/cmte.js'), '--debug', // Ensure debug logging is enabled '--prompts', // Optional: save prompts if helpful ...args ]; console.log(`[Test] Running command: node ${commandArgs.join(' ')}`); const committee = spawn('node', commandArgs, { stdio: ['inherit', 'pipe', 'pipe'], env: { ...process.env } // Pass environment variables }); // Function to check if everything is finished const checkDone = (exitCode) => { if (stdoutEnded && stderrEnded && processClosed) { console.log(`[Test] All streams ended and process closed with code ${exitCode}. Resolving.`); if (exitCode !== 0) { console.error('[Test] CLI Error Output:\n', stderrData); } resolve({ stdout: stdoutData, stderr: stderrData, code: exitCode }); } }; committee.stdout.on('data', (data) => { const str = data.toString(); stdoutData += str; //process.stdout.write(str); }); committee.stdout.on('end', () => { console.log('[Test] stdout stream ended.'); stdoutEnded = true; checkDone(committee.exitCode); }); committee.stdout.on('error', (err) => console.error('[Test] stdout error:', err)); // Log errors committee.stderr.on('data', (data) => { const str = data.toString(); stderrData += str; //process.stderr.write(str); }); committee.stderr.on('end', () => { console.log('[Test] stderr stream ended.'); stderrEnded = true; checkDone(committee.exitCode); }); committee.stderr.on('error', (err) => console.error('[Test] stderr error:', err)); // Log errors committee.on('close', (code) => { console.log(`[Test] CLI process exited (closed) with code ${code}`); processClosed = true; checkDone(code); }); committee.on('error', (err) => { console.error('[Test] Failed to start or communicate with CLI process:', err); reject(err); // Reject on spawn error }); }); } // Function to parse logs and check parallelism function analyzeParallelism(logOutput, maxConcurrent) { const startRegex = /\[Parallelism\] Starting request (\S+)\. Active: (\d+)/g; const finishRegex = /\[Parallelism\] Finished request (\S+)\. Duration: (\d+)ms\. Active: (\d+)\. Queue size: (\d+)/g; let maxObservedActive = 0; let concurrentStarts = 0; let matches; console.log(`[Analysis] Analyzing logs for max concurrency of ${maxConcurrent}...`); // Find the highest reported active count while ((matches = startRegex.exec(logOutput)) !== null) { const activeCount = parseInt(matches[2], 10); maxObservedActive = Math.max(maxObservedActive, activeCount); if (activeCount > 1) { concurrentStarts++; // Count instances where a request starts while others are active } } console.log(`[Analysis] Max observed active requests reported in logs: ${maxObservedActive}`); console.log(`[Analysis] Number of requests started while others were active: ${concurrentStarts}`); // Basic checks expect(maxObservedActive, 'Max observed active requests should be greater than 1 for parallelism').to.be.greaterThan(1); expect(maxObservedActive, 'Max observed active requests should not exceed configured limit').to.be.at.most(maxConcurrent); expect(concurrentStarts, 'Should have observed requests starting while others were active').to.be.greaterThan(0); // More detailed analysis could be added here (e.g., tracking specific request IDs and their overlap) console.log(`[Analysis] Parallelism checks passed (based on log analysis).`); } // Main test execution async function runParallelTest() { // Skip if Anthropic key is not set if (!process.env.ANTHROPIC_API_KEY) { console.warn('[SKIP] ANTHROPIC_API_KEY not set. Skipping parallel E2E test against real API.'); return; } console.log('[Test] Starting parallel load test using Claude API...'); // Point to the new parallel load test workflow const workflowPath = path.resolve(__dirname, 'workflows/parallel-load-test/workflow.yaml'); // Set a generous timeout for the entire test const TEST_TIMEOUT_MS = 15 * 60 * 1000; // 15 minutes let timeoutHandle = null; // Handle for the timeout const timeoutPromise = new Promise((_, reject) => timeoutHandle = setTimeout(() => reject(new Error(`Test timed out after ${TEST_TIMEOUT_MS / 1000 / 60} minutes`)), TEST_TIMEOUT_MS) ); try { await Promise.race([ (async () => { // Run the workflow - DO NOT use --local, use default Claude console.log('[Test] Running workflow via CLI... This may take several minutes.'); const { stdout, stderr, code } = await runCLIWithDebug([workflowPath]); if (code !== 0) { console.error(`[Test] Workflow execution failed with code ${code}. Cannot reliably analyze parallelism.`); console.error('Stderr:', stderr); throw new Error(`Workflow failed with code ${code}`); } console.log('[Test] Workflow finished. Analyzing logs...'); // Analyze the stdout logs for parallelism markers analyzeParallelism(stdout, MAX_PARALLEL_REQUESTS); console.log('[Test] Parallel E2E test completed successfully.'); })(), timeoutPromise ]); } catch (error) { console.error('[Test] Parallel E2E test failed:', error); process.exit(1); // Exit with error code } finally { // Ensure the timeout is cleared regardless of outcome if (timeoutHandle) { clearTimeout(timeoutHandle); console.log('[Test] Timeout cleared.'); } } } runParallelTest();