@mettamatt/code-reasoning
Version:
Enhanced MCP server for code reasoning using sequential thinking methodology, optimized for programming tasks
584 lines (583 loc) • 23.7 kB
JavaScript
/**
* Integrated Test Runner for Code-Reasoning
*
* A streamlined testing solution that manages server and client processes
* while ensuring proper JSON-RPC protocol implementation. Key features:
*
* 1. Properly handles JSON-RPC notifications (no waiting for responses)
* 2. Captures all communication in dedicated log files
* 3. Provides clear test result summaries
* 4. Works reliably without requiring separate terminals
*
* This approach solves the test visibility issues caused by StdioClientTransport
* capturing stdout for JSON-RPC communication.
*/
import * as fs from 'fs';
import * as path from 'path';
import { fileURLToPath } from 'url';
import { spawn } from 'child_process';
// Find project root by looking for package.json
function findProjectRoot(startDir) {
let currentDir = startDir;
// Walk up the directory tree until we find package.json
while (currentDir !== path.parse(currentDir).root) {
if (fs.existsSync(path.join(currentDir, 'package.json'))) {
return currentDir;
}
currentDir = path.dirname(currentDir);
}
// If we reach the filesystem root without finding package.json,
// fall back to the original calculation
return path.join(startDir, '..');
}
// Get current file directory and find project root
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const projectRoot = findProjectRoot(__dirname);
// Log the detected project root for debugging
console.error(`Detected project root: ${projectRoot}`);
// Create log directories
const logsDir = path.join(projectRoot, 'logs');
const testResultsDir = path.join(projectRoot, 'test-results');
// Ensure directories exist
for (const dir of [logsDir, testResultsDir]) {
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true });
}
}
// Generate timestamp
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
// Create log files
const logFile = path.join(logsDir, `custom-test-${timestamp}.log`);
const resultFile = path.join(testResultsDir, `custom-result-${timestamp}.json`);
// Create writable stream
const logStream = fs.createWriteStream(logFile, { flags: 'a' });
// Logger function with stream writable check
function log(message) {
const timestamp = new Date().toISOString();
const logMessage = `${timestamp} ${message}\n`;
// Only write to logStream if it is still writable
if (logStream.writable) {
logStream.write(logMessage);
}
console.error(message);
}
// Parse command line arguments
const args = process.argv.slice(2);
const options = {
scenario: args.find(arg => !arg.startsWith('-')) || 'basic',
verbose: args.includes('--verbose') || args.includes('-v'),
};
// Log startup information
log(`Code-Reasoning E2E Test Runner starting`);
log(`Log file: ${logFile}`);
log(`Result file: ${resultFile}`);
log(`Running scenario: ${options.scenario}`);
log(`Verbose mode: ${options.verbose}`);
// Define the test scenarios
const testScenarios = {
basic: {
name: 'Basic thought flow',
description: 'Tests a linear sequence of thoughts without branches or revisions',
thoughts: [
{
thought: "First step: Define the problem we're trying to solve.",
thought_number: 1,
total_thoughts: 4,
next_thought_needed: true,
},
{
thought: 'Second step: Break down the problem into smaller parts.',
thought_number: 2,
total_thoughts: 4,
next_thought_needed: true,
},
{
thought: 'Third step: Analyze each part and identify solutions.',
thought_number: 3,
total_thoughts: 4,
next_thought_needed: true,
},
{
thought: 'Fourth step: Combine the solutions into a comprehensive approach.',
thought_number: 4,
total_thoughts: 4,
next_thought_needed: false,
},
],
expectedSuccessCount: 4,
expectedErrorCount: 0,
},
branch: {
name: 'Thought flow with branching',
description: 'Tests the ability to branch into alternative thought paths',
thoughts: [
{
thought: 'Main approach: Define the problem scope.',
thought_number: 1,
total_thoughts: 5,
next_thought_needed: true,
},
{
thought: 'Identify potential solution methodologies.',
thought_number: 2,
total_thoughts: 5,
next_thought_needed: true,
},
{
thought: 'Alternative approach: Consider the problem from first principles.',
thought_number: 3,
total_thoughts: 5,
branch_from_thought: 1,
branch_id: 'B1',
next_thought_needed: true,
},
{
thought: 'Continue main approach: Evaluate methodologies against requirements.',
thought_number: 3,
total_thoughts: 5,
next_thought_needed: true,
},
{
thought: 'B1: Break down the problem to its fundamental components.',
thought_number: 4,
total_thoughts: 5,
branch_from_thought: 3,
branch_id: 'B1',
next_thought_needed: true,
},
{
thought: 'Final main approach: Select the most appropriate methodology.',
thought_number: 4,
total_thoughts: 5,
next_thought_needed: false,
},
{
thought: 'B1: Construct solution directly from fundamental components.',
thought_number: 5,
total_thoughts: 5,
branch_from_thought: 4,
branch_id: 'B1',
next_thought_needed: false,
},
],
expectedSuccessCount: 7,
expectedErrorCount: 0,
},
revision: {
name: 'Thought flow with revision',
description: 'Tests the ability to revise previous thoughts',
thoughts: [
{
thought: 'Initial analysis of the problem domain.',
thought_number: 1,
total_thoughts: 4,
next_thought_needed: true,
},
{
thought: 'Proposed solution based on initial understanding.',
thought_number: 2,
total_thoughts: 4,
next_thought_needed: true,
},
{
thought: 'Wait, I made an error in my initial analysis. The problem actually involves asynchronous operations.',
thought_number: 3,
total_thoughts: 5,
is_revision: true,
revises_thought: 1,
next_thought_needed: true,
},
{
thought: 'Given the asynchronous nature, we need to revise our solution approach.',
thought_number: 4,
total_thoughts: 5,
is_revision: true,
revises_thought: 2,
next_thought_needed: true,
},
{
thought: 'Final solution incorporating asynchronous handling and error management.',
thought_number: 5,
total_thoughts: 5,
next_thought_needed: false,
},
],
expectedSuccessCount: 5,
expectedErrorCount: 0,
},
error: {
name: 'Error handling tests',
description: 'Tests various error conditions and edge cases',
thoughts: [
// Missing required field (thought)
{
thought_number: 1,
total_thoughts: 3,
next_thought_needed: true,
},
// Missing required field (next_thought_needed)
{
thought: 'This is missing next_thought_needed field',
thought_number: 1,
total_thoughts: 3,
},
// Invalid type (thought_number as string)
{
thought: 'Invalid thought_number type',
thought_number: '1',
total_thoughts: 3,
next_thought_needed: true,
},
// Exceeding max thought number (>20)
{
thought: 'This exceeds the maximum thought number',
thought_number: 21,
total_thoughts: 25,
next_thought_needed: true,
},
// Invalid branch reference (non-existent thought)
{
thought: 'This references a non-existent branch point',
thought_number: 1,
total_thoughts: 3,
branch_from_thought: 100,
branch_id: 'INVALID',
next_thought_needed: true,
},
// Valid thought (to ensure the server is still functional)
{
thought: 'This is a valid thought after error tests',
thought_number: 1,
total_thoughts: 1,
next_thought_needed: false,
},
],
expectedSuccessCount: 1,
expectedErrorCount: 5,
},
perf: {
name: 'Performance testing',
description: 'Tests performance with a long sequence of thoughts',
thoughts: Array.from({ length: 20 }, (_, i) => ({
thought: `Performance test thought ${i + 1}. This is a longer thought to simulate more realistic content that the server would process in a real-world scenario. Including more text increases the processing load slightly to better measure performance characteristics.`,
thought_number: i + 1,
total_thoughts: 20,
next_thought_needed: i < 19, // Only the last one has next_thought_needed: false
})),
expectedSuccessCount: 20,
expectedErrorCount: 0,
},
};
// Create a custom socket for communication
async function createSocketConnection() {
return new Promise((resolve, reject) => {
// Start the server
log('Starting server process...');
// Check if the index.js file exists
const indexJsPath = path.join(projectRoot, 'dist/index.js');
if (!fs.existsSync(indexJsPath)) {
throw new Error(`Server entry point not found at ${indexJsPath}`);
}
log(`Using server entry point: ${indexJsPath}`);
const serverProcess = spawn('node', [indexJsPath, '--debug'], {
stdio: 'pipe',
shell: false,
});
// Handle server stderr for logging
serverProcess.stderr.on('data', (data) => {
const message = data.toString();
log(`[SERVER] ${message.trim()}`);
});
// Wait for server to initialize
setTimeout(() => {
log('Server initialization period complete');
resolve(serverProcess);
}, 5000);
// Handle server exit
serverProcess.on('exit', code => {
log(`Server process exited with code ${code}`);
reject(new Error(`Server exited unexpectedly with code ${code}`));
});
});
}
// Run a test scenario
async function runTestScenario(serverProcess, scenario) {
log(`Running test scenario: ${scenario.name}`);
log(scenario.description);
// Results container
const results = {
name: scenario.name,
description: scenario.description,
thoughts: [],
successful: true,
totalThoughts: scenario.thoughts.length,
successfulThoughts: 0,
};
// Initialize server with JSON-RPC
log('Sending initialize request...');
const initResponse = await sendJsonRpcMessage(serverProcess, {
jsonrpc: '2.0',
id: 0,
method: 'initialize',
params: {
protocolVersion: '2024-11-05',
capabilities: {},
clientInfo: {
name: 'custom-test-client',
version: '1.0.0',
},
},
});
log(`Initialize response: ${JSON.stringify(initResponse)}`);
// Send initialize notification - notifications don't need a response
log('Sending initialized notification...');
await sendJsonRpcNotification(serverProcess, {
jsonrpc: '2.0',
method: 'notifications/initialized',
});
// Request tools list
log('Requesting tools list...');
const toolsResponse = await sendJsonRpcMessage(serverProcess, {
jsonrpc: '2.0',
id: 1,
method: 'tools/list',
params: {},
});
log(`Tools response: ${JSON.stringify(toolsResponse)}`);
// Check for code-reasoning tool
const tools = Array.isArray(toolsResponse?.result?.tools) ? toolsResponse.result.tools : [];
const codeReasoningTool = tools.find((tool) => tool.name === 'code-reasoning');
if (!codeReasoningTool) {
log('Error: code-reasoning tool not found!');
results.successful = false;
return results;
}
log('Found code-reasoning tool, running thoughts...');
// Run each thought
for (let index = 0; index < scenario.thoughts.length; index++) {
const thought = scenario.thoughts[index];
log(`Sending thought #${index + 1}/${scenario.thoughts.length}: ${thought.thought?.substring?.(0, 50) || 'undefined'}...`);
try {
const response = await sendJsonRpcMessage(serverProcess, {
jsonrpc: '2.0',
id: index + 2, // Start from id 2 (after initialize and tools/list)
method: 'tools/call',
params: {
name: 'code-reasoning',
arguments: thought,
},
});
const success = !response.error;
// Log the result
if (success) {
log(`✓ Thought #${index + 1} succeeded`);
results.successfulThoughts++;
}
else {
log(`✗ Thought #${index + 1} failed: ${response.error?.message || 'Unknown error'}`);
results.successful = false;
}
// Store the result
results.thoughts.push({
thought,
response,
success,
});
if (options.verbose) {
log(`Response: ${JSON.stringify(response)}`);
}
}
catch (error) {
log(`Error sending thought #${index + 1}: ${error instanceof Error ? error.message : String(error)}`);
results.successful = false;
results.thoughts.push({
thought,
error: error instanceof Error ? error.message : String(error),
success: false,
});
}
}
return results;
}
// Send a JSON-RPC notification (doesn't expect a response)
async function sendJsonRpcNotification(serverProcess, message) {
return new Promise(resolve => {
// Send the message
const messageStr = JSON.stringify(message) + '\n';
if (serverProcess.stdin) {
serverProcess.stdin.write(messageStr);
}
else {
throw new Error('Server process stdin is not available');
}
if (options.verbose) {
log(`Sent notification: ${message.method}`);
}
// Resolve immediately - notifications don't expect responses
resolve();
});
}
// Send a JSON-RPC message to the server and get the response
async function sendJsonRpcMessage(serverProcess, message) {
return new Promise((resolve, reject) => {
// Handle response timeout
const timeout = setTimeout(() => {
reject(new Error('Timeout waiting for server response'));
}, 10000);
// Create one-time handler for response
const responseHandler = (data) => {
try {
const responseStr = data.toString();
const lines = responseStr.split('\n');
for (const line of lines) {
if (!line.trim())
continue;
try {
const response = JSON.parse(line);
// Check if this is a response to our message
if (message.id !== undefined && response.id === message.id) {
clearTimeout(timeout);
resolve(response);
return true; // Signal that we handled this message
}
}
catch {
// Not JSON or not a response to our message
}
}
return false; // Signal that we didn't handle this message
}
catch (error) {
reject(error);
return false;
}
};
if (!serverProcess.stdout || !serverProcess.stdin) {
reject(new Error('Server process stdout or stdin is not available'));
return;
}
// Set up one-time listener for this message
const onData = (data) => {
const handled = responseHandler(data);
if (handled) {
serverProcess.stdout?.removeListener('data', onData);
}
};
serverProcess.stdout.on('data', onData);
// Send the message
const messageStr = JSON.stringify(message) + '\n';
serverProcess.stdin.write(messageStr);
});
}
// Main function
async function main() {
try {
// Start server
const serverProcess = await createSocketConnection();
// Determine which scenarios to run
const scenariosToRun = options.scenario === 'all' ? Object.keys(testScenarios) : [options.scenario];
// Validate scenarios
if (!scenariosToRun.every(scenario => testScenarios[scenario])) {
log(`Error: Unknown scenario '${options.scenario}'`);
log(`Available scenarios: ${Object.keys(testScenarios).join(', ')}`);
process.exit(1);
}
// Results storage for multiple scenarios
const allResults = [];
let totalSuccessfulThoughts = 0;
let totalThoughts = 0;
// Run each selected scenario
for (const scenarioKey of scenariosToRun) {
const scenario = testScenarios[scenarioKey];
log(`\n=== Running test scenario: ${scenario.name} ===`);
log(scenario.description);
const results = await runTestScenario(serverProcess, scenario);
allResults.push(results);
totalSuccessfulThoughts += results.successfulThoughts;
totalThoughts += results.totalThoughts;
// Display individual scenario summary
log(`\n--- ${scenario.name} Results ---`);
log(`Successful thoughts: ${results.successfulThoughts}/${results.totalThoughts}`);
log(`Status: ${results.successful ? 'SUCCESS' : 'FAILURE'}`);
// Check expected results if defined
if (scenario.expectedSuccessCount !== undefined) {
// For error handling tests specifically, check if responses have the expected isError flags
if (scenario.name === 'Error handling tests') {
// Count responses with isError: true and isError: false
const errorResponses = results.thoughts.filter(t => t.response?.result?.isError === true).length;
const successResponses = results.thoughts.filter(t => t.response?.result?.isError === false).length;
// The last thought should be the only one without an error
if (successResponses === scenario.expectedSuccessCount) {
log(`✓ Success count matches expected (${scenario.expectedSuccessCount})`);
}
else {
log(`✗ Success count ${successResponses} doesn't match expected ${scenario.expectedSuccessCount}`);
}
if (errorResponses === scenario.expectedErrorCount) {
log(`✓ Error count matches expected (${scenario.expectedErrorCount})`);
}
else {
log(`✗ Error count ${errorResponses} doesn't match expected ${scenario.expectedErrorCount}`);
}
}
else {
// For other tests, use the normal success/failure count
if (results.successfulThoughts === scenario.expectedSuccessCount) {
log(`✓ Success count matches expected (${scenario.expectedSuccessCount})`);
}
else {
log(`✗ Success count ${results.successfulThoughts} doesn't match expected ${scenario.expectedSuccessCount}`);
}
if (scenario.expectedErrorCount !== undefined) {
const errorCount = results.totalThoughts - results.successfulThoughts;
if (errorCount === scenario.expectedErrorCount) {
log(`✓ Error count matches expected (${scenario.expectedErrorCount})`);
}
else {
log(`✗ Error count ${errorCount} doesn't match expected ${scenario.expectedErrorCount}`);
}
}
}
}
}
// Save all results to file
const combinedResults = {
timestamp: new Date().toISOString(),
scenarios: allResults,
totalScenarios: scenariosToRun.length,
totalThoughts: totalThoughts,
totalSuccessfulThoughts: totalSuccessfulThoughts,
overallSuccess: totalSuccessfulThoughts === totalThoughts,
};
fs.writeFileSync(resultFile, JSON.stringify(combinedResults, null, 2));
log(`Results saved to ${resultFile}`);
// Display overall summary
log('\n=== Overall Test Results Summary ===');
log(`Scenarios run: ${scenariosToRun.length}`);
log(`Total thoughts: ${totalThoughts}`);
log(`Successful thoughts: ${totalSuccessfulThoughts}/${totalThoughts}`);
log(`Overall status: ${totalSuccessfulThoughts === totalThoughts ? 'SUCCESS' : 'FAILURE'}`);
// Make sure to remove all data listeners to prevent callbacks after stream is closed
serverProcess.stdout?.removeAllListeners('data');
serverProcess.stderr?.removeAllListeners('data');
// Kill server
serverProcess.kill();
// Give a small delay to ensure no more events are processed
await new Promise(resolve => setTimeout(resolve, 100));
// Close log stream
logStream.end();
}
catch (error) {
log(`Error: ${error instanceof Error ? error.message : String(error)}`);
logStream.end();
process.exit(1);
}
}
// Run the main function
main().catch(error => {
log(`Fatal error: ${error instanceof Error ? error.message : String(error)}`);
logStream.end();
process.exit(1);
});