UNPKG

@cyqlelabs/mcp-dual-cycle-reasoner

Version:

MCP server implementing dual-cycle metacognitive reasoning framework for autonomous agents

388 lines (338 loc) 12.9 kB
import { describe, it, expect, beforeAll, afterAll, beforeEach } from '@jest/globals'; import { ChildProcess } from 'child_process'; import { readFileSync } from 'fs'; import { join } from 'path'; import { MCPClient } from 'mcp-client'; // Load test fixtures const loadFixture = (filename: string) => { const fixturePath = join(__dirname, 'fixtures', filename); return JSON.parse(readFileSync(fixturePath, 'utf-8')); }; const complexScenarioFixture = loadFixture('browser_use_complex_scenario.json'); const loopFixture = loadFixture('browser_use_loop_fixture.json'); const scrollFixture = loadFixture('browser_use_scroll_fixture.json'); /** * Live MCP Server Integration Tests * * These tests spin up the actual MCP server process and interact with it * using the MCP protocol, providing a near-real-life scenario for testing * the dual-cycle reasoning framework. */ describe('Live MCP Server Integration', () => { let serverProcess: ChildProcess; let mcpClient: MCPClient; let serverReady = false; beforeAll(async () => { // Create MCP client mcpClient = new MCPClient({ name: 'test-client', version: '1.0.0', }); // Connect to the MCP server using stdio await mcpClient.connect({ type: 'stdio', command: 'node', args: ['build/server.js', '--stdio'], }); serverReady = true; }, 35000); afterAll(async () => { if (mcpClient) { await mcpClient.close(); } }); beforeEach(async () => { // Reset the engine before each test await mcpClient.callTool({ name: 'reset_engine', arguments: {}, }); }); describe('Complex Scenario Live Tests', () => { it('should handle complete workflow with complex scenario fixture', async () => { const trace = complexScenarioFixture.cognitive_trace; // Start monitoring const startResult = await mcpClient.callTool({ name: 'start_monitoring', arguments: { goal: trace.goal, initial_beliefs: [ 'Pricing information should be accessible', 'Modal interactions may be required', ], }, }); expect((startResult as any).content[0].text).toContain('✅ Metacognitive monitoring started'); // Process the trace actions one by one let processResult; let result; for (const action of trace.recent_actions) { processResult = await mcpClient.callTool({ name: 'process_trace_update', arguments: { last_action: action, current_context: trace.current_context, goal: trace.goal, window_size: 10, }, }); // Parse result and break if intervention is required const responseText = (processResult as any).content[0].text; try { result = JSON.parse(responseText); if (result.intervention_required) { break; } } catch (parseError) { console.error('Failed to parse response:', responseText); throw parseError; } } // The complex scenario should trigger intervention due to loop detection expect(result.intervention_required).toBe(true); expect(result.loop_detected?.detected).toBe(true); // Get monitoring status const statusResult = await mcpClient.callTool({ name: 'get_monitoring_status', arguments: {}, }); const status = JSON.parse((statusResult as any).content[0].text); expect(status.is_monitoring).toBe(true); expect(status.current_goal).toBe(trace.goal); expect(status.trace_length).toBeLessThanOrEqual(trace.recent_actions.length); }); it('should store and retrieve experience in live environment', async () => { // Store experience const storeResult = await mcpClient.callTool({ name: 'store_experience', arguments: { problem_description: 'Complex pricing table navigation', solution: 'Navigate to pricing page and interact with comparison modal', outcome: true, }, }); expect((storeResult as any).content[0].text).toContain('✅ Experience stored'); // Retrieve similar cases const retrieveResult = await mcpClient.callTool({ name: 'retrieve_similar_cases', arguments: { problem_description: 'pricing comparison task', max_results: 3, }, }); const cases = JSON.parse((retrieveResult as any).content[0].text); expect(cases.length).toBeGreaterThan(0); expect(cases[0]).toHaveProperty('problem_description'); expect(cases[0]).toHaveProperty('solution'); }, 15000); // 15 second timeout for NLP processing }); describe('Loop Detection Live Tests', () => { it('should detect and handle loops in live environment', async () => { const trace = loopFixture.cognitive_trace; // Start monitoring await mcpClient.callTool({ name: 'start_monitoring', arguments: { goal: trace.goal, initial_beliefs: ['Download button should be visible', 'Page content is accessible'], }, }); // Process the trace actions one by one (should detect loop) let processResult; for (const action of trace.recent_actions) { processResult = await mcpClient.callTool({ name: 'process_trace_update', arguments: { last_action: action, current_context: trace.current_context, goal: trace.goal, window_size: 10, }, }); // Break if intervention is required const responseText = (processResult as any).content[0].text; try { const result = JSON.parse(responseText); if (result.intervention_required) { break; } } catch (parseError) { console.error('Failed to parse loop detection response:', responseText); throw parseError; } } const result = JSON.parse((processResult as any).content[0].text); expect(result.intervention_required).toBe(true); expect(result.loop_detected?.detected).toBe(true); expect(result.loop_detected?.type).toMatch( /state_invariance|action_repetition|progress_stagnation/ ); expect(result.explanation).toBeDefined(); // Additional step: Call detect_loop to verify accumulated actions const detectResult = await mcpClient.callTool({ name: 'detect_loop', arguments: { current_context: trace.current_context, goal: trace.goal, detection_method: 'hybrid', }, }); const detectResultText = (detectResult as any).content[0].text; // Check if it's an error message or valid JSON if (detectResultText.startsWith('❌ Error executing')) { // Handle error case - test should fail with a descriptive message throw new Error(`Loop detection failed: ${detectResultText}`); } const loopResult = JSON.parse(detectResultText); expect(loopResult.detected).toBe(true); expect(loopResult.type).toBeDefined(); expect(loopResult.confidence).toBeGreaterThan(0); expect(loopResult.details).toBeDefined(); }, 15000); }); describe('Configuration Live Tests', () => { it('should configure detection parameters in live environment', async () => { const configResult = await mcpClient.callTool({ name: 'configure_detection', arguments: { progress_indicators: ['success', 'found', 'completed', 'navigated'], min_actions_for_detection: 3, alternating_threshold: 0.4, repetition_threshold: 0.3, progress_threshold_adjustment: 0.1, }, }); expect((configResult as any).content[0].text).toContain('⚙️ Detection configuration updated'); expect((configResult as any).content[0].text).toContain('Min actions for detection: 3'); expect((configResult as any).content[0].text).toContain('Alternating threshold: 0.4'); }); it('should handle different detection methods', async () => { const trace = scrollFixture.cognitive_trace; // Test statistical detection // First populate the internal trace by processing actions await mcpClient.callTool({ name: 'start_monitoring', arguments: { goal: trace.goal, initial_beliefs: [], }, }); // Add actions to internal trace for (const action of trace.recent_actions) { await mcpClient.callTool({ name: 'process_trace_update', arguments: { last_action: action, current_context: trace.current_context, goal: trace.goal, }, }); } const statisticalResult = await mcpClient.callTool({ name: 'detect_loop', arguments: { current_context: trace.current_context, goal: trace.goal, detection_method: 'statistical', }, }); const statisticalResponseText = (statisticalResult as any).content[0].text; let statisticalLoop; try { statisticalLoop = JSON.parse(statisticalResponseText); } catch (parseError) { console.error('Failed to parse statistical detection response:', statisticalResponseText); throw parseError; } expect(statisticalLoop).toBeDefined(); // Test pattern detection const patternResult = await mcpClient.callTool({ name: 'detect_loop', arguments: { current_context: trace.current_context, goal: trace.goal, detection_method: 'pattern', }, }); const patternLoop = JSON.parse((patternResult as any).content[0].text); expect(patternLoop).toBeDefined(); }, 15000); }); describe('Session Management Live Tests', () => { it('should handle session lifecycle in live environment', async () => { const trace = scrollFixture.cognitive_trace; // Start monitoring const startResult = await mcpClient.callTool({ name: 'start_monitoring', arguments: { goal: trace.goal, initial_beliefs: ['Scroll pattern should find target', 'Page content is dynamic'], }, }); expect((startResult as any).content[0].text).toContain('✅ Metacognitive monitoring started'); // Process partial trace const partialActions = trace.recent_actions.slice(0, 3); for (const action of partialActions) { await mcpClient.callTool({ name: 'process_trace_update', arguments: { last_action: action, current_context: trace.current_context, goal: trace.goal, window_size: 10, }, }); } // Get status const statusResult = await mcpClient.callTool({ name: 'get_monitoring_status', arguments: {}, }); const status = JSON.parse((statusResult as any).content[0].text); expect(status.is_monitoring).toBe(true); expect(status.trace_length).toBe(3); // Stop monitoring const stopResult = await mcpClient.callTool({ name: 'stop_monitoring', arguments: {}, }); expect((stopResult as any).content[0].text).toContain('🛑 Monitoring stopped'); expect((stopResult as any).content[0].text).toContain('Goal:'); expect((stopResult as any).content[0].text).toContain('Total interventions:'); }); }); describe('Error Handling Live Tests', () => { it('should handle invalid requests gracefully', async () => { // Test invalid tool name - should throw McpError try { await mcpClient.callTool({ name: 'nonexistent_tool', arguments: {}, }); fail('Expected McpError to be thrown'); } catch (error: any) { expect(error.name).toBe('McpError'); expect(error.message).toContain('Unknown tool'); expect(error.message).toContain('nonexistent_tool'); } }); it('should handle malformed trace data', async () => { // Test malformed parameters - should throw McpError try { await mcpClient.callTool({ name: 'process_trace_update', arguments: { // Missing required fields - last_action and goal are required current_context: 'test', }, }); fail('Expected McpError to be thrown'); } catch (error: any) { expect(error.name).toBe('McpError'); expect(error.message).toContain('parameter validation failed'); expect(error.message).toContain('last_action: Required'); expect(error.message).toContain('goal: Required'); } }); }); });