@cyqlelabs/mcp-dual-cycle-reasoner
Version:
MCP server implementing dual-cycle metacognitive reasoning framework for autonomous agents
692 lines (576 loc) • 25.1 kB
text/typescript
// @ts-nocheck
import { describe, it, expect, beforeEach, afterEach, jest } from '@jest/globals';
import { readFileSync } from 'fs';
import { join } from 'path';
// Mock chalk to avoid ES module issues in Jest
jest.mock('chalk', () => ({
__esModule: true,
default: {
blue: (str: string) => str,
green: (str: string) => str,
yellow: (str: string) => str,
red: (str: string) => str,
gray: (str: string) => str,
magenta: (str: string) => str,
cyan: (str: string) => str,
},
}));
// Mock UUID to avoid potential issues
jest.mock('uuid', () => ({
v4: () => 'test-uuid-1234',
}));
// Mock semantic analyzer to avoid initialization issues
jest.mock('../src/semantic-analyzer', () => ({
semanticAnalyzer: {
initialize: jest.fn(async () => {}),
analyzeTextPair: jest.fn(async () => ({
similarity: 0.8,
confidence: 0.9,
sentiment: 0.1,
})),
assessActionOutcome: jest.fn(async () => ({
success_probability: 0.7,
confidence: 0.8,
sentiment: 0.2,
})),
assessBeliefContradiction: jest.fn(async () => ({
contradicts: true,
confidence: 0.8,
reasoning: 'Evidence contradicts belief',
})),
extractSemanticFeatures: jest.fn(async () => ({
intents: ['performing action', 'checking status'],
sentiment: 'positive',
confidence: 0.8,
})),
calculateSemanticSimilarity: jest.fn(async () => ({
similarity: 0.7,
confidence: 0.8,
reasoning: 'Semantic similarity analysis',
})),
getBatchEmbeddings: jest.fn(
async (texts) => texts.map(() => Array(384).fill(0.5)) // Mock 384-dim embeddings
),
computeSimilarityMatrix: jest.fn(async (texts) => {
const n = texts.length;
return Array(n)
.fill()
.map(
(_, i) =>
Array(n)
.fill()
.map((_, j) => (i === j ? 1.0 : 0.7)) // Mock similarity matrix
);
}),
isReady: jest.fn(() => true),
},
}));
import { DualCycleEngine } from '../src/dual-cycle-engine';
import { CognitiveTrace, SentinelConfig } from '../src/types';
// Load test fixtures
const loadFixture = (filename: string) => {
const fixturePath = join(__dirname, 'fixtures', filename);
return JSON.parse(readFileSync(fixturePath, 'utf-8'));
};
const complexScenarioFixture = loadFixture('browser_use_complex_scenario.json');
const loopFixture = loadFixture('browser_use_loop_fixture.json');
const scrollFixture = loadFixture('browser_use_scroll_fixture.json');
describe('DualCycleEngine with Fixtures', () => {
let engine: DualCycleEngine;
let config: Partial<SentinelConfig>;
beforeEach(async () => {
config = {
progress_indicators: ['success', 'found', 'completed', 'navigated'],
min_actions_for_detection: 3,
alternating_threshold: 0.4,
repetition_threshold: 0.3,
progress_threshold_adjustment: 0.1,
};
engine = new DualCycleEngine(config);
// Initialize semantic analyzer for tests
const { semanticAnalyzer } = await import('../src/semantic-analyzer');
await semanticAnalyzer.initialize();
});
afterEach(() => {
engine.reset();
});
describe('Complex Scenario Fixture Tests', () => {
it('should successfully process complex scenario trace and detect state invariance', async () => {
const trace: CognitiveTrace = complexScenarioFixture.cognitive_trace;
// Start monitoring
await engine.startMonitoring(trace.goal, []);
// Process the trace actions one by one
let result: any = { intervention_required: false, loop_detected: { detected: false } };
for (const action of trace.recent_actions) {
result = await engine.processTraceUpdate(action, trace.current_context, trace.goal);
// Break if intervention is required
if (result.intervention_required) {
break;
}
}
// If no actions were processed, use the default result
if (trace.recent_actions.length === 0) {
result = { intervention_required: false, loop_detected: { detected: false } };
}
expect(result).toBeDefined();
// The complex scenario should trigger loop detection due to repetitive actions
expect(result.intervention_required).toBe(true);
expect(result.loop_detected?.detected).toBe(true);
// Verify monitoring status
const status = engine.getMonitoringStatus();
expect(status.is_monitoring).toBe(true);
expect(status.current_goal).toBe(trace.goal);
// The trace length will be shorter due to early loop detection
expect(status.trace_length).toBeLessThanOrEqual(trace.recent_actions.length);
expect(status.trace_length).toBeGreaterThan(0);
});
it('should detect progress indicators in complex scenario', async () => {
const trace: CognitiveTrace = complexScenarioFixture.cognitive_trace;
// Configure with specific progress indicators
const progressConfig = {
...config,
progress_indicators: ['click_element_by_index', 'extract_structured_data'],
};
const progressEngine = new DualCycleEngine(progressConfig);
await progressEngine.startMonitoring(trace.goal, []);
// Process the trace actions one by one
let result;
for (const action of trace.recent_actions) {
result = await progressEngine.processTraceUpdate(action, trace.current_context, trace.goal);
// Break if intervention is required
if (result.intervention_required) {
break;
}
}
// Even with progress indicators, repetitive actions should still be detected
expect(result.intervention_required).toBe(true);
expect(result.loop_detected?.detected).toBe(true);
});
it('should store and retrieve experience from complex scenario', async () => {
const trace: CognitiveTrace = complexScenarioFixture.cognitive_trace;
// Store experience
const experience = {
problem_description: 'Finding pricing comparison table',
solution: 'Navigate to pricing page and open comparison modal',
outcome: true,
};
const adjudicator = (engine as any).adjudicator;
await adjudicator.storeExperience(experience);
// Retrieve similar cases
const similarCases = await engine.getSimilarCases('pricing table comparison', 3);
expect(similarCases).toBeDefined();
expect(similarCases.length).toBeGreaterThan(0);
expect(similarCases[0].problem_description).toContain('pricing');
});
});
describe('Loop Detection Fixture Tests', () => {
it('should detect loops in repetitive scrolling pattern', async () => {
const trace: CognitiveTrace = loopFixture.cognitive_trace;
await engine.startMonitoring(trace.goal, []);
// Process the trace with loop detection
// Process the trace actions one by one
let result;
for (const action of trace.recent_actions) {
result = await engine.processTraceUpdate(action, trace.current_context, trace.goal);
// Stop if intervention is required
if (result.intervention_required) {
break;
}
}
expect(result).toBeDefined();
expect(result.intervention_required).toBe(true);
expect(result.loop_detected?.detected).toBe(true);
expect(result.loop_detected?.type).toMatch(
/state_invariance|action_repetition|progress_stagnation/
);
expect(result.loop_detected?.confidence).toBeGreaterThan(0.5);
});
it('should confirm browser_use_loop_fixture.json triggers loop detection', async () => {
// This is the critical test - the loop fixture should ALWAYS trigger a loop
const trace: CognitiveTrace = loopFixture.cognitive_trace;
await engine.startMonitoring(trace.goal, []);
// Process the trace actions one by one
let result;
for (const action of trace.recent_actions) {
result = await engine.processTraceUpdate(action, trace.current_context, trace.goal);
// Stop if intervention is required
if (result.intervention_required) {
break;
}
}
// Assert that the loop fixture definitively triggers loop detection
expect(result.intervention_required).toBe(true);
expect(result.loop_detected?.detected).toBe(true);
expect(result.loop_detected?.confidence).toBeGreaterThan(0.5);
expect(result.loop_detected?.details).toBeDefined();
});
it('should confirm complex scenario triggers state invariance detection', async () => {
// Complex scenario should trigger state invariance detection due to repetitive actions
const complexTrace: CognitiveTrace = complexScenarioFixture.cognitive_trace;
await engine.startMonitoring(complexTrace.goal, []);
// Process the trace actions one by one
let complexResult: any = { intervention_required: false, loop_detected: { detected: false } };
for (const action of complexTrace.recent_actions) {
complexResult = await engine.processTraceUpdate(
action,
complexTrace.current_context,
complexTrace.goal
);
// Break if intervention is required
if (complexResult.intervention_required) {
break;
}
}
expect(complexResult.intervention_required).toBe(true);
expect(complexResult.loop_detected?.detected).toBe(true);
expect(complexResult.loop_detected?.type).toMatch(
/state_invariance|action_repetition|progress_stagnation/
);
// Reset for next test
engine.reset();
// Scroll fixture should trigger loop detection due to repetitive scroll_down actions
const scrollTrace: CognitiveTrace = scrollFixture.cognitive_trace;
await engine.startMonitoring(scrollTrace.goal, []);
// Process the trace actions one by one
let scrollResult: any = { intervention_required: false, loop_detected: { detected: false } };
for (const action of scrollTrace.recent_actions) {
scrollResult = await engine.processTraceUpdate(
action,
scrollTrace.current_context,
scrollTrace.goal
);
// Break if intervention is required
if (scrollResult.intervention_required) {
break;
}
}
expect(scrollResult.intervention_required).toBe(true);
expect(scrollResult.loop_detected?.detected).toBe(true);
});
it('should use hybrid detection method for loop fixture', async () => {
const trace: CognitiveTrace = loopFixture.cognitive_trace;
const sentinel = (engine as any).sentinel;
const loopResult = await sentinel.detectLoop(trace, 'hybrid');
expect(loopResult.detected).toBe(true);
expect(loopResult.type).toBe('progress_stagnation');
expect(loopResult.details.metrics.diversity).toBeDefined();
expect(loopResult.actions_involved).toBeDefined();
expect(loopResult.actions_involved!.length).toBeGreaterThan(0);
expect(loopResult.actions_involved).toEqual(
expect.arrayContaining(['scroll_down', 'scroll_up'])
);
});
it('should detect loop and retrieve similar cases for recovery', async () => {
const trace: CognitiveTrace = loopFixture.cognitive_trace;
const sentinel = (engine as any).sentinel;
const adjudicator = (engine as any).adjudicator;
// Detect loop
const loopResult = await sentinel.detectLoop(trace, 'hybrid');
expect(loopResult).toBeDefined();
expect(loopResult.detected).toBe(true);
expect(loopResult.confidence).toBeGreaterThan(0);
// Store a related experience case
const experience = {
problem_description: 'Stuck in scrolling loop trying to find element',
solution: 'Use alternative navigation method',
outcome: true,
};
await adjudicator.storeExperience(experience);
// Retrieve similar cases for recovery
const similarCases = await engine.getSimilarCases('scrolling loop navigation', 3);
expect(similarCases).toBeDefined();
expect(similarCases.length).toBeGreaterThan(0);
expect(similarCases[0].problem_description).toContain('scrolling');
});
it('should handle recovery outcome updates', async () => {
const trace: CognitiveTrace = loopFixture.cognitive_trace;
await engine.startMonitoring(trace.goal, []);
// Process the trace actions one by one
for (const action of trace.recent_actions) {
await engine.processTraceUpdate(action, trace.current_context, trace.goal);
}
const status = engine.getMonitoringStatus();
expect(status.intervention_count).toBeGreaterThan(0);
});
});
describe('Scroll Fixture Tests', () => {
it('should handle successful scroll-to-find pattern', async () => {
const trace: CognitiveTrace = scrollFixture.cognitive_trace;
await engine.startMonitoring(trace.goal, []);
// Process the trace actions one by one
let result;
for (const action of trace.recent_actions) {
result = await engine.processTraceUpdate(action, trace.current_context, trace.goal);
// Stop if intervention is required
if (result.intervention_required) {
break;
}
}
expect(result).toBeDefined();
// The scroll pattern with repetitive actions should trigger loop detection
expect(result.intervention_required).toBe(true);
expect(result.loop_detected?.detected).toBe(true);
});
it('should detect successful task completion pattern', async () => {
const trace: CognitiveTrace = scrollFixture.cognitive_trace;
// Configure with click success indicator
const successConfig = {
...config,
progress_indicators: ['click_element_by_index'],
};
const successEngine = new DualCycleEngine(successConfig);
await successEngine.startMonitoring(trace.goal, []);
// Process the trace actions one by one
let result;
for (const action of trace.recent_actions) {
result = await successEngine.processTraceUpdate(action, trace.current_context, trace.goal);
// Break if intervention is required
if (result.intervention_required) {
break;
}
}
// Even with progress indicators, repetitive actions should still be detected
expect(result.intervention_required).toBe(true);
expect(result.loop_detected?.detected).toBe(true);
});
it('should perform statistical analysis on scroll pattern', async () => {
const trace: CognitiveTrace = scrollFixture.cognitive_trace;
const sentinel = (engine as any).sentinel;
const loopResult = sentinel.detectLoop(trace, 'statistical');
expect(loopResult).toBeDefined();
expect(loopResult).toBeDefined();
if (loopResult.statistical_metrics) {
expect(loopResult.statistical_metrics.entropy_score).toBeDefined();
expect(loopResult.statistical_metrics.variance_score).toBeDefined();
}
});
});
describe('Configuration Tests', () => {
it('should handle different detection thresholds', async () => {
const strictConfig = {
...config,
alternating_threshold: 0.2,
repetition_threshold: 0.1,
};
const strictEngine = new DualCycleEngine(strictConfig);
await strictEngine.startMonitoring(loopFixture.cognitive_trace.goal, []);
// Process the trace actions one by one
let result;
for (const action of loopFixture.cognitive_trace.recent_actions) {
result = await strictEngine.processTraceUpdate(
action,
loopFixture.cognitive_trace.current_context,
loopFixture.cognitive_trace.goal
);
// Stop if intervention is required
if (result.intervention_required) {
break;
}
}
expect(result.intervention_required).toBe(true);
expect(result.loop_detected?.confidence).toBeGreaterThan(0.5);
});
it('should adjust thresholds based on progress indicators', async () => {
const progressConfig = {
...config,
progress_indicators: ['found_element', 'click_success'],
progress_threshold_adjustment: 0.5,
min_actions_for_detection: 3,
};
const progressEngine = new DualCycleEngine(progressConfig);
// Test with trace that has progress indicators and varying context
const progressTrace: CognitiveTrace = {
recent_actions: ['scroll_down', 'found_element', 'click_success'],
current_context: 'different_context',
goal: 'Find and click button',
};
// Process actions with different contexts to avoid state invariance
const contexts = ['searching', 'found_target', 'clicking'];
await progressEngine.startMonitoring(progressTrace.goal, []);
// Process the trace actions one by one with different contexts
let result;
for (let i = 0; i < progressTrace.recent_actions.length; i++) {
const action = progressTrace.recent_actions[i];
const context = contexts[i] || contexts[contexts.length - 1];
result = await progressEngine.processTraceUpdate(action, context, progressTrace.goal);
// Break if intervention is required
if (result.intervention_required) {
break;
}
}
// With proper progress indicators, this sequence should NOT trigger loop detection
expect(result.intervention_required).toBe(false);
expect(result.loop_detected?.detected).toBe(false);
});
});
describe('Integration Tests', () => {
it('should handle complete workflow from monitoring to recovery', async () => {
const trace: CognitiveTrace = loopFixture.cognitive_trace;
// Start monitoring
await engine.startMonitoring(trace.goal, ['Button should be visible', 'Page loads quickly']);
// Process trace and detect loop
// Process the trace actions one by one
let result;
for (const action of trace.recent_actions) {
result = await engine.processTraceUpdate(action, trace.current_context, trace.goal);
// Stop if intervention is required
if (result.intervention_required) {
break;
}
}
expect(result.intervention_required).toBe(true);
expect(result.loop_detected?.detected).toBe(true);
expect(result.explanation).toBeDefined();
const status = engine.getMonitoringStatus();
expect(status.intervention_count).toBe(1);
});
it('should maintain session state across multiple trace updates', async () => {
const scrollTrace: CognitiveTrace = scrollFixture.cognitive_trace;
await engine.startMonitoring(scrollTrace.goal, []);
// Process partial trace
const partialTrace = {
...scrollTrace,
recent_actions: scrollTrace.recent_actions.slice(0, 2),
};
// Process the trace actions one by one
let result1: any = { intervention_required: false, loop_detected: { detected: false } };
for (const action of partialTrace.recent_actions) {
result1 = await engine.processTraceUpdate(
action,
partialTrace.current_context,
partialTrace.goal
);
if (result1.intervention_required) {
break;
}
}
// First 2 actions might trigger loop if they're repetitive
expect(result1).toBeDefined();
// Process remaining actions (not the full trace, just the remaining ones)
const remainingActions = scrollTrace.recent_actions.slice(2);
let result2: any = result1;
for (const action of remainingActions) {
result2 = await engine.processTraceUpdate(
action,
scrollTrace.current_context,
scrollTrace.goal
);
if (result2.intervention_required) {
break;
}
}
// Final state should be intervention required due to accumulated repetitive actions
expect(result2).toBeDefined();
expect(result2.intervention_required).toBe(true);
const status = engine.getMonitoringStatus();
expect(status.trace_length).toBeLessThanOrEqual(scrollTrace.recent_actions.length);
expect(status.trace_length).toBeGreaterThan(0);
});
it('should reset engine state properly', async () => {
const trace: CognitiveTrace = complexScenarioFixture.cognitive_trace;
// Process some data
await engine.startMonitoring(trace.goal, ['Initial belief']);
// Process the trace actions one by one
for (const action of trace.recent_actions) {
await engine.processTraceUpdate(action, trace.current_context, trace.goal);
}
// Reset engine
engine.reset();
const status = engine.getMonitoringStatus();
expect(status.is_monitoring).toBe(false);
expect(status.trace_length).toBe(0);
expect(status.intervention_count).toBe(0);
});
});
describe('Actions Accumulation Tests', () => {
it('should accumulate actions through complete monitoring workflow', async () => {
const goal = 'Find and click download button';
const initialBeliefs = ['Download button is visible on page'];
// Step 1: Start monitoring
await engine.startMonitoring(goal, initialBeliefs);
let status = engine.getMonitoringStatus();
expect(status.is_monitoring).toBe(true);
expect(status.trace_length).toBe(0);
// Step 2: Add multiple actions via process_trace_update
const actions = ['scroll_down', 'scroll_down', 'scroll_down', 'scroll_down', 'scroll_down'];
const context = 'Looking for download button';
let finalResult;
for (const action of actions) {
finalResult = await engine.processTraceUpdate(action, context, goal);
// Check that trace length increases with each action
status = engine.getMonitoringStatus();
expect(status.trace_length).toBeGreaterThan(0);
// If loop is detected, break early
if (finalResult.intervention_required) {
break;
}
}
// Verify actions were accumulated
status = engine.getMonitoringStatus();
expect(status.trace_length).toBeGreaterThan(0);
expect(status.trace_length).toBeLessThanOrEqual(actions.length);
// Step 3: Call detect_loop to interpret accumulated actions
const sentinel = (engine as any).sentinel;
const enrichedTrace = engine.getEnrichedCurrentTrace();
expect(enrichedTrace).toBeDefined();
expect(enrichedTrace.recent_actions).toBeDefined();
expect(enrichedTrace.recent_actions.length).toBeGreaterThan(0);
const loopResult = await sentinel.detectLoop(enrichedTrace, 'hybrid');
expect(loopResult).toBeDefined();
expect(loopResult.detected).toBe(true);
expect(loopResult.type).toBeDefined();
expect(loopResult.confidence).toBeGreaterThan(0);
// Verify the accumulated actions triggered loop detection
expect(finalResult.intervention_required).toBe(true);
expect(finalResult.loop_detected?.detected).toBe(true);
// Step 4: Stop monitoring
engine.stopMonitoring();
status = engine.getMonitoringStatus();
expect(status.is_monitoring).toBe(false);
});
it('should verify individual action accumulation during monitoring', async () => {
const goal = 'Navigate to pricing page';
// Start monitoring
await engine.startMonitoring(goal, []);
const testActions = [
'click_element_by_index',
'wait_for_page_load',
'scroll_down',
'click_element_by_index',
'wait_for_page_load',
'scroll_down',
];
const contexts = [
'Clicked navigation link',
'Waiting for page to load',
'Scrolling to find pricing',
'Clicked pricing tab',
'Waiting for content',
'Scrolling for more details',
];
// Process each action and verify accumulation
for (let i = 0; i < testActions.length; i++) {
const action = testActions[i];
const context = contexts[i];
const result = await engine.processTraceUpdate(action, context, goal);
// Verify trace length increases
const status = engine.getMonitoringStatus();
expect(status.trace_length).toBe(i + 1);
// Verify current trace contains all actions up to this point
const enrichedTrace = engine.getEnrichedCurrentTrace();
expect(enrichedTrace.recent_actions).toBeDefined();
expect(enrichedTrace.recent_actions.length).toBe(i + 1);
expect(enrichedTrace.recent_actions[i]).toBe(action);
// If intervention is required, break
if (result.intervention_required) {
break;
}
}
// Final verification
const finalStatus = engine.getMonitoringStatus();
expect(finalStatus.trace_length).toBeGreaterThan(0);
expect(finalStatus.trace_length).toBeLessThanOrEqual(testActions.length);
engine.stopMonitoring();
});
});
});