UNPKG

claude-flow

Version:

Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration

246 lines 12.2 kB
/** * Smoke tests for the GAIA convergence layer (gaia-convergence.ts). * * 10 assertions covering: * T1: max_turns hit → checkConvergenceTriggers returns 'max_turns' * T2: same tool+args 3× in window → returns 'loop' * T3: 120k tokens → returns 'token_overflow' * T4: forceCommit with prose-embedded answer in history → extraction succeeds * T5: forceCommit with explicit FINAL_ANSWER: X in response → returns X * T6: forceCommit on truly empty/looping conversation → returns null (graceful) * T7: argsHash deterministic — same args → same hash * T8: argsHash distinguishes — different args → different hash * T9: ConvergenceState records turn counts correctly via recordTurn * T10: Anti-loop window correctly slides — only last 5 turns matter * * Refs: #2156, iter 62 */ import assert from 'node:assert'; import { checkConvergenceTriggers, forceCommit, argsHash, createConvergenceState, recordTurn, extractFinalAnswerFromText, extractFromPriorMessages, TOKEN_OVERFLOW_THRESHOLD, LOOP_WINDOW_SIZE, } from './gaia-convergence.js'; // --------------------------------------------------------------------------- // Test harness // --------------------------------------------------------------------------- let passed = 0; let failed = 0; function test(name, fn) { const result = fn(); if (result && typeof result.then === 'function') { result .then(() => { console.log(` PASS ${name}`); passed++; }) .catch((err) => { console.error(` FAIL ${name}: ${err.message ?? err}`); failed++; }); } else { try { console.log(` PASS ${name}`); passed++; } catch (err) { console.error(` FAIL ${name}: ${err.message ?? err}`); failed++; } } } async function testAsync(name, fn) { try { await fn(); console.log(` PASS ${name}`); passed++; } catch (err) { console.error(` FAIL ${name}: ${err.message ?? err}`); failed++; } } // --------------------------------------------------------------------------- // T1: max_turns hit → returns 'max_turns' // --------------------------------------------------------------------------- test('T1: max_turns hit returns max_turns', () => { const state = { turnCount: 12, totalTokens: 1000, toolCalls: [], detectedFailureMode: null, }; const result = checkConvergenceTriggers(state, 12); assert.strictEqual(result, 'max_turns', `Expected 'max_turns', got ${result}`); }); // --------------------------------------------------------------------------- // T2: same tool+args 3× in last 5 turns → returns 'loop' // --------------------------------------------------------------------------- test('T2: loop detection — same tool+args 3x in window', () => { const hash = argsHash('web_search', { query: 'test' }); const state = { turnCount: 5, totalTokens: 5000, toolCalls: [ { name: 'web_search', argsHash: hash, turn: 1 }, { name: 'web_search', argsHash: hash, turn: 2 }, { name: 'web_search', argsHash: hash, turn: 3 }, ], detectedFailureMode: null, }; const result = checkConvergenceTriggers(state, 15); assert.strictEqual(result, 'loop', `Expected 'loop', got ${result}`); }); // --------------------------------------------------------------------------- // T3: 120k tokens → returns 'token_overflow' // --------------------------------------------------------------------------- test('T3: token_overflow at threshold', () => { const state = { turnCount: 5, totalTokens: TOKEN_OVERFLOW_THRESHOLD, toolCalls: [], detectedFailureMode: null, }; const result = checkConvergenceTriggers(state, 20); assert.strictEqual(result, 'token_overflow', `Expected 'token_overflow', got ${result}`); }); // --------------------------------------------------------------------------- // T4: forceCommit with prose-embedded answer in prior history → extraction succeeds // --------------------------------------------------------------------------- await testAsync('T4: forceCommit — prose-embedded FINAL_ANSWER in prior history', async () => { const messages = [ { role: 'user', content: 'What is 2+2?' }, { role: 'assistant', content: 'Let me think about this. The answer is definitely 4. FINAL_ANSWER: 4', }, { role: 'user', content: 'tool results...' }, { role: 'assistant', content: 'I got confused. Let me recalculate.' }, ]; // callModel returns an empty response (simulating no new FINAL_ANSWER) const callModel = async () => 'I need to think more about this.'; const result = await forceCommit(messages, callModel, 'max_turns'); assert.strictEqual(result.answer, '4', `Expected '4', got ${result.answer}`); assert.strictEqual(result.usedFallback, true, 'Expected usedFallback=true for history scan'); }); // --------------------------------------------------------------------------- // T5: forceCommit with explicit FINAL_ANSWER: X in forced response → returns X // --------------------------------------------------------------------------- await testAsync('T5: forceCommit — explicit FINAL_ANSWER in forced-commit response', async () => { const messages = [ { role: 'user', content: 'What is the capital of France?' }, { role: 'assistant', content: 'I need to search for this.' }, ]; // callModel returns a response WITH FINAL_ANSWER const callModel = async () => 'Based on my knowledge, the capital is Paris. FINAL_ANSWER: Paris'; const result = await forceCommit(messages, callModel, 'max_turns'); assert.strictEqual(result.answer, 'Paris', `Expected 'Paris', got ${result.answer}`); assert.strictEqual(result.usedFallback, false, 'Expected usedFallback=false when direct extraction'); assert.strictEqual(result.triggerMode, 'max_turns'); }); // --------------------------------------------------------------------------- // T6: forceCommit on truly empty conversation → returns null (graceful) // --------------------------------------------------------------------------- await testAsync('T6: forceCommit — graceful null on empty conversation', async () => { const messages = [ { role: 'user', content: 'What is something unknowable?' }, { role: 'assistant', content: 'I have been calling tools repeatedly with no progress.' }, ]; // callModel returns nothing useful const callModel = async () => 'I cannot determine the answer to this question.'; const result = await forceCommit(messages, callModel, 'loop'); assert.strictEqual(result.answer, null, `Expected null, got ${result.answer}`); assert.strictEqual(result.triggerMode, 'loop'); }); // --------------------------------------------------------------------------- // T7: argsHash deterministic — same args → same hash // --------------------------------------------------------------------------- test('T7: argsHash is deterministic', () => { const h1 = argsHash('web_search', { query: 'hello world', limit: 5 }); const h2 = argsHash('web_search', { query: 'hello world', limit: 5 }); assert.strictEqual(h1, h2, 'Same inputs must produce identical hash'); assert.ok(h1.length === 16, `Hash should be 16 hex chars, got ${h1.length}`); }); // --------------------------------------------------------------------------- // T8: argsHash distinguishes — different args → different hash // --------------------------------------------------------------------------- test('T8: argsHash distinguishes different args', () => { const h1 = argsHash('web_search', { query: 'hello' }); const h2 = argsHash('web_search', { query: 'world' }); assert.notStrictEqual(h1, h2, 'Different args must produce different hash'); const h3 = argsHash('calculator', { query: 'hello' }); assert.notStrictEqual(h1, h3, 'Different tool names must produce different hash'); }); // --------------------------------------------------------------------------- // T9: ConvergenceState records turn counts correctly via recordTurn // --------------------------------------------------------------------------- test('T9: recordTurn increments turnCount and totalTokens', () => { const state = createConvergenceState(); assert.strictEqual(state.turnCount, 0); assert.strictEqual(state.totalTokens, 0); assert.strictEqual(state.toolCalls.length, 0); recordTurn(state, 1500, [ { name: 'web_search', args: { query: 'test' } }, { name: 'calculator', args: { expr: '2+2' } }, ]); assert.strictEqual(state.turnCount, 1, `Expected turnCount=1, got ${state.turnCount}`); assert.strictEqual(state.totalTokens, 1500, `Expected totalTokens=1500, got ${state.totalTokens}`); assert.strictEqual(state.toolCalls.length, 2, `Expected 2 tool calls recorded, got ${state.toolCalls.length}`); assert.strictEqual(state.toolCalls[0].name, 'web_search'); assert.strictEqual(state.toolCalls[0].turn, 1); recordTurn(state, 2000, []); assert.strictEqual(state.turnCount, 2); assert.strictEqual(state.totalTokens, 3500); }); // --------------------------------------------------------------------------- // T10: Anti-loop window correctly slides — only last LOOP_WINDOW_SIZE turns matter // --------------------------------------------------------------------------- test('T10: loop window slides — old calls outside window do not trigger', () => { const hash = argsHash('web_search', { query: 'test' }); // 4 repeated calls followed by 5 different calls — window is last 5 only const oldCalls = Array.from({ length: 4 }, (_, i) => ({ name: 'web_search', argsHash: hash, turn: i + 1, })); const recentCalls = Array.from({ length: LOOP_WINDOW_SIZE }, (_, i) => ({ name: `tool_${i}`, argsHash: argsHash(`tool_${i}`, {}), turn: i + 5, })); const state = { turnCount: 9, totalTokens: 9000, toolCalls: [...oldCalls, ...recentCalls], detectedFailureMode: null, }; const result = checkConvergenceTriggers(state, 20); assert.strictEqual(result, null, `Expected null (old calls outside window), got ${result}`); }); // --------------------------------------------------------------------------- // Bonus: extractFinalAnswerFromText edge cases // --------------------------------------------------------------------------- test('T11 (bonus): extractFinalAnswerFromText handles multiline and trailing content', () => { const text = 'After much deliberation:\nFINAL_ANSWER: 42\nSome extra text'; const result = extractFinalAnswerFromText(text); assert.strictEqual(result, '42', `Expected '42', got ${result}`); }); test('T12 (bonus): extractFromPriorMessages scans in reverse order', () => { const messages = [ { role: 'user', content: 'Question' }, { role: 'assistant', content: 'First answer FINAL_ANSWER: wrong' }, { role: 'user', content: 'tool results' }, { role: 'assistant', content: 'After more research FINAL_ANSWER: correct' }, ]; // Should return 'correct' (last assistant message first in reverse scan) const result = extractFromPriorMessages(messages); assert.strictEqual(result, 'correct', `Expected 'correct' (most recent), got ${result}`); }); // --------------------------------------------------------------------------- // Summary // --------------------------------------------------------------------------- // Wait for all async tests to settle, then print summary setTimeout(() => { const total = passed + failed; console.log(''); console.log('=== GAIA Convergence Layer Smoke Test ==='); console.log(`Pass rate: ${passed}/${total}`); console.log(`Status: ${failed === 0 ? 'ALL PASSED' : `${failed} FAILED`}`); if (failed > 0) { process.exit(1); } }, 200); //# sourceMappingURL=gaia-convergence.smoke.js.map