UNPKG

claude-flow

Version:

Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration

228 lines 13.7 kB
/** * Smoke tests for gaia-decomposer.ts (ADR-135 Track E) * * All HTTP calls are mocked — no live API calls, no cost. * Covers 7 assertions: * 1. Atomic question → decomposed=false, single sub-question = original * 2. 3-step question → decomposed=true, 3 sub-questions in dependency order * 3. Malformed JSON response → fallback to atomic (graceful degradation) * 4. API error → fallback to atomic (graceful degradation) * 5. synthesizeFromSubAnswers — atomic question returns sub-answer directly (no API call) * 6. synthesizeFromSubAnswers — valid JSON response → finalAnswer + reasoning * 7. synthesizeFromSubAnswers — malformed JSON → fallback to last sub-answer * * Run with: * npx tsx src/benchmarks/gaia-decomposer.smoke.ts * * Expected cost: $0 (all mocked). */ import { decomposeQuestion, synthesizeFromSubAnswers } from './gaia-decomposer.js'; function installFetchMock(responder) { globalThis.fetch = (url, init) => { const r = responder(url, init); return Promise.resolve(r); }; } function mockSuccessResponse(jsonBody) { const bodyStr = JSON.stringify(jsonBody); return { ok: true, status: 200, text: () => Promise.resolve(bodyStr), json: () => Promise.resolve(jsonBody), }; } function mockErrorResponse(status) { return { ok: false, status, text: () => Promise.resolve(`HTTP ${status} error`), json: () => Promise.reject(new Error('error response has no JSON')), }; } /** Build a minimal Anthropic Messages API response shape. */ function anthropicTextResponse(text, inputTokens = 100, outputTokens = 50) { return { id: 'msg_test', type: 'message', role: 'assistant', content: [{ type: 'text', text }], model: 'claude-haiku-4-5', stop_reason: 'end_turn', usage: { input_tokens: inputTokens, output_tokens: outputTokens }, }; } // --------------------------------------------------------------------------- // Test runner // --------------------------------------------------------------------------- const PASS = '\x1b[32mPASS\x1b[0m'; const FAIL = '\x1b[31mFAIL\x1b[0m'; let failures = 0; let passed = 0; function check(label, condition) { if (condition) { console.log(` ${PASS} ${label}`); passed++; } else { console.log(` ${FAIL} ${label}`); failures++; } } // --------------------------------------------------------------------------- // Smoke tests // --------------------------------------------------------------------------- async function runSmoke() { console.log('\n=== gaia-decomposer smoke (mocked, $0) ===\n'); // Set a fake API key so resolveApiKey() doesn't throw process.env.ANTHROPIC_API_KEY = 'sk-ant-test-key'; // ───────────────────────────────────────────────────────────────────────── // Test 1: Atomic question → decomposed=false, single sub-question // ───────────────────────────────────────────────────────────────────────── console.log('-- Test 1: Atomic question --'); { const atomicPayload = { decomposed: false, subQuestions: ['What year was the Eiffel Tower built?'], synthesisHint: 'Use directly.', }; installFetchMock(() => mockSuccessResponse(anthropicTextResponse(JSON.stringify(atomicPayload)))); const result = await decomposeQuestion('What year was the Eiffel Tower built?'); check('decomposed=false for atomic question', result.decomposed === false); check('single sub-question equals original', result.subQuestions.length === 1 && result.subQuestions[0] === 'What year was the Eiffel Tower built?'); check('cost > 0 (Haiku tokens consumed)', result.cost > 0); } // ───────────────────────────────────────────────────────────────────────── // Test 2: 3-step complex question → decomposed=true, 3 sub-questions in order // ───────────────────────────────────────────────────────────────────────── console.log('\n-- Test 2: 3-step complex question --'); { const complexPayload = { decomposed: true, subQuestions: [ 'What year was the Eiffel Tower built?', 'What decade contains the year 1889?', 'Who directed the highest-grossing film of the 1880s?', ], synthesisHint: 'The answer to sub-question 3 is the final answer.', }; installFetchMock(() => mockSuccessResponse(anthropicTextResponse(JSON.stringify(complexPayload)))); const question = 'Who directed the highest-grossing film of the decade the Eiffel Tower was built?'; const result = await decomposeQuestion(question); check('decomposed=true for 3-step question', result.decomposed === true); check('3 sub-questions returned', result.subQuestions.length === 3); check('first sub-question asks about Eiffel Tower year', result.subQuestions[0].toLowerCase().includes('eiffel tower')); check('synthesisHint is non-empty', result.synthesisHint.length > 0); } // ───────────────────────────────────────────────────────────────────────── // Test 3: Malformed JSON response → fallback to atomic // ───────────────────────────────────────────────────────────────────────── console.log('\n-- Test 3: Malformed JSON fallback --'); { installFetchMock(() => mockSuccessResponse(anthropicTextResponse('This is not JSON at all { broken'))); const question = 'What is the capital of France?'; const result = await decomposeQuestion(question); check('malformed JSON → decomposed=false', result.decomposed === false); check('malformed JSON → sub-question is original', result.subQuestions[0] === question); } // ───────────────────────────────────────────────────────────────────────── // Test 4: API error → fallback to atomic // ───────────────────────────────────────────────────────────────────────── console.log('\n-- Test 4: API error fallback --'); { installFetchMock(() => mockErrorResponse(500)); const question = 'How many planets are in the solar system?'; const result = await decomposeQuestion(question); check('API error → decomposed=false', result.decomposed === false); check('API error → sub-question is original', result.subQuestions[0] === question); check('API error → cost=0', result.cost === 0); } // ───────────────────────────────────────────────────────────────────────── // Test 5: synthesizeFromSubAnswers — atomic question (decomposed=false) // returns sub-answer directly WITHOUT an API call // ───────────────────────────────────────────────────────────────────────── console.log('\n-- Test 5: synthesize — atomic passthrough (no API call) --'); { // Install a fetch mock that throws if called — should NOT be called let fetchCalled = false; installFetchMock(() => { fetchCalled = true; return mockSuccessResponse({ finalAnswer: 'unexpected', reasoning: 'should not reach here' }); }); const atomicDecomposed = { originalQuestion: 'What year was the Eiffel Tower built?', subQuestions: ['What year was the Eiffel Tower built?'], synthesisHint: 'Use directly.', decomposed: false, cost: 0, }; const result = await synthesizeFromSubAnswers(atomicDecomposed, ['1889']); check('atomic synthesize returns sub-answer directly', result.finalAnswer === '1889'); check('atomic synthesize has cost=0', result.cost === 0); check('atomic synthesize did NOT call API', fetchCalled === false); } // ───────────────────────────────────────────────────────────────────────── // Test 6: synthesizeFromSubAnswers — valid JSON → finalAnswer + reasoning // ───────────────────────────────────────────────────────────────────────── console.log('\n-- Test 6: synthesize — valid JSON response --'); { const synthPayload = { finalAnswer: 'Georges Méliès', reasoning: 'The highest-grossing film of the 1880s was X. Its director was Georges Méliès.', }; installFetchMock(() => mockSuccessResponse(anthropicTextResponse(JSON.stringify(synthPayload)))); const decomposed = { originalQuestion: 'Who directed the highest-grossing film of the decade the Eiffel Tower was built?', subQuestions: [ 'What year was the Eiffel Tower built?', 'What decade contains 1889?', 'Who directed the highest-grossing film of the 1880s?', ], synthesisHint: 'The director from sub-question 3 is the answer.', decomposed: true, cost: 0.0003, }; const result = await synthesizeFromSubAnswers(decomposed, ['1889', '1880s', 'Georges Méliès']); check('synthesize returns correct finalAnswer', result.finalAnswer === 'Georges Méliès'); check('synthesize reasoning is non-empty', result.reasoning.length > 0); check('synthesize cost > 0 (Sonnet tokens consumed)', result.cost > 0); } // ───────────────────────────────────────────────────────────────────────── // Test 7: synthesizeFromSubAnswers — malformed JSON → fallback to last sub-answer // ───────────────────────────────────────────────────────────────────────── console.log('\n-- Test 7: synthesize — malformed JSON fallback --'); { installFetchMock(() => mockSuccessResponse(anthropicTextResponse('{ bad json here ]]}'))); const decomposed = { originalQuestion: 'How many people live in Tokyo?', subQuestions: [ 'What is the population of Tokyo metropolitan area?', 'What is the population of Tokyo proper?', ], synthesisHint: 'Use the Tokyo proper figure.', decomposed: true, cost: 0.0003, }; const result = await synthesizeFromSubAnswers(decomposed, ['37.4 million (metropolitan)', '13.96 million (proper)']); check('malformed JSON fallback → returns last sub-answer', result.finalAnswer === '13.96 million (proper)'); check('malformed JSON fallback → reasoning mentions parse error', result.reasoning.toLowerCase().includes('parse error')); } // ───────────────────────────────────────────────────────────────────────── // Summary // ───────────────────────────────────────────────────────────────────────── const total = passed + failures; console.log(`\n=== smoke ${failures === 0 ? 'PASSED' : `FAILED (${failures}/${total} assertion(s))`}${passed}/${total} passed ===\n`); if (failures > 0) process.exit(1); } // Run when executed directly const isMain = process.argv[1] && (process.argv[1].endsWith('gaia-decomposer.smoke.ts') || process.argv[1].endsWith('gaia-decomposer.smoke.js')); if (isMain) { runSmoke().catch((err) => { console.error('Smoke failed with exception:', err); process.exit(1); }); } export { runSmoke }; //# sourceMappingURL=gaia-decomposer.smoke.js.map