UNPKG

claude-flow

Version:

Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration

131 lines 7.01 kB
/** * Smoke tests for gaia-mode-router.ts — iter 58 (#2156) * * Synthetic question texts covering each routing rule. * All 10 cases must pass (no API calls — purely rule-based). * * Run: * npx ts-node src/benchmarks/gaia-mode-router.smoke.ts * # or after build: * node dist/src/benchmarks/gaia-mode-router.smoke.js * * Refs: iter 58, #2156 */ import { routeQuestion, routeQuestions } from './gaia-mode-router.js'; // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- let passed = 0; let failed = 0; function assert(condition, label, detail) { if (condition) { console.log(` PASS ${label}`); passed++; } else { console.error(` FAIL ${label}${detail ? ': ' + detail : ''}`); failed++; } } /** Build a minimal GaiaQuestion for routing tests. */ function q(text, fileName = null) { return { task_id: 'smoke-' + text.slice(0, 12).replace(/\W/g, '-'), level: 1, question: text, final_answer: 'irrelevant', file_name: fileName, file_path: fileName ? `/tmp/${fileName}` : null, }; } // --------------------------------------------------------------------------- // Test cases — one per routing rule plus edge cases // --------------------------------------------------------------------------- function testAttachmentRule() { console.log('\n-- Rule 1: attachment → ToolCalling --'); const d = routeQuestion(q('What is the total in column B of the spreadsheet?', 'data.xlsx')); assert(d.mode === 'ToolCalling', 'xlsx attachment → ToolCalling', `got mode=${d.mode}`); assert(d.rule === 'attachment', 'rule is attachment', `got rule=${d.rule}`); const d2 = routeQuestion(q('Describe the image shown.', 'photo.png')); assert(d2.mode === 'ToolCalling', 'png attachment → ToolCalling', `got mode=${d2.mode}`); assert(d2.rule === 'attachment', 'rule is attachment for image', `got rule=${d2.rule}`); } function testWebRetrievalRule() { console.log('\n-- Rule 2: web retrieval → ToolCalling --'); const d1 = routeQuestion(q('According to Wikipedia, what year was the Eiffel Tower built?')); assert(d1.mode === 'ToolCalling', 'wikipedia keyword → ToolCalling', `got mode=${d1.mode}`); assert(d1.rule === 'web_retrieval', 'rule is web_retrieval', `got rule=${d1.rule}`); const d2 = routeQuestion(q('Visit the website and find the CEO of the company listed.')); assert(d2.mode === 'ToolCalling', 'visit/website keyword → ToolCalling', `got mode=${d2.mode}`); assert(d2.rule === 'web_retrieval', 'rule is web_retrieval for browse', `got rule=${d2.rule}`); const d3 = routeQuestion(q('Based on the article published in Nature in 2021, what was the main finding?')); assert(d3.mode === 'ToolCalling', 'article/published → ToolCalling', `got mode=${d3.mode}`); } function testPureReasoningRule() { console.log('\n-- Rule 3: pure reasoning → CodeAgent --'); const d1 = routeQuestion(q('Calculate the volume of a sphere with radius 7.')); assert(d1.mode === 'CodeAgent', 'calculate keyword → CodeAgent', `got mode=${d1.mode}`); assert(d1.rule === 'pure_reasoning', 'rule is pure_reasoning', `got rule=${d1.rule}`); const d2 = routeQuestion(q('How many combinations of 3 items can be chosen from a set of 10?')); assert(d2.mode === 'CodeAgent', 'combinations keyword → CodeAgent', `got mode=${d2.mode}`); const d3 = routeQuestion(q('Solve the logic puzzle: Alice is taller than Bob. Bob is shorter than Carol. Who is tallest?')); assert(d3.mode === 'CodeAgent', 'logic puzzle keyword → CodeAgent', `got mode=${d3.mode}`); } function testLongQuestionRule() { console.log('\n-- Rule 4: long question → ToolCalling --'); // Build a question > 400 chars with no explicit keywords const longText = 'A researcher is studying the behavior of a particular bird species that lives in a mountainous region. ' + 'The researcher has collected data over five years and wants to understand the migration patterns. ' + 'Using the provided observations, determine whether the species migrates seasonally or stays year-round. ' + 'Provide the specific months during which migration occurs based on the data collected. ' + 'Be precise about the start and end months for each migratory period.'; assert(longText.length > 400, 'sanity: text is > 400 chars', `length=${longText.length}`); const d = routeQuestion(q(longText)); assert(d.mode === 'ToolCalling', 'long question → ToolCalling', `got mode=${d.mode}`); assert(d.rule === 'long_question', 'rule is long_question', `got rule=${d.rule}`); } function testDefaultRule() { console.log('\n-- Rule 5: default → ToolCalling --'); const d = routeQuestion(q('What is the capital of France?')); assert(d.mode === 'ToolCalling', 'short factual → ToolCalling (default)', `got mode=${d.mode}`); assert(d.rule === 'default', 'rule is default', `got rule=${d.rule}`); } function testBatchRouting() { console.log('\n-- Batch: routeQuestions summary --'); const questions = [ q('What is the capital of France?'), // default → ToolCalling q('Calculate the sum of 1 to 100 using Gauss formula.'), // pure_reasoning → CodeAgent q('Visit the NASA website and find the most recent mission.'), // web_retrieval → ToolCalling q('What is the total in the file?', 'data.xlsx'), // attachment → ToolCalling ]; const { decisions, summary } = routeQuestions(questions); assert(summary.total === 4, 'summary.total = 4', `got ${summary.total}`); assert(summary.codeAgent === 1, 'exactly 1 CodeAgent', `got ${summary.codeAgent}`); assert(summary.toolCalling === 3, 'exactly 3 ToolCalling', `got ${summary.toolCalling}`); assert(decisions.length === 4, 'decisions length = 4', `got ${decisions.length}`); assert(summary.byRule.pure_reasoning === 1, 'byRule.pure_reasoning = 1'); assert(summary.byRule.attachment === 1, 'byRule.attachment = 1'); assert(summary.byRule.web_retrieval === 1, 'byRule.web_retrieval = 1'); assert(summary.byRule.default === 1, 'byRule.default = 1'); } // --------------------------------------------------------------------------- // Main // --------------------------------------------------------------------------- function main() { console.log('=== gaia-mode-router smoke tests ==='); console.log('(no API calls — rule-based only)\n'); testAttachmentRule(); testWebRetrievalRule(); testPureReasoningRule(); testLongQuestionRule(); testDefaultRule(); testBatchRouting(); const total = passed + failed; console.log(`\n=== Results: ${passed}/${total} passed ===`); if (failed > 0) { console.error(`${failed} tests FAILED`); process.exit(1); } } main(); //# sourceMappingURL=gaia-mode-router.smoke.js.map