claude-flow
Version:
Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration
177 lines • 7.27 kB
JavaScript
/**
* GAIA Agent Contrastive Wrapper — iter 49.5
*
* Thin wrapper around runGaiaAgent that adds three ruflo intelligence hooks:
* 1. PRE: memory_search_unified for relevant patterns
* 2. DURING: trajectory recording (start/step/end) via memory store
* 3. POST: memory_store of question+answer+verdict for future recall
*
* Purpose: measure whether ruflo's AgentDB intelligence (5,930 entries,
* 26,490 patterns) provides a measurable lift on GAIA L1 when injected
* as system context before the agent loop.
*
* Design constraints:
* - Does NOT change the underlying agent loop logic
* - Does NOT enable ADR-135 tracks (critic, decomposition, voting, MoE, KG)
* - Pure addition: memory before, trajectory during, store after
* - Graceful fallback: any hook failure is logged but does NOT fail the question
*
* Refs: ADR-133, iter 49.5, #2156
*/
import { execSync } from 'node:child_process';
import { runGaiaAgent } from './gaia-agent.js';
// ---------------------------------------------------------------------------
// Ruflo CLI helpers (best-effort — never throw)
// ---------------------------------------------------------------------------
const CLI_TIMEOUT_MS = 10_000;
const GAIA_NAMESPACE = 'gaia-l1-questions';
/**
* Run memory search via CLI and return up to `limit` result summaries.
* Returns empty array on any error (graceful fallback).
*/
function memorySearch(query, limit) {
try {
const raw = execSync(`npx @claude-flow/cli@latest memory search --query ${JSON.stringify(query)} --limit ${limit}`, { encoding: 'utf-8', timeout: CLI_TIMEOUT_MS, stdio: ['pipe', 'pipe', 'pipe'] });
// Parse table rows: extract Preview column (5th column in pipe-separated table)
const rows = raw
.split('\n')
.filter((line) => line.startsWith('|') && !line.includes('Key') && !line.includes('---'))
.map((line) => {
const cols = line.split('|').map((c) => c.trim()).filter(Boolean);
// cols: [Key, Score, Namespace, Preview]
return cols[3] ?? '';
})
.filter((preview) => preview.length > 0 && !preview.includes('Preview'));
return rows.slice(0, limit);
}
catch {
return [];
}
}
/**
* Store a key/value in memory under the gaia namespace.
* Best-effort — swallows errors.
*/
function memoryStore(key, value) {
try {
const valueStr = JSON.stringify(value);
execSync(`npx @claude-flow/cli@latest memory store --key ${JSON.stringify(key)} --value ${JSON.stringify(valueStr)} --namespace ${GAIA_NAMESPACE}`, { encoding: 'utf-8', timeout: CLI_TIMEOUT_MS, stdio: ['pipe', 'pipe', 'pipe'] });
return true;
}
catch {
return false;
}
}
/**
* Record a trajectory event (start/step/end) via memory store.
* This is a lightweight proxy — the MCP trajectory tools are not available
* from inside the benchmark harness. We persist the trajectory as memory
* entries so they can be recalled later.
*/
function trajectoryRecord(type, data) {
try {
const key = `traj-gaia-l1-${type}-${Date.now()}`;
execSync(`npx @claude-flow/cli@latest memory store --key ${JSON.stringify(key)} --value ${JSON.stringify(JSON.stringify(data))} --namespace trajectories`, { encoding: 'utf-8', timeout: CLI_TIMEOUT_MS, stdio: ['pipe', 'pipe', 'pipe'] });
}
catch {
// Best-effort — trajectory recording never blocks progress
}
}
// ---------------------------------------------------------------------------
// System prompt injection
// ---------------------------------------------------------------------------
/**
* Build a context prefix to prepend to the system prompt when patterns
* are found in memory. Returns empty string when patterns is empty.
*/
function buildMemoryContextPrefix(patterns) {
if (patterns.length === 0)
return '';
return ('Relevant patterns from prior ruflo work:\n' +
patterns.map((p) => `- ${p.slice(0, 120)}`).join('\n') +
'\n\n');
}
// ---------------------------------------------------------------------------
// Contrastive wrapper
// ---------------------------------------------------------------------------
/**
* Wrap runGaiaAgent with ruflo intelligence hooks.
*
* Hook points:
* 1. memory_search before the agent loop (context injection)
* 2. trajectory-start/end records via memory store
* 3. memory_store after the final answer
*
* The underlying agent loop is called unchanged.
*/
export async function runGaiaAgentContrastive(question, options, stats) {
const questionId = question.task_id;
// ---- 1. PRE: memory_search for relevant patterns ----------------------
stats.memorySearchCalls++;
const patterns = memorySearch(question.question, 3);
const patternsInjected = patterns.length;
if (patterns.length > 0) {
stats.memorySearchHits++;
stats.totalPatternsInjected += patterns.length;
}
// ---- 2. TRAJECTORY: start recording -----------------------------------
trajectoryRecord('start', {
task: 'gaia-l1',
question_id: questionId,
question_snippet: question.question.slice(0, 80),
startedAt: new Date().toISOString(),
patternsInjected,
});
// ---- Build patched options with memory context injected ---------------
// We inject context by wrapping the question text (system prompt cannot be
// overridden via GaiaAgentOptions today; instead we prepend context to
// the question text as a user-visible hint). This is intentionally minimal.
const contextPrefix = buildMemoryContextPrefix(patterns);
const patchedQuestion = contextPrefix
? { ...question, question: contextPrefix + question.question }
: question;
// ---- Run the actual agent loop (unchanged) ----------------------------
const result = await runGaiaAgent(patchedQuestion, options);
// ---- 3. POST: store question+answer+verdict ---------------------------
const questionKey = `gaia-l1-q${questionId}-${Date.now()}`;
const stored = memoryStore(questionKey, {
question: question.question,
answer: result.finalAnswer,
questionId,
model: options.model ?? 'claude-sonnet-4-6',
turns: result.turns,
timedOut: result.timedOut ?? false,
storedAt: new Date().toISOString(),
});
if (stored)
stats.memoryStoreCount++;
// ---- TRAJECTORY: end recording ----------------------------------------
trajectoryRecord('end', {
task: 'gaia-l1',
question_id: questionId,
answer: result.finalAnswer,
turns: result.turns,
timedOut: result.timedOut ?? false,
endedAt: new Date().toISOString(),
});
stats.trajectoryCount++;
return {
...result,
patternsInjected,
trajectoryRecorded: true,
memoryStored: stored,
};
}
/**
* Create a fresh ContrastiveStats counter object.
*/
export function createContrastiveStats() {
return {
memorySearchCalls: 0,
memorySearchHits: 0,
memoryStoreCount: 0,
trajectoryCount: 0,
totalPatternsInjected: 0,
};
}
//# sourceMappingURL=gaia-agent-contrastive.js.map