claude-flow
Version:
Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration
281 lines • 11.9 kB
JavaScript
/**
* GAIA Causal Failure-Avoidance Memory — ADR-135 Track I
*
* Records causal edges after each failed GAIA trajectory:
* "trying tool X on question type Y → caused failure Z"
*
* Before each new question, retrieves matching causal edges and injects
* an "avoid these approaches" hint into the agent's system prompt.
*
* This is one of ruflo's 6 architectural primitives distinguishing it
* from HAL: HAL is stateless across runs; ruflo accumulates causal memory.
*
* Storage: JSONL file at ~/.cache/ruflo/gaia/causal-edges.jsonl
* - Simple, portable, no runtime dependency on AgentDB
* - Production upgrade path: switch to AgentDB causal-edge MCP controller
* (`mcp__claude-flow__agentdb_causal-edge`) for cross-session persistence
* and embedding-based similarity matching.
*
* Expected lift:
* - First run (no edges yet): +0pp (empty hint → no overhead)
* - After 5+ runs (warm-up): +2-5pp compound
*
* NOT wired into gaia-bench.ts here — wiring is a follow-up PR once all
* in-flight iterators (29/31/34/35/37) have landed to avoid conflicts.
*
* Refs: ADR-135, ADR-133, #2156
*/
import * as crypto from 'node:crypto';
import * as fs from 'node:fs';
import * as os from 'node:os';
import * as path from 'node:path';
import * as readline from 'node:readline';
// ---------------------------------------------------------------------------
// Internal helpers
// ---------------------------------------------------------------------------
const DEFAULT_STORE_SUFFIX = path.join('.cache', 'ruflo', 'gaia', 'causal-edges.jsonl');
const DEFAULT_MAX_EDGES = 5;
/** Resolve the JSONL store path from options, defaulting to ~/.cache/ruflo/… */
function resolveStorePath(options) {
if (options?.storePath) {
return options.storePath;
}
return path.join(os.homedir(), DEFAULT_STORE_SUFFIX);
}
/**
* Compute a deterministic question signature.
*
* Algorithm (v1, hash-based):
* 1. Lower-case the question text.
* 2. Collapse runs of whitespace to a single space and trim.
* 3. SHA-256 → first 16 hex characters (64-bit prefix, collision-unlikely
* for the ~450-question GAIA validation set).
*
* Future (v2, embedding-based): replace with RuVector cosine similarity so
* semantically similar questions (paraphrases, translated variants) share
* causal edges across runs.
*/
export function computeQuestionSignature(questionText) {
const normalised = questionText.toLowerCase().replace(/\s+/g, ' ').trim();
return crypto.createHash('sha256').update(normalised, 'utf8').digest('hex').slice(0, 16);
}
/** Derive a failure type from a completed agent result + known correctness. */
export function inferFailureType(result, wasCorrect) {
if (wasCorrect) {
return null; // Not a failure — callers should skip this question.
}
if (result.timedOut === true) {
return 'timeout';
}
if (result.error) {
return 'tool_error';
}
if (result.finalAnswer === null || result.finalAnswer.trim() === '') {
return 'empty_result';
}
return 'wrong_answer';
}
/**
* Parse one JSONL line into a CausalEdge.
* Returns null if the line is empty, a comment, or malformed.
*/
function parseLine(line) {
const trimmed = line.trim();
if (trimmed === '' || trimmed.startsWith('//') || trimmed.startsWith('#')) {
return null;
}
try {
const parsed = JSON.parse(trimmed);
// Minimal shape guard — avoids crashing on partially-written lines.
if (typeof parsed.questionSignature !== 'string' ||
typeof parsed.failedTool !== 'string' ||
typeof parsed.failedTrajectoryStep !== 'string' ||
typeof parsed.observedFailureType !== 'string' ||
typeof parsed.createdAt !== 'string' ||
typeof parsed.occurrenceCount !== 'number') {
return null;
}
return parsed;
}
catch {
// Corrupted line — skip gracefully.
return null;
}
}
/** Read all valid CausalEdge entries from the JSONL store. */
async function readAllEdges(storePath) {
if (!fs.existsSync(storePath)) {
return [];
}
const edges = [];
const fileStream = fs.createReadStream(storePath, { encoding: 'utf8' });
const rl = readline.createInterface({ input: fileStream, crlfDelay: Infinity });
for await (const line of rl) {
const edge = parseLine(line);
if (edge !== null) {
edges.push(edge);
}
}
return edges;
}
/** Write all edges back to the JSONL store (full rewrite for upsert support). */
function writeAllEdges(storePath, edges) {
const dir = path.dirname(storePath);
fs.mkdirSync(dir, { recursive: true });
const content = edges.map((e) => JSON.stringify(e)).join('\n') + (edges.length > 0 ? '\n' : '');
fs.writeFileSync(storePath, content, { encoding: 'utf8' });
}
// ---------------------------------------------------------------------------
// Trajectory analysis
// ---------------------------------------------------------------------------
/**
* Inspect a failed trajectory and extract tool-level failure events.
*
* The agent result carries `toolCallsByName` (tool → call count). For a
* failed run we attribute the failure to the most-used tool, since that is
* the tool whose repeated use did not converge to a correct answer.
*
* Returns an array of (tool, stepDescription, failureType) triples.
* Empty array when there is nothing attributable.
*/
function extractFailureEvents(result, failureType) {
const events = [];
const toolCalls = result.toolCallsByName;
if (!toolCalls || Object.keys(toolCalls).length === 0) {
// No tool calls at all — record a synthetic "no_tool" event.
events.push({
tool: 'no_tool_called',
step: `Agent failed with type=${failureType} without making any tool calls`,
type: failureType,
});
return events;
}
// Attribute one event per tool that was called at least once.
for (const [toolName, callCount] of Object.entries(toolCalls)) {
const step = buildStepDescription(toolName, callCount, failureType);
events.push({ tool: toolName, step, type: failureType });
}
return events;
}
function buildStepDescription(toolName, callCount, failureType) {
const timesStr = callCount === 1 ? 'once' : `${callCount} times`;
switch (failureType) {
case 'empty_result':
return `${toolName} called ${timesStr} but returned empty/no-result`;
case 'timeout':
return `${toolName} called ${timesStr} but agent timed out before converging`;
case 'tool_error':
return `${toolName} called ${timesStr} but raised an execution error`;
case 'wrong_answer':
return `${toolName} called ${timesStr} but final answer was incorrect`;
default:
return `${toolName} called ${timesStr}; failure type=${failureType}`;
}
}
// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------
/**
* After a GAIA trajectory completes, analyse it for causal failure patterns
* and persist each observed edge in the JSONL store.
*
* Behaviour:
* - If `wasCorrect === true`, no edges are written (zero overhead).
* - Each (signature, tool, step) triple is deduplicated: if the same triple
* already exists in the store, its `occurrenceCount` is incremented in
* place rather than appending a new line.
* - Edges beyond `maxEdgesPerSignature` (default 5) per signature are
* discarded to keep the store bounded.
*
* @param question - The GaiaQuestion that was attempted.
* @param result - The agent result from runGaiaAgent().
* @param wasCorrect - Whether the final answer was judged correct.
* @param options - Optional store path and limits.
* @returns Number of edges written/updated and the resolved store path.
*/
export async function recordCausalFailures(question, result, wasCorrect, options) {
const storePath = resolveStorePath(options);
const maxEdges = options?.maxEdgesPerSignature ?? DEFAULT_MAX_EDGES;
const failureType = inferFailureType(result, wasCorrect);
if (failureType === null) {
// Correct answer — nothing to record.
return { edgesRecorded: 0, storePath };
}
const signature = computeQuestionSignature(question.question);
const failureEvents = extractFailureEvents(result, failureType);
if (failureEvents.length === 0) {
return { edgesRecorded: 0, storePath };
}
// Load existing edges, upsert, cap, rewrite.
const existing = await readAllEdges(storePath);
let edgesRecorded = 0;
for (const event of failureEvents) {
// Count how many edges already exist for this signature.
const sigEdges = existing.filter((e) => e.questionSignature === signature);
// Check for exact duplicate (signature + tool + step).
const dupIdx = existing.findIndex((e) => e.questionSignature === signature &&
e.failedTool === event.tool &&
e.failedTrajectoryStep === event.step);
if (dupIdx >= 0) {
// Increment occurrence count in place.
existing[dupIdx].occurrenceCount += 1;
edgesRecorded++;
continue;
}
// New edge — only add if we haven't hit the per-signature cap.
if (sigEdges.length >= maxEdges) {
continue; // Cap reached; discard.
}
const newEdge = {
questionSignature: signature,
failedTool: event.tool,
failedTrajectoryStep: event.step,
observedFailureType: event.type,
createdAt: new Date().toISOString(),
occurrenceCount: 1,
};
existing.push(newEdge);
edgesRecorded++;
}
if (edgesRecorded > 0) {
writeAllEdges(storePath, existing);
}
return { edgesRecorded, storePath };
}
/**
* Before running a new question, retrieve causal edges from prior failures
* that match the question's signature and format them as a system-prompt hint.
*
* Return contract:
* - No edges matched → `{ hint: '', edgesMatched: 0 }` — caller MUST NOT
* inject an empty hint (wastes tokens; may confuse the model).
* - 1+ edges matched → `{ hint: '[PRIOR FAILURES] …', edgesMatched: N }`.
*
* @param question - The GaiaQuestion about to be attempted.
* @param options - Optional store path and limits.
* @returns Formatted hint string and match count.
*/
export async function retrieveCausalHints(question, options) {
const storePath = resolveStorePath(options);
const maxEdges = options?.maxEdgesPerSignature ?? DEFAULT_MAX_EDGES;
const signature = computeQuestionSignature(question.question);
const allEdges = await readAllEdges(storePath);
// Filter to this question's signature, sort by occurrence count descending
// so the most-reinforced warnings appear first.
const matched = allEdges
.filter((e) => e.questionSignature === signature)
.sort((a, b) => b.occurrenceCount - a.occurrenceCount)
.slice(0, maxEdges);
if (matched.length === 0) {
return { hint: '', edgesMatched: 0 };
}
const lines = matched.map((e) => {
const times = e.occurrenceCount === 1 ? '1 time' : `${e.occurrenceCount} times`;
return ` - ${e.failedTool} failed ${times} (${e.observedFailureType}): ${e.failedTrajectoryStep}`;
});
const hint = '[PRIOR FAILURES] On similar questions, these approaches failed:\n' +
lines.join('\n') +
'\nAvoid repeating these patterns. Try alternative tools or approaches.';
return { hint, edgesMatched: matched.length };
}
//# sourceMappingURL=gaia-causal-memory.js.map