claude-flow
Version:
Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration
133 lines • 5.74 kB
TypeScript
/**
* GAIA Causal Failure-Avoidance Memory — ADR-135 Track I
*
* Records causal edges after each failed GAIA trajectory:
* "trying tool X on question type Y → caused failure Z"
*
* Before each new question, retrieves matching causal edges and injects
* an "avoid these approaches" hint into the agent's system prompt.
*
* This is one of ruflo's 6 architectural primitives distinguishing it
* from HAL: HAL is stateless across runs; ruflo accumulates causal memory.
*
* Storage: JSONL file at ~/.cache/ruflo/gaia/causal-edges.jsonl
* - Simple, portable, no runtime dependency on AgentDB
* - Production upgrade path: switch to AgentDB causal-edge MCP controller
* (`mcp__claude-flow__agentdb_causal-edge`) for cross-session persistence
* and embedding-based similarity matching.
*
* Expected lift:
* - First run (no edges yet): +0pp (empty hint → no overhead)
* - After 5+ runs (warm-up): +2-5pp compound
*
* NOT wired into gaia-bench.ts here — wiring is a follow-up PR once all
* in-flight iterators (29/31/34/35/37) have landed to avoid conflicts.
*
* Refs: ADR-135, ADR-133, #2156
*/
import type { GaiaQuestion } from './gaia-loader.js';
import type { GaiaAgentResult } from './gaia-agent.js';
/** Observation failure categories derived from trajectory analysis. */
export type FailureType = 'empty_result' | 'timeout' | 'wrong_answer' | 'tool_error';
/**
* A causal edge: "in a question of signature S, using tool T in way W
* caused failure F." occurrenceCount increments each time the same
* (signature, tool, step) triple is observed again instead of duplicating.
*/
export interface CausalEdge {
/** Deterministic hash of the normalised question text. */
questionSignature: string;
/** Tool name that failed (e.g. 'web_search', 'python_exec'). */
failedTool: string;
/** Brief description of what the failing step attempted. */
failedTrajectoryStep: string;
/** Categorised failure type. */
observedFailureType: FailureType;
/** ISO-8601 timestamp of first observation. */
createdAt: string;
/** Increments when the same edge is observed in a subsequent run. */
occurrenceCount: number;
}
/** Options for causal-memory operations. */
export interface CausalMemoryOptions {
/**
* Override the JSONL store path.
* Default: ~/.cache/ruflo/gaia/causal-edges.jsonl
*/
storePath?: string;
/**
* Maximum edges to return per question signature when retrieving hints.
* Default: 5
*/
maxEdgesPerSignature?: number;
/**
* Signature similarity threshold (0–1). Currently unused for the simple
* hash-based implementation; reserved for future RuVector upgrade.
* Default: 0.7
*/
similarityThreshold?: number;
}
/** Result of a recordCausalFailures call. */
export interface RecordResult {
edgesRecorded: number;
storePath: string;
}
/** Result of a retrieveCausalHints call. */
export interface RetrieveResult {
/**
* System-prompt-ready hint string.
* Empty string when no edges match (caller must not inject empty hints).
*/
hint: string;
edgesMatched: number;
}
/**
* Compute a deterministic question signature.
*
* Algorithm (v1, hash-based):
* 1. Lower-case the question text.
* 2. Collapse runs of whitespace to a single space and trim.
* 3. SHA-256 → first 16 hex characters (64-bit prefix, collision-unlikely
* for the ~450-question GAIA validation set).
*
* Future (v2, embedding-based): replace with RuVector cosine similarity so
* semantically similar questions (paraphrases, translated variants) share
* causal edges across runs.
*/
export declare function computeQuestionSignature(questionText: string): string;
/** Derive a failure type from a completed agent result + known correctness. */
export declare function inferFailureType(result: GaiaAgentResult, wasCorrect: boolean): FailureType | null;
/**
* After a GAIA trajectory completes, analyse it for causal failure patterns
* and persist each observed edge in the JSONL store.
*
* Behaviour:
* - If `wasCorrect === true`, no edges are written (zero overhead).
* - Each (signature, tool, step) triple is deduplicated: if the same triple
* already exists in the store, its `occurrenceCount` is incremented in
* place rather than appending a new line.
* - Edges beyond `maxEdgesPerSignature` (default 5) per signature are
* discarded to keep the store bounded.
*
* @param question - The GaiaQuestion that was attempted.
* @param result - The agent result from runGaiaAgent().
* @param wasCorrect - Whether the final answer was judged correct.
* @param options - Optional store path and limits.
* @returns Number of edges written/updated and the resolved store path.
*/
export declare function recordCausalFailures(question: GaiaQuestion, result: GaiaAgentResult, wasCorrect: boolean, options?: CausalMemoryOptions): Promise<RecordResult>;
/**
* Before running a new question, retrieve causal edges from prior failures
* that match the question's signature and format them as a system-prompt hint.
*
* Return contract:
* - No edges matched → `{ hint: '', edgesMatched: 0 }` — caller MUST NOT
* inject an empty hint (wastes tokens; may confuse the model).
* - 1+ edges matched → `{ hint: '[PRIOR FAILURES] …', edgesMatched: N }`.
*
* @param question - The GaiaQuestion about to be attempted.
* @param options - Optional store path and limits.
* @returns Formatted hint string and match count.
*/
export declare function retrieveCausalHints(question: GaiaQuestion, options?: CausalMemoryOptions): Promise<RetrieveResult>;
//# sourceMappingURL=gaia-causal-memory.d.ts.map