aiwg
Version:
Cognitive architecture for AI-augmented software development with structured memory, ensemble validation, and closed-loop correction. FAIR-aligned artifacts, 84% cost reduction via human-in-the-loop, standards adopted by 100+ organizations.
529 lines (528 loc) • 18.3 kB
JSON
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://aiwg.io/schemas/ralph/reflection-memory.json",
"title": "Reflexion Episodic Memory Schema",
"description": "Structured episodic memory for Ralph loops following Reflexion's three-model architecture (Actor, Evaluator, Self-Reflection). See REF-021 for theoretical foundation.",
"type": "object",
"required": [
"loop_id",
"iteration",
"timestamp",
"actor_output",
"evaluator_output",
"self_reflection",
"memory_metadata"
],
"properties": {
"loop_id": {
"type": "string",
"description": "Unique identifier for the Ralph loop (e.g., 'ralph-research-issues-2026-01-25')",
"pattern": "^ralph-[a-z0-9-]+$"
},
"iteration": {
"type": "integer",
"description": "Zero-indexed iteration number (0 = first attempt)",
"minimum": 0
},
"timestamp": {
"type": "string",
"format": "date-time",
"description": "ISO 8601 timestamp when reflection was generated"
},
"task_description": {
"type": "string",
"description": "Natural language description of the task being attempted"
},
"actor_output": {
"type": "object",
"description": "Output from Actor model (Ma): actions taken and code changes made",
"required": ["actions", "rationale"],
"properties": {
"actions": {
"type": "array",
"description": "Sequence of actions taken in this iteration",
"items": {
"type": "object",
"required": ["type", "description"],
"properties": {
"type": {
"type": "string",
"enum": [
"code_modification",
"file_creation",
"file_deletion",
"test_execution",
"command_execution",
"api_call",
"other"
],
"description": "Type of action performed"
},
"description": {
"type": "string",
"description": "Natural language description of the action"
},
"file_path": {
"type": "string",
"description": "Path to file affected by this action (if applicable)"
},
"changes": {
"type": "object",
"description": "Details of code changes made",
"properties": {
"additions": {
"type": "integer",
"description": "Lines of code added"
},
"deletions": {
"type": "integer",
"description": "Lines of code deleted"
},
"diff": {
"type": "string",
"description": "Unified diff format of changes"
}
}
},
"command": {
"type": "string",
"description": "Command executed (if type is command_execution)"
},
"timestamp": {
"type": "string",
"format": "date-time",
"description": "When this specific action was taken"
}
}
}
},
"rationale": {
"type": "string",
"description": "Actor's reasoning for chosen actions and approach"
},
"strategy": {
"type": "string",
"description": "High-level strategy employed in this iteration"
},
"files_modified": {
"type": "array",
"description": "List of all files touched in this iteration",
"items": {
"type": "string"
}
},
"total_changes": {
"type": "object",
"description": "Aggregate statistics for this iteration",
"properties": {
"files_changed": {
"type": "integer"
},
"lines_added": {
"type": "integer"
},
"lines_deleted": {
"type": "integer"
}
}
}
}
},
"evaluator_output": {
"type": "object",
"description": "Output from Evaluator model (Me): verification results and error signals",
"required": ["passed", "verification_type"],
"properties": {
"passed": {
"type": "boolean",
"description": "Whether the iteration passed verification (true = success, false = retry needed)"
},
"verification_type": {
"type": "string",
"enum": [
"unit_tests",
"integration_tests",
"type_check",
"lint",
"compilation",
"heuristic",
"external_api",
"manual_review",
"combined"
],
"description": "Type of verification performed (follows Reflexion paper Section 3.2)"
},
"results": {
"type": "array",
"description": "Individual verification results from external tools",
"items": {
"type": "object",
"required": ["tool", "status"],
"properties": {
"tool": {
"type": "string",
"description": "Name of verification tool (e.g., 'npm test', 'tsc', 'eslint')"
},
"status": {
"type": "string",
"enum": ["pass", "fail", "error", "skip"],
"description": "Result status"
},
"exit_code": {
"type": "integer",
"description": "Exit code from tool execution"
},
"stdout": {
"type": "string",
"description": "Standard output from tool"
},
"stderr": {
"type": "string",
"description": "Standard error from tool"
},
"duration_ms": {
"type": "integer",
"description": "Execution time in milliseconds"
}
}
}
},
"errors": {
"type": "array",
"description": "Structured error information for failed verifications",
"items": {
"type": "object",
"required": ["type", "message"],
"properties": {
"type": {
"type": "string",
"enum": [
"syntax_error",
"type_error",
"test_failure",
"lint_error",
"runtime_error",
"logic_error",
"timeout",
"other"
],
"description": "Category of error"
},
"message": {
"type": "string",
"description": "Error message from verification tool"
},
"file": {
"type": "string",
"description": "File where error occurred"
},
"line": {
"type": "integer",
"description": "Line number where error occurred"
},
"column": {
"type": "integer",
"description": "Column number where error occurred"
},
"stack_trace": {
"type": "string",
"description": "Stack trace if available"
},
"severity": {
"type": "string",
"enum": ["error", "warning", "info"],
"description": "Severity level"
},
"rule": {
"type": "string",
"description": "Linter rule or test name that failed"
}
}
}
},
"reward_signal": {
"type": "number",
"description": "Scalar reward (0.0 = complete failure, 1.0 = complete success). Following Reflexion paper Section 3.2.",
"minimum": 0,
"maximum": 1
},
"metrics": {
"type": "object",
"description": "Quantitative metrics from verification",
"properties": {
"tests_passed": {
"type": "integer"
},
"tests_failed": {
"type": "integer"
},
"tests_total": {
"type": "integer"
},
"coverage_percentage": {
"type": "number",
"minimum": 0,
"maximum": 100
},
"lint_errors": {
"type": "integer"
},
"lint_warnings": {
"type": "integer"
},
"type_errors": {
"type": "integer"
}
}
}
}
},
"self_reflection": {
"type": "object",
"description": "Output from Self-Reflection model (Msr): verbal analysis of what went wrong and lessons learned. This is the core of Reflexion's episodic memory (see REF-021 Section 3.3).",
"required": ["reflection_text"],
"properties": {
"reflection_text": {
"type": "string",
"description": "Natural language self-reflection following Reflexion pattern: (1) credit assignment - which actions failed, (2) causal reasoning - why they failed, (3) actionable insights - what to do differently. Written in first person."
},
"credit_assignment": {
"type": "object",
"description": "Identification of specific failing actions in trajectory",
"properties": {
"failing_action_indices": {
"type": "array",
"description": "Indices of actions in actor_output.actions that caused failure",
"items": {
"type": "integer"
}
},
"root_cause": {
"type": "string",
"description": "Identified root cause of failure"
},
"failure_category": {
"type": "string",
"enum": [
"hallucination",
"inefficient_planning",
"incorrect_assumption",
"incomplete_implementation",
"edge_case_miss",
"integration_error",
"configuration_error",
"logic_error",
"other"
],
"description": "Category of failure (inspired by Reflexion AlfWorld error analysis)"
}
}
},
"causal_reasoning": {
"type": "string",
"description": "Explanation of why the identified actions led to failure"
},
"actionable_insights": {
"type": "array",
"description": "Specific concrete steps to take in next iteration",
"items": {
"type": "string"
}
},
"lessons_learned": {
"type": "array",
"description": "General lessons applicable to future similar tasks",
"items": {
"type": "string"
}
},
"confidence": {
"type": "number",
"description": "Self-assessed confidence in this reflection's accuracy (0.0 = low, 1.0 = high)",
"minimum": 0,
"maximum": 1
},
"related_reflections": {
"type": "array",
"description": "References to previous reflections that informed this one",
"items": {
"type": "integer",
"description": "Iteration number of related reflection"
}
}
}
},
"memory_metadata": {
"type": "object",
"description": "Metadata about the episodic memory sliding window (Ω capacity from Reflexion paper)",
"required": ["omega_capacity", "current_memory_size"],
"properties": {
"omega_capacity": {
"type": "integer",
"description": "Maximum number of reflections to keep in memory (Ω parameter from Reflexion). Typical values: 1 for programming, 3 for decision-making/reasoning.",
"minimum": 1,
"maximum": 10,
"default": 3
},
"current_memory_size": {
"type": "integer",
"description": "Current number of reflections in sliding window",
"minimum": 0
},
"reflections_in_context": {
"type": "array",
"description": "Iteration numbers of reflections currently in memory window",
"items": {
"type": "integer"
}
},
"window_policy": {
"type": "string",
"enum": ["fifo", "recency", "relevance_weighted"],
"description": "Policy for sliding window maintenance",
"default": "fifo"
},
"total_reflections_generated": {
"type": "integer",
"description": "Total reflections generated across all iterations (may exceed Ω)",
"minimum": 0
}
}
},
"context_injected": {
"type": "boolean",
"description": "Whether previous reflections from memory window were injected into this iteration's context",
"default": false
},
"previous_reflections_used": {
"type": "array",
"description": "Iteration numbers of previous reflections that informed this attempt",
"items": {
"type": "integer"
}
},
"performance_delta": {
"type": "object",
"description": "Performance comparison to previous iteration",
"properties": {
"reward_change": {
"type": "number",
"description": "Change in reward signal from previous iteration (positive = improvement)"
},
"error_count_change": {
"type": "integer",
"description": "Change in error count from previous iteration (negative = improvement)"
},
"is_improvement": {
"type": "boolean",
"description": "Whether this iteration showed measurable improvement"
}
}
},
"notes": {
"type": "string",
"description": "Optional free-form notes about this iteration"
}
},
"examples": [
{
"loop_id": "ralph-test-coverage-2026-01-25",
"iteration": 1,
"timestamp": "2026-01-25T10:30:00Z",
"task_description": "Increase test coverage to 80% for authentication module",
"actor_output": {
"actions": [
{
"type": "code_modification",
"description": "Added unit tests for login function",
"file_path": "/test/unit/auth/login.test.ts",
"changes": {
"additions": 45,
"deletions": 0,
"diff": "..."
},
"timestamp": "2026-01-25T10:25:00Z"
},
{
"type": "test_execution",
"description": "Ran npm test to verify new tests",
"command": "npm test",
"timestamp": "2026-01-25T10:28:00Z"
}
],
"rationale": "I added comprehensive tests for the login function including edge cases for empty inputs and invalid credentials.",
"strategy": "Test-first development: write failing tests, then implement fixes",
"files_modified": ["/test/unit/auth/login.test.ts"],
"total_changes": {
"files_changed": 1,
"lines_added": 45,
"lines_deleted": 0
}
},
"evaluator_output": {
"passed": false,
"verification_type": "unit_tests",
"results": [
{
"tool": "npm test",
"status": "fail",
"exit_code": 1,
"stderr": "TypeError: Cannot read property 'map' of undefined",
"duration_ms": 2340
}
],
"errors": [
{
"type": "test_failure",
"message": "should handle empty API response",
"file": "/test/unit/auth/login.test.ts",
"line": 34,
"severity": "error",
"rule": "login-empty-response"
}
],
"reward_signal": 0.3,
"metrics": {
"tests_passed": 3,
"tests_failed": 2,
"tests_total": 5,
"coverage_percentage": 65
}
},
"self_reflection": {
"reflection_text": "In my previous attempt, I tried to map over userData without checking if it exists. The error occurred because the API response was empty in the test case. I should add a null check before the map operation. In the next attempt, I will verify userData exists and return an empty array if it doesn't.",
"credit_assignment": {
"failing_action_indices": [0],
"root_cause": "Missing null check for API response",
"failure_category": "edge_case_miss"
},
"causal_reasoning": "The test case 'should handle empty API response' failed because I assumed userData would always be defined. When the API returns empty, userData is undefined and calling .map() throws TypeError.",
"actionable_insights": [
"Add null check: if (!userData) return []",
"Add test case for undefined userData first",
"Review other API response handlers for same pattern"
],
"lessons_learned": [
"Always validate API responses before processing",
"Empty responses are a common edge case in authentication flows"
],
"confidence": 0.9,
"related_reflections": []
},
"memory_metadata": {
"omega_capacity": 3,
"current_memory_size": 1,
"reflections_in_context": [1],
"window_policy": "fifo",
"total_reflections_generated": 1
},
"context_injected": false,
"previous_reflections_used": [],
"performance_delta": {
"reward_change": -0.2,
"error_count_change": 2,
"is_improvement": false
}
}
]
}