aiwg
Version:
Cognitive architecture for AI-augmented software development with structured memory, ensemble validation, and closed-loop correction. FAIR-aligned artifacts, 84% cost reduction via human-in-the-loop, standards adopted by 100+ organizations.
396 lines (356 loc) • 10.2 kB
YAML
# Workflow Checkpoint Schema
# Based on REF-058 R-LAM (systematic checkpoint/recovery)
# Issue: #112, #268 (multi-loop support)
$schema: "https://json-schema.org/draft/2020-12/schema"
$id: "https://aiwg.io/schemas/checkpoint/v2"
title: "Workflow Checkpoint Schema"
description: |
Systematic checkpoint and recovery for workflows to enable resumption
from last good state after failures. Based on R-LAM findings.
Version 2 adds multi-loop support with loop_id field.
type: object
required:
- checkpoint_id
- workflow_id
- created_at
- state
properties:
checkpoint_id:
type: string
format: uuid
description: "Unique checkpoint identifier"
workflow_id:
type: string
description: "ID of the workflow being checkpointed"
loop_id:
type: string
pattern: "^ralph-[a-z0-9-]+-[a-f0-9]{8}$"
nullable: true
description: |
Loop ID for multi-loop support (v2). Required for ralph_loop workflows.
Format: ralph-{slug}-{uuid8}
Null for non-Ralph or legacy single-loop checkpoints.
workflow_type:
type: string
enum:
- ralph_loop
- flow_command
- skill_execution
- agent_task
description: "Type of workflow"
created_at:
type: string
format: date-time
description: "When checkpoint was created"
trigger:
type: string
enum:
- manual # User-initiated
- automatic # System-initiated
- phase_boundary # At phase transition
- iteration # At Ralph iteration
- pre_risky_op # Before risky operation
- error_recovery # During error recovery
description: "What triggered the checkpoint"
state:
type: object
required: [phase, progress, artifacts]
description: "Complete workflow state"
properties:
phase:
type: string
description: "Current SDLC phase"
step:
type: string
description: "Current step within phase"
progress:
type: object
properties:
total_steps:
type: integer
completed_steps:
type: integer
percentage:
type: number
minimum: 0
maximum: 100
description: "Progress tracking"
iteration:
type: object
properties:
current:
type: integer
max:
type: integer
failures:
type: integer
description: "Ralph iteration state (if applicable)"
artifacts:
type: array
items:
type: object
properties:
path:
type: string
hash:
type: string
status:
type: string
enum: [created, modified, deleted, unchanged]
description: "Artifact state at checkpoint"
variables:
type: object
additionalProperties: true
description: "Workflow variables and context"
agent_state:
type: object
properties:
current_agent:
type: string
memory:
type: object
pending_actions:
type: array
items:
type: string
description: "Agent execution state"
execution_config:
$ref: "#/$defs/ExecutionConfig"
description: "Configuration for reproducibility"
metadata:
type: object
properties:
size_bytes:
type: integer
compression:
type: string
enum: [none, gzip, lz4]
retention_days:
type: integer
default: 30
tags:
type: array
items:
type: string
schema_version:
type: string
enum: ["1.0", "2.0"]
default: "2.0"
description: "Schema version for migration tracking"
description: "Checkpoint metadata"
$defs:
ExecutionConfig:
type: object
description: "Configuration snapshot for reproducibility"
properties:
model:
type: string
description: "Model ID (e.g., claude-3-opus)"
temperature:
type: number
minimum: 0
maximum: 2
seed:
type: integer
description: "Random seed if set"
execution_mode:
type: string
enum: [strict, seeded, logged, default]
agent:
type: string
description: "Active agent"
tools:
type: array
items:
type: string
description: "Available tools"
inputs:
type: object
additionalProperties: true
description: "Input values"
# Recovery configuration
recovery:
strategies:
last_checkpoint:
description: "Restore most recent checkpoint"
steps:
- load_checkpoint
- restore_state
- resume_execution
select_checkpoint:
description: "Let user choose checkpoint"
steps:
- list_checkpoints
- user_selects
- load_checkpoint
- restore_state
- resume_execution
smart_rollback:
description: "Analyze and select best checkpoint"
steps:
- analyze_failure
- find_safe_checkpoint
- load_checkpoint
- restore_state
- resume_execution
restore_sequence:
- validate_checkpoint_integrity
- restore_artifacts
- restore_variables
- restore_agent_state
- verify_state
- resume_or_fail
# Checkpoint lifecycle
lifecycle:
creation:
auto_triggers:
- phase_start
- artifact_complete
- before_external_call
- iteration_boundary
retention:
default_count: 5
default_days: 30
on_success: keep_latest
on_failure: keep_all_recent
cleanup:
schedule: daily
preserve_tagged: true
compress_old: true
# Storage configuration
storage:
# Multi-loop path structure (v2)
multi_loop_location: ".aiwg/ralph/loops/{loop_id}/checkpoints/"
multi_loop_naming: "iteration-{iteration:03d}.json.gz"
# Legacy single-loop path (v1, deprecated)
legacy_location: ".aiwg/checkpoints/"
legacy_naming: "{workflow_id}-{timestamp}-{trigger}.json.gz"
# Active format
format: json
compression: gzip
# Path Resolution
path_resolution:
rules:
- if_loop_id_present: use_multi_loop_location
- if_workflow_type_ralph_loop_and_no_loop_id: use_legacy_location
- else: use_legacy_location
examples:
multi_loop: ".aiwg/ralph/loops/ralph-fix-tests-a1b2c3d4/checkpoints/iteration-005.json.gz"
legacy: ".aiwg/checkpoints/ralph-research-impl-2026-01-25T15-00-00Z-iteration.json.gz"
# Backward Compatibility
backward_compatibility:
single_loop_checkpoints:
description: |
Checkpoints without loop_id are valid for single-loop workflows.
When loading, if loop_id is null and workflow_type is ralph_loop,
treat as legacy single-loop checkpoint.
migration_on_read:
- if_loop_id_null_and_ralph_loop: treat_as_single_loop
- load_from_legacy_location
- no_automatic_migration_to_multi_loop
migration_on_write:
- if_loop_id_provided: use_multi_loop_path
- if_loop_id_null: use_legacy_path
- set_metadata.schema_version: "2.0"
validation:
# Validate loop_id if workflow_type is ralph_loop in multi-loop mode
ralph_loop_requires_loop_id_in_multi_loop: true
allow_null_loop_id_for_legacy: true
# Example checkpoints
examples:
# Multi-loop checkpoint (v2)
- checkpoint_id: "cp-001-multi-loop"
workflow_id: "ralph-fix-tests-a1b2c3d4"
loop_id: "ralph-fix-tests-a1b2c3d4"
workflow_type: ralph_loop
created_at: "2026-02-02T21:05:00Z"
trigger: iteration
state:
phase: construction
step: implement_issue
progress:
total_steps: 200
completed_steps: 5
percentage: 2.5
iteration:
current: 5
max: 200
failures: 0
artifacts:
- path: "src/auth/login.ts"
hash: "abc123..."
status: modified
variables:
current_issue: "#268"
implementation_approach: "multi-loop"
execution_config:
model: "claude-sonnet-4.5"
temperature: 0
execution_mode: strict
agent: "Software Implementer"
metadata:
size_bytes: 4096
compression: gzip
retention_days: 30
schema_version: "2.0"
# Legacy single-loop checkpoint (v1, backward compatible)
- checkpoint_id: "cp-002-legacy"
workflow_id: "ralph-research-impl"
loop_id: null
workflow_type: ralph_loop
created_at: "2026-01-25T15:00:00Z"
trigger: iteration
state:
phase: construction
step: implement_issue
progress:
total_steps: 10
completed_steps: 3
percentage: 30
iteration:
current: 5
max: 200
failures: 0
artifacts:
- path: "agentic/code/frameworks/sdlc-complete/schemas/research/checkpoint.yaml"
hash: "abc123..."
status: created
variables:
current_issue: "#112"
implementation_approach: "agentic"
execution_config:
model: "claude-3-opus"
temperature: 0
execution_mode: strict
agent: "Software Implementer"
metadata:
size_bytes: 4096
compression: gzip
retention_days: 30
schema_version: "1.0"
# Migration Guide
migration_guide:
from_v1_to_v2:
description: "Adding multi-loop support"
breaking_changes: false
steps:
- add_loop_id_field_nullable
- update_storage_path_logic
- maintain_legacy_path_support
- no_automatic_migration_required
compatibility:
v1_checkpoints_readable: true
v2_checkpoints_backward_compatible: true
mixed_versions_supported: true
# References
references:
research:
- "@.aiwg/research/findings/REF-058-r-lam.md"
implementation:
- "#112" # Original checkpoint implementation
- "#268" # Multi-loop support
related:
- "@agentic/code/frameworks/sdlc-complete/schemas/flows/error-handling.yaml"
- "@agentic/code/addons/ralph/schemas/reflection-memory.json"
- "@agentic/code/addons/ralph/schemas/loop-registry.yaml"
- "@agentic/code/addons/ralph/schemas/loop-state.yaml"