aiwg
Version:
Cognitive architecture for AI-augmented software development with structured memory, ensemble validation, and closed-loop correction. FAIR-aligned artifacts, 84% cost reduction via human-in-the-loop, standards adopted by 100+ organizations.
831 lines (710 loc) • 20.2 kB
YAML
# Regression Learning Schema
# Based on REF-013 MetaGPT and REF-015 Self-Refine
# Finding: Cross-task learning improves regression detection over time
# Integrates with Ralph's debug memory for continuous improvement
$schema: "https://json-schema.org/draft/2020-12/schema"
$id: "https://aiwg.io/schemas/regression-learning/v1"
title: "Regression Learning Schema"
description: |
Schema for cross-task learning that improves regression detection through
pattern recognition, test prioritization, and historical analysis. Stores
accumulated knowledge from past regressions to prevent future occurrences.
Key principles:
- Learn from every regression
- Build pattern taxonomy
- Prioritize high-value tests
- Predict regression risk
- Share knowledge across projects
type: object
required:
- learning_id
- patterns
- hotspots
- test_effectiveness
properties:
learning_id:
type: string
format: uuid
description: "Unique identifier for this learning session"
created_at:
type: string
format: date-time
last_updated:
type: string
format: date-time
patterns:
type: array
items:
$ref: "#/$defs/RegressionPattern"
description: "Learned regression patterns"
hotspots:
type: array
items:
$ref: "#/$defs/CodeHotspot"
description: "High-risk code areas"
test_effectiveness:
type: array
items:
$ref: "#/$defs/TestEffectiveness"
description: "Test value metrics"
fix_templates:
type: array
items:
$ref: "#/$defs/FixTemplate"
description: "Reusable fix patterns"
predictions:
type: array
items:
$ref: "#/$defs/RegressionPrediction"
description: "Risk predictions for code changes"
metadata:
type: object
properties:
project:
type: string
description: "Project identifier"
agent:
type: string
description: "Agent that performed learning"
total_regressions_analyzed:
type: integer
learning_confidence:
type: number
minimum: 0
maximum: 1
$defs:
RegressionPattern:
type: object
required:
- pattern_id
- pattern_name
- category
- occurrences
description: "Identified recurring regression pattern"
properties:
pattern_id:
type: string
pattern: "^RP-[0-9]{3}$"
description: "Pattern identifier (RP-XXX format)"
examples:
- "RP-001"
- "RP-042"
pattern_name:
type: string
description: "Human-readable pattern name"
examples:
- "null_access_without_check"
- "type_mismatch_string_number"
- "off_by_one_array_bounds"
category:
type: string
enum:
- null_undefined_access
- type_mismatch
- off_by_one
- race_condition
- missing_validation
- logic_error
- resource_leak
- missing_error_handling
- api_misuse
- configuration_error
- dependency_issue
- other
description: "Pattern category"
occurrences:
type: array
items:
type: object
required:
- regression_id
- file
- root_cause
- fix
properties:
regression_id:
type: string
pattern: "^REG-[0-9]{4}$"
description: "Reference to regression record"
file:
type: string
description: "File where regression occurred"
line:
type: integer
description: "Line number"
function:
type: string
description: "Function/method name"
root_cause:
type: string
description: "Root cause description"
fix:
type: string
description: "How it was fixed"
effectiveness:
type: number
minimum: 0
maximum: 1
description: "Fix success rate (0-1)"
fix_time_hours:
type: number
description: "Time to fix in hours"
recurred:
type: boolean
description: "Did this regression recur?"
statistics:
type: object
properties:
total_occurrences:
type: integer
minimum: 1
avg_fix_time_hours:
type: number
description: "Average time to fix"
recurrence_rate:
type: number
minimum: 0
maximum: 1
description: "Rate of recurrence after fix"
detection_methods:
type: object
properties:
automated_test:
type: integer
manual_test:
type: integer
production:
type: integer
code_review:
type: integer
ci_cd:
type: integer
fix_template:
type: object
properties:
pattern:
type: string
description: "Template code showing fix pattern"
applicability_rules:
type: array
items:
type: object
properties:
rule_type:
type: string
enum: [language, error_pattern, context, file_pattern]
value:
type: string
description: "When this template applies"
examples:
type: array
items:
type: object
properties:
before:
type: string
after:
type: string
explanation:
type: string
confidence:
type: number
minimum: 0
maximum: 1
description: "Confidence in this pattern (based on occurrences)"
last_updated:
type: string
format: date-time
CodeHotspot:
type: object
required:
- file
- risk_score
- regression_history
description: "High-risk code location"
properties:
file:
type: string
description: "File path"
risk_score:
type: number
minimum: 0
maximum: 10
description: "Risk score (0-10 scale)"
regression_history:
type: array
items:
type: object
properties:
regression_id:
type: string
category:
type: string
severity:
type: string
detected_at:
type: string
format: date-time
description: "Past regressions in this file"
metrics:
type: object
properties:
total_regressions:
type: integer
regressions_per_kloc:
type: number
description: "Regressions per 1000 lines of code"
recent_changes:
type: object
properties:
last_30_days:
type: integer
last_7_days:
type: integer
complexity_score:
type: number
minimum: 0
maximum: 100
description: "Code complexity (cyclomatic)"
test_coverage_percent:
type: number
minimum: 0
maximum: 100
contributors:
type: integer
description: "Number of contributors"
risk_factors:
type: object
properties:
high_complexity:
type: boolean
frequent_changes:
type: boolean
multiple_regressions:
type: boolean
low_test_coverage:
type: boolean
critical_path:
type: boolean
many_contributors:
type: boolean
recommended_actions:
type: array
items:
type: string
description: "Actionable recommendations"
test_priority:
type: string
enum: [critical, high, medium, low]
description: "Test execution priority"
TestEffectiveness:
type: object
required:
- test_path
- regression_detection_rate
description: "Effectiveness metrics for a test"
properties:
test_path:
type: string
description: "Path to test file"
regression_detection_rate:
type: number
minimum: 0
maximum: 1
description: "Percentage of regressions caught (0-1)"
detected_regressions:
type: array
items:
type: string
pattern: "^REG-[0-9]{4}$"
description: "Regressions this test caught"
missed_regressions:
type: array
items:
type: object
properties:
regression_id:
type: string
reason:
type: string
description: "Why test missed this regression"
description: "Regressions this test should have caught"
false_positive_rate:
type: number
minimum: 0
maximum: 1
description: "Rate of false alarms"
execution_stats:
type: object
properties:
avg_duration_ms:
type: number
description: "Average execution time"
success_rate:
type: number
minimum: 0
maximum: 1
description: "Rate of passing (stability)"
flakiness_score:
type: number
minimum: 0
maximum: 1
description: "Test flakiness (0=stable, 1=very flaky)"
priority_score:
type: number
minimum: 0
maximum: 10
description: "Test value score (0-10)"
recommendations:
type: array
items:
type: string
description: "How to improve this test"
FixTemplate:
type: object
required:
- template_id
- name
- pattern
description: "Reusable fix pattern"
properties:
template_id:
type: string
pattern: "^FT-[0-9]{3}$"
description: "Fix template identifier"
name:
type: string
description: "Template name"
examples:
- "null-check"
- "type-validation"
- "error-handling"
pattern:
type: string
description: "Template code"
applies_to:
type: object
properties:
categories:
type: array
items:
type: string
languages:
type: array
items:
type: string
error_patterns:
type: array
items:
type: string
examples:
type: array
items:
type: object
properties:
before:
type: string
after:
type: string
context:
type: string
effectiveness:
type: number
minimum: 0
maximum: 1
description: "Success rate when applied"
usage_count:
type: integer
description: "Times this template was used"
RegressionPrediction:
type: object
required:
- prediction_id
- file
- risk_level
- confidence
description: "Prediction of regression risk"
properties:
prediction_id:
type: string
format: uuid
timestamp:
type: string
format: date-time
file:
type: string
description: "File being analyzed"
change_type:
type: string
enum: [addition, modification, deletion, rename]
lines_changed:
type: integer
risk_level:
type: string
enum: [critical, high, medium, low]
description: "Predicted risk level"
confidence:
type: number
minimum: 0
maximum: 1
description: "Confidence in prediction"
risk_factors:
type: array
items:
type: object
properties:
factor:
type: string
description: "Risk factor description"
weight:
type: number
minimum: 0
maximum: 1
description: "Factors contributing to risk"
similar_past_changes:
type: array
items:
type: object
properties:
commit:
type: string
lines_changed:
type: integer
result:
type: string
enum: [clean, regression]
regression_id:
type: string
time_to_detect_hours:
type: number
description: "Similar changes from history"
recommendations:
type: array
items:
type: string
description: "Recommended actions"
suggested_tests:
type: array
items:
type: string
description: "Tests to run for this change"
actual_outcome:
type: object
properties:
regression_occurred:
type: boolean
regression_id:
type: string
prediction_correct:
type: boolean
description: "Actual outcome (for learning)"
# Test Prioritization Algorithm
test_prioritization:
description: "Algorithm for prioritizing test execution"
factors:
code_change_correlation:
weight: 0.30
description: "How often this test catches changes in affected code"
historical_regression_detection:
weight: 0.25
description: "Past regression detection rate"
code_hotspot_coverage:
weight: 0.20
description: "Whether test covers high-risk areas"
recent_failure_trend:
weight: 0.15
description: "Recent failure patterns"
execution_efficiency:
weight: 0.10
description: "Test execution speed"
priority_levels:
critical:
threshold: 8.0
description: "Run first, never skip"
high:
threshold: 6.0
description: "Run early"
medium:
threshold: 4.0
description: "Standard priority"
low:
threshold: 0.0
description: "Run if time allows, may skip in fast mode"
# Cross-Project Learning
cross_project_learning:
description: "Share learning across projects"
shared_patterns:
- null_undefined_access
- type_mismatch
- missing_validation
- off_by_one
- race_condition
- logic_error
- resource_leak
- missing_error_handling
- api_misuse
- configuration_error
privacy:
anonymize_file_paths: true
anonymize_business_logic: true
share_only_patterns: true
require_opt_in: true
storage:
location: "~/.aiwg/global-learning/"
format: yaml
encryption: optional
# Metrics
metrics:
learning_effectiveness:
pattern_accuracy:
description: "Pattern matching accuracy"
target: 0.85
calculation: "correct_matches / total_matches"
fix_template_success_rate:
description: "Fix template effectiveness"
target: 0.90
calculation: "successful_fixes / total_applications"
prediction_accuracy:
description: "Risk prediction accuracy"
target: 0.75
calculation: "correct_predictions / total_predictions"
test_prioritization_efficiency:
description: "Time saved by prioritization"
target: 0.50
calculation: "time_saved / baseline_time"
false_positive_rate:
description: "Rate of spurious alerts"
target: 0.10
calculation: "false_positives / total_predictions"
cross_project_reuse:
description: "Pattern reuse across projects"
target: 0.30
calculation: "patterns_reused / total_patterns"
# Integration Hooks
integration:
ralph_hooks:
on_test_failure:
- query_similar_past_failures
- retrieve_fix_templates
- apply_learned_patterns
- update_pattern_effectiveness
on_test_success:
- record_successful_fix
- increment_fix_effectiveness
- update_confidence_scores
on_regression_detected:
- create_regression_record
- analyze_root_cause
- store_in_learning_memory
- update_hotspot_scores
- adjust_test_priorities
debug_memory:
enabled: true
integration_path: "@agentic/code/addons/ralph/schemas/debug-memory.yaml"
cross_session_learning: true
regression_schema:
enabled: true
integration_path: "@agentic/code/frameworks/sdlc-complete/schemas/testing/regression.yaml"
baseline_integration: true
# Storage Structure
storage:
paths:
patterns: ".aiwg/ralph/learning/regression-patterns.yaml"
hotspots: ".aiwg/ralph/learning/code-hotspots.yaml"
test_effectiveness: ".aiwg/ralph/learning/test-effectiveness.yaml"
fix_templates: ".aiwg/ralph/learning/fix-templates/"
predictions: ".aiwg/ralph/learning/predictions/"
analytics: ".aiwg/ralph/learning/analytics/"
retention:
regression_records: 365 # days
pattern_library: -1 # forever
effectiveness_metrics: 180
predictions: 90
indexing:
by_pattern_category: true
by_file_path: true
by_regression_id: true
by_risk_score: true
# Examples
examples:
pattern_example:
pattern_id: "RP-001"
pattern_name: "null_access_without_check"
category: null_undefined_access
occurrences:
- regression_id: "REG-0023"
file: "src/auth/validate.ts"
line: 15
function: "validateInput"
root_cause: "Missing null check before property access"
fix: "Added early return for null/undefined"
effectiveness: 1.0
fix_time_hours: 1.2
recurred: false
statistics:
total_occurrences: 3
avg_fix_time_hours: 1.5
recurrence_rate: 0.0
confidence: 0.95
hotspot_example:
file: "src/auth/validate.ts"
risk_score: 8.5
regression_history:
- regression_id: "REG-0023"
category: null_undefined_access
severity: high
metrics:
total_regressions: 3
regressions_per_kloc: 2.1
test_coverage_percent: 65
risk_factors:
high_complexity: true
frequent_changes: true
multiple_regressions: true
test_priority: critical
prediction_example:
prediction_id: "pred-12345678"
file: "src/payments/process.ts"
change_type: modification
lines_changed: 45
risk_level: high
confidence: 0.82
risk_factors:
- factor: "Known hotspot (REG-0034, REG-0067)"
weight: 0.35
- factor: "Large change (45 lines)"
weight: 0.25
recommendations:
- "Run full payment test suite"
- "Add integration tests for new code paths"
suggested_tests:
- "test/payments/process.test.ts"
- "test/integration/payment-flow.test.ts"
# Validation Rules
validation:
pattern_creation:
min_occurrences: 2
min_confidence: 0.70
hotspot_scoring:
factors_required: 3
min_risk_score: 0.0
max_risk_score: 10.0
prediction_acceptance:
min_confidence: 0.60
require_similar_history: false
# References
references:
research:
- "@.aiwg/research/findings/REF-013-metagpt.md"
- "@.aiwg/research/findings/REF-015-self-refine.md"
schemas:
- "@agentic/code/addons/ralph/schemas/debug-memory.yaml"
- "@agentic/code/frameworks/sdlc-complete/schemas/testing/regression.yaml"
- "@agentic/code/addons/ralph/schemas/iteration-analytics.yaml"
rules:
- "@.claude/rules/executable-feedback.md"
- "@.claude/rules/best-output-selection.md"
skills:
- "@agentic/code/frameworks/sdlc-complete/skills/regression-learning/SKILL.md"
- "@agentic/code/frameworks/sdlc-complete/skills/regression-baseline/SKILL.md"
- "@agentic/code/frameworks/sdlc-complete/skills/regression-bisect/SKILL.md"