aiwg

Version:

Cognitive architecture for AI-augmented software development with structured memory, ensemble validation, and closed-loop correction. FAIR-aligned artifacts, 84% cost reduction via human-in-the-loop, standards adopted by 100+ organizations.

aiwg.io

jmagly/aiwg

831 lines (710 loc) • 20.2 kB

YAML

# Regression Learning Schema # Based on REF-013 MetaGPT and REF-015 Self-Refine # Finding: Cross-task learning improves regression detection over time # Integrates with Ralph's debug memory for continuous improvement $schema: "https://json-schema.org/draft/2020-12/schema" $id: "https://aiwg.io/schemas/regression-learning/v1" title: "Regression Learning Schema" description: | Schema for cross-task learning that improves regression detection through pattern recognition, test prioritization, and historical analysis. Stores accumulated knowledge from past regressions to prevent future occurrences. Key principles: - Learn from every regression - Build pattern taxonomy - Prioritize high-value tests - Predict regression risk - Share knowledge across projects type: object required: - learning_id - patterns - hotspots - test_effectiveness properties: learning_id: type: string format: uuid description: "Unique identifier for this learning session" created_at: type: string format: date-time last_updated: type: string format: date-time patterns: type: array items: $ref: "#/$defs/RegressionPattern" description: "Learned regression patterns" hotspots: type: array items: $ref: "#/$defs/CodeHotspot" description: "High-risk code areas" test_effectiveness: type: array items: $ref: "#/$defs/TestEffectiveness" description: "Test value metrics" fix_templates: type: array items: $ref: "#/$defs/FixTemplate" description: "Reusable fix patterns" predictions: type: array items: $ref: "#/$defs/RegressionPrediction" description: "Risk predictions for code changes" metadata: type: object properties: project: type: string description: "Project identifier" agent: type: string description: "Agent that performed learning" total_regressions_analyzed: type: integer learning_confidence: type: number minimum: 0 maximum: 1 $defs: RegressionPattern: type: object required: - pattern_id - pattern_name - category - occurrences description: "Identified recurring regression pattern" properties: pattern_id: type: string pattern: "^RP-[0-9]{3}$" description: "Pattern identifier (RP-XXX format)" examples: - "RP-001" - "RP-042" pattern_name: type: string description: "Human-readable pattern name" examples: - "null_access_without_check" - "type_mismatch_string_number" - "off_by_one_array_bounds" category: type: string enum: - null_undefined_access - type_mismatch - off_by_one - race_condition - missing_validation - logic_error - resource_leak - missing_error_handling - api_misuse - configuration_error - dependency_issue - other description: "Pattern category" occurrences: type: array items: type: object required: - regression_id - file - root_cause - fix properties: regression_id: type: string pattern: "^REG-[0-9]{4}$" description: "Reference to regression record" file: type: string description: "File where regression occurred" line: type: integer description: "Line number" function: type: string description: "Function/method name" root_cause: type: string description: "Root cause description" fix: type: string description: "How it was fixed" effectiveness: type: number minimum: 0 maximum: 1 description: "Fix success rate (0-1)" fix_time_hours: type: number description: "Time to fix in hours" recurred: type: boolean description: "Did this regression recur?" statistics: type: object properties: total_occurrences: type: integer minimum: 1 avg_fix_time_hours: type: number description: "Average time to fix" recurrence_rate: type: number minimum: 0 maximum: 1 description: "Rate of recurrence after fix" detection_methods: type: object properties: automated_test: type: integer manual_test: type: integer production: type: integer code_review: type: integer ci_cd: type: integer fix_template: type: object properties: pattern: type: string description: "Template code showing fix pattern" applicability_rules: type: array items: type: object properties: rule_type: type: string enum: [language, error_pattern, context, file_pattern] value: type: string description: "When this template applies" examples: type: array items: type: object properties: before: type: string after: type: string explanation: type: string confidence: type: number minimum: 0 maximum: 1 description: "Confidence in this pattern (based on occurrences)" last_updated: type: string format: date-time CodeHotspot: type: object required: - file - risk_score - regression_history description: "High-risk code location" properties: file: type: string description: "File path" risk_score: type: number minimum: 0 maximum: 10 description: "Risk score (0-10 scale)" regression_history: type: array items: type: object properties: regression_id: type: string category: type: string severity: type: string detected_at: type: string format: date-time description: "Past regressions in this file" metrics: type: object properties: total_regressions: type: integer regressions_per_kloc: type: number description: "Regressions per 1000 lines of code" recent_changes: type: object properties: last_30_days: type: integer last_7_days: type: integer complexity_score: type: number minimum: 0 maximum: 100 description: "Code complexity (cyclomatic)" test_coverage_percent: type: number minimum: 0 maximum: 100 contributors: type: integer description: "Number of contributors" risk_factors: type: object properties: high_complexity: type: boolean frequent_changes: type: boolean multiple_regressions: type: boolean low_test_coverage: type: boolean critical_path: type: boolean many_contributors: type: boolean recommended_actions: type: array items: type: string description: "Actionable recommendations" test_priority: type: string enum: [critical, high, medium, low] description: "Test execution priority" TestEffectiveness: type: object required: - test_path - regression_detection_rate description: "Effectiveness metrics for a test" properties: test_path: type: string description: "Path to test file" regression_detection_rate: type: number minimum: 0 maximum: 1 description: "Percentage of regressions caught (0-1)" detected_regressions: type: array items: type: string pattern: "^REG-[0-9]{4}$" description: "Regressions this test caught" missed_regressions: type: array items: type: object properties: regression_id: type: string reason: type: string description: "Why test missed this regression" description: "Regressions this test should have caught" false_positive_rate: type: number minimum: 0 maximum: 1 description: "Rate of false alarms" execution_stats: type: object properties: avg_duration_ms: type: number description: "Average execution time" success_rate: type: number minimum: 0 maximum: 1 description: "Rate of passing (stability)" flakiness_score: type: number minimum: 0 maximum: 1 description: "Test flakiness (0=stable, 1=very flaky)" priority_score: type: number minimum: 0 maximum: 10 description: "Test value score (0-10)" recommendations: type: array items: type: string description: "How to improve this test" FixTemplate: type: object required: - template_id - name - pattern description: "Reusable fix pattern" properties: template_id: type: string pattern: "^FT-[0-9]{3}$" description: "Fix template identifier" name: type: string description: "Template name" examples: - "null-check" - "type-validation" - "error-handling" pattern: type: string description: "Template code" applies_to: type: object properties: categories: type: array items: type: string languages: type: array items: type: string error_patterns: type: array items: type: string examples: type: array items: type: object properties: before: type: string after: type: string context: type: string effectiveness: type: number minimum: 0 maximum: 1 description: "Success rate when applied" usage_count: type: integer description: "Times this template was used" RegressionPrediction: type: object required: - prediction_id - file - risk_level - confidence description: "Prediction of regression risk" properties: prediction_id: type: string format: uuid timestamp: type: string format: date-time file: type: string description: "File being analyzed" change_type: type: string enum: [addition, modification, deletion, rename] lines_changed: type: integer risk_level: type: string enum: [critical, high, medium, low] description: "Predicted risk level" confidence: type: number minimum: 0 maximum: 1 description: "Confidence in prediction" risk_factors: type: array items: type: object properties: factor: type: string description: "Risk factor description" weight: type: number minimum: 0 maximum: 1 description: "Factors contributing to risk" similar_past_changes: type: array items: type: object properties: commit: type: string lines_changed: type: integer result: type: string enum: [clean, regression] regression_id: type: string time_to_detect_hours: type: number description: "Similar changes from history" recommendations: type: array items: type: string description: "Recommended actions" suggested_tests: type: array items: type: string description: "Tests to run for this change" actual_outcome: type: object properties: regression_occurred: type: boolean regression_id: type: string prediction_correct: type: boolean description: "Actual outcome (for learning)" # Test Prioritization Algorithm test_prioritization: description: "Algorithm for prioritizing test execution" factors: code_change_correlation: weight: 0.30 description: "How often this test catches changes in affected code" historical_regression_detection: weight: 0.25 description: "Past regression detection rate" code_hotspot_coverage: weight: 0.20 description: "Whether test covers high-risk areas" recent_failure_trend: weight: 0.15 description: "Recent failure patterns" execution_efficiency: weight: 0.10 description: "Test execution speed" priority_levels: critical: threshold: 8.0 description: "Run first, never skip" high: threshold: 6.0 description: "Run early" medium: threshold: 4.0 description: "Standard priority" low: threshold: 0.0 description: "Run if time allows, may skip in fast mode" # Cross-Project Learning cross_project_learning: description: "Share learning across projects" shared_patterns: - null_undefined_access - type_mismatch - missing_validation - off_by_one - race_condition - logic_error - resource_leak - missing_error_handling - api_misuse - configuration_error privacy: anonymize_file_paths: true anonymize_business_logic: true share_only_patterns: true require_opt_in: true storage: location: "~/.aiwg/global-learning/" format: yaml encryption: optional # Metrics metrics: learning_effectiveness: pattern_accuracy: description: "Pattern matching accuracy" target: 0.85 calculation: "correct_matches / total_matches" fix_template_success_rate: description: "Fix template effectiveness" target: 0.90 calculation: "successful_fixes / total_applications" prediction_accuracy: description: "Risk prediction accuracy" target: 0.75 calculation: "correct_predictions / total_predictions" test_prioritization_efficiency: description: "Time saved by prioritization" target: 0.50 calculation: "time_saved / baseline_time" false_positive_rate: description: "Rate of spurious alerts" target: 0.10 calculation: "false_positives / total_predictions" cross_project_reuse: description: "Pattern reuse across projects" target: 0.30 calculation: "patterns_reused / total_patterns" # Integration Hooks integration: ralph_hooks: on_test_failure: - query_similar_past_failures - retrieve_fix_templates - apply_learned_patterns - update_pattern_effectiveness on_test_success: - record_successful_fix - increment_fix_effectiveness - update_confidence_scores on_regression_detected: - create_regression_record - analyze_root_cause - store_in_learning_memory - update_hotspot_scores - adjust_test_priorities debug_memory: enabled: true integration_path: "@agentic/code/addons/ralph/schemas/debug-memory.yaml" cross_session_learning: true regression_schema: enabled: true integration_path: "@agentic/code/frameworks/sdlc-complete/schemas/testing/regression.yaml" baseline_integration: true # Storage Structure storage: paths: patterns: ".aiwg/ralph/learning/regression-patterns.yaml" hotspots: ".aiwg/ralph/learning/code-hotspots.yaml" test_effectiveness: ".aiwg/ralph/learning/test-effectiveness.yaml" fix_templates: ".aiwg/ralph/learning/fix-templates/" predictions: ".aiwg/ralph/learning/predictions/" analytics: ".aiwg/ralph/learning/analytics/" retention: regression_records: 365 # days pattern_library: -1 # forever effectiveness_metrics: 180 predictions: 90 indexing: by_pattern_category: true by_file_path: true by_regression_id: true by_risk_score: true # Examples examples: pattern_example: pattern_id: "RP-001" pattern_name: "null_access_without_check" category: null_undefined_access occurrences: - regression_id: "REG-0023" file: "src/auth/validate.ts" line: 15 function: "validateInput" root_cause: "Missing null check before property access" fix: "Added early return for null/undefined" effectiveness: 1.0 fix_time_hours: 1.2 recurred: false statistics: total_occurrences: 3 avg_fix_time_hours: 1.5 recurrence_rate: 0.0 confidence: 0.95 hotspot_example: file: "src/auth/validate.ts" risk_score: 8.5 regression_history: - regression_id: "REG-0023" category: null_undefined_access severity: high metrics: total_regressions: 3 regressions_per_kloc: 2.1 test_coverage_percent: 65 risk_factors: high_complexity: true frequent_changes: true multiple_regressions: true test_priority: critical prediction_example: prediction_id: "pred-12345678" file: "src/payments/process.ts" change_type: modification lines_changed: 45 risk_level: high confidence: 0.82 risk_factors: - factor: "Known hotspot (REG-0034, REG-0067)" weight: 0.35 - factor: "Large change (45 lines)" weight: 0.25 recommendations: - "Run full payment test suite" - "Add integration tests for new code paths" suggested_tests: - "test/payments/process.test.ts" - "test/integration/payment-flow.test.ts" # Validation Rules validation: pattern_creation: min_occurrences: 2 min_confidence: 0.70 hotspot_scoring: factors_required: 3 min_risk_score: 0.0 max_risk_score: 10.0 prediction_acceptance: min_confidence: 0.60 require_similar_history: false # References references: research: - "@.aiwg/research/findings/REF-013-metagpt.md" - "@.aiwg/research/findings/REF-015-self-refine.md" schemas: - "@agentic/code/addons/ralph/schemas/debug-memory.yaml" - "@agentic/code/frameworks/sdlc-complete/schemas/testing/regression.yaml" - "@agentic/code/addons/ralph/schemas/iteration-analytics.yaml" rules: - "@.claude/rules/executable-feedback.md" - "@.claude/rules/best-output-selection.md" skills: - "@agentic/code/frameworks/sdlc-complete/skills/regression-learning/SKILL.md" - "@agentic/code/frameworks/sdlc-complete/skills/regression-baseline/SKILL.md" - "@agentic/code/frameworks/sdlc-complete/skills/regression-bisect/SKILL.md"