aiwg
Version:
Cognitive architecture for AI-augmented software development with structured memory, ensemble validation, and closed-loop correction. FAIR-aligned artifacts, 84% cost reduction via human-in-the-loop, standards adopted by 100+ organizations.
770 lines (722 loc) • 23.2 kB
YAML
# Regression Testing Schema
# Based on REF-013 MetaGPT
# Finding: Executable feedback prevents cascading regressions
# Issue: #101
$schema: "https://json-schema.org/draft/2020-12/schema"
$id: "https://aiwg.io/schemas/regression-testing/v1"
title: "Regression Testing Schema"
description: |
Schema for tracking and managing regression test cases that prevent
previously fixed bugs from reoccurring. Implements MetaGPT's executable
feedback pattern extended to regression prevention.
Key principles:
- Every bug fix generates a regression test
- Regression tests are never removed
- Baselines are preserved for comparison
- Failures trigger immediate investigation
type: object
required:
- regression_id
- record
- baseline
properties:
regression_id:
type: string
pattern: "^REG-[0-9]{4}$"
description: "Unique regression test identifier (REG-XXXX format)"
examples:
- "REG-0001"
- "REG-0042"
record:
$ref: "#/$defs/RegressionRecord"
description: "The regression test record"
baseline:
$ref: "#/$defs/RegressionBaseline"
description: "Baseline snapshot for comparison"
report:
$ref: "#/$defs/RegressionReport"
description: "Generated regression report"
metadata:
type: object
properties:
created_at:
type: string
format: date-time
created_by:
type: string
description: "Agent or human who created this record"
last_updated:
type: string
format: date-time
tags:
type: array
items:
type: string
description: "Tags for categorization"
$defs:
RegressionRecord:
type: object
required:
- id
- type
- severity
- status
- introduced_by
- baseline
- current
- impact_analysis
properties:
id:
type: string
pattern: "^REG-[0-9]{4}$"
description: "Regression identifier matching parent"
type:
type: string
enum:
- functional # Functional behavior changed
- performance # Performance degraded
- behavioral # Behavioral change detected
- visual # UI/visual change detected
- security # Security regression
- api # API contract changed
- data # Data format or schema changed
description: "Type of regression detected"
severity:
type: string
enum:
- critical # System unusable or security breach
- high # Major functionality broken
- medium # Partial functionality broken
- low # Minor issue or edge case
description: "Severity of the regression"
status:
type: string
enum:
- detected # Regression detected, not yet investigated
- confirmed # Regression confirmed as actual issue
- analyzing # Root cause analysis in progress
- fixing # Fix in development
- fixed # Fix implemented and tested
- verified # Fix verified in production
- false_positive # Not actually a regression
- wont_fix # Acknowledged but won't fix
description: "Current status of the regression"
introduced_by:
type: object
required: [commit, timestamp]
properties:
commit:
type: string
description: "Git commit hash that introduced the regression"
timestamp:
type: string
format: date-time
author:
type: string
description: "Author of the commit"
pull_request:
type: string
description: "PR number if applicable"
agent:
type: string
description: "Agent that generated the code if applicable"
baseline:
type: object
required: [snapshot_id, timestamp]
properties:
snapshot_id:
type: string
description: "Reference to baseline snapshot"
timestamp:
type: string
format: date-time
commit:
type: string
description: "Git commit of baseline"
behavior:
type: string
description: "Description of expected baseline behavior"
artifacts:
type: array
items:
type: string
description: "Paths to baseline artifacts (screenshots, logs, etc.)"
current:
type: object
required: [snapshot_id, timestamp]
properties:
snapshot_id:
type: string
description: "Reference to current snapshot"
timestamp:
type: string
format: date-time
commit:
type: string
description: "Git commit where regression detected"
behavior:
type: string
description: "Description of observed current behavior"
artifacts:
type: array
items:
type: string
description: "Paths to current artifacts showing regression"
impact_analysis:
type: object
required: [user_impact, business_impact]
properties:
user_impact:
type: string
enum:
- blocking # Users cannot complete tasks
- degraded # Users experience reduced functionality
- annoying # Users experience inconvenience
- minimal # Most users won't notice
description: "Impact on user experience"
business_impact:
type: string
enum:
- critical # Revenue loss or legal risk
- high # Customer satisfaction at risk
- medium # Minor customer complaints expected
- low # Minimal business impact
description: "Impact on business operations"
affected_features:
type: array
items:
type: string
description: "List of features affected by this regression"
affected_users:
type: object
properties:
percentage:
type: number
minimum: 0
maximum: 100
description: "Percentage of users affected"
segments:
type: array
items:
type: string
description: "User segments affected (e.g., 'premium', 'mobile')"
workaround:
type: object
properties:
exists:
type: boolean
description:
type: string
effort:
type: string
enum: [trivial, easy, moderate, difficult, impossible]
detection:
type: object
properties:
method:
type: string
enum:
- automated_test # Detected by automated test suite
- manual_test # Detected by manual testing
- production # Detected in production
- code_review # Detected during code review
- ci_cd # Detected by CI/CD pipeline
description: "How the regression was detected"
detected_at:
type: string
format: date-time
detected_by:
type: string
description: "Agent, user, or system that detected it"
test_case:
type: string
description: "Test case that caught the regression if applicable"
resolution:
type: object
properties:
root_cause:
type: string
description: "Root cause analysis of the regression"
fix_description:
type: string
description: "Description of the fix applied"
fix_commit:
type: string
description: "Git commit hash of the fix"
fix_applied_at:
type: string
format: date-time
verification:
type: object
properties:
verified_by:
type: string
verified_at:
type: string
format: date-time
verification_method:
type: string
enum: [automated, manual, production_monitoring]
prevention:
type: object
properties:
test_added:
type: boolean
description: "Regression test added to prevent recurrence"
test_path:
type: string
description: "Path to the regression test"
guardrails_added:
type: array
items:
type: string
description: "Additional guardrails to prevent similar regressions"
RegressionBaseline:
type: object
required:
- snapshot_id
- timestamp
- artifacts
description: "Baseline snapshot for regression comparison"
properties:
snapshot_id:
type: string
description: "Unique identifier for this baseline"
pattern: "^baseline-[0-9]{4}-[a-f0-9]{8}$"
examples:
- "baseline-0001-a1b2c3d4"
timestamp:
type: string
format: date-time
description: "When this baseline was created"
commit:
type: string
description: "Git commit of the baseline"
artifacts:
type: object
properties:
functional:
type: array
items:
type: object
properties:
path:
type: string
description: "Path to functional test output"
checksum:
type: string
description: "SHA-256 checksum for integrity"
description:
type: string
description: "Functional test outputs"
performance:
type: array
items:
type: object
properties:
metric:
type: string
description: "Performance metric name"
value:
type: number
unit:
type: string
threshold:
type: object
properties:
max_degradation_percent:
type: number
description: "Maximum acceptable degradation %"
description: "Performance benchmarks"
visual:
type: array
items:
type: object
properties:
path:
type: string
description: "Path to screenshot or visual snapshot"
checksum:
type: string
viewport:
type: object
properties:
width:
type: integer
height:
type: integer
description: "Visual snapshots (screenshots)"
api:
type: array
items:
type: object
properties:
endpoint:
type: string
request:
type: object
response:
type: object
properties:
status:
type: integer
body:
type: object
headers:
type: object
description: "API contract snapshots"
data:
type: array
items:
type: object
properties:
schema_path:
type: string
sample_data_path:
type: string
validation_rules:
type: array
items:
type: string
description: "Data format snapshots"
environment:
type: object
properties:
platform:
type: string
description: "Platform or OS"
runtime_version:
type: string
description: "Runtime version (Node, Python, etc.)"
dependencies:
type: object
description: "Key dependency versions"
metadata:
type: object
properties:
created_by:
type: string
label:
type: string
description: "Human-readable label (e.g., 'v2.1.0 release baseline')"
notes:
type: string
RegressionReport:
type: object
description: "Generated regression test report"
properties:
report_id:
type: string
description: "Unique report identifier"
generated_at:
type: string
format: date-time
period:
type: object
properties:
start:
type: string
format: date-time
end:
type: string
format: date-time
summary:
type: object
required:
- total_regressions
- by_status
- by_severity
- by_type
properties:
total_regressions:
type: integer
minimum: 0
by_status:
type: object
properties:
detected:
type: integer
confirmed:
type: integer
analyzing:
type: integer
fixing:
type: integer
fixed:
type: integer
verified:
type: integer
false_positive:
type: integer
wont_fix:
type: integer
by_severity:
type: object
properties:
critical:
type: integer
high:
type: integer
medium:
type: integer
low:
type: integer
by_type:
type: object
properties:
functional:
type: integer
performance:
type: integer
behavioral:
type: integer
visual:
type: integer
security:
type: integer
api:
type: integer
data:
type: integer
critical_regressions:
type: array
items:
type: object
properties:
regression_id:
type: string
type:
type: string
severity:
type: string
status:
type: string
introduced_by:
type: object
impact:
type: string
description: "List of critical severity regressions"
trends:
type: object
properties:
regression_rate:
type: object
properties:
current_period:
type: number
description: "Regressions per commit in current period"
previous_period:
type: number
trend:
type: string
enum: [improving, stable, degrading]
mean_time_to_detect:
type: object
properties:
current_hours:
type: number
previous_hours:
type: number
trend:
type: string
enum: [improving, stable, degrading]
mean_time_to_fix:
type: object
properties:
current_hours:
type: number
previous_hours:
type: number
trend:
type: string
enum: [improving, stable, degrading]
recommendations:
type: array
items:
type: object
properties:
category:
type: string
enum:
- test_coverage
- code_review
- automation
- process
- tooling
recommendation:
type: string
priority:
type: string
enum: [high, medium, low]
estimated_impact:
type: string
# Protocol for Regression Testing
#
# 1. BASELINE CREATION
# - Create baseline after each stable release
# - Capture all artifact types (functional, performance, visual, API, data)
# - Store baselines with checksums for integrity
# - Label baselines clearly (e.g., "v2.1.0 release")
#
# 2. REGRESSION DETECTION
# - Run regression suite on every commit (CI/CD)
# - Compare current state to baseline
# - Automatically create regression record if deviation detected
# - Classify by type and severity
#
# 3. REGRESSION ANALYSIS
# - Bisect commits to find introduction point
# - Analyze root cause
# - Assess user and business impact
# - Determine if workaround exists
#
# 4. REGRESSION FIX
# - Fix the regression
# - Add regression test to prevent recurrence
# - Verify fix against baseline
# - Update regression record to "verified"
#
# 5. REPORTING
# - Generate regression reports periodically
# - Track trends (regression rate, time to detect, time to fix)
# - Surface critical regressions immediately
# - Provide actionable recommendations
# Integration with Executable Feedback
#
# This schema integrates with executable-feedback.yaml:
# - Every bug fix MUST generate a regression test
# - Regression tests are added to the test suite
# - Executable feedback loop ensures regression tests pass
# - Debug memory tracks regression patterns
# Metrics
#
# Track these metrics for regression management:
#
# | Metric | Target | Purpose |
# |---------------------------|----------|----------------------------------|
# | regression_rate | <1/100 | Regressions per commit |
# | mean_time_to_detect | <24h | How quickly regressions found |
# | mean_time_to_fix | <48h | How quickly regressions resolved |
# | false_positive_rate | <5% | Accuracy of detection |
# | test_effectiveness | >95% | % regressions caught by tests |
# | recurrence_rate | <1% | Same regression occurring twice |
# Examples
examples:
functional_regression:
regression_id: "REG-0001"
record:
id: "REG-0001"
type: functional
severity: high
status: fixed
introduced_by:
commit: "a1b2c3d4"
timestamp: "2026-01-24T15:30:00Z"
author: "software-implementer-agent"
pull_request: "#142"
baseline:
snapshot_id: "baseline-0001-a1b2c3d4"
timestamp: "2026-01-20T10:00:00Z"
commit: "e5f6g7h8"
behavior: "User can log in with valid credentials"
current:
snapshot_id: "current-0001-x9y8z7w6"
timestamp: "2026-01-24T16:00:00Z"
commit: "a1b2c3d4"
behavior: "Login fails with valid credentials"
impact_analysis:
user_impact: blocking
business_impact: critical
affected_features: ["user authentication", "session management"]
affected_users:
percentage: 100
segments: ["all users"]
detection:
method: automated_test
detected_at: "2026-01-24T16:00:00Z"
detected_by: "ci-pipeline"
test_case: "test/integration/auth/login.test.ts"
resolution:
root_cause: "Password validation logic inverted (! operator added incorrectly)"
fix_description: "Removed erroneous negation in password check"
fix_commit: "i9j8k7l6"
fix_applied_at: "2026-01-24T17:00:00Z"
verification:
verified_by: "test-engineer-agent"
verified_at: "2026-01-24T17:05:00Z"
verification_method: automated
prevention:
test_added: true
test_path: "test/regression/REG-0001-login-validation.test.ts"
baseline:
snapshot_id: "baseline-0001-a1b2c3d4"
timestamp: "2026-01-20T10:00:00Z"
commit: "e5f6g7h8"
artifacts:
functional:
- path: ".aiwg/testing/baselines/auth-login-success.json"
checksum: "sha256:abc123..."
description: "Expected login success response"
performance_regression:
regression_id: "REG-0002"
record:
id: "REG-0002"
type: performance
severity: medium
status: analyzing
introduced_by:
commit: "b2c3d4e5"
timestamp: "2026-01-25T09:00:00Z"
baseline:
snapshot_id: "baseline-perf-0002"
timestamp: "2026-01-20T10:00:00Z"
behavior: "API response time <200ms at p95"
current:
snapshot_id: "current-perf-0002"
timestamp: "2026-01-25T09:15:00Z"
behavior: "API response time 850ms at p95"
impact_analysis:
user_impact: degraded
business_impact: medium
affected_features: ["search API"]
affected_users:
percentage: 30
segments: ["power users", "API consumers"]
detection:
method: ci_cd
detected_at: "2026-01-25T09:15:00Z"
detected_by: "performance-monitoring"
baseline:
snapshot_id: "baseline-perf-0002"
timestamp: "2026-01-20T10:00:00Z"
artifacts:
performance:
- metric: "api_response_p95"
value: 185
unit: "milliseconds"
threshold:
max_degradation_percent: 20
# Validation Rules
#
# Before marking a regression as verified:
# - [ ] Root cause identified and documented
# - [ ] Fix implemented and committed
# - [ ] Regression test added to test suite
# - [ ] All tests (including new regression test) passing
# - [ ] Baseline comparison shows regression eliminated
# - [ ] Impact analysis completed
# - [ ] Fix verified in production (if applicable)
# References
references:
research:
- "@.aiwg/research/findings/REF-013-metagpt.md"
schemas:
- "@agentic/code/frameworks/sdlc-complete/schemas/flows/executable-feedback.yaml"
- "@agentic/code/addons/ralph/schemas/debug-memory.yaml"
- "@agentic/code/addons/ralph/schemas/iteration-analytics.yaml"
rules:
- "@.claude/rules/executable-feedback.md"
implementation:
- "#101"
guide:
- "@.aiwg/testing/docs/regression-testing-guide.md"