aiwg

Version:

Cognitive architecture for AI-augmented software development with structured memory, ensemble validation, and closed-loop correction. FAIR-aligned artifacts, 84% cost reduction via human-in-the-loop, standards adopted by 100+ organizations.

aiwg.io

jmagly/aiwg

1,158 lines (1,028 loc) • 29.9 kB

YAML

# Regression Metrics Dashboard Schema # Based on REF-013 MetaGPT # Finding: Executable feedback and metrics enable continuous quality improvement # Issue: #101 $schema: "https://json-schema.org/draft/2020-12/schema" $id: "https://aiwg.io/schemas/regression-dashboard/v1" title: "Regression Metrics Dashboard Schema" description: | Schema for regression metrics dashboard that tracks and visualizes regression testing health. Integrates with regression.yaml to provide actionable insights for Metrics Analyst agent. Key principles: - Real-time visibility into regression health - Trend analysis to detect quality degradation - Actionable alerts for intervention - Component-level breakdown for targeted improvement type: object required: - dashboard_config - metrics_definitions - alerts_config - data_sources properties: dashboard_config: $ref: "#/$defs/DashboardConfig" description: "Dashboard layout and configuration" metrics_definitions: $ref: "#/$defs/MetricsDefinitions" description: "Definitions for all tracked metrics" alerts_config: $ref: "#/$defs/AlertsConfig" description: "Alert thresholds and notification settings" data_sources: $ref: "#/$defs/DataSources" description: "Where to pull data from" current_snapshot: $ref: "#/$defs/DashboardSnapshot" description: "Current dashboard state" $defs: DashboardConfig: type: object required: - layout - refresh_rate - time_range properties: layout: type: object properties: sections: type: array items: type: object properties: id: type: string enum: - summary_cards - trend_charts - heatmap - leaderboard - recent_regressions - alert_banner order: type: integer description: "Display order (1 = top)" visible: type: boolean default: true size: type: string enum: [small, medium, large, full_width] description: "Dashboard sections in display order" theme: type: string enum: [light, dark, auto] default: auto compact_mode: type: boolean default: false description: "Use compact layout for more data density" refresh_rate: type: object properties: auto_refresh: type: boolean default: true interval_seconds: type: integer default: 300 minimum: 30 description: "Auto-refresh interval" real_time_metrics: type: array items: type: string description: "Metrics that update in real-time (e.g., active regressions)" time_range: type: object properties: default_range: type: string enum: - last_24h - last_week - last_sprint - last_month - last_quarter - custom default: last_week custom_range: type: object properties: start: type: string format: date-time end: type: string format: date-time comparison_mode: type: boolean default: true description: "Show comparison to previous period" granularity: type: string enum: [hourly, daily, weekly, sprint] default: daily MetricsDefinitions: type: object required: - regression_rate - mttd - mttf - escape_rate - recurrence_rate - coverage_by_type properties: regression_rate: $ref: "#/$defs/MetricDefinition" description: "Regressions detected per time period" mttd: $ref: "#/$defs/MetricDefinition" description: "Mean Time to Detect (hours)" mttf: $ref: "#/$defs/MetricDefinition" description: "Mean Time to Fix (hours)" escape_rate: $ref: "#/$defs/MetricDefinition" description: "Percentage of regressions reaching production" recurrence_rate: $ref: "#/$defs/MetricDefinition" description: "Percentage of regressions recurring" coverage_by_type: $ref: "#/$defs/MetricDefinition" description: "Test coverage breakdown by regression type" velocity_impact: $ref: "#/$defs/MetricDefinition" description: "Regression impact on sprint velocity" false_positive_rate: $ref: "#/$defs/MetricDefinition" description: "Percentage of regression detections that were false positives" test_effectiveness: $ref: "#/$defs/MetricDefinition" description: "Percentage of regressions caught by automated tests" MetricDefinition: type: object required: - metric_id - display_name - unit - calculation properties: metric_id: type: string description: "Unique identifier for this metric" display_name: type: string description: "Human-readable name" description: type: string description: "Explanation of what this metric measures" unit: type: string enum: - count - percentage - hours - days - rate - ratio description: "Unit of measurement" calculation: type: object required: - formula properties: formula: type: string description: "Calculation formula in plain text" examples: - "total_regressions / total_commits" - "(detection_timestamp - introduction_timestamp) / 3600000" - "(production_regressions / total_regressions) * 100" aggregation: type: string enum: [sum, average, median, p95, p99, count, max, min] default: average filters: type: object properties: severity: type: array items: type: string enum: [critical, high, medium, low] status: type: array items: type: string regression_type: type: array items: type: string thresholds: type: object properties: excellent: type: number description: "Value considered excellent (green)" good: type: number description: "Value considered good (light green)" acceptable: type: number description: "Value considered acceptable (yellow)" concerning: type: number description: "Value considered concerning (orange)" critical: type: number description: "Value considered critical (red)" trend: type: object properties: direction: type: string enum: [improving, stable, degrading] change_percent: type: number description: "Percentage change from previous period" previous_value: type: number visualization: type: object properties: type: type: string enum: - number_card - line_chart - bar_chart - heatmap - pie_chart - gauge default: number_card color_scheme: type: string enum: [traffic_light, gradient, monochrome] default: traffic_light AlertsConfig: type: object required: - thresholds - notification_channels properties: enabled: type: boolean default: true thresholds: type: object properties: regression_rate: type: object properties: critical: type: number default: 10 description: "Regressions per week threshold" high: type: number default: 5 mttd: type: object properties: critical: type: number default: 72 description: "Hours threshold" high: type: number default: 48 mttf: type: object properties: critical: type: number default: 120 description: "Hours threshold" high: type: number default: 72 escape_rate: type: object properties: critical: type: number default: 10 description: "Percentage threshold" high: type: number default: 5 recurrence_rate: type: object properties: critical: type: number default: 5 description: "Percentage threshold" high: type: number default: 2 active_critical_regressions: type: object properties: critical: type: number default: 1 description: "Count threshold" notification_channels: type: array items: type: object properties: channel: type: string enum: - dashboard_banner - email - slack - issue_comment - webhook severity_levels: type: array items: type: string enum: [critical, high, medium, low] default: [critical, high] config: type: object description: "Channel-specific configuration" escalation: type: object properties: enabled: type: boolean default: true rules: type: array items: type: object properties: condition: type: string description: "Condition that triggers escalation" examples: - "critical_regression_open > 24h" - "mttf_weekly > 120h" - "escape_rate > 10%" escalate_to: type: array items: type: string description: "Who to escalate to" notification_template: type: string alert_suppression: type: object properties: enabled: type: boolean default: true cooldown_minutes: type: integer default: 60 description: "Minimum time between duplicate alerts" DataSources: type: object required: - regression_records properties: regression_records: type: object properties: path: type: string default: ".aiwg/testing/regressions/" description: "Path to regression records" schema: type: string default: "@agentic/code/frameworks/sdlc-complete/schemas/testing/regression.yaml" git_history: type: object properties: enabled: type: boolean default: true repository_path: type: string default: "." commit_range: type: string description: "Git commit range to analyze" ci_cd_pipeline: type: object properties: enabled: type: boolean default: true api_endpoint: type: string metrics: type: array items: type: string description: "Metrics to pull from CI/CD" examples: - "test_pass_rate" - "build_duration" - "pipeline_failures" test_results: type: object properties: path: type: string default: ".aiwg/testing/results/" format: type: string enum: [junit, json, tap] default: junit issue_tracker: type: object properties: enabled: type: boolean default: false api_endpoint: type: string label_filter: type: array items: type: string default: ["regression", "bug"] production_monitoring: type: object properties: enabled: type: boolean default: false api_endpoint: type: string metrics: type: array items: type: string DashboardSnapshot: type: object description: "Current state of the dashboard" properties: snapshot_id: type: string format: uuid generated_at: type: string format: date-time time_period: type: object properties: start: type: string format: date-time end: type: string format: date-time label: type: string description: "Human-readable label (e.g., 'Last Week')" summary_cards: $ref: "#/$defs/SummaryCards" trend_charts: $ref: "#/$defs/TrendCharts" heatmap: $ref: "#/$defs/ComponentHeatmap" leaderboard: $ref: "#/$defs/ProblemAreaLeaderboard" recent_regressions: $ref: "#/$defs/RecentRegressions" alerts: type: array items: $ref: "#/$defs/Alert" SummaryCards: type: object description: "Key metrics displayed as summary cards" properties: regression_rate: $ref: "#/$defs/MetricCard" mttd: $ref: "#/$defs/MetricCard" mttf: $ref: "#/$defs/MetricCard" escape_rate: $ref: "#/$defs/MetricCard" active_regressions: $ref: "#/$defs/MetricCard" recurrence_rate: $ref: "#/$defs/MetricCard" MetricCard: type: object properties: value: type: number description: "Current value" unit: type: string trend: type: object properties: direction: type: string enum: [up, down, stable] change_percent: type: number previous_value: type: number status: type: string enum: [excellent, good, acceptable, concerning, critical] description: "Status based on thresholds" sparkline: type: array items: type: number description: "Mini chart data for last N periods" TrendCharts: type: object description: "Time-series trend visualizations" properties: regression_rate_over_time: $ref: "#/$defs/TimeSeriesData" mttd_over_time: $ref: "#/$defs/TimeSeriesData" mttf_over_time: $ref: "#/$defs/TimeSeriesData" severity_distribution: $ref: "#/$defs/TimeSeriesData" type_distribution: $ref: "#/$defs/TimeSeriesData" TimeSeriesData: type: object properties: data_points: type: array items: type: object properties: timestamp: type: string format: date-time value: type: number label: type: string comparison_data: type: array items: type: object description: "Data from previous period for comparison" annotations: type: array items: type: object properties: timestamp: type: string format: date-time label: type: string type: type: string enum: [release, incident, milestone] description: "Notable events marked on chart" ComponentHeatmap: type: object description: "Heatmap showing which components have most regressions" properties: components: type: array items: type: object required: - component_name - regression_count properties: component_name: type: string description: "Component or module name" regression_count: type: integer description: "Number of regressions in this component" severity_breakdown: type: object properties: critical: type: integer high: type: integer medium: type: integer low: type: integer trend: type: string enum: [improving, stable, degrading] color_intensity: type: number minimum: 0 maximum: 1 description: "Heat intensity (0 = cold, 1 = hot)" layout: type: string enum: [grid, treemap, sunburst] default: grid ProblemAreaLeaderboard: type: object description: "Top problematic areas ranked by regression frequency" properties: ranking_metric: type: string enum: - regression_count - severity_weighted_count - mttf - recurrence_rate default: severity_weighted_count top_areas: type: array items: type: object required: - rank - area - score properties: rank: type: integer minimum: 1 area: type: string description: "Component, module, or feature area" score: type: number description: "Calculated score based on ranking metric" regression_count: type: integer critical_count: type: integer average_mttf: type: number description: "Average time to fix (hours)" recurrence_count: type: integer trend: type: string enum: [improving, stable, degrading] recommended_actions: type: array items: type: string description: "Suggested improvements for this area" max_displayed: type: integer default: 10 description: "Maximum number of areas to show" RecentRegressions: type: object description: "List of recent regression detections" properties: regressions: type: array items: type: object required: - regression_id - type - severity - status - detected_at properties: regression_id: type: string type: type: string severity: type: string status: type: string detected_at: type: string format: date-time age_hours: type: number description: "Hours since detection" component: type: string introduced_by: type: object properties: commit: type: string author: type: string link: type: string description: "Link to full regression record" max_displayed: type: integer default: 20 filters: type: object properties: severity: type: array items: type: string status: type: array items: type: string time_range: type: string Alert: type: object required: - alert_id - severity - message - triggered_at properties: alert_id: type: string format: uuid severity: type: string enum: [critical, high, medium, low] message: type: string description: "Alert message" triggered_at: type: string format: date-time metric: type: string description: "Metric that triggered the alert" current_value: type: number threshold_value: type: number recommended_action: type: string description: "Suggested remediation" status: type: string enum: [active, acknowledged, resolved, suppressed] default: active acknowledged_by: type: string acknowledged_at: type: string format: date-time # Protocol for Dashboard Usage # # 1. DATA COLLECTION # - Pull regression records from .aiwg/testing/regressions/ # - Pull test results from CI/CD pipeline # - Pull git commit history # - Optional: Pull production monitoring data # # 2. METRIC CALCULATION # - Calculate all defined metrics per formulas # - Apply filters and aggregations # - Compute trends vs previous period # - Classify status based on thresholds # # 3. VISUALIZATION GENERATION # - Render summary cards with current values and trends # - Generate trend charts with time-series data # - Build component heatmap # - Compile problem area leaderboard # - List recent regressions # # 4. ALERT EVALUATION # - Check metric values against alert thresholds # - Generate alerts for threshold violations # - Apply suppression rules # - Send notifications via configured channels # # 5. DASHBOARD REFRESH # - Auto-refresh at configured interval # - On-demand refresh via manual trigger # - Real-time updates for critical metrics # # 6. ACTION WORKFLOW # - User clicks metric for drill-down # - User investigates regression from recent list # - User acknowledges alerts # - User exports data for reporting # Integration with Metrics Analyst Agent # # The Metrics Analyst agent (@.claude/agents/metrics-analyst.md): # - Monitors this dashboard for threshold violations # - Generates periodic regression reports # - Identifies trends requiring intervention # - Recommends process improvements based on metrics # - Escalates critical issues to relevant agents # Metrics Formulas # # regression_rate = total_regressions / time_period_commits # mttd = avg(detection_timestamp - introduction_timestamp) in hours # mttf = avg(fix_timestamp - detection_timestamp) in hours # escape_rate = (production_regressions / total_regressions) * 100 # recurrence_rate = (recurring_regressions / total_regressions) * 100 # coverage_by_type = count_by_type / total_regressions * 100 # velocity_impact = (story_points_lost_to_regressions / total_story_points) * 100 # false_positive_rate = (false_positives / total_detections) * 100 # test_effectiveness = (automated_detections / total_detections) * 100 # Threshold Recommendations # # | Metric | Excellent | Good | Acceptable | Concerning | Critical | # |---------------------|-----------|------|------------|------------|----------| # | regression_rate | <1/wk | <3 | <5 | <10 | >=10 | # | mttd (hours) | <12 | <24 | <48 | <72 | >=72 | # | mttf (hours) | <24 | <48 | <72 | <120 | >=120 | # | escape_rate (%) | <1 | <3 | <5 | <10 | >=10 | # | recurrence_rate (%) | 0 | <1 | <2 | <5 | >=5 | # | test_effectiveness | >95 | >90 | >80 | >70 | <=70 | # Examples examples: weekly_dashboard: dashboard_config: layout: sections: - id: alert_banner order: 1 visible: true size: full_width - id: summary_cards order: 2 visible: true size: full_width - id: trend_charts order: 3 visible: true size: large - id: heatmap order: 4 visible: true size: medium - id: leaderboard order: 5 visible: true size: medium - id: recent_regressions order: 6 visible: true size: full_width refresh_rate: auto_refresh: true interval_seconds: 300 time_range: default_range: last_week comparison_mode: true metrics_definitions: regression_rate: metric_id: "regression_rate" display_name: "Regression Rate" description: "Number of regressions detected per week" unit: rate calculation: formula: "total_regressions / weeks_in_period" aggregation: average thresholds: excellent: 1 good: 3 acceptable: 5 concerning: 10 critical: 15 visualization: type: number_card color_scheme: traffic_light alerts_config: enabled: true thresholds: regression_rate: critical: 10 high: 5 mttd: critical: 72 high: 48 mttf: critical: 120 high: 72 escape_rate: critical: 10 high: 5 active_critical_regressions: critical: 1 notification_channels: - channel: dashboard_banner severity_levels: [critical, high] - channel: slack severity_levels: [critical] config: webhook_url: "https://hooks.slack.com/services/..." channel: "#engineering-alerts" data_sources: regression_records: path: ".aiwg/testing/regressions/" schema: "@agentic/code/frameworks/sdlc-complete/schemas/testing/regression.yaml" git_history: enabled: true repository_path: "." ci_cd_pipeline: enabled: true api_endpoint: "https://ci.example.com/api" test_results: path: ".aiwg/testing/results/" format: junit current_snapshot_example: snapshot_id: "snapshot-2026-01-25-143000" generated_at: "2026-01-25T14:30:00Z" time_period: start: "2026-01-18T00:00:00Z" end: "2026-01-25T00:00:00Z" label: "Last Week (Jan 18-25)" summary_cards: regression_rate: value: 3 unit: "per week" trend: direction: down change_percent: -25 previous_value: 4 status: good mttd: value: 18 unit: "hours" trend: direction: down change_percent: -10 previous_value: 20 status: excellent mttf: value: 36 unit: "hours" trend: direction: down change_percent: -20 previous_value: 45 status: good escape_rate: value: 2 unit: "percent" trend: direction: stable change_percent: 0 previous_value: 2 status: good active_regressions: value: 2 unit: "count" trend: direction: down change_percent: -50 previous_value: 4 status: acceptable alerts: - alert_id: "alert-001" severity: high message: "MTTF for security regressions exceeds 72h threshold" triggered_at: "2026-01-25T10:00:00Z" metric: "mttf" current_value: 84 threshold_value: 72 recommended_action: "Prioritize security regression fixes. Consider dedicated security engineer assignment." status: active # Validation Checklist # # Before deploying dashboard: # - [ ] All data sources configured and accessible # - [ ] Metric formulas validated against test data # - [ ] Thresholds calibrated to project context # - [ ] Alert channels tested # - [ ] Dashboard sections render correctly # - [ ] Auto-refresh working # - [ ] Drill-down navigation functional # References references: research: - "@.aiwg/research/findings/REF-013-metagpt.md" schemas: - "@agentic/code/frameworks/sdlc-complete/schemas/testing/regression.yaml" - "@agentic/code/addons/ralph/schemas/iteration-analytics.yaml" agents: - "@.claude/agents/metrics-analyst.md" rules: - "@.claude/rules/executable-feedback.md" implementation: - "#101"