@cloudkinetix/bmad-enhanced
Version:
Cloud-Kinetix enhanced fork of BMAD-METHOD - Breakthrough Method of Agile AI-driven Development with robust versioning and unified validation.
275 lines (260 loc) • 10.1 kB
YAML
name: gitlab-ci-debugging
title: GitLab CI/CD Debugging and Issue Resolution
description: Systematic workflow for diagnosing and resolving GitLab CI/CD pipeline issues with intelligent root cause analysis and cross-pack integration support
agents:
- glab
- dev
- architect
metadata:
estimated_duration: "2-8 hours"
complexity: "medium"
prerequisites:
- Access to failing GitLab pipeline
- GitLab CLI authenticated
- Pipeline logs and error messages available
success_metrics:
- Pipeline failures resolved
- Root causes identified and documented
- Prevention measures implemented
- Team knowledge updated
phases:
issue_discovery:
title: "Issue Discovery and Initial Analysis"
description: "Identify and categorize pipeline failures with initial impact assessment"
estimated_duration: "30-60 minutes"
agents: ["glab"]
tasks:
- analyze-pipeline-failures
- monitor-pipeline-status
decision_points:
- failure_severity_assessment:
question: "Is this a critical failure blocking development?"
options:
critical: "Proceed with urgent resolution path"
moderate: "Continue with standard debugging workflow"
minor: "Consider deferring or batching with other fixes"
success_criteria:
- Failure patterns identified
- Impact assessment completed
- Initial root cause hypotheses formed
- Resolution priority established
outputs:
- failure_analysis_report
- priority_classification
- initial_root_cause_hypotheses
root_cause_analysis:
title: "Deep Root Cause Analysis"
description: "Comprehensive analysis of failure causes using logs, configuration, and historical patterns"
estimated_duration: "1-3 hours"
dependencies: ["issue_discovery"]
agents: ["glab", "dev"]
tasks:
- debug-ci-configuration
- analyze-pipeline-failures
decision_points:
- analysis_depth_decision:
question: "Is additional analysis needed beyond standard failure patterns?"
options:
deep_analysis: "Engage architect for complex system issues"
standard_analysis: "Continue with standard resolution"
success_criteria:
- Root causes identified with confidence
- Configuration issues documented
- Environmental factors assessed
- Historical pattern analysis completed
outputs:
- detailed_root_cause_analysis
- configuration_issues_list
- environmental_factor_assessment
solution_design:
title: "Solution Design and Planning"
description: "Design comprehensive solution addressing root causes and preventing recurrence"
estimated_duration: "30-90 minutes"
dependencies: ["root_cause_analysis"]
agents: ["dev", "architect"]
tasks:
- create-gitlab-workflow-plan
decision_points:
- solution_complexity_review:
question: "Does the solution require architectural changes?"
options:
architectural_changes: "Involve architect for system design"
configuration_changes: "Proceed with configuration fixes"
simple_fixes: "Apply direct fixes to pipeline"
success_criteria:
- Solution approach defined
- Implementation plan created
- Risk assessment completed
- Testing strategy established
outputs:
- solution_design_document
- implementation_plan
- risk_mitigation_strategy
implementation:
title: "Solution Implementation"
description: "Implement fixes and improvements with proper testing and validation"
estimated_duration: "1-4 hours"
dependencies: ["solution_design"]
agents: ["dev", "glab"]
tasks:
- debug-ci-configuration
- monitor-pipeline-status
checkpoints:
- configuration_updated:
description: "CI configuration changes applied"
validation: "Configuration passes lint validation"
- initial_testing:
description: "Basic functionality testing completed"
validation: "Pipeline executes without syntax errors"
success_criteria:
- Fixes implemented according to plan
- Configuration changes validated
- Initial testing successful
- No regression introduced
outputs:
- updated_ci_configuration
- implementation_validation_results
integration_sync:
title: "Cross-Pack Integration Synchronization"
description: "Update related systems and notify stakeholders of resolution progress"
estimated_duration: "15-30 minutes"
dependencies: ["implementation"]
agents: ["glab"]
tasks:
- sync-ci-status-to-jira
- coordinate-parallel-ci
decision_points:
- integration_update_scope:
question: "Which integration systems need to be updated?"
options:
all_integrations: "Update all connected systems"
critical_only: "Update only critical integrations"
manual_notify: "Use manual notification process"
success_criteria:
- JIRA issues updated with resolution status
- Parallel development teams notified
- Integration systems synchronized
- Stakeholders informed
outputs:
- integration_update_summary
- stakeholder_notifications
validation_and_monitoring:
title: "Solution Validation and Monitoring Setup"
description: "Validate the complete solution and establish monitoring to prevent recurrence"
estimated_duration: "30-60 minutes"
dependencies: ["integration_sync"]
agents: ["glab", "dev"]
tasks:
- monitor-pipeline-status
- generate-ci-health-report
decision_points:
- validation_scope:
question: "What level of validation testing is required?"
options:
comprehensive: "Full end-to-end testing including edge cases"
standard: "Standard functionality testing"
minimal: "Basic smoke testing"
success_criteria:
- Solution thoroughly validated
- Monitoring established
- Documentation updated
- Knowledge shared with team
outputs:
- validation_test_results
- monitoring_configuration
- updated_documentation
checkpoints:
- phase: issue_discovery
checkpoint: failure_identified
validation: "Pipeline failure categorized and impact assessed"
- phase: root_cause_analysis
checkpoint: root_cause_confirmed
validation: "Root cause identified with supporting evidence"
- phase: solution_design
checkpoint: solution_approved
validation: "Solution design reviewed and approved by team"
- phase: implementation
checkpoint: fix_deployed
validation: "Fix implemented and basic testing passed"
- phase: integration_sync
checkpoint: integrations_updated
validation: "All relevant integration systems notified and updated"
- phase: validation_and_monitoring
checkpoint: solution_validated
validation: "Complete solution validated and monitoring established"
quality_gates:
- gate: root_cause_validation
criteria: "Root cause analysis supported by clear evidence"
phase: root_cause_analysis
blocking: true
- gate: solution_review
criteria: "Solution design addresses root cause and prevents recurrence"
phase: solution_design
blocking: true
- gate: regression_testing
criteria: "Implementation does not introduce new issues"
phase: implementation
blocking: true
- gate: integration_verification
criteria: "All integration updates successful"
phase: integration_sync
blocking: false
risk_mitigation:
- risk: "Solution introduces new failures"
mitigation: "Implement incremental changes with rollback capability"
phase: implementation
- risk: "Incomplete root cause analysis"
mitigation: "Use multiple analysis methods and historical pattern review"
phase: root_cause_analysis
- risk: "Integration update failures"
mitigation: "Test integration updates in isolation before full deployment"
phase: integration_sync
tools_and_resources:
required_tools:
- GitLab CLI (glab)
- Git access
- Pipeline log access
recommended_tools:
- GitLab CI linter
- Log analysis tools
- Collaboration tools for team communication
external_dependencies:
- GitLab repository access
- CI/CD pipeline execution environment
- Integration system access (JIRA, etc.)
escalation_paths:
- trigger: "Critical system-wide failure"
escalation: "Engage architect and senior dev immediately"
timeline: "Within 15 minutes"
- trigger: "Complex architectural issues identified"
escalation: "Include architect in solution design phase"
timeline: "Before implementation begins"
- trigger: "Multiple failed resolution attempts"
escalation: "Senior team review and alternative approach"
timeline: "After 2 failed attempts"
success_patterns:
- pattern: "Configuration syntax errors"
typical_resolution: "CI configuration debugging and validation"
average_duration: "1-2 hours"
- pattern: "Dependency or environment issues"
typical_resolution: "Environment analysis and dependency updates"
average_duration: "2-4 hours"
- pattern: "Test failures or flaky tests"
typical_resolution: "Test analysis and stabilization"
average_duration: "2-6 hours"
integration_hooks:
jira_integration:
- phase: issue_discovery
action: "Create or update JIRA issue with failure details"
- phase: validation_and_monitoring
action: "Update JIRA with resolution and close if appropriate"
parallel_dev_integration:
- phase: issue_discovery
action: "Notify parallel development teams of potential blocking issues"
- phase: validation_and_monitoring
action: "Confirm resolution across all parallel development streams"
core_bmad_integration:
- phase: solution_design
action: "Integrate solution into development workflow planning"
- phase: validation_and_monitoring
action: "Update project documentation and knowledge base"