agentic-data-stack-community
Version:
AI Agentic Data Stack Framework - Community Edition. Open source data engineering framework with 4 core agents, essential templates, and 3-dimensional quality validation.
389 lines (338 loc) • 13 kB
YAML
# Data Analysis Template
# Standardized template for conducting comprehensive data analysis across the AI Agentic Data Stack Framework
metadata:
template_id: "data-analysis-tmpl"
name: "Data Analysis Template"
version: "1.0.0"
description: "Comprehensive template for structured data analysis with statistical methods and insights generation"
category: "analytics"
tags: ["analysis", "statistics", "insights", "exploration", "reporting"]
created_by: "AI Agentic Data Stack Framework"
created_date: "2025-01-23"
template:
name: "Data Analysis Template"
description: "Template for conducting systematic data analysis"
version: "1.0.0"
sections:
- name: "analysis_overview"
description: "Analysis objectives and scope definition"
required: true
- name: "data_exploration"
description: "Initial data exploration and profiling"
required: true
- name: "analysis_methods"
description: "Statistical methods and analytical techniques"
required: true
- name: "results_findings"
description: "Analysis results and key findings"
required: true
- name: "insights_recommendations"
description: "Business insights and actionable recommendations"
required: true
- name: "quality_validation"
description: "Analysis quality checks and validation"
required: true
- name: "validation_rules"
description: "Template validation requirements"
required: true
# Analysis Overview
analysis_overview:
# Basic Information
analysis_id: "${analysis_id}"
analysis_name: "${analysis_name}"
description: "${analysis_description}"
version: "${analysis_version}"
# Objectives and Scope
objectives:
primary_objective: "${primary_objective}"
secondary_objectives: ["${secondary_objectives}"]
business_questions: ["${business_questions}"]
hypotheses: ["${hypotheses}"]
# Analysis Context
context:
business_context: "${business_context}"
problem_statement: "${problem_statement}"
expected_outcomes: ["${expected_outcomes}"]
success_criteria: ["${success_criteria}"]
# Scope Definition
scope:
time_period: "${analysis_time_period}"
data_scope: "${data_scope}"
geographical_scope: "${geographical_scope}"
exclusions: ["${scope_exclusions}"]
# Stakeholders
stakeholders:
analysis_owner: "${analysis_owner}"
business_sponsor: "${business_sponsor}"
data_consumers: ["${data_consumers}"]
reviewers: ["${analysis_reviewers}"]
# Data Exploration
data_exploration:
# Data Sources
data_sources:
- source_id: "${source_id}"
source_name: "${source_name}"
source_type: "${source_type}" # database, file, api, stream
data_location: "${data_location}"
access_method: "${access_method}"
# Data Characteristics
characteristics:
record_count: ${record_count}
field_count: ${field_count}
data_size: "${data_size}"
update_frequency: "${update_frequency}"
# Data Profiling
data_profiling:
# Numeric Fields
numeric_fields:
- field_name: "${field_name}"
data_type: "${data_type}"
statistics:
mean: ${field_mean}
median: ${field_median}
mode: ${field_mode}
std_deviation: ${field_std_dev}
min_value: ${field_min}
max_value: ${field_max}
quartiles: ["${q1}", "${q2}", "${q3}"]
outliers_count: ${outliers_count}
null_count: ${null_count}
null_percentage: ${null_percentage}
# Categorical Fields
categorical_fields:
- field_name: "${field_name}"
data_type: "${data_type}"
statistics:
unique_values: ${unique_count}
most_frequent: "${most_frequent_value}"
frequency_distribution: ["${frequency_distribution}"]
null_count: ${null_count}
null_percentage: ${null_percentage}
# Date/Time Fields
datetime_fields:
- field_name: "${field_name}"
data_type: "${data_type}"
statistics:
earliest_date: "${earliest_date}"
latest_date: "${latest_date}"
date_range: "${date_range}"
null_count: ${null_count}
# Data Quality Assessment
data_quality:
completeness:
overall_completeness: ${overall_completeness_percentage}
critical_fields_completeness: ${critical_fields_completeness}
accuracy:
data_validation_results: ["${validation_results}"]
business_rule_violations: ${rule_violations_count}
consistency:
cross_field_consistency: ${cross_field_consistency_score}
referential_integrity: ${referential_integrity_score}
timeliness:
data_freshness: "${data_freshness}"
lag_indicators: ["${lag_indicators}"]
# Analysis Methods
analysis_methods:
# Descriptive Analysis
descriptive_analysis:
- method_name: "${descriptive_method}"
method_type: "${method_type}" # summary_statistics, frequency_analysis, cross_tabulation
variables: ["${analysis_variables}"]
purpose: "${analysis_purpose}"
# Method Configuration
configuration:
grouping_variables: ["${grouping_variables}"]
aggregation_functions: ["${aggregation_functions}"]
filters_applied: ["${filters_applied}"]
# Results
results:
summary_statistics: ["${summary_statistics}"]
key_findings: ["${key_findings}"]
visualizations: ["${visualizations}"]
# Diagnostic Analysis
diagnostic_analysis:
- analysis_type: "${diagnostic_type}" # correlation, regression, chi_square, anova
research_question: "${research_question}"
variables: ["${diagnostic_variables}"]
# Statistical Tests
statistical_tests:
test_name: "${statistical_test}"
significance_level: ${significance_level}
test_statistic: ${test_statistic}
p_value: ${p_value}
confidence_interval: ["${ci_lower}", "${ci_upper}"]
# Assumptions
assumptions:
assumption_checks: ["${assumption_checks}"]
violations_detected: ["${assumption_violations}"]
remedial_actions: ["${remedial_actions}"]
# Predictive Analysis
predictive_analysis:
- model_type: "${predictive_model_type}"
target_variable: "${target_variable}"
predictor_variables: ["${predictor_variables}"]
# Model Configuration
model_config:
algorithm: "${algorithm}"
parameters: ["${model_parameters}"]
training_data_split: ${training_split_percentage}
validation_method: "${validation_method}"
# Model Performance
performance_metrics:
accuracy: ${model_accuracy}
precision: ${model_precision}
recall: ${model_recall}
f1_score: ${model_f1_score}
rmse: ${model_rmse}
r_squared: ${model_r_squared}
# Advanced Analytics
advanced_analytics:
- technique: "${advanced_technique}" # clustering, association_rules, time_series, text_analysis
purpose: "${technique_purpose}"
data_requirements: ["${data_requirements}"]
# Technique Parameters
parameters:
algorithm_parameters: ["${algorithm_parameters}"]
optimization_criteria: "${optimization_criteria}"
convergence_criteria: "${convergence_criteria}"
# Results Interpretation
interpretation:
pattern_identification: ["${identified_patterns}"]
cluster_characteristics: ["${cluster_characteristics}"]
association_rules: ["${association_rules}"]
# Results and Findings
results_findings:
# Key Findings
key_findings:
- finding_id: "${finding_id}"
finding_title: "${finding_title}"
finding_description: "${finding_description}"
# Supporting Evidence
evidence:
statistical_evidence: ["${statistical_evidence}"]
visual_evidence: ["${visual_evidence}"]
data_samples: ["${data_samples}"]
# Significance
significance:
business_impact: "${business_impact}"
statistical_significance: ${statistical_significance}
confidence_level: ${confidence_level}
effect_size: "${effect_size}"
# Trends and Patterns
trends_patterns:
- pattern_type: "${pattern_type}" # trend, seasonal, cyclical, irregular
pattern_description: "${pattern_description}"
time_period: "${pattern_time_period}"
strength: "${pattern_strength}" # weak, moderate, strong
# Outliers and Anomalies
outliers_anomalies:
- anomaly_type: "${anomaly_type}"
anomaly_description: "${anomaly_description}"
detection_method: "${detection_method}"
potential_causes: ["${potential_causes}"]
# Correlations and Relationships
correlations:
- variable_pair: ["${variable_1}", "${variable_2}"]
correlation_coefficient: ${correlation_coefficient}
correlation_type: "${correlation_type}" # positive, negative, none
significance: ${correlation_significance}
# Insights and Recommendations
insights_recommendations:
# Business Insights
business_insights:
- insight_id: "${insight_id}"
insight_title: "${insight_title}"
insight_description: "${insight_description}"
# Business Value
business_value:
value_category: "${value_category}" # cost_reduction, revenue_increase, risk_mitigation, efficiency_improvement
quantified_impact: "${quantified_impact}"
confidence_level: "${insight_confidence}"
# Supporting Analysis
supporting_analysis:
analysis_methods: ["${supporting_methods}"]
data_sources: ["${supporting_data}"]
validation_checks: ["${validation_checks}"]
# Actionable Recommendations
recommendations:
- recommendation_id: "${recommendation_id}"
recommendation_title: "${recommendation_title}"
recommendation_description: "${recommendation_description}"
# Implementation Details
implementation:
priority: "${recommendation_priority}" # high, medium, low
effort_level: "${effort_level}" # low, medium, high
timeline: "${implementation_timeline}"
resources_required: ["${required_resources}"]
# Expected Outcomes
expected_outcomes:
success_metrics: ["${success_metrics}"]
target_values: ["${target_values}"]
measurement_approach: "${measurement_approach}"
# Next Steps
next_steps:
immediate_actions: ["${immediate_actions}"]
follow_up_analysis: ["${follow_up_analysis}"]
monitoring_requirements: ["${monitoring_requirements}"]
# Quality Validation
quality_validation:
# Analysis Quality Checks
analysis_quality:
data_quality_validation: ${data_quality_validated}
methodology_appropriateness: ${methodology_appropriate}
statistical_assumptions_met: ${assumptions_validated}
results_reproducible: ${results_reproducible}
# Peer Review
peer_review:
reviewer_name: "${reviewer_name}"
review_date: "${review_date}"
review_status: "${review_status}" # pending, approved, needs_revision
review_comments: ["${review_comments}"]
# Validation Tests
validation_tests:
- test_name: "${validation_test_name}"
test_type: "${test_type}" # data_validation, methodology_check, result_verification
test_result: "${test_result}" # pass, fail, warning
test_details: "${test_details}"
# Documentation and Reporting
documentation:
# Analysis Documentation
analysis_documentation:
methodology_document: "${methodology_document_path}"
code_repository: "${code_repository_url}"
data_dictionary: "${data_dictionary_path}"
analysis_notebook: "${analysis_notebook_path}"
# Reporting
reporting:
executive_summary: "${executive_summary_path}"
detailed_report: "${detailed_report_path}"
visualization_dashboard: "${dashboard_url}"
presentation_slides: "${presentation_path}"
# Validation Rules
validation_rules:
required_fields:
- analysis_id
- analysis_name
- primary_objective
- data_sources
- analysis_methods
- key_findings
quality_standards:
- statistical_significance: "p < 0.05 for hypothesis tests"
- confidence_level: "Minimum 95% for confidence intervals"
- sample_size: "Adequate for statistical power"
- data_quality: "Minimum 90% completeness for critical fields"
documentation_requirements:
- methodology_documented: true
- assumptions_stated: true
- limitations_disclosed: true
- reproducible_code: true
# Template Metadata
template_metadata:
author: "AI Agentic Data Stack Framework"
maintainer: "Data Analyst"
last_updated: "2025-01-23"
changelog:
- version: "1.0.0"
date: "2025-01-23"
changes: "Initial template creation with comprehensive data analysis configuration"