UNPKG

agentic-data-stack-community

Version:

AI Agentic Data Stack Framework - Community Edition. Open source data engineering framework with 4 core agents, essential templates, and 3-dimensional quality validation.

389 lines (338 loc) 13 kB
# Data Analysis Template # Standardized template for conducting comprehensive data analysis across the AI Agentic Data Stack Framework metadata: template_id: "data-analysis-tmpl" name: "Data Analysis Template" version: "1.0.0" description: "Comprehensive template for structured data analysis with statistical methods and insights generation" category: "analytics" tags: ["analysis", "statistics", "insights", "exploration", "reporting"] created_by: "AI Agentic Data Stack Framework" created_date: "2025-01-23" template: name: "Data Analysis Template" description: "Template for conducting systematic data analysis" version: "1.0.0" sections: - name: "analysis_overview" description: "Analysis objectives and scope definition" required: true - name: "data_exploration" description: "Initial data exploration and profiling" required: true - name: "analysis_methods" description: "Statistical methods and analytical techniques" required: true - name: "results_findings" description: "Analysis results and key findings" required: true - name: "insights_recommendations" description: "Business insights and actionable recommendations" required: true - name: "quality_validation" description: "Analysis quality checks and validation" required: true - name: "validation_rules" description: "Template validation requirements" required: true # Analysis Overview analysis_overview: # Basic Information analysis_id: "${analysis_id}" analysis_name: "${analysis_name}" description: "${analysis_description}" version: "${analysis_version}" # Objectives and Scope objectives: primary_objective: "${primary_objective}" secondary_objectives: ["${secondary_objectives}"] business_questions: ["${business_questions}"] hypotheses: ["${hypotheses}"] # Analysis Context context: business_context: "${business_context}" problem_statement: "${problem_statement}" expected_outcomes: ["${expected_outcomes}"] success_criteria: ["${success_criteria}"] # Scope Definition scope: time_period: "${analysis_time_period}" data_scope: "${data_scope}" geographical_scope: "${geographical_scope}" exclusions: ["${scope_exclusions}"] # Stakeholders stakeholders: analysis_owner: "${analysis_owner}" business_sponsor: "${business_sponsor}" data_consumers: ["${data_consumers}"] reviewers: ["${analysis_reviewers}"] # Data Exploration data_exploration: # Data Sources data_sources: - source_id: "${source_id}" source_name: "${source_name}" source_type: "${source_type}" # database, file, api, stream data_location: "${data_location}" access_method: "${access_method}" # Data Characteristics characteristics: record_count: ${record_count} field_count: ${field_count} data_size: "${data_size}" update_frequency: "${update_frequency}" # Data Profiling data_profiling: # Numeric Fields numeric_fields: - field_name: "${field_name}" data_type: "${data_type}" statistics: mean: ${field_mean} median: ${field_median} mode: ${field_mode} std_deviation: ${field_std_dev} min_value: ${field_min} max_value: ${field_max} quartiles: ["${q1}", "${q2}", "${q3}"] outliers_count: ${outliers_count} null_count: ${null_count} null_percentage: ${null_percentage} # Categorical Fields categorical_fields: - field_name: "${field_name}" data_type: "${data_type}" statistics: unique_values: ${unique_count} most_frequent: "${most_frequent_value}" frequency_distribution: ["${frequency_distribution}"] null_count: ${null_count} null_percentage: ${null_percentage} # Date/Time Fields datetime_fields: - field_name: "${field_name}" data_type: "${data_type}" statistics: earliest_date: "${earliest_date}" latest_date: "${latest_date}" date_range: "${date_range}" null_count: ${null_count} # Data Quality Assessment data_quality: completeness: overall_completeness: ${overall_completeness_percentage} critical_fields_completeness: ${critical_fields_completeness} accuracy: data_validation_results: ["${validation_results}"] business_rule_violations: ${rule_violations_count} consistency: cross_field_consistency: ${cross_field_consistency_score} referential_integrity: ${referential_integrity_score} timeliness: data_freshness: "${data_freshness}" lag_indicators: ["${lag_indicators}"] # Analysis Methods analysis_methods: # Descriptive Analysis descriptive_analysis: - method_name: "${descriptive_method}" method_type: "${method_type}" # summary_statistics, frequency_analysis, cross_tabulation variables: ["${analysis_variables}"] purpose: "${analysis_purpose}" # Method Configuration configuration: grouping_variables: ["${grouping_variables}"] aggregation_functions: ["${aggregation_functions}"] filters_applied: ["${filters_applied}"] # Results results: summary_statistics: ["${summary_statistics}"] key_findings: ["${key_findings}"] visualizations: ["${visualizations}"] # Diagnostic Analysis diagnostic_analysis: - analysis_type: "${diagnostic_type}" # correlation, regression, chi_square, anova research_question: "${research_question}" variables: ["${diagnostic_variables}"] # Statistical Tests statistical_tests: test_name: "${statistical_test}" significance_level: ${significance_level} test_statistic: ${test_statistic} p_value: ${p_value} confidence_interval: ["${ci_lower}", "${ci_upper}"] # Assumptions assumptions: assumption_checks: ["${assumption_checks}"] violations_detected: ["${assumption_violations}"] remedial_actions: ["${remedial_actions}"] # Predictive Analysis predictive_analysis: - model_type: "${predictive_model_type}" target_variable: "${target_variable}" predictor_variables: ["${predictor_variables}"] # Model Configuration model_config: algorithm: "${algorithm}" parameters: ["${model_parameters}"] training_data_split: ${training_split_percentage} validation_method: "${validation_method}" # Model Performance performance_metrics: accuracy: ${model_accuracy} precision: ${model_precision} recall: ${model_recall} f1_score: ${model_f1_score} rmse: ${model_rmse} r_squared: ${model_r_squared} # Advanced Analytics advanced_analytics: - technique: "${advanced_technique}" # clustering, association_rules, time_series, text_analysis purpose: "${technique_purpose}" data_requirements: ["${data_requirements}"] # Technique Parameters parameters: algorithm_parameters: ["${algorithm_parameters}"] optimization_criteria: "${optimization_criteria}" convergence_criteria: "${convergence_criteria}" # Results Interpretation interpretation: pattern_identification: ["${identified_patterns}"] cluster_characteristics: ["${cluster_characteristics}"] association_rules: ["${association_rules}"] # Results and Findings results_findings: # Key Findings key_findings: - finding_id: "${finding_id}" finding_title: "${finding_title}" finding_description: "${finding_description}" # Supporting Evidence evidence: statistical_evidence: ["${statistical_evidence}"] visual_evidence: ["${visual_evidence}"] data_samples: ["${data_samples}"] # Significance significance: business_impact: "${business_impact}" statistical_significance: ${statistical_significance} confidence_level: ${confidence_level} effect_size: "${effect_size}" # Trends and Patterns trends_patterns: - pattern_type: "${pattern_type}" # trend, seasonal, cyclical, irregular pattern_description: "${pattern_description}" time_period: "${pattern_time_period}" strength: "${pattern_strength}" # weak, moderate, strong # Outliers and Anomalies outliers_anomalies: - anomaly_type: "${anomaly_type}" anomaly_description: "${anomaly_description}" detection_method: "${detection_method}" potential_causes: ["${potential_causes}"] # Correlations and Relationships correlations: - variable_pair: ["${variable_1}", "${variable_2}"] correlation_coefficient: ${correlation_coefficient} correlation_type: "${correlation_type}" # positive, negative, none significance: ${correlation_significance} # Insights and Recommendations insights_recommendations: # Business Insights business_insights: - insight_id: "${insight_id}" insight_title: "${insight_title}" insight_description: "${insight_description}" # Business Value business_value: value_category: "${value_category}" # cost_reduction, revenue_increase, risk_mitigation, efficiency_improvement quantified_impact: "${quantified_impact}" confidence_level: "${insight_confidence}" # Supporting Analysis supporting_analysis: analysis_methods: ["${supporting_methods}"] data_sources: ["${supporting_data}"] validation_checks: ["${validation_checks}"] # Actionable Recommendations recommendations: - recommendation_id: "${recommendation_id}" recommendation_title: "${recommendation_title}" recommendation_description: "${recommendation_description}" # Implementation Details implementation: priority: "${recommendation_priority}" # high, medium, low effort_level: "${effort_level}" # low, medium, high timeline: "${implementation_timeline}" resources_required: ["${required_resources}"] # Expected Outcomes expected_outcomes: success_metrics: ["${success_metrics}"] target_values: ["${target_values}"] measurement_approach: "${measurement_approach}" # Next Steps next_steps: immediate_actions: ["${immediate_actions}"] follow_up_analysis: ["${follow_up_analysis}"] monitoring_requirements: ["${monitoring_requirements}"] # Quality Validation quality_validation: # Analysis Quality Checks analysis_quality: data_quality_validation: ${data_quality_validated} methodology_appropriateness: ${methodology_appropriate} statistical_assumptions_met: ${assumptions_validated} results_reproducible: ${results_reproducible} # Peer Review peer_review: reviewer_name: "${reviewer_name}" review_date: "${review_date}" review_status: "${review_status}" # pending, approved, needs_revision review_comments: ["${review_comments}"] # Validation Tests validation_tests: - test_name: "${validation_test_name}" test_type: "${test_type}" # data_validation, methodology_check, result_verification test_result: "${test_result}" # pass, fail, warning test_details: "${test_details}" # Documentation and Reporting documentation: # Analysis Documentation analysis_documentation: methodology_document: "${methodology_document_path}" code_repository: "${code_repository_url}" data_dictionary: "${data_dictionary_path}" analysis_notebook: "${analysis_notebook_path}" # Reporting reporting: executive_summary: "${executive_summary_path}" detailed_report: "${detailed_report_path}" visualization_dashboard: "${dashboard_url}" presentation_slides: "${presentation_path}" # Validation Rules validation_rules: required_fields: - analysis_id - analysis_name - primary_objective - data_sources - analysis_methods - key_findings quality_standards: - statistical_significance: "p < 0.05 for hypothesis tests" - confidence_level: "Minimum 95% for confidence intervals" - sample_size: "Adequate for statistical power" - data_quality: "Minimum 90% completeness for critical fields" documentation_requirements: - methodology_documented: true - assumptions_stated: true - limitations_disclosed: true - reproducible_code: true # Template Metadata template_metadata: author: "AI Agentic Data Stack Framework" maintainer: "Data Analyst" last_updated: "2025-01-23" changelog: - version: "1.0.0" date: "2025-01-23" changes: "Initial template creation with comprehensive data analysis configuration"