UNPKG

datapilot-cli

Version:

Enterprise-grade streaming multi-format data analysis with comprehensive statistical insights and intelligent relationship detection - supports CSV, JSON, Excel, TSV, Parquet - memory-efficient, cross-platform

456 lines 15 kB
/** * Advanced Dataset Characterization Engine - Type Definitions * Provides sophisticated dataset analysis beyond basic statistics */ export interface DatasetComplexityProfile { intrinsicDimensionality: IntrinsicDimensionalityAnalysis; featureInteractionDensity: FeatureInteractionAnalysis; nonLinearityScore: NonLinearityAnalysis; separabilityIndex: SeparabilityAnalysis; noiseLevel: NoiseAnalysis; sparsityCharacteristics: SparsityProfile; temporalComplexity?: TimeSeriesComplexity; overallComplexityScore: number; confidenceLevel: 'very_high' | 'high' | 'medium' | 'low'; analysisMetadata: AnalysisMetadata; } export interface IntrinsicDimensionalityAnalysis { estimatedDimension: number; actualFeatureCount: number; dimensionalityReduction: number; method: 'pca_eigenvalue' | 'mle' | 'correlation_dimension'; confidence: number; redundantFeatures: string[]; criticalFeatures: string[]; } export interface FeatureInteractionAnalysis { overallInteractionStrength: number; pairwiseInteractions: PairwiseInteraction[]; higherOrderInteractions: HigherOrderInteraction[]; interactionDensity: number; dominantInteractionTypes: InteractionType[]; featureInteractionNetwork: InteractionNetwork; } export interface PairwiseInteraction { feature1: string; feature2: string; interactionStrength: number; interactionType: InteractionType; mutualInformation: number; statisticalSignificance: number; businessRelevance?: number; } export type InteractionType = 'linear_correlation' | 'non_linear_dependency' | 'conditional_dependency' | 'mutual_exclusion' | 'synergistic' | 'redundant' | 'complementary'; export interface HigherOrderInteraction { features: string[]; interactionOrder: number; interactionStrength: number; conditionalDependencies: ConditionalDependency[]; } export interface ConditionalDependency { dependentFeature: string; conditioningFeatures: string[]; dependencyStrength: number; conditioningContext: string; } export interface InteractionNetwork { nodes: NetworkNode[]; edges: NetworkEdge[]; centralityScores: CentralityScore[]; communityStructure: FeatureCommunity[]; } export interface NetworkNode { featureName: string; importance: number; connectivity: number; role: 'hub' | 'bridge' | 'peripheral' | 'specialist'; } export interface NetworkEdge { source: string; target: string; weight: number; interactionType: InteractionType; } export interface CentralityScore { feature: string; betweennessCentrality: number; closenessCentrality: number; degreeCentrality: number; eigenvectorCentrality: number; } export interface FeatureCommunity { communityId: string; features: string[]; cohesionScore: number; functionalDescription?: string; } export interface NonLinearityAnalysis { overallNonLinearityScore: number; featureNonLinearity: FeatureNonLinearity[]; targetNonLinearity?: TargetNonLinearity; nonLinearPatterns: NonLinearPattern[]; complexityIndicators: ComplexityIndicator[]; } export interface FeatureNonLinearity { featureName: string; linearityScore: number; nonLinearPatterns: string[]; transformationSuggestions: TransformationSuggestion[]; } export interface TargetNonLinearity { targetVariable: string; linearityWithFeatures: number; nonLinearRelationships: NonLinearRelationship[]; modelingImplications: string[]; } export interface NonLinearRelationship { feature: string; relationshipType: 'polynomial' | 'exponential' | 'logarithmic' | 'sinusoidal' | 'step' | 'complex'; strength: number; description: string; } export interface NonLinearPattern { patternType: string; affectedFeatures: string[]; patternStrength: number; modelingImpact: 'low' | 'medium' | 'high'; recommendedApproach: string; } export interface ComplexityIndicator { indicator: string; value: number; interpretation: string; modelingImplication: string; } export interface TransformationSuggestion { transformation: 'log' | 'sqrt' | 'square' | 'reciprocal' | 'box_cox' | 'yeo_johnson' | 'polynomial'; expectedImprovement: number; preservesInterpretability: boolean; computationalCost: 'low' | 'medium' | 'high'; } export interface SeparabilityAnalysis { overallSeparability: number; classSeparability: ClassSeparability[]; separabilityMethods: SeparabilityMethod[]; visualSeparability: VisualSeparability[]; geometricProperties: GeometricProperties; } export interface ClassSeparability { className: string; separabilityFromOthers: number; confusionLikelihood: ConfusionLikelihood[]; distinctiveFeatures: string[]; problematicRegions: ProblematicRegion[]; } export interface ConfusionLikelihood { confusedWith: string; confusionProbability: number; confusionCauses: string[]; mitigation: string[]; } export interface ProblematicRegion { description: string; features: string[]; severity: 'low' | 'medium' | 'high'; recommendedAction: string; } export interface SeparabilityMethod { method: 'distance_based' | 'density_based' | 'linear_discriminant' | 'manifold_based'; separabilityScore: number; confidence: number; methodSpecificMetrics: Record<string, number>; } export interface VisualSeparability { dimensions: string[]; separabilityScore: number; visualizationRecommendation: string; plotType: 'scatter' | 'parallel_coordinates' | 'radar' | 'projection'; } export interface GeometricProperties { dataManifoldDimension: number; manifoldComplexity: number; clusteringTendency: number; boundaryComplexity: number; volumeRatio: number; } export interface NoiseAnalysis { overallNoiseLevel: number; signalToNoiseRatio: number; noiseCharacteristics: NoiseCharacteristic[]; noiseDistribution: NoiseDistribution; outlierAnalysis: OutlierAnalysis; dataQualityImpact: DataQualityImpact; } export interface NoiseCharacteristic { feature: string; noiseLevel: number; noiseType: 'gaussian' | 'uniform' | 'systematic' | 'sporadic' | 'mixed'; noiseSources: string[]; filteringRecommendations: FilteringRecommendation[]; } export interface FilteringRecommendation { method: string; expectedImprovement: number; preservesSignal: boolean; implementationComplexity: 'low' | 'medium' | 'high'; } export interface NoiseDistribution { globalNoise: number; localNoise: LocalNoiseRegion[]; systematicNoise: SystematicNoise[]; temporalNoisePattern?: TemporalNoisePattern; } export interface LocalNoiseRegion { region: string; noiseLevel: number; affectedSamples: number; characteristics: string[]; } export interface SystematicNoise { pattern: string; affectedFeatures: string[]; noiseStrength: number; correctionStrategy: string; } export interface TemporalNoisePattern { pattern: 'increasing' | 'decreasing' | 'cyclical' | 'sporadic'; temporalFeature: string; noiseEvolution: number[]; seasonality?: number; } export interface OutlierAnalysis { outlierPercentage: number; outlierTypes: OutlierType[]; outlierImpact: OutlierImpact; treatmentRecommendations: OutlierTreatment[]; } export interface OutlierType { type: 'global' | 'local' | 'contextual' | 'collective'; count: number; severity: 'mild' | 'moderate' | 'severe'; features: string[]; characteristics: string[]; } export interface OutlierImpact { modelingSensitivity: 'low' | 'medium' | 'high'; statisticalImpact: number; businessRelevance: 'noise' | 'important' | 'critical'; interpretationImpact: string; } export interface OutlierTreatment { treatment: 'remove' | 'cap' | 'transform' | 'separate_model' | 'robust_method'; applicability: number; tradeoffs: string[]; implementation: string; } export interface DataQualityImpact { reliabilityScore: number; uncertaintyMeasures: UncertaintyMeasure[]; modelingRecommendations: string[]; dataCollectionSuggestions: string[]; } export interface UncertaintyMeasure { source: string; uncertaintyLevel: number; propagationImpact: number; mitigation: string[]; } export interface SparsityProfile { overallSparsity: number; featureSparsity: FeatureSparsity[]; sparsityPatterns: SparsityPattern[]; sparsityImpact: SparsityImpact; handlingRecommendations: SparsityHandling[]; } export interface FeatureSparsity { feature: string; sparsityLevel: number; missingPatterns: MissingPattern[]; informationContent: number; criticalityAssessment: 'essential' | 'important' | 'optional' | 'redundant'; } export interface MissingPattern { pattern: 'random' | 'systematic' | 'informative' | 'clustered'; description: string; implicationForModeling: string; treatmentPriority: 'high' | 'medium' | 'low'; } export interface SparsityPattern { patternType: string; affectedFeatures: string[]; patternStrength: number; businessExplanation?: string; modelingStrategy: string; } export interface SparsityImpact { algorithmSensitivity: AlgorithmSensitivity[]; performanceImpact: number; interpretabilityImpact: string; computationalImpact: string; } export interface AlgorithmSensitivity { algorithmCategory: string; sensitivityLevel: 'low' | 'medium' | 'high'; specificConcerns: string[]; adaptations: string[]; } export interface SparsityHandling { method: 'imputation' | 'sparse_algorithms' | 'feature_selection' | 'regularization'; applicability: number; expectedImpact: number; implementationComplexity: 'low' | 'medium' | 'high'; preservesInterpretability: boolean; } export interface TimeSeriesComplexity { temporalFeature: string; trendComplexity: TrendComplexity; seasonalityComplexity: SeasonalityComplexity; cyclicalComplexity: CyclicalComplexity; irregularityAnalysis: IrregularityAnalysis; forecastabilityAssessment: ForecastabilityAssessment; } export interface TrendComplexity { trendPresence: boolean; trendType: 'linear' | 'polynomial' | 'exponential' | 'complex'; trendStrength: number; changePoints: ChangePoint[]; trendStability: number; } export interface ChangePoint { timestamp: number; changeType: 'level' | 'trend' | 'variance'; magnitude: number; confidence: number; businessContext?: string; } export interface SeasonalityComplexity { seasonalPresence: boolean; seasonalPeriods: SeasonalPeriod[]; seasonalStrength: number; seasonalStability: number; harmonic: number; } export interface SeasonalPeriod { period: number; strength: number; description: string; businessRelevance: string; } export interface CyclicalComplexity { cyclicalPresence: boolean; cyclicalPeriods: number[]; cyclicalStrength: number; cyclicalRegularity: number; } export interface IrregularityAnalysis { irregularityLevel: number; irregularityType: 'random' | 'systematic' | 'episodic'; volatilityClustering: boolean; extremeEvents: ExtremeEvent[]; } export interface ExtremeEvent { timestamp: number; magnitude: number; type: 'outlier' | 'structural_break' | 'regime_change'; description: string; modelingImplication: string; } export interface ForecastabilityAssessment { shortTermForecastability: number; longTermForecastability: number; optimalForecastHorizon: number; forecastingChallenges: string[]; recommendedApproaches: string[]; } export interface AnalysisMetadata { analysisTimestamp: Date; analysisVersion: string; computationTime: number; sampleSize: number; samplingStrategy?: string; confidenceBounds: ConfidenceBounds; limitationsAndCaveats: string[]; reproducibilityInfo: ReproducibilityInfo; } export interface ConfidenceBounds { overallConfidence: number; componentConfidences: ComponentConfidence[]; uncertaintySources: string[]; confidenceInterpretation: string; } export interface ComponentConfidence { component: string; confidence: number; limitingFactors: string[]; } export interface ReproducibilityInfo { randomSeed?: number; softwareVersions: SoftwareVersion[]; configurationParameters: Record<string, any>; deterministicAnalysis: boolean; } export interface SoftwareVersion { package: string; version: string; purpose: string; } export interface DatasetCharacterizationConfig { analysisDepth: 'basic' | 'standard' | 'comprehensive' | 'exhaustive'; focusAreas: CharacterizationFocus[]; computationalBudget: ComputationalBudget; confidenceRequirements: ConfidenceRequirements; businessContext?: BusinessContext; temporalAnalysis: boolean; interactionAnalysisDepth: number; samplingStrategy: SamplingStrategy; } export type CharacterizationFocus = 'complexity' | 'interactions' | 'non_linearity' | 'separability' | 'noise' | 'sparsity' | 'temporal' | 'all'; export interface ComputationalBudget { maxComputationTime: number; maxMemoryUsage: number; parallelizationLevel: number; approximationTolerance?: number; } export interface ConfidenceRequirements { minimumConfidence: number; criticalComponents: string[]; uncertaintyTolerance: number; } export interface BusinessContext { domain: string; businessObjective: string; stakeholderRequirements: string[]; interpretabilityNeeds: 'low' | 'medium' | 'high'; regulatoryConstraints: string[]; } export interface SamplingStrategy { strategy: 'full' | 'stratified' | 'random' | 'systematic' | 'adaptive'; sampleSize?: number; stratificationColumns?: string[]; preserveDistributions: boolean; } export interface CharacterizationProgress { phase: 'initialization' | 'complexity_analysis' | 'interaction_analysis' | 'noise_analysis' | 'finalization'; progress: number; currentOperation: string; estimatedTimeRemaining: number; completedComponents: string[]; errorCount: number; warningCount: number; } export interface CharacterizationWarning { category: 'data_quality' | 'computational' | 'statistical' | 'configuration'; severity: 'low' | 'medium' | 'high'; message: string; component: string; impact: string; recommendation: string; } export declare class CharacterizationError extends Error { readonly category: 'data_error' | 'computational_error' | 'configuration_error' | 'resource_error'; readonly component: string; readonly recoverable: boolean; readonly fallbackStrategy?: string; constructor(message: string, category: CharacterizationError['category'], component: string, recoverable: boolean, fallbackStrategy?: string); } //# sourceMappingURL=types.d.ts.map