UNPKG

datapilot-cli

Version:

Enterprise-grade streaming multi-format data analysis with comprehensive statistical insights and intelligent relationship detection - supports CSV, JSON, Excel, TSV, Parquet - memory-efficient, cross-platform

160 lines 5.28 kB
/** * Streaming Data Analysis Engine * Memory-efficient analysis using online algorithms and chunk processing */ import type { CLIOptions } from '../../cli/types'; import type { Section3Result, Section3Config, Section3Progress } from '../eda/types'; interface StreamingAnalyzerConfig extends Section3Config { chunkSize: number; memoryThresholdMB: number; maxRowsAnalyzed: number; adaptiveChunkSizing: boolean; enableMemoryOptimization: boolean; enableAdaptiveStreaming: boolean; enableParallelProcessing: boolean; samplingOptions?: Pick<CLIOptions, 'autoSample' | 'samplePercentage' | 'sampleRows' | 'sampleSizeBytes' | 'sampleMethod' | 'stratifyBy' | 'seed'>; } /** * Main Streaming Analysis Engine * Processes any size dataset with constant memory usage */ export declare class StreamingAnalyzer { private config; private state; private progressCallback?; private columnAnalyzers; private bivariateAnalyzer; private smartSampler?; private headers; private detectedTypes; private semanticTypes; private warnings; private typeDetectionResults; private hasHeaders; private collectedData; private maxCollectedRows; /** * Create a StreamingAnalyzer with smart sampling options from CLI */ static withSamplingOptions(config?: Partial<StreamingAnalyzerConfig>, samplingOptions?: Pick<CLIOptions, 'autoSample' | 'samplePercentage' | 'sampleRows' | 'sampleSizeBytes' | 'sampleMethod' | 'stratifyBy' | 'seed'>): StreamingAnalyzer; constructor(config?: Partial<StreamingAnalyzerConfig>); setProgressCallback(callback: (progress: Section3Progress) => void): void; /** * Initialize smart sampling if conditions are met */ private initializeSmartSampling; /** * Analyze a CSV file using streaming processing */ analyzeFile(filePath: string): Promise<Section3Result>; /** * Internal file analysis with memory optimization wrapper */ private analyzeFileWithMemoryOptimization; private initializeMemoryOptimization; /** * Core file analysis implementation */ private analyzeFileInternal; /** * Traditional streaming analysis (backward compatibility) */ private analyzeFileTraditional; /** * Enhanced streaming analysis with adaptive chunk sizing */ private analyzeFileWithAdaptiveStreaming; /** * First pass: Quick scan for headers, types, and basic metadata */ private firstPass; /** * Collect sample data using smart sampling */ private collectSmartSample; /** * Collect sample data using traditional fixed sampling */ private collectFixedSample; /** * Parse chunk text into rows */ private parseChunkText; /** * Handle memory pressure by adapting chunk size */ private handleMemoryPressure; /** * Format bytes for human readable display */ private formatBytes; /** * Main streaming pass: Process data in chunks */ private streamingPass; /** * Process a single chunk of data */ private processChunk; /** * Perform aggressive memory cleanup */ private performMemoryCleanup; /** * Adaptive memory management with aggressive cleanup */ private manageMemory; /** * Finalize analysis and generate results */ private finalizeResults; private extractHeaders; private detectColumnTypes; private inferSemanticTypes; private initializeColumnAnalyzers; private initializeBivariateAnalysis; private generateStreamingInsights; private reportProgress; /** * Handle analysis errors with graceful degradation */ private handleAnalysisError; /** * Create a degraded result when full analysis fails */ private createDegradedResult; /** * Check if multivariate data should be collected */ private shouldCollectMultivariateData; /** * Collect data for multivariate analysis with memory-efficient approach */ private collectMultivariateData; /** * Handle memory pressure for multivariate data collection */ private handleMultivariateMemoryPressure; /** * Perform multivariate analysis with enhanced type safety */ private performMultivariateAnalysis; /** * Create minimal multivariate analysis when disabled or insufficient data */ private createMinimalMultivariateAnalysis; /** * Validate analyzer state before operations */ private validateAnalyzerState; } /** * Convenience function to analyze a file using streaming approach */ export declare function analyzeFileStreaming(filePath: string, config?: Partial<StreamingAnalyzerConfig>): Promise<Section3Result>; /** * Convenience function to analyze a file using streaming approach with smart sampling */ export declare function analyzeFileStreamingWithSampling(filePath: string, samplingOptions: Pick<CLIOptions, 'autoSample' | 'samplePercentage' | 'sampleRows' | 'sampleSizeBytes' | 'sampleMethod' | 'stratifyBy' | 'seed'>, config?: Partial<StreamingAnalyzerConfig>): Promise<Section3Result>; export {}; //# sourceMappingURL=streaming-analyzer.d.ts.map