datapilot-cli
Version:
Enterprise-grade streaming multi-format data analysis with comprehensive statistical insights and intelligent relationship detection - supports CSV, JSON, Excel, TSV, Parquet - memory-efficient, cross-platform
315 lines • 12.5 kB
JavaScript
"use strict";
/**
* Example Usage of DataPilot Configuration System
* Demonstrates how to use the new configurable thresholds and performance monitoring
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.configExamples = void 0;
exports.exampleBasicConfig = exampleBasicConfig;
exports.exampleEnvironmentConfig = exampleEnvironmentConfig;
exports.examplePresetConfigs = examplePresetConfigs;
exports.exampleAdaptiveConfig = exampleAdaptiveConfig;
exports.examplePerformanceMonitoring = examplePerformanceMonitoring;
exports.exampleConfiguredCSVParser = exampleConfiguredCSVParser;
exports.exampleConfiguredStreamingAnalyzer = exampleConfiguredStreamingAnalyzer;
exports.exampleCustomConfiguration = exampleCustomConfiguration;
exports.exampleConfigValidation = exampleConfigValidation;
exports.exampleConfigurationMonitoring = exampleConfigurationMonitoring;
const config_1 = require("./config");
const performance_monitor_1 = require("./performance-monitor");
const csv_parser_1 = require("../parsers/csv-parser");
const streaming_analyzer_1 = require("../analyzers/streaming/streaming-analyzer");
/**
* Example 1: Basic configuration usage
*/
function exampleBasicConfig() {
// Get default configuration
const configManager = (0, config_1.getConfig)();
const config = configManager.getConfig();
console.log('Default max rows:', config.performance.maxRows);
// Update specific configuration sections
configManager.updatePerformanceConfig({
maxRows: 2000000,
chunkSize: 128 * 1024,
});
configManager.updateStatisticalConfig({
significanceLevel: 0.01, // More stringent
correlationThresholds: {
weak: 0.2,
moderate: 0.5,
strong: 0.7,
veryStrong: 0.9,
},
});
// Validate configuration
const validation = configManager.validateConfig();
if (!validation.isValid) {
console.error('Configuration errors:', validation.errors);
}
}
/**
* Example 2: Environment-based configuration
*/
function exampleEnvironmentConfig() {
// Set environment variables
process.env.DATAPILOT_MAX_ROWS = '500000';
process.env.DATAPILOT_MEMORY_THRESHOLD_MB = '150';
process.env.DATAPILOT_SIGNIFICANCE_LEVEL = '0.01';
// Load from environment
const envConfig = (0, config_1.loadConfigFromEnvironment)();
const configManager = (0, config_1.getConfig)();
configManager.updateConfig(envConfig);
console.log('Environment-configured max rows:', configManager.getPerformanceConfig().maxRows);
}
/**
* Example 3: Preset configurations for different dataset sizes
*/
function examplePresetConfigs() {
const configManager = (0, config_1.getConfig)();
// Small dataset configuration
const smallConfig = (0, config_1.getPresetConfig)('small');
configManager.updateConfig(smallConfig);
console.log('Small dataset config - max rows:', configManager.getPerformanceConfig().maxRows);
// Large dataset configuration
const largeConfig = (0, config_1.getPresetConfig)('large');
configManager.updateConfig(largeConfig);
console.log('Large dataset config - max rows:', configManager.getPerformanceConfig().maxRows);
}
/**
* Example 4: Adaptive configuration based on system resources
*/
function exampleAdaptiveConfig() {
const configManager = (0, config_1.getConfig)();
// Detect system resources
const resources = performance_monitor_1.ResourceDetector.detectSystemResources();
console.log('Available memory:', resources.availableMemoryMB, 'MB');
console.log('Recommended config:', resources.recommendedConfig);
// Apply recommended configuration
configManager.updateConfig(resources.recommendedConfig);
// Get adaptive thresholds for dataset size
const datasetSize = 100000; // Example dataset size
const memoryAvailable = resources.availableMemoryMB * 1024 * 1024; // Convert to bytes
const adaptiveConfig = configManager.getAdaptiveThresholds(datasetSize, memoryAvailable);
configManager.updateConfig(adaptiveConfig);
}
/**
* Example 5: Performance monitoring with automatic adaptation
*/
async function examplePerformanceMonitoring() {
const perfMonitor = (0, performance_monitor_1.getPerformanceMonitor)();
// Start monitoring with 2-second intervals
perfMonitor.startMonitoring(2000);
// Enable automatic threshold adaptation
perfMonitor.setAutoAdaptation(true);
// Simulate some work
for (let i = 0; i < 1000; i++) {
perfMonitor.recordOperation('operation');
if (i % 100 === 0) {
perfMonitor.recordOperation('row', 100);
}
// Simulate some processing time
await new Promise((resolve) => setTimeout(resolve, 10));
}
// Get performance summary
const summary = perfMonitor.getPerformanceSummary();
console.log('Performance summary:', {
rowsProcessed: summary.operationalMetrics.rowsProcessed,
errorRate: summary.operationalMetrics.errorRate,
adaptiveThresholds: summary.adaptiveThresholds.length,
alerts: summary.recentAlerts.length,
});
// Stop monitoring
perfMonitor.stopMonitoring();
}
/**
* Example 6: Using configuration with CSV parser
*/
function exampleConfiguredCSVParser() {
const configManager = (0, config_1.getConfig)();
const perfConfig = configManager.getPerformanceConfig();
// Create parser with configuration-based options
const parser = new csv_parser_1.CSVParser({
maxRows: perfConfig.maxRows,
chunkSize: perfConfig.chunkSize,
maxFieldSize: perfConfig.maxFieldSize,
});
console.log('Parser configured with max rows:', perfConfig.maxRows);
return parser;
}
/**
* Example 7: Using configuration with streaming analyzer
*/
function exampleConfiguredStreamingAnalyzer() {
const configManager = (0, config_1.getConfig)();
// Configuration is automatically applied in StreamingAnalyzer constructor
const analyzer = new streaming_analyzer_1.StreamingAnalyzer({
// Override specific settings if needed
enableMultivariate: true,
// Other settings come from global configuration
});
console.log('Streaming analyzer configured from global config');
return analyzer;
}
/**
* Example 8: Custom configuration for specific use cases
*/
function exampleCustomConfiguration() {
const configManager = (0, config_1.getConfig)();
// Configuration for high-precision statistical analysis
const highPrecisionConfig = {
statistical: {
significanceLevel: 0.001, // Very stringent
alternativeSignificanceLevels: {
normalityTests: 0.001,
correlationTests: 0.001,
hypothesisTests: 0.001,
outlierDetection: 0.001,
},
confidenceLevel: 0.999,
correlationThresholds: {
weak: 0.3,
moderate: 0.5,
strong: 0.7,
veryStrong: 0.9,
},
outlierThresholds: {
zScoreThreshold: 3.0,
modifiedZScoreThreshold: 3.5,
iqrMultiplier: 1.5,
},
normalityThresholds: {
shapiroWilkMinSample: 3,
shapiroWilkMaxSample: 5000,
jarqueBeraThreshold: 0.001,
ksTestThreshold: 0.001,
},
},
analysis: {
maxCategoricalLevels: 50,
maxCorrelationPairs: 200, // More comprehensive analysis
samplingThreshold: 10000,
outlierMethods: ['iqr', 'zscore', 'modified_zscore'],
normalityTests: ['shapiro', 'jarque_bera', 'ks_test'],
enableMultivariate: true,
enabledAnalyses: ['univariate', 'bivariate', 'correlations'],
highCardinalityThreshold: 80,
missingValueQualityThreshold: 20,
multivariateThreshold: 1000,
maxDimensionsForPCA: 10,
clusteringMethods: ['kmeans', 'hierarchical'],
},
};
configManager.updateConfig(highPrecisionConfig);
console.log('Applied high-precision configuration');
// Configuration for memory-constrained environments
const memoryConstrainedConfig = {
performance: {
maxRows: 50000,
maxFieldSize: 1024 * 1024,
memoryThresholdBytes: 128 * 1024 * 1024,
chunkSize: 8 * 1024,
sampleSize: 1024 * 1024,
adaptiveChunkSizing: true,
maxCollectedRowsMultivariate: 100,
batchSize: 100,
performanceMonitoringInterval: 10,
memoryCleanupInterval: 20,
emergencyMemoryThresholdMultiplier: 1.5,
},
streaming: {
memoryThresholdMB: 50,
maxRowsAnalyzed: 50000,
adaptiveChunkSizing: {
enabled: true,
minChunkSize: 50,
maxChunkSize: 2000,
reductionFactor: 0.6,
expansionFactor: 1.1,
targetMemoryUtilization: 0.8,
},
memoryManagement: {
cleanupInterval: 20,
emergencyThresholdMultiplier: 1.5,
forceGarbageCollection: true,
gcFrequency: 1000,
memoryLeakDetection: false,
autoGarbageCollect: false,
},
},
analysis: {
maxCategoricalLevels: 20,
maxCorrelationPairs: 20,
samplingThreshold: 2000,
outlierMethods: ['iqr'],
normalityTests: ['shapiro'],
enableMultivariate: false, // Disable memory-intensive analysis
enabledAnalyses: ['univariate'],
highCardinalityThreshold: 80,
missingValueQualityThreshold: 20,
multivariateThreshold: 500,
maxDimensionsForPCA: 3,
clusteringMethods: ['kmeans'],
},
};
configManager.updateConfig(memoryConstrainedConfig);
console.log('Applied memory-constrained configuration');
}
/**
* Example 9: Configuration validation and error handling
*/
function exampleConfigValidation() {
const configManager = (0, config_1.getConfig)();
// Try to set invalid configuration
try {
configManager.updateStatisticalConfig({
significanceLevel: 1.5, // Invalid - must be 0-1
});
const validation = configManager.validateConfig();
if (!validation.isValid) {
console.error('Configuration validation failed:', validation.errors);
// Reset to default configuration
configManager.reset();
console.log('Configuration reset to defaults');
}
}
catch (error) {
console.error('Configuration error:', error);
}
}
/**
* Example 10: Runtime configuration monitoring
*/
function exampleConfigurationMonitoring() {
const configManager = (0, config_1.getConfig)();
const perfMonitor = (0, performance_monitor_1.getPerformanceMonitor)();
// Start with default configuration
console.log('Initial chunk size:', configManager.getPerformanceConfig().chunkSize);
// Start performance monitoring
perfMonitor.startMonitoring(1000);
// Simulate high memory usage that triggers adaptation
setTimeout(() => {
// Manually trigger memory pressure simulation
perfMonitor.setThreshold('chunkSize', 8192, 'Simulated memory pressure');
console.log('Adapted chunk size:', perfMonitor.getAdaptiveThreshold('chunkSize'));
}, 3000);
// Stop monitoring after 10 seconds
setTimeout(() => {
perfMonitor.stopMonitoring();
const summary = perfMonitor.getPerformanceSummary();
console.log('Adaptation history:', summary.adaptationHistory);
}, 10000);
}
// Export all examples for easy testing
exports.configExamples = {
basicConfig: exampleBasicConfig,
environmentConfig: exampleEnvironmentConfig,
presetConfigs: examplePresetConfigs,
adaptiveConfig: exampleAdaptiveConfig,
performanceMonitoring: examplePerformanceMonitoring,
configuredCSVParser: exampleConfiguredCSVParser,
configuredStreamingAnalyzer: exampleConfiguredStreamingAnalyzer,
customConfiguration: exampleCustomConfiguration,
configValidation: exampleConfigValidation,
configurationMonitoring: exampleConfigurationMonitoring,
};
//# sourceMappingURL=example-config-usage.js.map