UNPKG

cleanifix

Version:

Intelligent data cleaning CLI with natural language support - Docker-powered Python engine

215 lines 8.55 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.quickAnalyzeCommand = exports.analyzeCommand = void 0; const commander_1 = require("commander"); const python_bridge_1 = require("../services/python-bridge"); const formatters_1 = require("../utils/formatters"); const spinner_1 = require("../utils/spinner"); const logger_1 = require("../utils/logger"); const fs = __importStar(require("fs/promises")); const path = __importStar(require("path")); exports.analyzeCommand = new commander_1.Command('analyze') .description('Analyze data quality issues in a file') .argument('<file>', 'Path to the file to analyze') .option('-m, --missing', 'Analyze missing values') .option('-d, --duplicates', 'Analyze duplicate rows and values') .option('-f, --format', 'Analyze format inconsistencies') .option('-q, --quality', 'Comprehensive quality analysis') .option('-a, --all', 'Run all available analyses') .option('-c, --columns <columns>', 'Specific columns to analyze (comma-separated)') .option('-o, --output <file>', 'Save analysis results to file') .option('--json', 'Output results as JSON') .option('-v, --verbose', 'Show detailed output') .action(async (file, options) => { try { // Validate input file exists try { await fs.access(file); } catch { logger_1.logger.error(`File not found: ${file}`); throw new Error(`File not found: ${file}`); } // Determine which analyses to run const analysesToRun = determineAnalyses(options); if (analysesToRun.length === 0) { logger_1.logger.warn('No analysis type specified. Use --all or specific flags.'); logger_1.logger.info('Available analyses: --missing, --duplicates, --format, --quality'); process.exit(1); } // Parse columns if specified const columns = options.columns ? options.columns.split(',').map(c => c.trim()) : null; // Start the analysis (suppress spinner in JSON mode) const spin = (0, spinner_1.spinner)(); if (!options.json) { spin.start(`Analyzing ${path.basename(file)}...`); } try { // Initialize Python bridge const pythonBridge = new python_bridge_1.PythonBridge({ debug: false, quiet: options.json }); await pythonBridge.initialize(); // Prepare the analysis configuration const analysisConfig = { operations: analysesToRun.map(type => ({ type: 'analyze', subtype: type, options: { columns: columns, deep_scan: options.verbose } })), config: { input_file: file, verbose: options.verbose } }; try { // Execute analysis const response = await pythonBridge.analyze(file, { verbose: options.verbose, columns: columns, operations: analysesToRun }); const results = response.result; if (!options.json) { spin.stop(); } // Format and display results if (options.json) { console.log(JSON.stringify(results, null, 2)); } else { const formatted = formatAnalysisResults(results, options); console.log(formatted); } // Save to file if requested if (options.output) { await saveResults(results, options.output, options.json || false); if (!options.json) { logger_1.logger.success(`Analysis results saved to ${options.output}`); } } // Show summary statistics if (!options.json && results.summary) { showSummaryStatistics(results.summary); } } finally { // Cleanup Python bridge await pythonBridge.shutdown(); } } catch (error) { if (!options.json) { spin.stop(); } throw error; } } catch (error) { logger_1.logger.error('Analysis failed:', error); if (options.verbose) { console.error(error); } process.exit(1); } }); /** * Determine which analyses to run based on options */ function determineAnalyses(options) { const analyses = []; if (options.all) { return ['missing_values', 'duplicates', 'format_issues', 'quality']; } if (options.missing) analyses.push('missing_values'); if (options.duplicates) analyses.push('duplicates'); if (options.format) analyses.push('format_issues'); if (options.quality) analyses.push('quality'); return analyses; } /** * Format analysis results for display */ function formatAnalysisResults(results, options) { // Use the formatters utility to create beautiful output return formatters_1.formatters.formatAnalysisResults(results, { color: true, verbose: options.verbose, json: false }); } /** * Save analysis results to file */ async function saveResults(results, outputPath, asJson) { const content = asJson ? JSON.stringify(results, null, 2) : formatters_1.formatters.formatAnalysisResults(results, { color: false, json: false }); await fs.writeFile(outputPath, content, 'utf-8'); } /** * Show summary statistics */ function showSummaryStatistics(summary) { console.log('\n' + formatters_1.formatters.formatInfo('Quick Statistics:')); if (summary.data_quality_score !== undefined) { const score = summary.data_quality_score; const scoreColor = score >= 80 ? 'green' : score >= 60 ? 'yellow' : 'red'; console.log(` Data Quality Score: ${formatters_1.formatters.formatPercentage(score, { color: true })}`); } if (summary.total_issues !== undefined) { console.log(` Total Issues Found: ${formatters_1.formatters.formatNumber(summary.total_issues)}`); } if (summary.critical_issues !== undefined && summary.critical_issues > 0) { console.log(formatters_1.formatters.formatWarning(` Critical Issues: ${summary.critical_issues}`)); } console.log(''); } // Additional helper command for quick analysis exports.quickAnalyzeCommand = new commander_1.Command('qa') .description('Quick analysis of a file (alias for analyze --all)') .argument('<file>', 'Path to the file to analyze') .option('--json', 'Output results as JSON') .action(async (file, options) => { // Delegate to main analyze command with --all flag await exports.analyzeCommand.parseAsync([file, '--all', ...(options.json ? ['--json'] : [])]); }); //# sourceMappingURL=analyze.js.map