UNPKG

cleanifix

Version:

Intelligent data cleaning CLI with natural language support - Docker-powered Python engine

383 lines 16.6 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.formatters = void 0; const chalk_1 = __importDefault(require("chalk")); const cli_table3_1 = __importDefault(require("cli-table3")); exports.formatters = { formatCleaningSummary(summary) { const lines = []; if (summary.missing_handled !== undefined) { lines.push(` • Missing values handled: ${chalk_1.default.yellow(summary.missing_handled)}`); if (summary.strategy_used) { lines.push(` Strategy: ${chalk_1.default.gray(summary.strategy_used)}`); } } if (summary.rows_removed !== undefined && summary.rows_removed > 0) { lines.push(` • Rows removed: ${chalk_1.default.red(summary.rows_removed)}`); } if (summary.columns_removed !== undefined && summary.columns_removed > 0) { lines.push(` • Columns removed: ${chalk_1.default.red(summary.columns_removed)}`); } if (summary.duplicates_removed !== undefined && summary.duplicates_removed > 0) { lines.push(` • Duplicates removed: ${chalk_1.default.yellow(summary.duplicates_removed)}`); } if (summary.values_standardized !== undefined && summary.values_standardized > 0) { lines.push(` • Values standardized: ${chalk_1.default.green(summary.values_standardized)}`); } return lines.join('\n'); }, formatCleaningComplete(data) { const lines = []; if (data.dry_run) { lines.push(chalk_1.default.yellow.bold('=== DRY RUN COMPLETE ===')); lines.push(chalk_1.default.yellow('No files were modified\n')); } else { lines.push(chalk_1.default.green.bold('=== CLEANING COMPLETE ===\n')); } lines.push(chalk_1.default.bold('Summary:')); lines.push(` Input: ${chalk_1.default.cyan(data.input_file)}`); lines.push(` Output: ${chalk_1.default.cyan(data.output_file)}`); lines.push(` Rules applied: ${chalk_1.default.yellow(data.rules_applied)}\n`); if (data.applied_rules.length > 0) { lines.push(chalk_1.default.bold('Applied Rules:')); for (const rule of data.applied_rules) { lines.push(`\n ${chalk_1.default.green('✓')} ${chalk_1.default.bold(rule.rule)}`); if (rule.summary) { const summaryText = this.formatCleaningSummary(rule.summary); if (summaryText) { lines.push(summaryText); } } } } // Show before/after stats if available if (data.initial_stats?.shape) { lines.push(`\n${chalk_1.default.bold('Data Shape:')}`); lines.push(` Before: ${chalk_1.default.gray(`${data.initial_stats.shape.rows} rows × ${data.initial_stats.shape.columns} columns`)}`); // Calculate after shape from applied rules let finalRows = data.initial_stats.shape.rows; let finalCols = data.initial_stats.shape.columns; for (const rule of data.applied_rules) { if (rule.summary?.rows_after !== undefined) { finalRows = rule.summary.rows_after; } if (rule.summary?.columns_after !== undefined) { finalCols = rule.summary.columns_after; } } lines.push(` After: ${chalk_1.default.green(`${finalRows} rows × ${finalCols} columns`)}`); } return lines.join('\n'); }, formatError(error) { if (error.code === 'ENOENT') { return chalk_1.default.red(`File not found: ${error.path}`); } return chalk_1.default.red(error.message || String(error)); }, formatProgress(current, total, message) { const percentage = Math.round((current / total) * 100); const filled = Math.round((current / total) * 20); const bar = '█'.repeat(filled) + '░'.repeat(20 - filled); let text = `[${bar}] ${percentage}%`; if (message) { text += ` - ${message}`; } return text; }, /** * Format analysis results for display */ formatAnalysisResults(results, options = {}) { if (options.json) { return JSON.stringify(results, null, 2); } const output = []; const { color = true } = options; // Format header output.push(''); output.push(color ? chalk_1.default.bold.blue('📊 Analysis Results') : '📊 Analysis Results'); output.push(color ? chalk_1.default.gray('═'.repeat(50)) : '═'.repeat(50)); // Format summary if (results.summary) { output.push(''); output.push(color ? chalk_1.default.bold('Summary:') : 'Summary:'); output.push(this.formatAnalysisSummary(results.summary, options)); } // Format missing values analysis if (results.missing_values) { output.push(''); output.push(color ? chalk_1.default.bold.yellow('Missing Values Analysis') : 'Missing Values Analysis'); output.push(this.formatMissingValues(results.missing_values, options)); } // Format duplicates analysis if (results.duplicates || results.duplicate_analysis) { output.push(''); output.push(color ? chalk_1.default.bold.yellow('Duplicates Analysis') : 'Duplicates Analysis'); output.push(this.formatDuplicates(results.duplicates || results.duplicate_analysis, options)); } // Format format issues if (results.format_issues || results.format_analysis) { output.push(''); output.push(color ? chalk_1.default.bold.yellow('Format Issues Analysis') : 'Format Issues Analysis'); output.push(this.formatFormatIssues(results.format_issues || results.format_analysis, options)); } // Format quality score if (results.quality) { output.push(''); output.push(color ? chalk_1.default.bold.green('Data Quality Score') : 'Data Quality Score'); const score = results.quality.score || 0; const scoreColor = score >= 80 ? chalk_1.default.green : score >= 60 ? chalk_1.default.yellow : chalk_1.default.red; output.push(` Score: ${color ? scoreColor(`${score}/100`) : `${score}/100`}`); } // Format recommendations if (results.recommendations && results.recommendations.length > 0) { output.push(''); output.push(color ? chalk_1.default.bold.green('💡 Recommendations:') : '💡 Recommendations:'); results.recommendations.forEach((rec, index) => { output.push(` ${index + 1}. ${rec}`); }); } output.push(''); return output.join('\n'); }, /** * Format success messages */ formatSuccess(message, options = {}) { const { color = true } = options; return color ? chalk_1.default.green(`✅ ${message}`) : `✅ ${message}`; }, /** * Format warning messages */ formatWarning(message, options = {}) { const { color = true } = options; return color ? chalk_1.default.yellow(`⚠️ ${message}`) : `⚠️ ${message}`; }, /** * Format info messages */ formatInfo(message, options = {}) { const { color = true } = options; return color ? chalk_1.default.blue(`ℹ️ ${message}`) : `ℹ️ ${message}`; }, /** * Format a data table */ formatTable(headers, rows, options = {}) { const { color = true, maxWidth = 120 } = options; const table = new cli_table3_1.default({ head: color ? headers.map(h => chalk_1.default.bold(h)) : headers, style: { head: color ? ['cyan'] : [], border: color ? ['gray'] : [] }, wordWrap: true, colWidths: this.calculateColumnWidths(headers, rows, maxWidth) }); rows.forEach(row => { table.push(row.map(cell => this.formatCell(cell, options))); }); return table.toString(); }, /** * Format file size in human-readable format */ formatFileSize(bytes) { const units = ['B', 'KB', 'MB', 'GB', 'TB']; let size = bytes; let unitIndex = 0; while (size >= 1024 && unitIndex < units.length - 1) { size /= 1024; unitIndex++; } return `${size.toFixed(2)} ${units[unitIndex]}`; }, /** * Format duration in human-readable format */ formatDuration(milliseconds) { if (milliseconds < 1000) { return `${milliseconds}ms`; } else if (milliseconds < 60000) { return `${(milliseconds / 1000).toFixed(1)}s`; } else { const minutes = Math.floor(milliseconds / 60000); const seconds = Math.round((milliseconds % 60000) / 1000); return `${minutes}m ${seconds}s`; } }, /** * Format percentage with color coding */ formatPercentage(value, options = {}) { const { color = true } = options; const percentage = `${value.toFixed(1)}%`; if (!color) return percentage; if (value >= 80) return chalk_1.default.red(percentage); if (value >= 50) return chalk_1.default.yellow(percentage); if (value >= 20) return chalk_1.default.blue(percentage); return chalk_1.default.green(percentage); }, /** * Format number with locale formatting */ formatNumber(num) { return num.toLocaleString(); }, // Helper functions formatAnalysisSummary(summary, options) { const lines = []; if (summary.total_rows !== undefined) { lines.push(` Total rows: ${this.formatNumber(summary.total_rows)}`); } if (summary.total_columns !== undefined) { lines.push(` Total columns: ${this.formatNumber(summary.total_columns)}`); } if (summary.file_size !== undefined) { lines.push(` File size: ${this.formatFileSize(summary.file_size)}`); } return lines.join('\n'); }, formatMissingValues(analysis, options) { const { color = true } = options; // Handle simple structure from Python if (analysis.total_missing_cells !== undefined) { if (analysis.total_missing_cells === 0) { return ' No missing values detected ✨'; } const lines = []; lines.push(` Total missing cells: ${this.formatNumber(analysis.total_missing_cells)}`); if (analysis.missing_by_column && Object.keys(analysis.missing_by_column).length > 0) { lines.push(''); lines.push(' Missing by column:'); Object.entries(analysis.missing_by_column).forEach(([col, count]) => { if (count > 0) { lines.push(` ${col}: ${this.formatNumber(count)}`); } }); } return lines.join('\n'); } if (!analysis.column_details || analysis.column_details.length === 0) { return ' No missing values detected ✨'; } const headers = ['Column', 'Missing Count', 'Missing %', 'Pattern']; const rows = analysis.column_details.slice(0, 10).map((col) => [ col.column_name, this.formatNumber(col.missing_count), this.formatPercentage(col.missing_percentage, options), col.missing_patterns?.has_consecutive_missing ? 'Consecutive' : 'Random' ]); return this.formatTable(headers, rows, options); }, formatDuplicates(analysis, options) { const { color = true } = options; // Handle simple structure from Python if (typeof analysis === 'object' && 'count' in analysis) { if (analysis.count === 0) { return ' No duplicates detected ✨'; } const dupText = analysis.count === 1 ? 'duplicate' : 'duplicates'; return ` ${this.formatNumber(analysis.count)} exact ${dupText} found (${analysis.percentage || 0}% of rows)`; } if (!analysis.exact_duplicates && !analysis.column_details) { return ' No duplicates detected ✨'; } const lines = []; if (analysis.summary) { lines.push(` Exact duplicate rows: ${this.formatNumber(analysis.summary.exact_duplicate_rows || 0)}`); lines.push(` Duplicate percentage: ${this.formatPercentage(analysis.summary.exact_duplicate_percentage || 0, options)}`); } if (analysis.column_details && Object.keys(analysis.column_details).length > 0) { lines.push(''); lines.push(' Column-level duplicates:'); const headers = ['Column', 'Unique Values', 'Duplicate Ratio']; const rows = Object.entries(analysis.column_details).slice(0, 5).map(([col, info]) => [ col, this.formatNumber(info.unique_values), this.formatPercentage(info.duplicate_ratio * 100, options) ]); lines.push(this.formatTable(headers, rows, { ...options, maxWidth: 80 })); } return lines.join('\n'); }, formatFormatIssues(analysis, options) { const { color = true } = options; // Handle simple structure from Python if (typeof analysis === 'object' && 'count' in analysis) { if (analysis.count === 0) { return ' No format issues detected ✨'; } return ` Format issues count: ${this.formatNumber(analysis.count)}`; } if (!analysis.column_details || analysis.column_details.length === 0) { return ' No format issues detected ✨'; } const headers = ['Column', 'Type', 'Consistency', 'Issues']; const rows = analysis.column_details.slice(0, 10).map((col) => [ col.column_name, col.detected_type, this.formatPercentage(col.consistency_score * 100, options), this.formatNumber(col.total_inconsistent || 0) ]); return this.formatTable(headers, rows, options); }, formatCell(value, options) { if (value === null || value === undefined) { return options.color ? chalk_1.default.gray('null') : 'null'; } if (typeof value === 'number') { return this.formatNumber(value); } if (typeof value === 'boolean') { return value ? '✓' : '✗'; } return String(value); }, calculateColumnWidths(headers, rows, maxWidth) { const columnCount = headers.length; const minWidth = 10; const maxColumnWidth = Math.floor(maxWidth / columnCount); const widths = headers.map((header, index) => { let maxLength = header.length; rows.forEach(row => { const cellLength = String(row[index] || '').length; maxLength = Math.max(maxLength, cellLength); }); return Math.min(Math.max(maxLength + 2, minWidth), maxColumnWidth); }); return widths; }, /** * Format command examples for help display */ formatCommandExamples(examples, options = {}) { const { color = true } = options; const output = []; output.push(''); output.push(color ? chalk_1.default.bold.blue('📚 Command Examples') : '📚 Command Examples'); output.push(color ? chalk_1.default.gray('═'.repeat(50)) : '═'.repeat(50)); Object.entries(examples).forEach(([command, exampleList]) => { output.push(''); output.push(color ? chalk_1.default.bold(command) : command); exampleList.forEach(example => { output.push(` ${color ? chalk_1.default.gray('$') : '$'} ${example}`); }); }); output.push(''); return output.join('\n'); } }; //# sourceMappingURL=formatters.js.map