cleanifix
Version:
Intelligent data cleaning CLI with natural language support - Docker-powered Python engine
383 lines • 16.6 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.formatters = void 0;
const chalk_1 = __importDefault(require("chalk"));
const cli_table3_1 = __importDefault(require("cli-table3"));
exports.formatters = {
formatCleaningSummary(summary) {
const lines = [];
if (summary.missing_handled !== undefined) {
lines.push(` • Missing values handled: ${chalk_1.default.yellow(summary.missing_handled)}`);
if (summary.strategy_used) {
lines.push(` Strategy: ${chalk_1.default.gray(summary.strategy_used)}`);
}
}
if (summary.rows_removed !== undefined && summary.rows_removed > 0) {
lines.push(` • Rows removed: ${chalk_1.default.red(summary.rows_removed)}`);
}
if (summary.columns_removed !== undefined && summary.columns_removed > 0) {
lines.push(` • Columns removed: ${chalk_1.default.red(summary.columns_removed)}`);
}
if (summary.duplicates_removed !== undefined && summary.duplicates_removed > 0) {
lines.push(` • Duplicates removed: ${chalk_1.default.yellow(summary.duplicates_removed)}`);
}
if (summary.values_standardized !== undefined && summary.values_standardized > 0) {
lines.push(` • Values standardized: ${chalk_1.default.green(summary.values_standardized)}`);
}
return lines.join('\n');
},
formatCleaningComplete(data) {
const lines = [];
if (data.dry_run) {
lines.push(chalk_1.default.yellow.bold('=== DRY RUN COMPLETE ==='));
lines.push(chalk_1.default.yellow('No files were modified\n'));
}
else {
lines.push(chalk_1.default.green.bold('=== CLEANING COMPLETE ===\n'));
}
lines.push(chalk_1.default.bold('Summary:'));
lines.push(` Input: ${chalk_1.default.cyan(data.input_file)}`);
lines.push(` Output: ${chalk_1.default.cyan(data.output_file)}`);
lines.push(` Rules applied: ${chalk_1.default.yellow(data.rules_applied)}\n`);
if (data.applied_rules.length > 0) {
lines.push(chalk_1.default.bold('Applied Rules:'));
for (const rule of data.applied_rules) {
lines.push(`\n ${chalk_1.default.green('✓')} ${chalk_1.default.bold(rule.rule)}`);
if (rule.summary) {
const summaryText = this.formatCleaningSummary(rule.summary);
if (summaryText) {
lines.push(summaryText);
}
}
}
}
// Show before/after stats if available
if (data.initial_stats?.shape) {
lines.push(`\n${chalk_1.default.bold('Data Shape:')}`);
lines.push(` Before: ${chalk_1.default.gray(`${data.initial_stats.shape.rows} rows × ${data.initial_stats.shape.columns} columns`)}`);
// Calculate after shape from applied rules
let finalRows = data.initial_stats.shape.rows;
let finalCols = data.initial_stats.shape.columns;
for (const rule of data.applied_rules) {
if (rule.summary?.rows_after !== undefined) {
finalRows = rule.summary.rows_after;
}
if (rule.summary?.columns_after !== undefined) {
finalCols = rule.summary.columns_after;
}
}
lines.push(` After: ${chalk_1.default.green(`${finalRows} rows × ${finalCols} columns`)}`);
}
return lines.join('\n');
},
formatError(error) {
if (error.code === 'ENOENT') {
return chalk_1.default.red(`File not found: ${error.path}`);
}
return chalk_1.default.red(error.message || String(error));
},
formatProgress(current, total, message) {
const percentage = Math.round((current / total) * 100);
const filled = Math.round((current / total) * 20);
const bar = '█'.repeat(filled) + '░'.repeat(20 - filled);
let text = `[${bar}] ${percentage}%`;
if (message) {
text += ` - ${message}`;
}
return text;
},
/**
* Format analysis results for display
*/
formatAnalysisResults(results, options = {}) {
if (options.json) {
return JSON.stringify(results, null, 2);
}
const output = [];
const { color = true } = options;
// Format header
output.push('');
output.push(color ? chalk_1.default.bold.blue('📊 Analysis Results') : '📊 Analysis Results');
output.push(color ? chalk_1.default.gray('═'.repeat(50)) : '═'.repeat(50));
// Format summary
if (results.summary) {
output.push('');
output.push(color ? chalk_1.default.bold('Summary:') : 'Summary:');
output.push(this.formatAnalysisSummary(results.summary, options));
}
// Format missing values analysis
if (results.missing_values) {
output.push('');
output.push(color ? chalk_1.default.bold.yellow('Missing Values Analysis') : 'Missing Values Analysis');
output.push(this.formatMissingValues(results.missing_values, options));
}
// Format duplicates analysis
if (results.duplicates || results.duplicate_analysis) {
output.push('');
output.push(color ? chalk_1.default.bold.yellow('Duplicates Analysis') : 'Duplicates Analysis');
output.push(this.formatDuplicates(results.duplicates || results.duplicate_analysis, options));
}
// Format format issues
if (results.format_issues || results.format_analysis) {
output.push('');
output.push(color ? chalk_1.default.bold.yellow('Format Issues Analysis') : 'Format Issues Analysis');
output.push(this.formatFormatIssues(results.format_issues || results.format_analysis, options));
}
// Format quality score
if (results.quality) {
output.push('');
output.push(color ? chalk_1.default.bold.green('Data Quality Score') : 'Data Quality Score');
const score = results.quality.score || 0;
const scoreColor = score >= 80 ? chalk_1.default.green : score >= 60 ? chalk_1.default.yellow : chalk_1.default.red;
output.push(` Score: ${color ? scoreColor(`${score}/100`) : `${score}/100`}`);
}
// Format recommendations
if (results.recommendations && results.recommendations.length > 0) {
output.push('');
output.push(color ? chalk_1.default.bold.green('💡 Recommendations:') : '💡 Recommendations:');
results.recommendations.forEach((rec, index) => {
output.push(` ${index + 1}. ${rec}`);
});
}
output.push('');
return output.join('\n');
},
/**
* Format success messages
*/
formatSuccess(message, options = {}) {
const { color = true } = options;
return color ? chalk_1.default.green(`✅ ${message}`) : `✅ ${message}`;
},
/**
* Format warning messages
*/
formatWarning(message, options = {}) {
const { color = true } = options;
return color ? chalk_1.default.yellow(`⚠️ ${message}`) : `⚠️ ${message}`;
},
/**
* Format info messages
*/
formatInfo(message, options = {}) {
const { color = true } = options;
return color ? chalk_1.default.blue(`ℹ️ ${message}`) : `ℹ️ ${message}`;
},
/**
* Format a data table
*/
formatTable(headers, rows, options = {}) {
const { color = true, maxWidth = 120 } = options;
const table = new cli_table3_1.default({
head: color ? headers.map(h => chalk_1.default.bold(h)) : headers,
style: {
head: color ? ['cyan'] : [],
border: color ? ['gray'] : []
},
wordWrap: true,
colWidths: this.calculateColumnWidths(headers, rows, maxWidth)
});
rows.forEach(row => {
table.push(row.map(cell => this.formatCell(cell, options)));
});
return table.toString();
},
/**
* Format file size in human-readable format
*/
formatFileSize(bytes) {
const units = ['B', 'KB', 'MB', 'GB', 'TB'];
let size = bytes;
let unitIndex = 0;
while (size >= 1024 && unitIndex < units.length - 1) {
size /= 1024;
unitIndex++;
}
return `${size.toFixed(2)} ${units[unitIndex]}`;
},
/**
* Format duration in human-readable format
*/
formatDuration(milliseconds) {
if (milliseconds < 1000) {
return `${milliseconds}ms`;
}
else if (milliseconds < 60000) {
return `${(milliseconds / 1000).toFixed(1)}s`;
}
else {
const minutes = Math.floor(milliseconds / 60000);
const seconds = Math.round((milliseconds % 60000) / 1000);
return `${minutes}m ${seconds}s`;
}
},
/**
* Format percentage with color coding
*/
formatPercentage(value, options = {}) {
const { color = true } = options;
const percentage = `${value.toFixed(1)}%`;
if (!color)
return percentage;
if (value >= 80)
return chalk_1.default.red(percentage);
if (value >= 50)
return chalk_1.default.yellow(percentage);
if (value >= 20)
return chalk_1.default.blue(percentage);
return chalk_1.default.green(percentage);
},
/**
* Format number with locale formatting
*/
formatNumber(num) {
return num.toLocaleString();
},
// Helper functions
formatAnalysisSummary(summary, options) {
const lines = [];
if (summary.total_rows !== undefined) {
lines.push(` Total rows: ${this.formatNumber(summary.total_rows)}`);
}
if (summary.total_columns !== undefined) {
lines.push(` Total columns: ${this.formatNumber(summary.total_columns)}`);
}
if (summary.file_size !== undefined) {
lines.push(` File size: ${this.formatFileSize(summary.file_size)}`);
}
return lines.join('\n');
},
formatMissingValues(analysis, options) {
const { color = true } = options;
// Handle simple structure from Python
if (analysis.total_missing_cells !== undefined) {
if (analysis.total_missing_cells === 0) {
return ' No missing values detected ✨';
}
const lines = [];
lines.push(` Total missing cells: ${this.formatNumber(analysis.total_missing_cells)}`);
if (analysis.missing_by_column && Object.keys(analysis.missing_by_column).length > 0) {
lines.push('');
lines.push(' Missing by column:');
Object.entries(analysis.missing_by_column).forEach(([col, count]) => {
if (count > 0) {
lines.push(` ${col}: ${this.formatNumber(count)}`);
}
});
}
return lines.join('\n');
}
if (!analysis.column_details || analysis.column_details.length === 0) {
return ' No missing values detected ✨';
}
const headers = ['Column', 'Missing Count', 'Missing %', 'Pattern'];
const rows = analysis.column_details.slice(0, 10).map((col) => [
col.column_name,
this.formatNumber(col.missing_count),
this.formatPercentage(col.missing_percentage, options),
col.missing_patterns?.has_consecutive_missing ? 'Consecutive' : 'Random'
]);
return this.formatTable(headers, rows, options);
},
formatDuplicates(analysis, options) {
const { color = true } = options;
// Handle simple structure from Python
if (typeof analysis === 'object' && 'count' in analysis) {
if (analysis.count === 0) {
return ' No duplicates detected ✨';
}
const dupText = analysis.count === 1 ? 'duplicate' : 'duplicates';
return ` ${this.formatNumber(analysis.count)} exact ${dupText} found (${analysis.percentage || 0}% of rows)`;
}
if (!analysis.exact_duplicates && !analysis.column_details) {
return ' No duplicates detected ✨';
}
const lines = [];
if (analysis.summary) {
lines.push(` Exact duplicate rows: ${this.formatNumber(analysis.summary.exact_duplicate_rows || 0)}`);
lines.push(` Duplicate percentage: ${this.formatPercentage(analysis.summary.exact_duplicate_percentage || 0, options)}`);
}
if (analysis.column_details && Object.keys(analysis.column_details).length > 0) {
lines.push('');
lines.push(' Column-level duplicates:');
const headers = ['Column', 'Unique Values', 'Duplicate Ratio'];
const rows = Object.entries(analysis.column_details).slice(0, 5).map(([col, info]) => [
col,
this.formatNumber(info.unique_values),
this.formatPercentage(info.duplicate_ratio * 100, options)
]);
lines.push(this.formatTable(headers, rows, { ...options, maxWidth: 80 }));
}
return lines.join('\n');
},
formatFormatIssues(analysis, options) {
const { color = true } = options;
// Handle simple structure from Python
if (typeof analysis === 'object' && 'count' in analysis) {
if (analysis.count === 0) {
return ' No format issues detected ✨';
}
return ` Format issues count: ${this.formatNumber(analysis.count)}`;
}
if (!analysis.column_details || analysis.column_details.length === 0) {
return ' No format issues detected ✨';
}
const headers = ['Column', 'Type', 'Consistency', 'Issues'];
const rows = analysis.column_details.slice(0, 10).map((col) => [
col.column_name,
col.detected_type,
this.formatPercentage(col.consistency_score * 100, options),
this.formatNumber(col.total_inconsistent || 0)
]);
return this.formatTable(headers, rows, options);
},
formatCell(value, options) {
if (value === null || value === undefined) {
return options.color ? chalk_1.default.gray('null') : 'null';
}
if (typeof value === 'number') {
return this.formatNumber(value);
}
if (typeof value === 'boolean') {
return value ? '✓' : '✗';
}
return String(value);
},
calculateColumnWidths(headers, rows, maxWidth) {
const columnCount = headers.length;
const minWidth = 10;
const maxColumnWidth = Math.floor(maxWidth / columnCount);
const widths = headers.map((header, index) => {
let maxLength = header.length;
rows.forEach(row => {
const cellLength = String(row[index] || '').length;
maxLength = Math.max(maxLength, cellLength);
});
return Math.min(Math.max(maxLength + 2, minWidth), maxColumnWidth);
});
return widths;
},
/**
* Format command examples for help display
*/
formatCommandExamples(examples, options = {}) {
const { color = true } = options;
const output = [];
output.push('');
output.push(color ? chalk_1.default.bold.blue('📚 Command Examples') : '📚 Command Examples');
output.push(color ? chalk_1.default.gray('═'.repeat(50)) : '═'.repeat(50));
Object.entries(examples).forEach(([command, exampleList]) => {
output.push('');
output.push(color ? chalk_1.default.bold(command) : command);
exampleList.forEach(example => {
output.push(` ${color ? chalk_1.default.gray('$') : '$'} ${example}`);
});
});
output.push('');
return output.join('\n');
}
};
//# sourceMappingURL=formatters.js.map