cleanifix
Version:
Intelligent data cleaning CLI with natural language support - Docker-powered Python engine
215 lines • 8.55 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.quickAnalyzeCommand = exports.analyzeCommand = void 0;
const commander_1 = require("commander");
const python_bridge_1 = require("../services/python-bridge");
const formatters_1 = require("../utils/formatters");
const spinner_1 = require("../utils/spinner");
const logger_1 = require("../utils/logger");
const fs = __importStar(require("fs/promises"));
const path = __importStar(require("path"));
exports.analyzeCommand = new commander_1.Command('analyze')
.description('Analyze data quality issues in a file')
.argument('<file>', 'Path to the file to analyze')
.option('-m, --missing', 'Analyze missing values')
.option('-d, --duplicates', 'Analyze duplicate rows and values')
.option('-f, --format', 'Analyze format inconsistencies')
.option('-q, --quality', 'Comprehensive quality analysis')
.option('-a, --all', 'Run all available analyses')
.option('-c, --columns <columns>', 'Specific columns to analyze (comma-separated)')
.option('-o, --output <file>', 'Save analysis results to file')
.option('--json', 'Output results as JSON')
.option('-v, --verbose', 'Show detailed output')
.action(async (file, options) => {
try {
// Validate input file exists
try {
await fs.access(file);
}
catch {
logger_1.logger.error(`File not found: ${file}`);
throw new Error(`File not found: ${file}`);
}
// Determine which analyses to run
const analysesToRun = determineAnalyses(options);
if (analysesToRun.length === 0) {
logger_1.logger.warn('No analysis type specified. Use --all or specific flags.');
logger_1.logger.info('Available analyses: --missing, --duplicates, --format, --quality');
process.exit(1);
}
// Parse columns if specified
const columns = options.columns ? options.columns.split(',').map(c => c.trim()) : null;
// Start the analysis (suppress spinner in JSON mode)
const spin = (0, spinner_1.spinner)();
if (!options.json) {
spin.start(`Analyzing ${path.basename(file)}...`);
}
try {
// Initialize Python bridge
const pythonBridge = new python_bridge_1.PythonBridge({ debug: false, quiet: options.json });
await pythonBridge.initialize();
// Prepare the analysis configuration
const analysisConfig = {
operations: analysesToRun.map(type => ({
type: 'analyze',
subtype: type,
options: {
columns: columns,
deep_scan: options.verbose
}
})),
config: {
input_file: file,
verbose: options.verbose
}
};
try {
// Execute analysis
const response = await pythonBridge.analyze(file, {
verbose: options.verbose,
columns: columns,
operations: analysesToRun
});
const results = response.result;
if (!options.json) {
spin.stop();
}
// Format and display results
if (options.json) {
console.log(JSON.stringify(results, null, 2));
}
else {
const formatted = formatAnalysisResults(results, options);
console.log(formatted);
}
// Save to file if requested
if (options.output) {
await saveResults(results, options.output, options.json || false);
if (!options.json) {
logger_1.logger.success(`Analysis results saved to ${options.output}`);
}
}
// Show summary statistics
if (!options.json && results.summary) {
showSummaryStatistics(results.summary);
}
}
finally {
// Cleanup Python bridge
await pythonBridge.shutdown();
}
}
catch (error) {
if (!options.json) {
spin.stop();
}
throw error;
}
}
catch (error) {
logger_1.logger.error('Analysis failed:', error);
if (options.verbose) {
console.error(error);
}
process.exit(1);
}
});
/**
* Determine which analyses to run based on options
*/
function determineAnalyses(options) {
const analyses = [];
if (options.all) {
return ['missing_values', 'duplicates', 'format_issues', 'quality'];
}
if (options.missing)
analyses.push('missing_values');
if (options.duplicates)
analyses.push('duplicates');
if (options.format)
analyses.push('format_issues');
if (options.quality)
analyses.push('quality');
return analyses;
}
/**
* Format analysis results for display
*/
function formatAnalysisResults(results, options) {
// Use the formatters utility to create beautiful output
return formatters_1.formatters.formatAnalysisResults(results, {
color: true,
verbose: options.verbose,
json: false
});
}
/**
* Save analysis results to file
*/
async function saveResults(results, outputPath, asJson) {
const content = asJson
? JSON.stringify(results, null, 2)
: formatters_1.formatters.formatAnalysisResults(results, { color: false, json: false });
await fs.writeFile(outputPath, content, 'utf-8');
}
/**
* Show summary statistics
*/
function showSummaryStatistics(summary) {
console.log('\n' + formatters_1.formatters.formatInfo('Quick Statistics:'));
if (summary.data_quality_score !== undefined) {
const score = summary.data_quality_score;
const scoreColor = score >= 80 ? 'green' : score >= 60 ? 'yellow' : 'red';
console.log(` Data Quality Score: ${formatters_1.formatters.formatPercentage(score, { color: true })}`);
}
if (summary.total_issues !== undefined) {
console.log(` Total Issues Found: ${formatters_1.formatters.formatNumber(summary.total_issues)}`);
}
if (summary.critical_issues !== undefined && summary.critical_issues > 0) {
console.log(formatters_1.formatters.formatWarning(` Critical Issues: ${summary.critical_issues}`));
}
console.log('');
}
// Additional helper command for quick analysis
exports.quickAnalyzeCommand = new commander_1.Command('qa')
.description('Quick analysis of a file (alias for analyze --all)')
.argument('<file>', 'Path to the file to analyze')
.option('--json', 'Output results as JSON')
.action(async (file, options) => {
// Delegate to main analyze command with --all flag
await exports.analyzeCommand.parseAsync([file, '--all', ...(options.json ? ['--json'] : [])]);
});
//# sourceMappingURL=analyze.js.map