UNPKG

cleanifix

Version:

Intelligent data cleaning CLI with natural language support - Docker-powered Python engine

248 lines 10.2 kB
#!/usr/bin/env node "use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const commander_1 = require("commander"); const packageJson = __importStar(require("../package.json")); const docker_manager_1 = require("./utils/docker-manager"); const logger_1 = require("./utils/logger"); const chalk_1 = __importDefault(require("chalk")); const ora_1 = __importDefault(require("ora")); const program = new commander_1.Command(); // Check Docker availability on startup async function checkPrerequisites() { const spinner = (0, ora_1.default)('Checking prerequisites...').start(); // Check if Docker is installed if (!await docker_manager_1.DockerManager.checkDocker()) { spinner.fail('Docker is not installed or not running'); console.error(chalk_1.default.red('\nCleanifix requires Docker to run the Python engine.')); console.error(chalk_1.default.yellow('Please install Docker from: https://docs.docker.com/get-docker/')); console.error(chalk_1.default.yellow('\nAfter installation, make sure Docker is running and try again.')); return false; } spinner.succeed('Docker is available'); return true; } // Wrapper function to run commands through Docker async function runWithDocker(command, args, options) { try { // Check prerequisites if (!await checkPrerequisites()) { process.exit(1); } // Ensure Docker image is available await docker_manager_1.DockerManager.ensureImage(); // Prepare command arguments for the Python engine const engineArgs = [command, ...args]; // Add options as flags Object.entries(options).forEach(([key, value]) => { if (value !== undefined && value !== false) { engineArgs.push(`--${key}`); if (value !== true) { engineArgs.push(String(value)); } } }); // Determine mount path (current directory by default) const mountPath = process.cwd(); // Run the command in Docker await docker_manager_1.DockerManager.runCommand(engineArgs, { mountPath, interactive: process.stdin.isTTY, env: { CLEANIFIX_LOG_LEVEL: process.env.LOG_LEVEL || 'info', }, }); } catch (error) { logger_1.logger.error('Command failed:', error.message); process.exit(1); } } program .name('cleanifix') .description('Intelligent data cleaning CLI with natural language support') .version(packageJson.version) .option('-v, --verbose', 'Enable verbose logging') .option('-q, --quiet', 'Suppress non-error output') .option('--no-docker', 'Run without Docker (requires local Python installation)') .hook('preAction', (thisCommand) => { const options = thisCommand.opts(); if (options.verbose) { process.env.LOG_LEVEL = 'debug'; } else if (options.quiet) { process.env.LOG_LEVEL = 'error'; } }); // Initialize project program .command('init') .description('Initialize a new Cleanifix project') .option('-t, --template <template>', 'Project template to use', 'default') .option('-f, --force', 'Overwrite existing configuration') .action(async (options) => { await runWithDocker('init', [], options); }); // Validate data program .command('validate') .description('Validate data against defined rules') .argument('<input>', 'Input file or directory path') .option('-c, --config <path>', 'Path to validation config file') .option('-r, --rules <path>', 'Path to custom rules file') .option('-o, --output <path>', 'Output directory for validation reports') .option('-f, --format <format>', 'Output format (json|html|csv)', 'json') .option('--fail-fast', 'Stop on first validation error') .option('--parallel <number>', 'Number of parallel workers', '4') .action(async (input, options) => { await runWithDocker('validate', [input], options); }); // Transform data program .command('transform') .description('Transform data using defined pipelines') .argument('<input>', 'Input file or directory path') .argument('<output>', 'Output file or directory path') .option('-p, --pipeline <path>', 'Path to transformation pipeline config') .option('-m, --mappings <path>', 'Path to field mappings file') .option('-f, --format <format>', 'Output format (csv|json|parquet|excel)') .option('--streaming', 'Enable streaming mode for large files') .option('--chunk-size <size>', 'Chunk size for processing', '10000') .action(async (input, output, options) => { await runWithDocker('transform', [input, output], options); }); // Analyze data program .command('analyze') .description('Analyze data quality and patterns') .argument('<input>', 'Input file or directory path') .option('-m, --missing', 'Analyze missing values') .option('-d, --duplicates', 'Analyze duplicate rows and values') .option('-f, --format', 'Analyze format inconsistencies') .option('-q, --quality', 'Comprehensive quality analysis') .option('-a, --all', 'Run all available analyses') .option('-c, --columns <columns>', 'Specific columns to analyze (comma-separated)') .option('-o, --output <file>', 'Save analysis results to file') .option('--json', 'Output results as JSON') .option('-v, --verbose', 'Show detailed output') .action(async (input, options) => { await runWithDocker('analyze', [input], options); }); // Clean data program .command('clean') .description('Clean data using predefined or custom rules') .argument('<input>', 'Input file or directory path') .option('-o, --output <path>', 'Output file or directory path') .option('-r, --rules <rules>', 'Comma-separated list of cleaning rules') .option('-c, --config <path>', 'Path to cleaning configuration file') .option('--dry-run', 'Preview changes without applying them') .option('--backup', 'Create backup of original files') .option('--report', 'Generate cleaning report') .option('-m, --missing', 'Clean missing values') .option('-d, --duplicates', 'Clean duplicate rows') .option('-s, --strategy <strategy>', 'Cleaning strategy') .option('-f, --fill-value <value>', 'Fill value for missing data') .option('--columns <columns>', 'Comma-separated list of columns to clean') .option('--subset <subset>', 'Columns for duplicate detection') .option('-t, --threshold <threshold>', 'Threshold for dropping columns (0-1)') .action(async (input, options) => { await runWithDocker('clean', [input], options); }); // Natural language interface program .command('ask') .description('Use natural language to describe what you want to do') .argument('<query>', 'Natural language query') .option('-i, --input <path>', 'Input file to work with') .option('-o, --output <path>', 'Output path for results') .option('--interactive', 'Enter interactive mode') .action(async (query, options) => { await runWithDocker('ask', [query], options); }); // Docker management commands program .command('docker:info') .description('Show Docker and Cleanifix engine information') .action(async () => { const info = await docker_manager_1.DockerManager.getSystemInfo(); console.log(chalk_1.default.cyan('System Information:')); Object.entries(info).forEach(([key, value]) => { console.log(` ${key}: ${value}`); }); }); program .command('docker:build') .description('Build the Docker image locally (for development)') .action(async () => { try { await docker_manager_1.DockerManager.ensureImage(); logger_1.logger.info('Docker image is ready'); } catch (error) { logger_1.logger.error('Failed to build Docker image:', error.message); process.exit(1); } }); program .command('docker:cleanup') .description('Clean up Docker resources') .action(async () => { await docker_manager_1.DockerManager.cleanup(); logger_1.logger.info('Docker resources cleaned up'); }); // Error handling program.exitOverride(); try { program.parse(process.argv); } catch (error) { if (error.code === 'commander.helpDisplayed' || error.code === 'commander.version') { process.exit(0); } else { logger_1.logger.error('Command error:', error.message); process.exit(1); } } // Show help if no command provided if (!process.argv.slice(2).length) { program.help(); } //# sourceMappingURL=index-docker.js.map