UNPKG

cleanifix

Version:

Intelligent data cleaning CLI with natural language support - Docker-powered Python engine

343 lines 14.9 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.DockerManager = void 0; const child_process_1 = require("child_process"); const path = __importStar(require("path")); const fs = __importStar(require("fs")); const os = __importStar(require("os")); const chalk_1 = __importDefault(require("chalk")); const ora_1 = __importDefault(require("ora")); const output_formatter_1 = require("./output-formatter"); class DockerManager { static IMAGE_NAME = 'rickyapp/cleanifix-engine'; static CONTAINER_NAME = 'cleanifix-engine'; static IMAGE_TAG = 'latest'; /** * Check if Docker is installed and running */ static async checkDocker() { try { (0, child_process_1.execSync)('docker --version', { stdio: 'ignore' }); (0, child_process_1.execSync)('docker ps', { stdio: 'ignore' }); return true; } catch { return false; } } /** * Check if the Cleanifix Docker image exists locally */ static async imageExists() { try { const output = (0, child_process_1.execSync)(`docker images -q ${this.IMAGE_NAME}:${this.IMAGE_TAG}`, { encoding: 'utf8' }); return output.trim().length > 0; } catch { return false; } } /** * Pull or build the Docker image */ static async ensureImage() { const spinner = (0, ora_1.default)('Checking Cleanifix Docker image...').start(); if (await this.imageExists()) { spinner.succeed('Docker image found'); return; } spinner.text = 'Docker image not found. Pulling from registry...'; try { // First try to pull from Docker Hub (0, child_process_1.execSync)(`docker pull ${this.IMAGE_NAME}:${this.IMAGE_TAG}`, { stdio: 'inherit' }); spinner.succeed('Docker image pulled successfully'); } catch { // If pull fails, build locally (for development) spinner.text = 'Building Docker image locally...'; const enginePath = path.join(__dirname, '..', '..', '..', 'engine'); if (!fs.existsSync(path.join(enginePath, 'Dockerfile'))) { spinner.fail('Dockerfile not found. Please ensure you have the complete Cleanifix installation.'); throw new Error('Missing Dockerfile'); } try { (0, child_process_1.execSync)(`docker build -t ${this.IMAGE_NAME}:${this.IMAGE_TAG} ${enginePath}`, { stdio: 'inherit' }); spinner.succeed('Docker image built successfully'); } catch (error) { spinner.fail('Failed to build Docker image'); throw error; } } } /** * Parse command options from CLI arguments */ static parseCommandOptions(args) { const options = {}; for (let i = 0; i < args.length; i++) { const arg = args[i]; if (arg.startsWith('--')) { const key = arg.slice(2); // Check if next arg is a value or another flag if (i + 1 < args.length && !args[i + 1].startsWith('--')) { options[key] = args[i + 1]; i++; // Skip the value } else { options[key] = true; // Boolean flag } } } return options; } /** * Run a command in the Docker container */ static async runCommand(command, options = {}) { await this.ensureImage(); const dockerArgs = [ 'run', '--rm', // Remove container after exit '--name', `${this.CONTAINER_NAME}-${Date.now()}`, ]; // Add volume mount if specified if (options.mountPath) { const absolutePath = path.resolve(options.mountPath); dockerArgs.push('-v', `${absolutePath}:/data`); dockerArgs.push('-w', '/data'); // Set working directory } // Add environment variables if (options.env) { Object.entries(options.env).forEach(([key, value]) => { dockerArgs.push('-e', `${key}=${value}`); }); } // Add interactive flags if needed if (options.interactive) { dockerArgs.push('-it'); } // Add image name dockerArgs.push(`${this.IMAGE_NAME}:${this.IMAGE_TAG}`); // Build the command object based on the command type const commandType = command[0]; const commandObj = { type: commandType, payload: {} }; // Handle different command types switch (commandType) { case 'analyze': commandObj.payload.input = command[1]; const analyzeOptions = this.parseCommandOptions(command.slice(2)); // Convert boolean flags to operations array const operations = []; if (analyzeOptions.missing) operations.push('missing_values'); if (analyzeOptions.duplicates) operations.push('duplicates'); if (analyzeOptions.format) operations.push('format_issues'); if (analyzeOptions.quality) operations.push('quality'); if (operations.length === 0) { // Default to all operations if none specified operations.push('missing_values', 'duplicates', 'format_issues', 'quality'); } commandObj.payload.operations = operations; break; case 'clean': commandObj.payload.input = command[1]; commandObj.payload.output = command[2]; const cleanOptions = this.parseCommandOptions(command.slice(3)); // Map CLI options to payload if (cleanOptions.missing) { commandObj.payload.missing = true; commandObj.payload.strategy = cleanOptions.strategy || 'drop'; if (cleanOptions['fill-value']) commandObj.payload.fill_value = cleanOptions['fill-value']; if (cleanOptions.columns) commandObj.payload.columns = cleanOptions.columns.split(','); if (cleanOptions.threshold) commandObj.payload.threshold = parseFloat(cleanOptions.threshold); } if (cleanOptions.duplicates) { commandObj.payload.duplicates = true; commandObj.payload.strategy = cleanOptions.strategy || 'first'; if (cleanOptions.subset) commandObj.payload.subset = cleanOptions.subset.split(','); } if (cleanOptions['dry-run']) commandObj.payload.dry_run = true; break; case 'ask': // For 'ask' command, we need to determine the actual command type from the natural language query const query = command[1].toLowerCase(); const askOptions = this.parseCommandOptions(command.slice(2)); const inputFile = askOptions.input || askOptions.i || 'test.csv'; if (query.includes('missing') || query.includes('null')) { commandObj.type = 'analyze'; commandObj.payload.input = inputFile; commandObj.payload.operations = ['missing_values']; } else if (query.includes('duplicate')) { commandObj.type = 'analyze'; commandObj.payload.input = inputFile; commandObj.payload.operations = ['duplicates']; } else if (query.includes('clean') || query.includes('remove')) { commandObj.type = 'clean'; commandObj.payload.input = inputFile; commandObj.payload.output = askOptions.output || askOptions.o || 'cleaned_' + inputFile; if (query.includes('missing')) { commandObj.payload.missing = true; commandObj.payload.strategy = 'drop'; } else if (query.includes('duplicate')) { commandObj.payload.duplicates = true; commandObj.payload.strategy = 'first'; } } else if (query.includes('quality') || query.includes('analyze')) { // Quality or general analysis commandObj.type = 'analyze'; commandObj.payload.input = inputFile; commandObj.payload.operations = ['missing_values', 'duplicates', 'format_issues', 'quality']; } else { // Default to analyze all commandObj.type = 'analyze'; commandObj.payload.input = inputFile; commandObj.payload.operations = ['missing_values', 'duplicates', 'format_issues', 'quality']; } // Apply any additional options from the ask command if (askOptions.output || askOptions.o) { commandObj.payload.output = askOptions.output || askOptions.o; } break; default: // For other commands, pass through the arguments commandObj.payload.input = command[1]; Object.assign(commandObj.payload, this.parseCommandOptions(command.slice(2))); } dockerArgs.push('python', '-m', 'src.main', '--mode', 'single', '--command', JSON.stringify(commandObj)); // Run the Docker command and capture output for formatting const spawnOptions = { stdio: ['inherit', 'pipe', 'pipe'], // Capture stdout and stderr shell: false, // Don't use shell to avoid JSON escaping issues }; return new Promise((resolve, reject) => { const proc = (0, child_process_1.spawn)('docker', dockerArgs, spawnOptions); let stdout = ''; let stderr = ''; let jsonResult = null; proc.stdout?.on('data', (data) => { const output = data.toString(); stdout += output; // Try to parse JSON output from Python engine const lines = output.split('\n'); for (const line of lines) { if (line.trim().startsWith('{') && line.includes('"success"')) { try { jsonResult = JSON.parse(line.trim()); } catch (e) { // Not valid JSON, continue } } } }); proc.stderr?.on('data', (data) => { stderr += data.toString(); // Only show Docker errors, not Python logging if (!data.toString().includes('INFO:') && !data.toString().includes('DEBUG:')) { console.error(chalk_1.default.red(data.toString())); } }); proc.on('error', (error) => { reject(new Error(`Failed to run Docker container: ${error.message}`)); }); proc.on('exit', (code) => { if (code === 0) { // Format and display the result if we captured JSON if (jsonResult) { output_formatter_1.OutputFormatter.format(jsonResult); } else if (stdout.trim()) { // If no JSON was captured, show raw output console.log(stdout); } resolve(); } else { reject(new Error(`Docker container exited with code ${code}`)); } }); }); } /** * Clean up Docker resources */ static async cleanup() { try { // Stop any running containers (0, child_process_1.execSync)(`docker ps -q --filter "name=${this.CONTAINER_NAME}" | xargs -r docker stop`, { stdio: 'ignore' }); // Remove stopped containers (0, child_process_1.execSync)(`docker ps -aq --filter "name=${this.CONTAINER_NAME}" | xargs -r docker rm`, { stdio: 'ignore' }); } catch { // Ignore cleanup errors } } /** * Get system information for debugging */ static async getSystemInfo() { const dockerInstalled = await this.checkDocker(); const imageExists = await this.imageExists(); return { platform: os.platform(), arch: os.arch(), dockerInstalled: dockerInstalled ? 'Yes' : 'No', imageExists: imageExists ? 'Yes' : 'No', }; } } exports.DockerManager = DockerManager; //# sourceMappingURL=docker-manager.js.map