cleanifix
Version:
Intelligent data cleaning CLI with natural language support - Docker-powered Python engine
343 lines • 14.9 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.DockerManager = void 0;
const child_process_1 = require("child_process");
const path = __importStar(require("path"));
const fs = __importStar(require("fs"));
const os = __importStar(require("os"));
const chalk_1 = __importDefault(require("chalk"));
const ora_1 = __importDefault(require("ora"));
const output_formatter_1 = require("./output-formatter");
class DockerManager {
static IMAGE_NAME = 'rickyapp/cleanifix-engine';
static CONTAINER_NAME = 'cleanifix-engine';
static IMAGE_TAG = 'latest';
/**
* Check if Docker is installed and running
*/
static async checkDocker() {
try {
(0, child_process_1.execSync)('docker --version', { stdio: 'ignore' });
(0, child_process_1.execSync)('docker ps', { stdio: 'ignore' });
return true;
}
catch {
return false;
}
}
/**
* Check if the Cleanifix Docker image exists locally
*/
static async imageExists() {
try {
const output = (0, child_process_1.execSync)(`docker images -q ${this.IMAGE_NAME}:${this.IMAGE_TAG}`, { encoding: 'utf8' });
return output.trim().length > 0;
}
catch {
return false;
}
}
/**
* Pull or build the Docker image
*/
static async ensureImage() {
const spinner = (0, ora_1.default)('Checking Cleanifix Docker image...').start();
if (await this.imageExists()) {
spinner.succeed('Docker image found');
return;
}
spinner.text = 'Docker image not found. Pulling from registry...';
try {
// First try to pull from Docker Hub
(0, child_process_1.execSync)(`docker pull ${this.IMAGE_NAME}:${this.IMAGE_TAG}`, { stdio: 'inherit' });
spinner.succeed('Docker image pulled successfully');
}
catch {
// If pull fails, build locally (for development)
spinner.text = 'Building Docker image locally...';
const enginePath = path.join(__dirname, '..', '..', '..', 'engine');
if (!fs.existsSync(path.join(enginePath, 'Dockerfile'))) {
spinner.fail('Dockerfile not found. Please ensure you have the complete Cleanifix installation.');
throw new Error('Missing Dockerfile');
}
try {
(0, child_process_1.execSync)(`docker build -t ${this.IMAGE_NAME}:${this.IMAGE_TAG} ${enginePath}`, { stdio: 'inherit' });
spinner.succeed('Docker image built successfully');
}
catch (error) {
spinner.fail('Failed to build Docker image');
throw error;
}
}
}
/**
* Parse command options from CLI arguments
*/
static parseCommandOptions(args) {
const options = {};
for (let i = 0; i < args.length; i++) {
const arg = args[i];
if (arg.startsWith('--')) {
const key = arg.slice(2);
// Check if next arg is a value or another flag
if (i + 1 < args.length && !args[i + 1].startsWith('--')) {
options[key] = args[i + 1];
i++; // Skip the value
}
else {
options[key] = true; // Boolean flag
}
}
}
return options;
}
/**
* Run a command in the Docker container
*/
static async runCommand(command, options = {}) {
await this.ensureImage();
const dockerArgs = [
'run',
'--rm', // Remove container after exit
'--name', `${this.CONTAINER_NAME}-${Date.now()}`,
];
// Add volume mount if specified
if (options.mountPath) {
const absolutePath = path.resolve(options.mountPath);
dockerArgs.push('-v', `${absolutePath}:/data`);
dockerArgs.push('-w', '/data'); // Set working directory
}
// Add environment variables
if (options.env) {
Object.entries(options.env).forEach(([key, value]) => {
dockerArgs.push('-e', `${key}=${value}`);
});
}
// Add interactive flags if needed
if (options.interactive) {
dockerArgs.push('-it');
}
// Add image name
dockerArgs.push(`${this.IMAGE_NAME}:${this.IMAGE_TAG}`);
// Build the command object based on the command type
const commandType = command[0];
const commandObj = {
type: commandType,
payload: {}
};
// Handle different command types
switch (commandType) {
case 'analyze':
commandObj.payload.input = command[1];
const analyzeOptions = this.parseCommandOptions(command.slice(2));
// Convert boolean flags to operations array
const operations = [];
if (analyzeOptions.missing)
operations.push('missing_values');
if (analyzeOptions.duplicates)
operations.push('duplicates');
if (analyzeOptions.format)
operations.push('format_issues');
if (analyzeOptions.quality)
operations.push('quality');
if (operations.length === 0) {
// Default to all operations if none specified
operations.push('missing_values', 'duplicates', 'format_issues', 'quality');
}
commandObj.payload.operations = operations;
break;
case 'clean':
commandObj.payload.input = command[1];
commandObj.payload.output = command[2];
const cleanOptions = this.parseCommandOptions(command.slice(3));
// Map CLI options to payload
if (cleanOptions.missing) {
commandObj.payload.missing = true;
commandObj.payload.strategy = cleanOptions.strategy || 'drop';
if (cleanOptions['fill-value'])
commandObj.payload.fill_value = cleanOptions['fill-value'];
if (cleanOptions.columns)
commandObj.payload.columns = cleanOptions.columns.split(',');
if (cleanOptions.threshold)
commandObj.payload.threshold = parseFloat(cleanOptions.threshold);
}
if (cleanOptions.duplicates) {
commandObj.payload.duplicates = true;
commandObj.payload.strategy = cleanOptions.strategy || 'first';
if (cleanOptions.subset)
commandObj.payload.subset = cleanOptions.subset.split(',');
}
if (cleanOptions['dry-run'])
commandObj.payload.dry_run = true;
break;
case 'ask':
// For 'ask' command, we need to determine the actual command type from the natural language query
const query = command[1].toLowerCase();
const askOptions = this.parseCommandOptions(command.slice(2));
const inputFile = askOptions.input || askOptions.i || 'test.csv';
if (query.includes('missing') || query.includes('null')) {
commandObj.type = 'analyze';
commandObj.payload.input = inputFile;
commandObj.payload.operations = ['missing_values'];
}
else if (query.includes('duplicate')) {
commandObj.type = 'analyze';
commandObj.payload.input = inputFile;
commandObj.payload.operations = ['duplicates'];
}
else if (query.includes('clean') || query.includes('remove')) {
commandObj.type = 'clean';
commandObj.payload.input = inputFile;
commandObj.payload.output = askOptions.output || askOptions.o || 'cleaned_' + inputFile;
if (query.includes('missing')) {
commandObj.payload.missing = true;
commandObj.payload.strategy = 'drop';
}
else if (query.includes('duplicate')) {
commandObj.payload.duplicates = true;
commandObj.payload.strategy = 'first';
}
}
else if (query.includes('quality') || query.includes('analyze')) {
// Quality or general analysis
commandObj.type = 'analyze';
commandObj.payload.input = inputFile;
commandObj.payload.operations = ['missing_values', 'duplicates', 'format_issues', 'quality'];
}
else {
// Default to analyze all
commandObj.type = 'analyze';
commandObj.payload.input = inputFile;
commandObj.payload.operations = ['missing_values', 'duplicates', 'format_issues', 'quality'];
}
// Apply any additional options from the ask command
if (askOptions.output || askOptions.o) {
commandObj.payload.output = askOptions.output || askOptions.o;
}
break;
default:
// For other commands, pass through the arguments
commandObj.payload.input = command[1];
Object.assign(commandObj.payload, this.parseCommandOptions(command.slice(2)));
}
dockerArgs.push('python', '-m', 'src.main', '--mode', 'single', '--command', JSON.stringify(commandObj));
// Run the Docker command and capture output for formatting
const spawnOptions = {
stdio: ['inherit', 'pipe', 'pipe'], // Capture stdout and stderr
shell: false, // Don't use shell to avoid JSON escaping issues
};
return new Promise((resolve, reject) => {
const proc = (0, child_process_1.spawn)('docker', dockerArgs, spawnOptions);
let stdout = '';
let stderr = '';
let jsonResult = null;
proc.stdout?.on('data', (data) => {
const output = data.toString();
stdout += output;
// Try to parse JSON output from Python engine
const lines = output.split('\n');
for (const line of lines) {
if (line.trim().startsWith('{') && line.includes('"success"')) {
try {
jsonResult = JSON.parse(line.trim());
}
catch (e) {
// Not valid JSON, continue
}
}
}
});
proc.stderr?.on('data', (data) => {
stderr += data.toString();
// Only show Docker errors, not Python logging
if (!data.toString().includes('INFO:') && !data.toString().includes('DEBUG:')) {
console.error(chalk_1.default.red(data.toString()));
}
});
proc.on('error', (error) => {
reject(new Error(`Failed to run Docker container: ${error.message}`));
});
proc.on('exit', (code) => {
if (code === 0) {
// Format and display the result if we captured JSON
if (jsonResult) {
output_formatter_1.OutputFormatter.format(jsonResult);
}
else if (stdout.trim()) {
// If no JSON was captured, show raw output
console.log(stdout);
}
resolve();
}
else {
reject(new Error(`Docker container exited with code ${code}`));
}
});
});
}
/**
* Clean up Docker resources
*/
static async cleanup() {
try {
// Stop any running containers
(0, child_process_1.execSync)(`docker ps -q --filter "name=${this.CONTAINER_NAME}" | xargs -r docker stop`, { stdio: 'ignore' });
// Remove stopped containers
(0, child_process_1.execSync)(`docker ps -aq --filter "name=${this.CONTAINER_NAME}" | xargs -r docker rm`, { stdio: 'ignore' });
}
catch {
// Ignore cleanup errors
}
}
/**
* Get system information for debugging
*/
static async getSystemInfo() {
const dockerInstalled = await this.checkDocker();
const imageExists = await this.imageExists();
return {
platform: os.platform(),
arch: os.arch(),
dockerInstalled: dockerInstalled ? 'Yes' : 'No',
imageExists: imageExists ? 'Yes' : 'No',
};
}
}
exports.DockerManager = DockerManager;
//# sourceMappingURL=docker-manager.js.map