cleanifix
Version:
Intelligent data cleaning CLI with natural language support - Docker-powered Python engine
248 lines • 10.2 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const commander_1 = require("commander");
const packageJson = __importStar(require("../package.json"));
const docker_manager_1 = require("./utils/docker-manager");
const logger_1 = require("./utils/logger");
const chalk_1 = __importDefault(require("chalk"));
const ora_1 = __importDefault(require("ora"));
const program = new commander_1.Command();
// Check Docker availability on startup
async function checkPrerequisites() {
const spinner = (0, ora_1.default)('Checking prerequisites...').start();
// Check if Docker is installed
if (!await docker_manager_1.DockerManager.checkDocker()) {
spinner.fail('Docker is not installed or not running');
console.error(chalk_1.default.red('\nCleanifix requires Docker to run the Python engine.'));
console.error(chalk_1.default.yellow('Please install Docker from: https://docs.docker.com/get-docker/'));
console.error(chalk_1.default.yellow('\nAfter installation, make sure Docker is running and try again.'));
return false;
}
spinner.succeed('Docker is available');
return true;
}
// Wrapper function to run commands through Docker
async function runWithDocker(command, args, options) {
try {
// Check prerequisites
if (!await checkPrerequisites()) {
process.exit(1);
}
// Ensure Docker image is available
await docker_manager_1.DockerManager.ensureImage();
// Prepare command arguments for the Python engine
const engineArgs = [command, ...args];
// Add options as flags
Object.entries(options).forEach(([key, value]) => {
if (value !== undefined && value !== false) {
engineArgs.push(`--${key}`);
if (value !== true) {
engineArgs.push(String(value));
}
}
});
// Determine mount path (current directory by default)
const mountPath = process.cwd();
// Run the command in Docker
await docker_manager_1.DockerManager.runCommand(engineArgs, {
mountPath,
interactive: process.stdin.isTTY,
env: {
CLEANIFIX_LOG_LEVEL: process.env.LOG_LEVEL || 'info',
},
});
}
catch (error) {
logger_1.logger.error('Command failed:', error.message);
process.exit(1);
}
}
program
.name('cleanifix')
.description('Intelligent data cleaning CLI with natural language support')
.version(packageJson.version)
.option('-v, --verbose', 'Enable verbose logging')
.option('-q, --quiet', 'Suppress non-error output')
.option('--no-docker', 'Run without Docker (requires local Python installation)')
.hook('preAction', (thisCommand) => {
const options = thisCommand.opts();
if (options.verbose) {
process.env.LOG_LEVEL = 'debug';
}
else if (options.quiet) {
process.env.LOG_LEVEL = 'error';
}
});
// Initialize project
program
.command('init')
.description('Initialize a new Cleanifix project')
.option('-t, --template <template>', 'Project template to use', 'default')
.option('-f, --force', 'Overwrite existing configuration')
.action(async (options) => {
await runWithDocker('init', [], options);
});
// Validate data
program
.command('validate')
.description('Validate data against defined rules')
.argument('<input>', 'Input file or directory path')
.option('-c, --config <path>', 'Path to validation config file')
.option('-r, --rules <path>', 'Path to custom rules file')
.option('-o, --output <path>', 'Output directory for validation reports')
.option('-f, --format <format>', 'Output format (json|html|csv)', 'json')
.option('--fail-fast', 'Stop on first validation error')
.option('--parallel <number>', 'Number of parallel workers', '4')
.action(async (input, options) => {
await runWithDocker('validate', [input], options);
});
// Transform data
program
.command('transform')
.description('Transform data using defined pipelines')
.argument('<input>', 'Input file or directory path')
.argument('<output>', 'Output file or directory path')
.option('-p, --pipeline <path>', 'Path to transformation pipeline config')
.option('-m, --mappings <path>', 'Path to field mappings file')
.option('-f, --format <format>', 'Output format (csv|json|parquet|excel)')
.option('--streaming', 'Enable streaming mode for large files')
.option('--chunk-size <size>', 'Chunk size for processing', '10000')
.action(async (input, output, options) => {
await runWithDocker('transform', [input, output], options);
});
// Analyze data
program
.command('analyze')
.description('Analyze data quality and patterns')
.argument('<input>', 'Input file or directory path')
.option('-m, --missing', 'Analyze missing values')
.option('-d, --duplicates', 'Analyze duplicate rows and values')
.option('-f, --format', 'Analyze format inconsistencies')
.option('-q, --quality', 'Comprehensive quality analysis')
.option('-a, --all', 'Run all available analyses')
.option('-c, --columns <columns>', 'Specific columns to analyze (comma-separated)')
.option('-o, --output <file>', 'Save analysis results to file')
.option('--json', 'Output results as JSON')
.option('-v, --verbose', 'Show detailed output')
.action(async (input, options) => {
await runWithDocker('analyze', [input], options);
});
// Clean data
program
.command('clean')
.description('Clean data using predefined or custom rules')
.argument('<input>', 'Input file or directory path')
.option('-o, --output <path>', 'Output file or directory path')
.option('-r, --rules <rules>', 'Comma-separated list of cleaning rules')
.option('-c, --config <path>', 'Path to cleaning configuration file')
.option('--dry-run', 'Preview changes without applying them')
.option('--backup', 'Create backup of original files')
.option('--report', 'Generate cleaning report')
.option('-m, --missing', 'Clean missing values')
.option('-d, --duplicates', 'Clean duplicate rows')
.option('-s, --strategy <strategy>', 'Cleaning strategy')
.option('-f, --fill-value <value>', 'Fill value for missing data')
.option('--columns <columns>', 'Comma-separated list of columns to clean')
.option('--subset <subset>', 'Columns for duplicate detection')
.option('-t, --threshold <threshold>', 'Threshold for dropping columns (0-1)')
.action(async (input, options) => {
await runWithDocker('clean', [input], options);
});
// Natural language interface
program
.command('ask')
.description('Use natural language to describe what you want to do')
.argument('<query>', 'Natural language query')
.option('-i, --input <path>', 'Input file to work with')
.option('-o, --output <path>', 'Output path for results')
.option('--interactive', 'Enter interactive mode')
.action(async (query, options) => {
await runWithDocker('ask', [query], options);
});
// Docker management commands
program
.command('docker:info')
.description('Show Docker and Cleanifix engine information')
.action(async () => {
const info = await docker_manager_1.DockerManager.getSystemInfo();
console.log(chalk_1.default.cyan('System Information:'));
Object.entries(info).forEach(([key, value]) => {
console.log(` ${key}: ${value}`);
});
});
program
.command('docker:build')
.description('Build the Docker image locally (for development)')
.action(async () => {
try {
await docker_manager_1.DockerManager.ensureImage();
logger_1.logger.info('Docker image is ready');
}
catch (error) {
logger_1.logger.error('Failed to build Docker image:', error.message);
process.exit(1);
}
});
program
.command('docker:cleanup')
.description('Clean up Docker resources')
.action(async () => {
await docker_manager_1.DockerManager.cleanup();
logger_1.logger.info('Docker resources cleaned up');
});
// Error handling
program.exitOverride();
try {
program.parse(process.argv);
}
catch (error) {
if (error.code === 'commander.helpDisplayed' || error.code === 'commander.version') {
process.exit(0);
}
else {
logger_1.logger.error('Command error:', error.message);
process.exit(1);
}
}
// Show help if no command provided
if (!process.argv.slice(2).length) {
program.help();
}
//# sourceMappingURL=index-docker.js.map