UNPKG

dataweave

Version:

AI-assisted CLI for modern data pipelines with DBT, Dagster, and Supabase integration

304 lines (282 loc) 12 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.DagsterManager = void 0; const promises_1 = require("fs/promises"); const path_1 = require("path"); const fs_1 = require("fs"); const child_process_1 = require("child_process"); const chalk_1 = __importDefault(require("chalk")); class DagsterManager { constructor(config) { this.config = config; } async generateAsset(options) { const { name, description, dependencies = [], code, partitions, schedule, tags = [], compute_kind, io_manager } = options; console.log(chalk_1.default.blue(`⚡ Generating Dagster asset: ${name}`)); if (!(0, fs_1.existsSync)(this.config.assetsPath)) { await (0, promises_1.mkdir)(this.config.assetsPath, { recursive: true }); } const assetCode = code || this.generateDefaultAssetCode(name, description, dependencies, partitions, tags, compute_kind, io_manager); const assetPath = (0, path_1.join)(this.config.assetsPath, `${name}.py`); await (0, promises_1.writeFile)(assetPath, assetCode); await this.updateAssetsInit(name); if (schedule) { await this.generateSchedule(name, schedule); } console.log(chalk_1.default.green(`✓ Created Dagster asset: ${name}`)); } async generateJob(options) { const { name, description, assets = [], schedule, tags = [] } = options; console.log(chalk_1.default.blue(`🔧 Generating Dagster job: ${name}`)); if (!(0, fs_1.existsSync)(this.config.jobsPath)) { await (0, promises_1.mkdir)(this.config.jobsPath, { recursive: true }); } const jobCode = this.generateJobCode(name, description, assets, tags); const jobPath = (0, path_1.join)(this.config.jobsPath, `${name}.py`); await (0, promises_1.writeFile)(jobPath, jobCode); await this.updateJobsInit(name); if (schedule) { await this.generateSchedule(name, schedule, true); } console.log(chalk_1.default.green(`✓ Created Dagster job: ${name}`)); } async generateDbtAsset(modelName) { console.log(chalk_1.default.blue(`📊 Generating Dagster asset for DBT model: ${modelName}`)); const assetCode = this.generateDbtAssetCode(modelName); const assetPath = (0, path_1.join)(this.config.assetsPath, `dbt_${modelName}.py`); await (0, promises_1.writeFile)(assetPath, assetCode); await this.updateAssetsInit(`dbt_${modelName}`); console.log(chalk_1.default.green(`✓ Created Dagster asset for DBT model: ${modelName}`)); } async runAsset(assetName) { console.log(chalk_1.default.blue(`🚀 Running Dagster asset: ${assetName}`)); await this.executeDagsterCommand(['asset', 'materialize', '--select', assetName]); } async runJob(jobName) { console.log(chalk_1.default.blue(`🚀 Running Dagster job: ${jobName}`)); await this.executeDagsterCommand(['job', 'execute', '--job', jobName]); } async startDagster(port = 3000) { console.log(chalk_1.default.blue(`🌐 Starting Dagster web server on port ${port}...`)); await this.executeDagsterCommand(['dev', '--port', port.toString()]); } async validatePipeline() { console.log(chalk_1.default.blue('🔍 Validating Dagster pipeline...')); await this.executeDagsterCommand(['pipeline', 'validate']); } generateDefaultAssetCode(name, description, dependencies = [], partitions, tags = [], compute_kind, io_manager) { const imports = [ 'from dagster import asset', 'import pandas as pd', ]; if (partitions) { imports.push('from dagster import DailyPartitionsDefinition'); } if (dependencies.length > 0) { imports.push('from dagster import AssetIn'); } const decoratorOptions = []; if (dependencies.length > 0) { const depsStr = dependencies.map(dep => `"${dep}": AssetIn()`).join(', '); decoratorOptions.push(`ins={${depsStr}}`); } if (description) { decoratorOptions.push(`description="${description}"`); } if (tags.length > 0) { decoratorOptions.push(`tags={${tags.map(tag => `"${tag}"`).join(', ')}}`); } if (compute_kind) { decoratorOptions.push(`compute_kind="${compute_kind}"`); } if (io_manager) { decoratorOptions.push(`io_manager_key="${io_manager}"`); } if (partitions) { decoratorOptions.push(`partitions_def=DailyPartitionsDefinition(start_date="${partitions}")`); } const decorator = decoratorOptions.length > 0 ? `@asset(${decoratorOptions.join(', ')})` : '@asset'; const functionParams = dependencies.length > 0 ? dependencies.map(dep => `${dep}: pd.DataFrame`).join(', ') : ''; const defaultLogic = dependencies.length > 0 ? ` # Process input data processed_data = pd.DataFrame() # TODO: Add your asset logic here # Example: processed_data = ${dependencies[0]}.copy() return processed_data` : ` # TODO: Add your asset logic here # Example: Load data from external source data = pd.DataFrame({ 'id': [1, 2, 3], 'value': [100, 200, 300] }) return data`; return `${imports.join('\n')} ${decorator} def ${name}(${functionParams}): """ ${description || `Asset: ${name}`} Returns: pd.DataFrame: Processed data """ ${defaultLogic} `; } generateJobCode(name, description, assets = [], tags = []) { const imports = [ 'from dagster import job, op', 'from dagster import Config', ]; if (assets.length > 0) { imports.push('from dagster import AssetMaterialization'); } const jobOptions = []; if (description) { jobOptions.push(`description="${description}"`); } if (tags.length > 0) { jobOptions.push(`tags={${tags.map(tag => `"${tag}"`).join(', ')}}`); } const decorator = jobOptions.length > 0 ? `@job(${jobOptions.join(', ')})` : '@job'; const assetOps = assets.length > 0 ? assets.map(asset => ` ${asset}_op()`).join('\n') : ' # TODO: Add your ops here\n pass'; return `${imports.join('\n')} class ${name.charAt(0).toUpperCase() + name.slice(1)}Config(Config): """Configuration for ${name} job""" # TODO: Add configuration parameters pass @op def ${name}_op(config: ${name.charAt(0).toUpperCase() + name.slice(1)}Config): """Main operation for ${name} job""" # TODO: Add your operation logic here pass ${decorator} def ${name}(): """ ${description || `Job: ${name}`} """ ${assetOps} `; } generateDbtAssetCode(modelName) { return `from dagster import asset from dagster_dbt import dbt_assets, DbtCliResource from dagster import Config import pandas as pd class DbtConfig(Config): """Configuration for DBT operations""" profiles_dir: str = "config" project_dir: str = "data/dbt" @asset(compute_kind="dbt") def dbt_${modelName}(config: DbtConfig) -> pd.DataFrame: """ Dagster asset for DBT model: ${modelName} This asset runs the DBT model and returns the results. """ # TODO: Implement DBT model execution # This is a placeholder - in production you'd use dagster-dbt integration # For now, return empty DataFrame return pd.DataFrame() # Alternative: Use dagster-dbt integration (recommended for production) # @dbt_assets( # manifest=dbt_manifest_path, # select="${modelName}", # compute_kind="dbt" # ) # def dbt_${modelName}_assets(context: AssetExecutionContext, dbt: DbtCliResource): # yield from dbt.cli(["build"], context=context).stream() `; } async generateSchedule(assetOrJobName, schedule, isJob = false) { const schedulesPath = (0, path_1.join)(this.config.dagsterPath, 'schedules'); if (!(0, fs_1.existsSync)(schedulesPath)) { await (0, promises_1.mkdir)(schedulesPath, { recursive: true }); } const scheduleCode = this.generateScheduleCode(assetOrJobName, schedule, isJob); const schedulePath = (0, path_1.join)(schedulesPath, `${assetOrJobName}_schedule.py`); await (0, promises_1.writeFile)(schedulePath, scheduleCode); await this.updateSchedulesInit(`${assetOrJobName}_schedule`); } generateScheduleCode(assetOrJobName, schedule, isJob) { const imports = isJob ? `from dagster import schedule\nfrom ..jobs.${assetOrJobName} import ${assetOrJobName}` : `from dagster import schedule, RunRequest\nfrom ..assets.${assetOrJobName} import ${assetOrJobName}`; const scheduleFunction = isJob ? `@schedule(cron_schedule="${schedule}", job=${assetOrJobName}) def ${assetOrJobName}_schedule(): """Schedule for ${assetOrJobName} job""" return {}` : `@schedule(cron_schedule="${schedule}") def ${assetOrJobName}_schedule(): """Schedule for ${assetOrJobName} asset""" return RunRequest(asset_selection=[${assetOrJobName}])`; return `${imports} ${scheduleFunction} `; } async updateAssetsInit(assetName) { const initPath = (0, path_1.join)(this.config.assetsPath, '__init__.py'); await this.updateInitFile(initPath, assetName, 'assets'); } async updateJobsInit(jobName) { const initPath = (0, path_1.join)(this.config.jobsPath, '__init__.py'); await this.updateInitFile(initPath, jobName, 'jobs'); } async updateSchedulesInit(scheduleName) { const initPath = (0, path_1.join)(this.config.dagsterPath, 'schedules', '__init__.py'); await this.updateInitFile(initPath, scheduleName, 'schedules'); } async updateInitFile(initPath, itemName, _itemType) { let content = ''; if ((0, fs_1.existsSync)(initPath)) { content = await (0, promises_1.readFile)(initPath, 'utf-8'); } const importLine = `from .${itemName} import ${itemName}`; if (content.includes(importLine)) { return; } content += `${importLine}\n`; const allMatch = content.match(/__all__\s*=\s*\[(.*?)\]/s); if (allMatch) { const existingItems = allMatch[1].split(',').map(item => item.trim().replace(/['"]/g, '')); if (!existingItems.includes(itemName)) { const updatedItems = [...existingItems.filter(item => item), `"${itemName}"`]; content = content.replace(/__all__\s*=\s*\[(.*?)\]/s, `__all__ = [${updatedItems.join(', ')}]`); } } else { content += `\n__all__ = ["${itemName}"]\n`; } await (0, promises_1.writeFile)(initPath, content); } async executeDagsterCommand(args) { return new Promise((resolve, reject) => { const dagster = (0, child_process_1.spawn)('dagster', args, { cwd: this.config.projectPath, stdio: 'inherit', }); dagster.on('close', (code) => { if (code === 0) { resolve(); } else { reject(new Error(`Dagster command failed with exit code ${code}`)); } }); dagster.on('error', (error) => { reject(new Error(`Failed to execute Dagster command: ${error.message}`)); }); }); } } exports.DagsterManager = DagsterManager; //# sourceMappingURL=index.js.map