dataweave
Version:
AI-assisted CLI for modern data pipelines with DBT, Dagster, and Supabase integration
304 lines (282 loc) • 12 kB
JavaScript
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.DagsterManager = void 0;
const promises_1 = require("fs/promises");
const path_1 = require("path");
const fs_1 = require("fs");
const child_process_1 = require("child_process");
const chalk_1 = __importDefault(require("chalk"));
class DagsterManager {
constructor(config) {
this.config = config;
}
async generateAsset(options) {
const { name, description, dependencies = [], code, partitions, schedule, tags = [], compute_kind, io_manager } = options;
console.log(chalk_1.default.blue(`⚡ Generating Dagster asset: ${name}`));
if (!(0, fs_1.existsSync)(this.config.assetsPath)) {
await (0, promises_1.mkdir)(this.config.assetsPath, { recursive: true });
}
const assetCode = code || this.generateDefaultAssetCode(name, description, dependencies, partitions, tags, compute_kind, io_manager);
const assetPath = (0, path_1.join)(this.config.assetsPath, `${name}.py`);
await (0, promises_1.writeFile)(assetPath, assetCode);
await this.updateAssetsInit(name);
if (schedule) {
await this.generateSchedule(name, schedule);
}
console.log(chalk_1.default.green(`✓ Created Dagster asset: ${name}`));
}
async generateJob(options) {
const { name, description, assets = [], schedule, tags = [] } = options;
console.log(chalk_1.default.blue(`🔧 Generating Dagster job: ${name}`));
if (!(0, fs_1.existsSync)(this.config.jobsPath)) {
await (0, promises_1.mkdir)(this.config.jobsPath, { recursive: true });
}
const jobCode = this.generateJobCode(name, description, assets, tags);
const jobPath = (0, path_1.join)(this.config.jobsPath, `${name}.py`);
await (0, promises_1.writeFile)(jobPath, jobCode);
await this.updateJobsInit(name);
if (schedule) {
await this.generateSchedule(name, schedule, true);
}
console.log(chalk_1.default.green(`✓ Created Dagster job: ${name}`));
}
async generateDbtAsset(modelName) {
console.log(chalk_1.default.blue(`📊 Generating Dagster asset for DBT model: ${modelName}`));
const assetCode = this.generateDbtAssetCode(modelName);
const assetPath = (0, path_1.join)(this.config.assetsPath, `dbt_${modelName}.py`);
await (0, promises_1.writeFile)(assetPath, assetCode);
await this.updateAssetsInit(`dbt_${modelName}`);
console.log(chalk_1.default.green(`✓ Created Dagster asset for DBT model: ${modelName}`));
}
async runAsset(assetName) {
console.log(chalk_1.default.blue(`🚀 Running Dagster asset: ${assetName}`));
await this.executeDagsterCommand(['asset', 'materialize', '--select', assetName]);
}
async runJob(jobName) {
console.log(chalk_1.default.blue(`🚀 Running Dagster job: ${jobName}`));
await this.executeDagsterCommand(['job', 'execute', '--job', jobName]);
}
async startDagster(port = 3000) {
console.log(chalk_1.default.blue(`🌐 Starting Dagster web server on port ${port}...`));
await this.executeDagsterCommand(['dev', '--port', port.toString()]);
}
async validatePipeline() {
console.log(chalk_1.default.blue('🔍 Validating Dagster pipeline...'));
await this.executeDagsterCommand(['pipeline', 'validate']);
}
generateDefaultAssetCode(name, description, dependencies = [], partitions, tags = [], compute_kind, io_manager) {
const imports = [
'from dagster import asset',
'import pandas as pd',
];
if (partitions) {
imports.push('from dagster import DailyPartitionsDefinition');
}
if (dependencies.length > 0) {
imports.push('from dagster import AssetIn');
}
const decoratorOptions = [];
if (dependencies.length > 0) {
const depsStr = dependencies.map(dep => `"${dep}": AssetIn()`).join(', ');
decoratorOptions.push(`ins={${depsStr}}`);
}
if (description) {
decoratorOptions.push(`description="${description}"`);
}
if (tags.length > 0) {
decoratorOptions.push(`tags={${tags.map(tag => `"${tag}"`).join(', ')}}`);
}
if (compute_kind) {
decoratorOptions.push(`compute_kind="${compute_kind}"`);
}
if (io_manager) {
decoratorOptions.push(`io_manager_key="${io_manager}"`);
}
if (partitions) {
decoratorOptions.push(`partitions_def=DailyPartitionsDefinition(start_date="${partitions}")`);
}
const decorator = decoratorOptions.length > 0
? `@asset(${decoratorOptions.join(', ')})`
: '@asset';
const functionParams = dependencies.length > 0
? dependencies.map(dep => `${dep}: pd.DataFrame`).join(', ')
: '';
const defaultLogic = dependencies.length > 0
? ` # Process input data
processed_data = pd.DataFrame()
# TODO: Add your asset logic here
# Example: processed_data = ${dependencies[0]}.copy()
return processed_data`
: ` # TODO: Add your asset logic here
# Example: Load data from external source
data = pd.DataFrame({
'id': [1, 2, 3],
'value': [100, 200, 300]
})
return data`;
return `${imports.join('\n')}
${decorator}
def ${name}(${functionParams}):
"""
${description || `Asset: ${name}`}
Returns:
pd.DataFrame: Processed data
"""
${defaultLogic}
`;
}
generateJobCode(name, description, assets = [], tags = []) {
const imports = [
'from dagster import job, op',
'from dagster import Config',
];
if (assets.length > 0) {
imports.push('from dagster import AssetMaterialization');
}
const jobOptions = [];
if (description) {
jobOptions.push(`description="${description}"`);
}
if (tags.length > 0) {
jobOptions.push(`tags={${tags.map(tag => `"${tag}"`).join(', ')}}`);
}
const decorator = jobOptions.length > 0
? `@job(${jobOptions.join(', ')})`
: '@job';
const assetOps = assets.length > 0
? assets.map(asset => ` ${asset}_op()`).join('\n')
: ' # TODO: Add your ops here\n pass';
return `${imports.join('\n')}
class ${name.charAt(0).toUpperCase() + name.slice(1)}Config(Config):
"""Configuration for ${name} job"""
# TODO: Add configuration parameters
pass
@op
def ${name}_op(config: ${name.charAt(0).toUpperCase() + name.slice(1)}Config):
"""Main operation for ${name} job"""
# TODO: Add your operation logic here
pass
${decorator}
def ${name}():
"""
${description || `Job: ${name}`}
"""
${assetOps}
`;
}
generateDbtAssetCode(modelName) {
return `from dagster import asset
from dagster_dbt import dbt_assets, DbtCliResource
from dagster import Config
import pandas as pd
class DbtConfig(Config):
"""Configuration for DBT operations"""
profiles_dir: str = "config"
project_dir: str = "data/dbt"
@asset(compute_kind="dbt")
def dbt_${modelName}(config: DbtConfig) -> pd.DataFrame:
"""
Dagster asset for DBT model: ${modelName}
This asset runs the DBT model and returns the results.
"""
# TODO: Implement DBT model execution
# This is a placeholder - in production you'd use dagster-dbt integration
# For now, return empty DataFrame
return pd.DataFrame()
# Alternative: Use dagster-dbt integration (recommended for production)
# @dbt_assets(
# manifest=dbt_manifest_path,
# select="${modelName}",
# compute_kind="dbt"
# )
# def dbt_${modelName}_assets(context: AssetExecutionContext, dbt: DbtCliResource):
# yield from dbt.cli(["build"], context=context).stream()
`;
}
async generateSchedule(assetOrJobName, schedule, isJob = false) {
const schedulesPath = (0, path_1.join)(this.config.dagsterPath, 'schedules');
if (!(0, fs_1.existsSync)(schedulesPath)) {
await (0, promises_1.mkdir)(schedulesPath, { recursive: true });
}
const scheduleCode = this.generateScheduleCode(assetOrJobName, schedule, isJob);
const schedulePath = (0, path_1.join)(schedulesPath, `${assetOrJobName}_schedule.py`);
await (0, promises_1.writeFile)(schedulePath, scheduleCode);
await this.updateSchedulesInit(`${assetOrJobName}_schedule`);
}
generateScheduleCode(assetOrJobName, schedule, isJob) {
const imports = isJob
? `from dagster import schedule\nfrom ..jobs.${assetOrJobName} import ${assetOrJobName}`
: `from dagster import schedule, RunRequest\nfrom ..assets.${assetOrJobName} import ${assetOrJobName}`;
const scheduleFunction = isJob
? `@schedule(cron_schedule="${schedule}", job=${assetOrJobName})
def ${assetOrJobName}_schedule():
"""Schedule for ${assetOrJobName} job"""
return {}`
: `@schedule(cron_schedule="${schedule}")
def ${assetOrJobName}_schedule():
"""Schedule for ${assetOrJobName} asset"""
return RunRequest(asset_selection=[${assetOrJobName}])`;
return `${imports}
${scheduleFunction}
`;
}
async updateAssetsInit(assetName) {
const initPath = (0, path_1.join)(this.config.assetsPath, '__init__.py');
await this.updateInitFile(initPath, assetName, 'assets');
}
async updateJobsInit(jobName) {
const initPath = (0, path_1.join)(this.config.jobsPath, '__init__.py');
await this.updateInitFile(initPath, jobName, 'jobs');
}
async updateSchedulesInit(scheduleName) {
const initPath = (0, path_1.join)(this.config.dagsterPath, 'schedules', '__init__.py');
await this.updateInitFile(initPath, scheduleName, 'schedules');
}
async updateInitFile(initPath, itemName, _itemType) {
let content = '';
if ((0, fs_1.existsSync)(initPath)) {
content = await (0, promises_1.readFile)(initPath, 'utf-8');
}
const importLine = `from .${itemName} import ${itemName}`;
if (content.includes(importLine)) {
return;
}
content += `${importLine}\n`;
const allMatch = content.match(/__all__\s*=\s*\[(.*?)\]/s);
if (allMatch) {
const existingItems = allMatch[1].split(',').map(item => item.trim().replace(/['"]/g, ''));
if (!existingItems.includes(itemName)) {
const updatedItems = [...existingItems.filter(item => item), `"${itemName}"`];
content = content.replace(/__all__\s*=\s*\[(.*?)\]/s, `__all__ = [${updatedItems.join(', ')}]`);
}
}
else {
content += `\n__all__ = ["${itemName}"]\n`;
}
await (0, promises_1.writeFile)(initPath, content);
}
async executeDagsterCommand(args) {
return new Promise((resolve, reject) => {
const dagster = (0, child_process_1.spawn)('dagster', args, {
cwd: this.config.projectPath,
stdio: 'inherit',
});
dagster.on('close', (code) => {
if (code === 0) {
resolve();
}
else {
reject(new Error(`Dagster command failed with exit code ${code}`));
}
});
dagster.on('error', (error) => {
reject(new Error(`Failed to execute Dagster command: ${error.message}`));
});
});
}
}
exports.DagsterManager = DagsterManager;
//# sourceMappingURL=index.js.map
;