UNPKG

dataweave

Version:

AI-assisted CLI for modern data pipelines with DBT, Dagster, and Supabase integration

450 lines (394 loc) • 12.5 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.ProjectScaffolder = void 0; const promises_1 = require("fs/promises"); const path_1 = require("path"); const fs_1 = require("fs"); const chalk_1 = __importDefault(require("chalk")); class ProjectScaffolder { constructor(options) { this.options = { includeDbt: true, includeDagster: true, includeSupabase: true, ...options, }; } async scaffold() { const { targetDir, name } = this.options; console.log(chalk_1.default.blue(`šŸš€ Scaffolding dataweave project: ${name}`)); await this.createDirectoryStructure(); await this.generateConfigFiles(); await this.generateTemplateFiles(); console.log(chalk_1.default.green(`āœ… Successfully scaffolded project in ${targetDir}`)); console.log(chalk_1.default.gray('\nNext steps:')); console.log(chalk_1.default.gray(` cd ${name}`)); console.log(chalk_1.default.gray(' npm install')); console.log(chalk_1.default.gray(' dataweave --help')); } async createDirectoryStructure() { const { targetDir } = this.options; const dirs = [ 'data', 'data/dbt', 'data/dbt/models', 'data/dbt/models/staging', 'data/dbt/models/intermediate', 'data/dbt/models/marts', 'data/dbt/macros', 'data/dbt/tests', 'data/dagster', 'data/dagster/assets', 'data/dagster/jobs', 'data/dagster/sensors', 'data/dagster/schedules', 'data/assets', 'supabase', 'supabase/migrations', 'supabase/functions', '.dataweave', 'config', 'scripts', ]; for (const dir of dirs) { const fullPath = (0, path_1.join)(targetDir, dir); if (!(0, fs_1.existsSync)(fullPath)) { await (0, promises_1.mkdir)(fullPath, { recursive: true }); } } console.log(chalk_1.default.green('āœ“ Created directory structure')); } async generateConfigFiles() { const { targetDir, name } = this.options; const dataweaveConfig = { name, version: '1.0.0', dbt: { enabled: this.options.includeDbt, profile: 'dataweave', target: 'dev', }, dagster: { enabled: this.options.includeDagster, workspace: './data/dagster', }, supabase: { enabled: this.options.includeSupabase, projectId: '', apiUrl: '', anonKey: '', }, ai: { enabled: true, provider: 'openai', model: 'gpt-4', }, }; await (0, promises_1.writeFile)((0, path_1.join)(targetDir, '.dataweave', 'config.json'), JSON.stringify(dataweaveConfig, null, 2)); const envTemplate = `# Dataweave Environment Configuration # Copy this to .env and fill in your actual values # Database Configuration DATABASE_URL=postgresql://username:password@localhost:5432/database_name # Supabase Configuration (if using Supabase) SUPABASE_URL=https://your-project.supabase.co SUPABASE_ANON_KEY=your-anon-key SUPABASE_SERVICE_ROLE_KEY=your-service-role-key # AI/LLM Configuration OPENAI_API_KEY=your-openai-api-key # Dagster Configuration DAGSTER_HOME=${targetDir}/data/dagster # DBT Configuration DBT_PROFILES_DIR=${targetDir}/config `; await (0, promises_1.writeFile)((0, path_1.join)(targetDir, '.env.example'), envTemplate); console.log(chalk_1.default.green('āœ“ Generated configuration files')); } async generateTemplateFiles() { if (this.options.includeDbt) { await this.generateDbtFiles(); } if (this.options.includeDagster) { await this.generateDagsterFiles(); } if (this.options.includeSupabase) { await this.generateSupabaseFiles(); } await this.generateReadme(); console.log(chalk_1.default.green('āœ“ Generated template files')); } async generateDbtFiles() { const { targetDir, name } = this.options; const dbtProject = `name: '${name}' version: '1.0.0' config-version: 2 profile: 'dataweave' model-paths: ["models"] analysis-paths: ["analysis"] test-paths: ["tests"] seed-paths: ["seeds"] macro-paths: ["macros"] snapshot-paths: ["snapshots"] target-path: "target" clean-targets: - "target" - "dbt_packages" models: ${name}: staging: +materialized: view intermediate: +materialized: view marts: +materialized: table `; await (0, promises_1.writeFile)((0, path_1.join)(targetDir, 'data/dbt/dbt_project.yml'), dbtProject); const profiles = `dataweave: outputs: dev: type: postgres host: localhost user: postgres password: postgres port: 5432 dbname: dataweave_dev schema: public threads: 4 keepalives_idle: 0 prod: type: postgres host: "{{ env_var('DATABASE_HOST') }}" user: "{{ env_var('DATABASE_USER') }}" password: "{{ env_var('DATABASE_PASSWORD') }}" port: "{{ env_var('DATABASE_PORT') | as_number }}" dbname: "{{ env_var('DATABASE_NAME') }}" schema: public threads: 4 keepalives_idle: 0 target: dev `; await (0, promises_1.writeFile)((0, path_1.join)(targetDir, 'config/profiles.yml'), profiles); const stagingModel = `{{ config(materialized='view') }} -- Sample staging model -- This is where you would clean and standardize raw data select id, name, email, created_at, updated_at from {{ source('raw', 'users') }} where created_at is not null `; await (0, promises_1.writeFile)((0, path_1.join)(targetDir, 'data/dbt/models/staging/stg_users.sql'), stagingModel); const schema = `version: 2 sources: - name: raw description: Raw data from various sources tables: - name: users description: Raw user data columns: - name: id description: Primary key tests: - unique - not_null - name: email description: User email address tests: - unique - not_null models: - name: stg_users description: Staged user data with basic cleaning columns: - name: id description: Primary key tests: - unique - not_null - name: email description: User email address tests: - unique - not_null `; await (0, promises_1.writeFile)((0, path_1.join)(targetDir, 'data/dbt/models/staging/schema.yml'), schema); } async generateDagsterFiles() { const { targetDir, name } = this.options; const definitions = `from dagster import Definitions, load_assets_from_modules from . import assets defs = Definitions( assets=load_assets_from_modules([assets]), ) `; await (0, promises_1.writeFile)((0, path_1.join)(targetDir, 'data/dagster/__init__.py'), ''); await (0, promises_1.writeFile)((0, path_1.join)(targetDir, 'data/dagster/definitions.py'), definitions); const assetsFile = `from dagster import asset import pandas as pd @asset def sample_data(): """Sample data asset to demonstrate Dagster functionality.""" return pd.DataFrame({ 'id': [1, 2, 3], 'name': ['Alice', 'Bob', 'Charlie'], 'value': [100, 200, 300] }) @asset(deps=[sample_data]) def processed_data(sample_data: pd.DataFrame): """Process the sample data.""" processed = sample_data.copy() processed['doubled_value'] = processed['value'] * 2 return processed `; await (0, promises_1.writeFile)((0, path_1.join)(targetDir, 'data/dagster/assets/__init__.py'), assetsFile); const pyproject = `[build-system] requires = ["setuptools", "wheel"] [project] name = "${name}-dagster" version = "0.1.0" dependencies = [ "dagster", "dagster-webserver", "dagster-postgres", "pandas", "psycopg2-binary", ] [tool.dagster] code_location = "data.dagster.definitions" `; await (0, promises_1.writeFile)((0, path_1.join)(targetDir, 'pyproject.toml'), pyproject); } async generateSupabaseFiles() { const { targetDir } = this.options; const supabaseConfig = `[api] enabled = true port = 54321 [db] port = 54322 [studio] enabled = true port = 54323 [inbucket] enabled = true port = 54324 [storage] enabled = true file_size_limit = "50MiB" [auth] enabled = true external_url = "http://localhost:3000" [edge_functions] enabled = true `; await (0, promises_1.writeFile)((0, path_1.join)(targetDir, 'supabase/config.toml'), supabaseConfig); const migration = `-- Sample migration -- This creates a basic users table create table if not exists users ( id uuid default gen_random_uuid() primary key, email text unique not null, name text, created_at timestamp with time zone default now(), updated_at timestamp with time zone default now() ); -- Enable Row Level Security alter table users enable row level security; -- Create updated_at trigger create or replace function update_updated_at_column() returns trigger as $$ begin new.updated_at = now(); return new; end; $$ language plpgsql; create trigger update_users_updated_at before update on users for each row execute function update_updated_at_column(); `; await (0, promises_1.writeFile)((0, path_1.join)(targetDir, 'supabase/migrations/001_initial_schema.sql'), migration); } async generateReadme() { const { targetDir, name } = this.options; const readme = `# ${name} AI-assisted data pipeline project generated with [dataweave](https://github.com/yourusername/dataweave). ## Getting Started ### Prerequisites - Python 3.8+ - Node.js 16+ - PostgreSQL (or Supabase account) ### Installation 1. Install dependencies: \`\`\`bash pip install -e . npm install \`\`\` 2. Set up environment variables: \`\`\`bash cp .env.example .env # Edit .env with your configuration \`\`\` 3. Initialize the database: \`\`\`bash # If using Supabase npx supabase start # If using local PostgreSQL createdb ${name}_dev \`\`\` ### Development #### DBT Run DBT models: \`\`\`bash cd data/dbt dbt run dbt test \`\`\` #### Dagster Start Dagster web server: \`\`\`bash dagster dev -f data/dagster/definitions.py \`\`\` #### Dataweave Commands \`\`\`bash # Generate new DBT model dataweave dbt model new --name my_model # Generate new Dagster asset dataweave dagster asset new --name my_asset # AI-powered generation dataweave ai generate "Create a model that calculates monthly revenue" # Analyze existing code dataweave analyze \`\`\` ## Project Structure \`\`\` ${name}/ ā”œā”€ā”€ data/ # Data pipeline code │ ā”œā”€ā”€ dbt/ # DBT models and configuration │ ā”œā”€ā”€ dagster/ # Dagster assets and jobs │ └── assets/ # Shared data assets ā”œā”€ā”€ supabase/ # Supabase configuration and migrations ā”œā”€ā”€ config/ # Configuration files ā”œā”€ā”€ scripts/ # Utility scripts └── .dataweave/ # Dataweave configuration \`\`\` ## Next Steps 1. Configure your database connection in \`.env\` 2. Update the DBT profile in \`config/profiles.yml\` 3. Run your first DBT models: \`cd data/dbt && dbt run\` 4. Start the Dagster web server: \`dagster dev\` 5. Explore AI-powered features: \`dataweave ai --help\` ## Documentation - [DBT Documentation](https://docs.getdbt.com/) - [Dagster Documentation](https://docs.dagster.io/) - [Supabase Documentation](https://supabase.com/docs) - [Dataweave Documentation](https://github.com/yourusername/dataweave) `; await (0, promises_1.writeFile)((0, path_1.join)(targetDir, 'README.md'), readme); } } exports.ProjectScaffolder = ProjectScaffolder; //# sourceMappingURL=index.js.map