dataweave
Version:
AI-assisted CLI for modern data pipelines with DBT, Dagster, and Supabase integration
450 lines (394 loc) ⢠12.5 kB
JavaScript
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.ProjectScaffolder = void 0;
const promises_1 = require("fs/promises");
const path_1 = require("path");
const fs_1 = require("fs");
const chalk_1 = __importDefault(require("chalk"));
class ProjectScaffolder {
constructor(options) {
this.options = {
includeDbt: true,
includeDagster: true,
includeSupabase: true,
...options,
};
}
async scaffold() {
const { targetDir, name } = this.options;
console.log(chalk_1.default.blue(`š Scaffolding dataweave project: ${name}`));
await this.createDirectoryStructure();
await this.generateConfigFiles();
await this.generateTemplateFiles();
console.log(chalk_1.default.green(`ā
Successfully scaffolded project in ${targetDir}`));
console.log(chalk_1.default.gray('\nNext steps:'));
console.log(chalk_1.default.gray(` cd ${name}`));
console.log(chalk_1.default.gray(' npm install'));
console.log(chalk_1.default.gray(' dataweave --help'));
}
async createDirectoryStructure() {
const { targetDir } = this.options;
const dirs = [
'data',
'data/dbt',
'data/dbt/models',
'data/dbt/models/staging',
'data/dbt/models/intermediate',
'data/dbt/models/marts',
'data/dbt/macros',
'data/dbt/tests',
'data/dagster',
'data/dagster/assets',
'data/dagster/jobs',
'data/dagster/sensors',
'data/dagster/schedules',
'data/assets',
'supabase',
'supabase/migrations',
'supabase/functions',
'.dataweave',
'config',
'scripts',
];
for (const dir of dirs) {
const fullPath = (0, path_1.join)(targetDir, dir);
if (!(0, fs_1.existsSync)(fullPath)) {
await (0, promises_1.mkdir)(fullPath, { recursive: true });
}
}
console.log(chalk_1.default.green('ā Created directory structure'));
}
async generateConfigFiles() {
const { targetDir, name } = this.options;
const dataweaveConfig = {
name,
version: '1.0.0',
dbt: {
enabled: this.options.includeDbt,
profile: 'dataweave',
target: 'dev',
},
dagster: {
enabled: this.options.includeDagster,
workspace: './data/dagster',
},
supabase: {
enabled: this.options.includeSupabase,
projectId: '',
apiUrl: '',
anonKey: '',
},
ai: {
enabled: true,
provider: 'openai',
model: 'gpt-4',
},
};
await (0, promises_1.writeFile)((0, path_1.join)(targetDir, '.dataweave', 'config.json'), JSON.stringify(dataweaveConfig, null, 2));
const envTemplate = `
DATABASE_URL=postgresql://username:password@localhost:5432/database_name
SUPABASE_URL=https://your-project.supabase.co
SUPABASE_ANON_KEY=your-anon-key
SUPABASE_SERVICE_ROLE_KEY=your-service-role-key
OPENAI_API_KEY=your-openai-api-key
DAGSTER_HOME=${targetDir}/data/dagster
DBT_PROFILES_DIR=${targetDir}/config
`;
await (0, promises_1.writeFile)((0, path_1.join)(targetDir, '.env.example'), envTemplate);
console.log(chalk_1.default.green('ā Generated configuration files'));
}
async generateTemplateFiles() {
if (this.options.includeDbt) {
await this.generateDbtFiles();
}
if (this.options.includeDagster) {
await this.generateDagsterFiles();
}
if (this.options.includeSupabase) {
await this.generateSupabaseFiles();
}
await this.generateReadme();
console.log(chalk_1.default.green('ā Generated template files'));
}
async generateDbtFiles() {
const { targetDir, name } = this.options;
const dbtProject = `name: '${name}'
version: '1.0.0'
config-version: 2
profile: 'dataweave'
model-paths: ["models"]
analysis-paths: ["analysis"]
test-paths: ["tests"]
seed-paths: ["seeds"]
macro-paths: ["macros"]
snapshot-paths: ["snapshots"]
target-path: "target"
clean-targets:
- "target"
- "dbt_packages"
models:
${name}:
staging:
+materialized: view
intermediate:
+materialized: view
marts:
+materialized: table
`;
await (0, promises_1.writeFile)((0, path_1.join)(targetDir, 'data/dbt/dbt_project.yml'), dbtProject);
const profiles = `dataweave:
outputs:
dev:
type: postgres
host: localhost
user: postgres
password: postgres
port: 5432
dbname: dataweave_dev
schema: public
threads: 4
keepalives_idle: 0
prod:
type: postgres
host: "{{ env_var('DATABASE_HOST') }}"
user: "{{ env_var('DATABASE_USER') }}"
password: "{{ env_var('DATABASE_PASSWORD') }}"
port: "{{ env_var('DATABASE_PORT') | as_number }}"
dbname: "{{ env_var('DATABASE_NAME') }}"
schema: public
threads: 4
keepalives_idle: 0
target: dev
`;
await (0, promises_1.writeFile)((0, path_1.join)(targetDir, 'config/profiles.yml'), profiles);
const stagingModel = `{{ config(materialized='view') }}
-- Sample staging model
-- This is where you would clean and standardize raw data
select
id,
name,
email,
created_at,
updated_at
from {{ source('raw', 'users') }}
where created_at is not null
`;
await (0, promises_1.writeFile)((0, path_1.join)(targetDir, 'data/dbt/models/staging/stg_users.sql'), stagingModel);
const schema = `version: 2
sources:
- name: raw
description: Raw data from various sources
tables:
- name: users
description: Raw user data
columns:
- name: id
description: Primary key
tests:
- unique
- not_null
- name: email
description: User email address
tests:
- unique
- not_null
models:
- name: stg_users
description: Staged user data with basic cleaning
columns:
- name: id
description: Primary key
tests:
- unique
- not_null
- name: email
description: User email address
tests:
- unique
- not_null
`;
await (0, promises_1.writeFile)((0, path_1.join)(targetDir, 'data/dbt/models/staging/schema.yml'), schema);
}
async generateDagsterFiles() {
const { targetDir, name } = this.options;
const definitions = `from dagster import Definitions, load_assets_from_modules
from . import assets
defs = Definitions(
assets=load_assets_from_modules([assets]),
)
`;
await (0, promises_1.writeFile)((0, path_1.join)(targetDir, 'data/dagster/__init__.py'), '');
await (0, promises_1.writeFile)((0, path_1.join)(targetDir, 'data/dagster/definitions.py'), definitions);
const assetsFile = `from dagster import asset
import pandas as pd
@asset
def sample_data():
"""Sample data asset to demonstrate Dagster functionality."""
return pd.DataFrame({
'id': [1, 2, 3],
'name': ['Alice', 'Bob', 'Charlie'],
'value': [100, 200, 300]
})
@asset(deps=[sample_data])
def processed_data(sample_data: pd.DataFrame):
"""Process the sample data."""
processed = sample_data.copy()
processed['doubled_value'] = processed['value'] * 2
return processed
`;
await (0, promises_1.writeFile)((0, path_1.join)(targetDir, 'data/dagster/assets/__init__.py'), assetsFile);
const pyproject = `[build-system]
requires = ["setuptools", "wheel"]
[ ]
name = "${name}-dagster"
version = "0.1.0"
dependencies = [
"dagster",
"dagster-webserver",
"dagster-postgres",
"pandas",
"psycopg2-binary",
]
[ ]
code_location = "data.dagster.definitions"
`;
await (0, promises_1.writeFile)((0, path_1.join)(targetDir, 'pyproject.toml'), pyproject);
}
async generateSupabaseFiles() {
const { targetDir } = this.options;
const supabaseConfig = `[api]
enabled = true
port = 54321
[ ]
port = 54322
[ ]
enabled = true
port = 54323
[ ]
enabled = true
port = 54324
[ ]
enabled = true
file_size_limit = "50MiB"
[ ]
enabled = true
external_url = "http://localhost:3000"
[ ]
enabled = true
`;
await (0, promises_1.writeFile)((0, path_1.join)(targetDir, 'supabase/config.toml'), supabaseConfig);
const migration = `-- Sample migration
-- This creates a basic users table
create table if not exists users (
id uuid default gen_random_uuid() primary key,
email text unique not null,
name text,
created_at timestamp with time zone default now(),
updated_at timestamp with time zone default now()
);
-- Enable Row Level Security
alter table users enable row level security;
-- Create updated_at trigger
create or replace function update_updated_at_column()
returns trigger as $$
begin
new.updated_at = now();
return new;
end;
$$ language plpgsql;
create trigger update_users_updated_at
before update on users
for each row
execute function update_updated_at_column();
`;
await (0, promises_1.writeFile)((0, path_1.join)(targetDir, 'supabase/migrations/001_initial_schema.sql'), migration);
}
async generateReadme() {
const { targetDir, name } = this.options;
const readme = `
AI-assisted data pipeline project generated with [dataweave](https://github.com/yourusername/dataweave).
- Python 3.8+
- Node.js 16+
- PostgreSQL (or Supabase account)
1. Install dependencies:
\`\`\`bash
pip install -e .
npm install
\`\`\`
2. Set up environment variables:
\`\`\`bash
cp .env.example .env
\`\`\`
3. Initialize the database:
\`\`\`bash
npx supabase start
createdb ${name}_dev
\`\`\`
Run DBT models:
\`\`\`bash
cd data/dbt
dbt run
dbt test
\`\`\`
Start Dagster web server:
\`\`\`bash
dagster dev -f data/dagster/definitions.py
\`\`\`
\`\`\`bash
dataweave dbt model new --name my_model
dataweave dagster asset new --name my_asset
dataweave ai generate "Create a model that calculates monthly revenue"
dataweave analyze
\`\`\`
\`\`\`
${name}/
āāā data/
ā āāā dbt/
ā āāā dagster/
ā āāā assets/
āāā supabase/
āāā config/
āāā scripts/
āāā .dataweave/
\`\`\`
1. Configure your database connection in \`.env\`
2. Update the DBT profile in \`config/profiles.yml\`
3. Run your first DBT models: \`cd data/dbt && dbt run\`
4. Start the Dagster web server: \`dagster dev\`
5. Explore AI-powered features: \`dataweave ai --help\`
- [DBT Documentation](https://docs.getdbt.com/)
- [Dagster Documentation](https://docs.dagster.io/)
- [Supabase Documentation](https://supabase.com/docs)
- [Dataweave Documentation](https://github.com/yourusername/dataweave)
`;
await (0, promises_1.writeFile)((0, path_1.join)(targetDir, 'README.md'), readme);
}
}
exports.ProjectScaffolder = ProjectScaffolder;
//# sourceMappingURL=index.js.map