cdk-serverless-airflow
Version:
[](https://github.com/readybuilderone/serverless-airflow/actions/workflows/build.yml) [
466 lines • 63.3 kB
JavaScript
"use strict";
var _a;
Object.defineProperty(exports, "__esModule", { value: true });
exports.Airflow = void 0;
const JSII_RTTI_SYMBOL_1 = Symbol.for("jsii.rtti");
const path = require("path");
const ec2 = require("@aws-cdk/aws-ec2");
const assets = require("@aws-cdk/aws-ecr-assets");
const ecs = require("@aws-cdk/aws-ecs");
const patterns = require("@aws-cdk/aws-ecs-patterns");
const elasticache = require("@aws-cdk/aws-elasticache");
const iam = require("@aws-cdk/aws-iam");
const logs = require("@aws-cdk/aws-logs");
const rds = require("@aws-cdk/aws-rds");
const s3 = require("@aws-cdk/aws-s3");
const secretsmanager = require("@aws-cdk/aws-secretsmanager");
const servicediscovery = require("@aws-cdk/aws-servicediscovery");
const cdk = require("@aws-cdk/core");
const core_1 = require("@aws-cdk/core");
/**
* @stability stable
*/
class Airflow extends cdk.Construct {
/**
* @stability stable
*/
constructor(scope, id, props = {}) {
var _b, _c;
super(scope, id);
this.fernetKey = (_b = process.env.AIRFLOW__CORE__FERNET_KEY) !== null && _b !== void 0 ? _b : '';
const airflowBucket = this._getAirflowBucket(props);
const vpc = this._getAirflowVPC(props);
//Initial Security Group Property
this.vpcendpointSG = this._createSecurityGroup(vpc, 'vpcendpoint-sg');
this.airflowECSServiceSG = this._createSecurityGroup(vpc, 'airflow-ecsservice-sg');
this.redisSG = this._createSecurityGroup(vpc, 'airflow-redis-sg');
this.databaseSG = this._createSecurityGroup(vpc, 'airflow-database-sg');
this._configSecurityGroup();
//Create VPC Endpoints
this._createVPCEndpoints(vpc);
//Create Database
const airflowDBSecret = this._getAirflowDBSecret();
const dbName = (_c = props.dbName) !== null && _c !== void 0 ? _c : 'airflowdb';
const airflowDB = this._getAirflowDB(vpc, airflowDBSecret, dbName);
//Create Redis
const airflowRedis = this._getAirflowRedis(props, vpc);
//Create AirflowCluster
this._getAirflowECSCluster(props, vpc, airflowBucket, airflowDBSecret, airflowDB, dbName, airflowRedis);
}
/**
* Create Security Group
* @param vpc
* @param securityGroupName
* @returns
*/
_createSecurityGroup(vpc, securityGroupName) {
return new ec2.SecurityGroup(this, securityGroupName, {
vpc,
securityGroupName,
});
}
/**
* Setting rules for security groups
*/
_configSecurityGroup() {
this.airflowECSServiceSG.connections.allowFrom(this.airflowECSServiceSG, ec2.Port.tcp(8080), 'Allow airflow scheduler/worker can connect to webserver');
this.vpcendpointSG.connections.allowFrom(ec2.Peer.ipv4('10.0.0.0/16'), ec2.Port.tcp(443), 'Allow ECS Cluster to access VPC Endpoints');
this.redisSG.connections.allowFrom(this.airflowECSServiceSG, ec2.Port.tcp(6379), 'Allow ECS Cluster to access Redis');
this.databaseSG.connections.allowFrom(this.airflowECSServiceSG, ec2.Port.tcp(5432), 'Allow ECS Cluster to access Database');
}
/**
* Create a S3 bucket for airflow to synch the DAG.
* If the bucket name is provided in the props, it will use
* @param props
* @returns
*/
_getAirflowBucket(props) {
var _b;
const bucketName = (_b = props.bucketName) !== null && _b !== void 0 ? _b : `airflow-bucket-${Math.floor(Math.random() * 1000001)}`;
const airflowBucket = new s3.Bucket(this, 'AirflowBucket', {
bucketName,
removalPolicy: cdk.RemovalPolicy.DESTROY,
blockPublicAccess: new s3.BlockPublicAccess({
blockPublicAcls: true,
blockPublicPolicy: true,
ignorePublicAcls: true,
restrictPublicBuckets: true,
}),
autoDeleteObjects: true,
});
new core_1.CfnOutput(this, 'airflow-bucket', {
value: airflowBucket.bucketName,
exportName: 'AirflowBucket',
description: 'Buckent Name',
});
return airflowBucket;
}
/**
* Get the VPC for airflow.
* This endpoints will be created for following services:
* - S3
* - ECS
* - CloudWatch
* - Secrets Manager
* @param props
* @returns
*/
_getAirflowVPC(props) {
var _b;
const vpcName = (_b = props.vpcName) !== null && _b !== void 0 ? _b : 'airflow-vpc';
const airflowVPC = new ec2.Vpc(this, vpcName, {
cidr: '10.0.0.0/16',
enableDnsHostnames: true,
enableDnsSupport: true,
maxAzs: 2,
subnetConfiguration: [
{
cidrMask: 24,
name: 'airflow-public',
subnetType: ec2.SubnetType.PUBLIC,
},
{
cidrMask: 24,
name: 'airflow-isolated',
subnetType: ec2.SubnetType.ISOLATED,
},
],
});
//TagSubnets
airflowVPC.publicSubnets.forEach(subnet => {
cdk.Tags.of(subnet).add('Name', `public-subnet-${subnet.availabilityZone}-airflow`);
});
airflowVPC.isolatedSubnets.forEach(subnet => {
cdk.Tags.of(subnet).add('Name', `isolated-subnet-${subnet.availabilityZone}-airflow`);
});
return airflowVPC;
}
/**
* Create VPC Endpoints
* @param vpc
*/
_createVPCEndpoints(vpc) {
//Create S3 Gateway VPC Endpoints
vpc.addGatewayEndpoint('s3-endpoint', {
service: ec2.GatewayVpcEndpointAwsService.S3,
subnets: [
{ subnetType: ec2.SubnetType.ISOLATED },
],
});
//Create Interface VPC Endpoints for ECR/ECS/CloudWatch/SecretsManager
vpc.addInterfaceEndpoint('ecr-endpoint', {
service: ec2.InterfaceVpcEndpointAwsService.ECR,
privateDnsEnabled: true,
securityGroups: [this.vpcendpointSG],
});
vpc.addInterfaceEndpoint('ecr-docker-endpoint', {
service: ec2.InterfaceVpcEndpointAwsService.ECR_DOCKER,
privateDnsEnabled: true,
securityGroups: [this.vpcendpointSG],
});
// vpc.addInterfaceEndpoint('ecs-endpoint', {
// service: ec2.InterfaceVpcEndpointAwsService.ECS,
// privateDnsEnabled: true,
// securityGroups: [this.vpcendpointSG],
// });
vpc.addInterfaceEndpoint('cloudwatchlogs-endpoint', {
service: ec2.InterfaceVpcEndpointAwsService.CLOUDWATCH_LOGS,
privateDnsEnabled: true,
securityGroups: [this.vpcendpointSG],
});
vpc.addInterfaceEndpoint('secrets-manager-endpoint', {
service: ec2.InterfaceVpcEndpointAwsService.SECRETS_MANAGER,
privateDnsEnabled: true,
securityGroups: [this.vpcendpointSG],
});
}
_getAirflowDBSecret() {
const databaseSceret = new secretsmanager.Secret(this, 'airflow-db-credentials', {
secretName: 'airflow-db-credentials',
generateSecretString: {
secretStringTemplate: '{"username":"airfflow"}',
generateStringKey: 'password',
passwordLength: 16,
excludeCharacters: '\"@/',
excludePunctuation: true,
},
});
return databaseSceret;
}
/**
* Get Database for Airflow
* @param props
* @param vpc
* @returns
*/
_getAirflowDB(vpc, databaseSceret, dbName) {
const credentials = rds.Credentials.fromSecret(databaseSceret);
const dbInstance = new rds.DatabaseInstance(this, 'airflow-db', {
vpc,
vpcSubnets: {
subnetType: ec2.SubnetType.ISOLATED,
},
engine: rds.DatabaseInstanceEngine.postgres({
version: rds.PostgresEngineVersion.VER_9_6_18,
}),
credentials,
instanceIdentifier: 'airflow-db',
databaseName: dbName,
port: 5432,
instanceType: ec2.InstanceType.of(ec2.InstanceClass.BURSTABLE3, ec2.InstanceSize.MICRO),
allocatedStorage: 20,
removalPolicy: cdk.RemovalPolicy.DESTROY,
parameterGroup: rds.ParameterGroup.fromParameterGroupName(this, 'airflow-db-parametergroup', 'default.postgres9.6'),
deletionProtection: false,
securityGroups: [this.databaseSG],
});
return dbInstance;
}
_getAirflowRedis(props, vpc) {
var _b;
const redisName = (_b = props.redisName) !== null && _b !== void 0 ? _b : 'airflowredis';
const redisCluster = new elasticache.CfnCacheCluster(this, 'airflowredis', {
engine: 'redis',
cacheNodeType: 'cache.t2.small',
numCacheNodes: 1,
port: 6379,
clusterName: redisName,
cacheSubnetGroupName: new elasticache.CfnSubnetGroup(this, 'redissubnets', {
description: 'Airflow Redis isolated subnet group',
subnetIds: vpc.isolatedSubnets.map((subnet) => subnet.subnetId),
}).ref,
vpcSecurityGroupIds: [this.redisSG.securityGroupId],
});
return redisCluster;
}
/**
* Create the Ariflow ECS Cluster
* @param props
* @returns
*/
_getAirflowECSCluster(props, vpc, bucket, databaseSceret, database, dbName, redis) {
var _b;
//Create ECS Cluster
const clusterName = (_b = props.ecsclusterName) !== null && _b !== void 0 ? _b : 'AirflowECSCluster';
const airflowCluster = new ecs.Cluster(this, 'airflow-ecs-cluster', {
vpc,
clusterName,
containerInsights: true,
});
//Create Roles
const executionRole = this._createTaskExecutionRole();
const taskRole = this._createTaskRole(bucket);
//Create Log Group
const webserverLogGroup = this._createAirflowLogGroup('airflow-webserver-lg', '/ecs/airflow-webserver');
const schedulerLogGroup = this._createAirflowLogGroup('airflow-scheduler-lg', '/ecs/airflow-scheduler');
const workerLogGroup = this._createAirflowLogGroup('airflow-worker-lg', '/ecs/airflow-worker');
webserverLogGroup.grantWrite(taskRole);
schedulerLogGroup.grantWrite(taskRole);
workerLogGroup.grantWrite(taskRole);
//Create Airflow ECS Service
this._createAirflowWebserverService(executionRole, taskRole, bucket, databaseSceret, database, dbName, airflowCluster, webserverLogGroup);
this._createAirflowSchedulerService(executionRole, taskRole, schedulerLogGroup, bucket, databaseSceret, database, dbName, redis, airflowCluster);
this._createAirflowWorkerService(executionRole, taskRole, workerLogGroup, bucket, databaseSceret, database, dbName, redis, airflowCluster);
return airflowCluster;
}
/**
* Create log group for Airflow ECS Cluster
*/
_createAirflowLogGroup(logGroupId, logGroupName) {
return new logs.LogGroup(this, logGroupId, {
logGroupName,
retention: logs.RetentionDays.ONE_MONTH,
removalPolicy: cdk.RemovalPolicy.DESTROY,
});
}
_createTaskExecutionRole() {
const executionRole = new iam.Role(this, 'AirflowTaskExecutionRole', {
assumedBy: new iam.ServicePrincipal('ecs-tasks.amazonaws.com'),
});
executionRole.addManagedPolicy(iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AmazonECSTaskExecutionRolePolicy'));
// executionRole.addManagedPolicy(iam.ManagedPolicy.fromAwsManagedPolicyName('AdministratorAccess'));
return executionRole;
}
_createTaskRole(bucket) {
const taskRole = new iam.Role(this, 'AirflowTaskRole', {
assumedBy: new iam.ServicePrincipal('ecs-tasks.amazonaws.com'),
});
// taskRole.addManagedPolicy(iam.ManagedPolicy.fromAwsManagedPolicyName('AdministratorAccess'));
//S3 Policy
taskRole.addToPolicy(new iam.PolicyStatement({
effect: iam.Effect.ALLOW,
actions: [
's3:ListBucket',
's3:GetObject',
's3:GetBucketLocation',
],
resources: [`${bucket.bucketArn}`, `${bucket.bucketArn}/*`],
}));
//Secrets Manager
taskRole.addToPolicy(new iam.PolicyStatement({
effect: iam.Effect.ALLOW,
actions: ['secretsmanager:GetSecretValue'],
resources: ['*'],
}));
return taskRole;
}
/**
* Create Airflow Webserver ECS Service
*/
_createAirflowWebserverService(executionRole, taskRole, bucket, databaseSceret, database, dbName, airflowCluster, webserverLogGroup) {
const loadBalancedFargateService = new patterns.ApplicationLoadBalancedFargateService(this, 'airflow-webserver-pattners', {
cluster: airflowCluster,
cpu: 512,
memoryLimitMiB: 1024,
taskImageOptions: {
image: ecs.AssetImage.fromDockerImageAsset(this._createAirflowWebServiceDockerImage()),
taskRole,
executionRole,
family: 'airflow-webserver-pattners',
environment: {
AIRFLOW_FERNET_KEY: this.fernetKey,
AIRFLOW_DATABASE_NAME: dbName,
AIRFLOW_DATABASE_PORT_NUMBER: '5432',
AIRFLOW_DATABASE_HOST: database.dbInstanceEndpointAddress,
AIRFLOW_EXECUTOR: 'CeleryExecutor',
AIRFLOW_LOAD_EXAMPLES: 'no',
AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL: '30',
BUCKET_NAME: bucket.bucketName,
},
secrets: {
AIRFLOW_DATABASE_USERNAME: ecs.Secret.fromSecretsManager(databaseSceret, 'username'),
AIRFLOW_DATABASE_PASSWORD: ecs.Secret.fromSecretsManager(databaseSceret, 'password'),
},
containerPort: 8080,
logDriver: ecs.LogDriver.awsLogs({
streamPrefix: 'ecs',
logGroup: webserverLogGroup,
}),
},
securityGroups: [this.airflowECSServiceSG],
serviceName: 'AirflowWebserverServiceName',
desiredCount: 1,
loadBalancerName: 'Airflow-Webserver-LB',
cloudMapOptions: {
name: 'webserver',
dnsRecordType: servicediscovery.DnsRecordType.A,
dnsTtl: cdk.Duration.seconds(30),
cloudMapNamespace: new servicediscovery.PrivateDnsNamespace(this, 'webserver-dns-namespace', {
name: 'airflow',
vpc: airflowCluster.vpc,
}),
},
});
loadBalancedFargateService.targetGroup.configureHealthCheck({
path: '/health',
interval: cdk.Duration.seconds(60),
timeout: cdk.Duration.seconds(20),
});
}
/**
* Create Airflow Scheduler ECS Service
*/
_createAirflowSchedulerService(executionRole, taskRole, schedulerLogGroup, bucket, databaseSceret, database, dbName, redis, airflowCluster) {
//Create Task Definition
const schedulerTask = new ecs.FargateTaskDefinition(this, 'AriflowSchedulerTask', {
executionRole,
taskRole,
cpu: 512,
memoryLimitMiB: 2048,
family: 'airflow-scheduler',
});
schedulerTask.addContainer('airflow-scheduler-container', {
image: ecs.AssetImage.fromDockerImageAsset(this._createAirflowSchedulerDockerImage()),
logging: new ecs.AwsLogDriver({
streamPrefix: 'ecs',
logGroup: schedulerLogGroup,
}),
environment: {
AIRFLOW_FERNET_KEY: this.fernetKey,
AIRFLOW_DATABASE_NAME: dbName,
AIRFLOW_DATABASE_PORT_NUMBER: '5432',
AIRFLOW_DATABASE_HOST: database.dbInstanceEndpointAddress,
AIRFLOW_EXECUTOR: 'CeleryExecutor',
AIRFLOW_WEBSERVER_HOST: 'webserver.airflow',
AIRFLOW_LOAD_EXAMPLES: 'no',
AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL: '30',
REDIS_HOST: redis.attrRedisEndpointAddress,
BUCKET_NAME: bucket.bucketName,
},
secrets: {
AIRFLOW_DATABASE_USERNAME: ecs.Secret.fromSecretsManager(databaseSceret, 'username'),
AIRFLOW_DATABASE_PASSWORD: ecs.Secret.fromSecretsManager(databaseSceret, 'password'),
},
});
//Create AirflowSchedulerService
new ecs.FargateService(this, 'AirflowSchedulerService', {
cluster: airflowCluster,
taskDefinition: schedulerTask,
serviceName: 'AirflowSchedulerServiceName',
securityGroups: [this.airflowECSServiceSG],
});
}
/**
* Create Airflow Worker ECS Service
*/
_createAirflowWorkerService(executionRole, taskRole, workerLogGroup, bucket, databaseSceret, database, dbName, redis, airflowCluster) {
//Create Task Definition
const workerTask = new ecs.FargateTaskDefinition(this, 'AriflowworkerTask', {
executionRole,
taskRole,
cpu: 1024,
memoryLimitMiB: 3072,
family: 'airflow-worker',
});
workerTask.addContainer('airflow-worker-container', {
image: ecs.AssetImage.fromDockerImageAsset(this._createAirflowWorkerDockerImage()),
logging: new ecs.AwsLogDriver({
streamPrefix: 'ecs',
logGroup: workerLogGroup,
}),
environment: {
AIRFLOW_FERNET_KEY: this.fernetKey,
AIRFLOW_DATABASE_NAME: dbName,
AIRFLOW_DATABASE_PORT_NUMBER: '5432',
AIRFLOW_DATABASE_HOST: database.dbInstanceEndpointAddress,
AIRFLOW_EXECUTOR: 'CeleryExecutor',
AIRFLOW_WEBSERVER_HOST: 'webserver.airflow',
AIRFLOW_LOAD_EXAMPLES: 'no',
AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL: '30',
REDIS_HOST: redis.attrRedisEndpointAddress,
BUCKET_NAME: bucket.bucketName,
},
secrets: {
AIRFLOW_DATABASE_USERNAME: ecs.Secret.fromSecretsManager(databaseSceret, 'username'),
AIRFLOW_DATABASE_PASSWORD: ecs.Secret.fromSecretsManager(databaseSceret, 'password'),
},
portMappings: [{ containerPort: 8793 }],
});
//Create AirflowWorkerService
new ecs.FargateService(this, 'AirflowWorkerService', {
cluster: airflowCluster,
taskDefinition: workerTask,
serviceName: 'AirflowWorkerServiceName',
securityGroups: [this.airflowECSServiceSG],
});
}
_createAirflowWebServiceDockerImage() {
return new assets.DockerImageAsset(this, 'airflow-webserver', {
directory: path.join(__dirname, '/../docker-images/airflow-webserver'),
});
}
_createAirflowSchedulerDockerImage() {
return new assets.DockerImageAsset(this, 'airflow-scheduler', {
directory: path.join(__dirname, '/../docker-images/airflow-scheduler'),
});
}
_createAirflowWorkerDockerImage() {
return new assets.DockerImageAsset(this, 'airflow-worker', {
directory: path.join(__dirname, '/../docker-images/airflow-worker'),
});
}
}
exports.Airflow = Airflow;
_a = JSII_RTTI_SYMBOL_1;
Airflow[_a] = { fqn: "cdk-serverless-airflow.Airflow", version: "0.7.6" };
//# sourceMappingURL=data:application/json;base64,