UNPKG

cdk-serverless-airflow

Version:

[![Build](https://github.com/readybuilderone/serverless-airflow/actions/workflows/build.yml/badge.svg)](https://github.com/readybuilderone/serverless-airflow/actions/workflows/build.yml) [![NPM version](https://badge.fury.io/js/cdk-serverless-airflow.svg)

466 lines 63.3 kB
"use strict"; var _a; Object.defineProperty(exports, "__esModule", { value: true }); exports.Airflow = void 0; const JSII_RTTI_SYMBOL_1 = Symbol.for("jsii.rtti"); const path = require("path"); const ec2 = require("@aws-cdk/aws-ec2"); const assets = require("@aws-cdk/aws-ecr-assets"); const ecs = require("@aws-cdk/aws-ecs"); const patterns = require("@aws-cdk/aws-ecs-patterns"); const elasticache = require("@aws-cdk/aws-elasticache"); const iam = require("@aws-cdk/aws-iam"); const logs = require("@aws-cdk/aws-logs"); const rds = require("@aws-cdk/aws-rds"); const s3 = require("@aws-cdk/aws-s3"); const secretsmanager = require("@aws-cdk/aws-secretsmanager"); const servicediscovery = require("@aws-cdk/aws-servicediscovery"); const cdk = require("@aws-cdk/core"); const core_1 = require("@aws-cdk/core"); /** * @stability stable */ class Airflow extends cdk.Construct { /** * @stability stable */ constructor(scope, id, props = {}) { var _b, _c; super(scope, id); this.fernetKey = (_b = process.env.AIRFLOW__CORE__FERNET_KEY) !== null && _b !== void 0 ? _b : ''; const airflowBucket = this._getAirflowBucket(props); const vpc = this._getAirflowVPC(props); //Initial Security Group Property this.vpcendpointSG = this._createSecurityGroup(vpc, 'vpcendpoint-sg'); this.airflowECSServiceSG = this._createSecurityGroup(vpc, 'airflow-ecsservice-sg'); this.redisSG = this._createSecurityGroup(vpc, 'airflow-redis-sg'); this.databaseSG = this._createSecurityGroup(vpc, 'airflow-database-sg'); this._configSecurityGroup(); //Create VPC Endpoints this._createVPCEndpoints(vpc); //Create Database const airflowDBSecret = this._getAirflowDBSecret(); const dbName = (_c = props.dbName) !== null && _c !== void 0 ? _c : 'airflowdb'; const airflowDB = this._getAirflowDB(vpc, airflowDBSecret, dbName); //Create Redis const airflowRedis = this._getAirflowRedis(props, vpc); //Create AirflowCluster this._getAirflowECSCluster(props, vpc, airflowBucket, airflowDBSecret, airflowDB, dbName, airflowRedis); } /** * Create Security Group * @param vpc * @param securityGroupName * @returns */ _createSecurityGroup(vpc, securityGroupName) { return new ec2.SecurityGroup(this, securityGroupName, { vpc, securityGroupName, }); } /** * Setting rules for security groups */ _configSecurityGroup() { this.airflowECSServiceSG.connections.allowFrom(this.airflowECSServiceSG, ec2.Port.tcp(8080), 'Allow airflow scheduler/worker can connect to webserver'); this.vpcendpointSG.connections.allowFrom(ec2.Peer.ipv4('10.0.0.0/16'), ec2.Port.tcp(443), 'Allow ECS Cluster to access VPC Endpoints'); this.redisSG.connections.allowFrom(this.airflowECSServiceSG, ec2.Port.tcp(6379), 'Allow ECS Cluster to access Redis'); this.databaseSG.connections.allowFrom(this.airflowECSServiceSG, ec2.Port.tcp(5432), 'Allow ECS Cluster to access Database'); } /** * Create a S3 bucket for airflow to synch the DAG. * If the bucket name is provided in the props, it will use * @param props * @returns */ _getAirflowBucket(props) { var _b; const bucketName = (_b = props.bucketName) !== null && _b !== void 0 ? _b : `airflow-bucket-${Math.floor(Math.random() * 1000001)}`; const airflowBucket = new s3.Bucket(this, 'AirflowBucket', { bucketName, removalPolicy: cdk.RemovalPolicy.DESTROY, blockPublicAccess: new s3.BlockPublicAccess({ blockPublicAcls: true, blockPublicPolicy: true, ignorePublicAcls: true, restrictPublicBuckets: true, }), autoDeleteObjects: true, }); new core_1.CfnOutput(this, 'airflow-bucket', { value: airflowBucket.bucketName, exportName: 'AirflowBucket', description: 'Buckent Name', }); return airflowBucket; } /** * Get the VPC for airflow. * This endpoints will be created for following services: * - S3 * - ECS * - CloudWatch * - Secrets Manager * @param props * @returns */ _getAirflowVPC(props) { var _b; const vpcName = (_b = props.vpcName) !== null && _b !== void 0 ? _b : 'airflow-vpc'; const airflowVPC = new ec2.Vpc(this, vpcName, { cidr: '10.0.0.0/16', enableDnsHostnames: true, enableDnsSupport: true, maxAzs: 2, subnetConfiguration: [ { cidrMask: 24, name: 'airflow-public', subnetType: ec2.SubnetType.PUBLIC, }, { cidrMask: 24, name: 'airflow-isolated', subnetType: ec2.SubnetType.ISOLATED, }, ], }); //TagSubnets airflowVPC.publicSubnets.forEach(subnet => { cdk.Tags.of(subnet).add('Name', `public-subnet-${subnet.availabilityZone}-airflow`); }); airflowVPC.isolatedSubnets.forEach(subnet => { cdk.Tags.of(subnet).add('Name', `isolated-subnet-${subnet.availabilityZone}-airflow`); }); return airflowVPC; } /** * Create VPC Endpoints * @param vpc */ _createVPCEndpoints(vpc) { //Create S3 Gateway VPC Endpoints vpc.addGatewayEndpoint('s3-endpoint', { service: ec2.GatewayVpcEndpointAwsService.S3, subnets: [ { subnetType: ec2.SubnetType.ISOLATED }, ], }); //Create Interface VPC Endpoints for ECR/ECS/CloudWatch/SecretsManager vpc.addInterfaceEndpoint('ecr-endpoint', { service: ec2.InterfaceVpcEndpointAwsService.ECR, privateDnsEnabled: true, securityGroups: [this.vpcendpointSG], }); vpc.addInterfaceEndpoint('ecr-docker-endpoint', { service: ec2.InterfaceVpcEndpointAwsService.ECR_DOCKER, privateDnsEnabled: true, securityGroups: [this.vpcendpointSG], }); // vpc.addInterfaceEndpoint('ecs-endpoint', { // service: ec2.InterfaceVpcEndpointAwsService.ECS, // privateDnsEnabled: true, // securityGroups: [this.vpcendpointSG], // }); vpc.addInterfaceEndpoint('cloudwatchlogs-endpoint', { service: ec2.InterfaceVpcEndpointAwsService.CLOUDWATCH_LOGS, privateDnsEnabled: true, securityGroups: [this.vpcendpointSG], }); vpc.addInterfaceEndpoint('secrets-manager-endpoint', { service: ec2.InterfaceVpcEndpointAwsService.SECRETS_MANAGER, privateDnsEnabled: true, securityGroups: [this.vpcendpointSG], }); } _getAirflowDBSecret() { const databaseSceret = new secretsmanager.Secret(this, 'airflow-db-credentials', { secretName: 'airflow-db-credentials', generateSecretString: { secretStringTemplate: '{"username":"airfflow"}', generateStringKey: 'password', passwordLength: 16, excludeCharacters: '\"@/', excludePunctuation: true, }, }); return databaseSceret; } /** * Get Database for Airflow * @param props * @param vpc * @returns */ _getAirflowDB(vpc, databaseSceret, dbName) { const credentials = rds.Credentials.fromSecret(databaseSceret); const dbInstance = new rds.DatabaseInstance(this, 'airflow-db', { vpc, vpcSubnets: { subnetType: ec2.SubnetType.ISOLATED, }, engine: rds.DatabaseInstanceEngine.postgres({ version: rds.PostgresEngineVersion.VER_9_6_18, }), credentials, instanceIdentifier: 'airflow-db', databaseName: dbName, port: 5432, instanceType: ec2.InstanceType.of(ec2.InstanceClass.BURSTABLE3, ec2.InstanceSize.MICRO), allocatedStorage: 20, removalPolicy: cdk.RemovalPolicy.DESTROY, parameterGroup: rds.ParameterGroup.fromParameterGroupName(this, 'airflow-db-parametergroup', 'default.postgres9.6'), deletionProtection: false, securityGroups: [this.databaseSG], }); return dbInstance; } _getAirflowRedis(props, vpc) { var _b; const redisName = (_b = props.redisName) !== null && _b !== void 0 ? _b : 'airflowredis'; const redisCluster = new elasticache.CfnCacheCluster(this, 'airflowredis', { engine: 'redis', cacheNodeType: 'cache.t2.small', numCacheNodes: 1, port: 6379, clusterName: redisName, cacheSubnetGroupName: new elasticache.CfnSubnetGroup(this, 'redissubnets', { description: 'Airflow Redis isolated subnet group', subnetIds: vpc.isolatedSubnets.map((subnet) => subnet.subnetId), }).ref, vpcSecurityGroupIds: [this.redisSG.securityGroupId], }); return redisCluster; } /** * Create the Ariflow ECS Cluster * @param props * @returns */ _getAirflowECSCluster(props, vpc, bucket, databaseSceret, database, dbName, redis) { var _b; //Create ECS Cluster const clusterName = (_b = props.ecsclusterName) !== null && _b !== void 0 ? _b : 'AirflowECSCluster'; const airflowCluster = new ecs.Cluster(this, 'airflow-ecs-cluster', { vpc, clusterName, containerInsights: true, }); //Create Roles const executionRole = this._createTaskExecutionRole(); const taskRole = this._createTaskRole(bucket); //Create Log Group const webserverLogGroup = this._createAirflowLogGroup('airflow-webserver-lg', '/ecs/airflow-webserver'); const schedulerLogGroup = this._createAirflowLogGroup('airflow-scheduler-lg', '/ecs/airflow-scheduler'); const workerLogGroup = this._createAirflowLogGroup('airflow-worker-lg', '/ecs/airflow-worker'); webserverLogGroup.grantWrite(taskRole); schedulerLogGroup.grantWrite(taskRole); workerLogGroup.grantWrite(taskRole); //Create Airflow ECS Service this._createAirflowWebserverService(executionRole, taskRole, bucket, databaseSceret, database, dbName, airflowCluster, webserverLogGroup); this._createAirflowSchedulerService(executionRole, taskRole, schedulerLogGroup, bucket, databaseSceret, database, dbName, redis, airflowCluster); this._createAirflowWorkerService(executionRole, taskRole, workerLogGroup, bucket, databaseSceret, database, dbName, redis, airflowCluster); return airflowCluster; } /** * Create log group for Airflow ECS Cluster */ _createAirflowLogGroup(logGroupId, logGroupName) { return new logs.LogGroup(this, logGroupId, { logGroupName, retention: logs.RetentionDays.ONE_MONTH, removalPolicy: cdk.RemovalPolicy.DESTROY, }); } _createTaskExecutionRole() { const executionRole = new iam.Role(this, 'AirflowTaskExecutionRole', { assumedBy: new iam.ServicePrincipal('ecs-tasks.amazonaws.com'), }); executionRole.addManagedPolicy(iam.ManagedPolicy.fromAwsManagedPolicyName('service-role/AmazonECSTaskExecutionRolePolicy')); // executionRole.addManagedPolicy(iam.ManagedPolicy.fromAwsManagedPolicyName('AdministratorAccess')); return executionRole; } _createTaskRole(bucket) { const taskRole = new iam.Role(this, 'AirflowTaskRole', { assumedBy: new iam.ServicePrincipal('ecs-tasks.amazonaws.com'), }); // taskRole.addManagedPolicy(iam.ManagedPolicy.fromAwsManagedPolicyName('AdministratorAccess')); //S3 Policy taskRole.addToPolicy(new iam.PolicyStatement({ effect: iam.Effect.ALLOW, actions: [ 's3:ListBucket', 's3:GetObject', 's3:GetBucketLocation', ], resources: [`${bucket.bucketArn}`, `${bucket.bucketArn}/*`], })); //Secrets Manager taskRole.addToPolicy(new iam.PolicyStatement({ effect: iam.Effect.ALLOW, actions: ['secretsmanager:GetSecretValue'], resources: ['*'], })); return taskRole; } /** * Create Airflow Webserver ECS Service */ _createAirflowWebserverService(executionRole, taskRole, bucket, databaseSceret, database, dbName, airflowCluster, webserverLogGroup) { const loadBalancedFargateService = new patterns.ApplicationLoadBalancedFargateService(this, 'airflow-webserver-pattners', { cluster: airflowCluster, cpu: 512, memoryLimitMiB: 1024, taskImageOptions: { image: ecs.AssetImage.fromDockerImageAsset(this._createAirflowWebServiceDockerImage()), taskRole, executionRole, family: 'airflow-webserver-pattners', environment: { AIRFLOW_FERNET_KEY: this.fernetKey, AIRFLOW_DATABASE_NAME: dbName, AIRFLOW_DATABASE_PORT_NUMBER: '5432', AIRFLOW_DATABASE_HOST: database.dbInstanceEndpointAddress, AIRFLOW_EXECUTOR: 'CeleryExecutor', AIRFLOW_LOAD_EXAMPLES: 'no', AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL: '30', BUCKET_NAME: bucket.bucketName, }, secrets: { AIRFLOW_DATABASE_USERNAME: ecs.Secret.fromSecretsManager(databaseSceret, 'username'), AIRFLOW_DATABASE_PASSWORD: ecs.Secret.fromSecretsManager(databaseSceret, 'password'), }, containerPort: 8080, logDriver: ecs.LogDriver.awsLogs({ streamPrefix: 'ecs', logGroup: webserverLogGroup, }), }, securityGroups: [this.airflowECSServiceSG], serviceName: 'AirflowWebserverServiceName', desiredCount: 1, loadBalancerName: 'Airflow-Webserver-LB', cloudMapOptions: { name: 'webserver', dnsRecordType: servicediscovery.DnsRecordType.A, dnsTtl: cdk.Duration.seconds(30), cloudMapNamespace: new servicediscovery.PrivateDnsNamespace(this, 'webserver-dns-namespace', { name: 'airflow', vpc: airflowCluster.vpc, }), }, }); loadBalancedFargateService.targetGroup.configureHealthCheck({ path: '/health', interval: cdk.Duration.seconds(60), timeout: cdk.Duration.seconds(20), }); } /** * Create Airflow Scheduler ECS Service */ _createAirflowSchedulerService(executionRole, taskRole, schedulerLogGroup, bucket, databaseSceret, database, dbName, redis, airflowCluster) { //Create Task Definition const schedulerTask = new ecs.FargateTaskDefinition(this, 'AriflowSchedulerTask', { executionRole, taskRole, cpu: 512, memoryLimitMiB: 2048, family: 'airflow-scheduler', }); schedulerTask.addContainer('airflow-scheduler-container', { image: ecs.AssetImage.fromDockerImageAsset(this._createAirflowSchedulerDockerImage()), logging: new ecs.AwsLogDriver({ streamPrefix: 'ecs', logGroup: schedulerLogGroup, }), environment: { AIRFLOW_FERNET_KEY: this.fernetKey, AIRFLOW_DATABASE_NAME: dbName, AIRFLOW_DATABASE_PORT_NUMBER: '5432', AIRFLOW_DATABASE_HOST: database.dbInstanceEndpointAddress, AIRFLOW_EXECUTOR: 'CeleryExecutor', AIRFLOW_WEBSERVER_HOST: 'webserver.airflow', AIRFLOW_LOAD_EXAMPLES: 'no', AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL: '30', REDIS_HOST: redis.attrRedisEndpointAddress, BUCKET_NAME: bucket.bucketName, }, secrets: { AIRFLOW_DATABASE_USERNAME: ecs.Secret.fromSecretsManager(databaseSceret, 'username'), AIRFLOW_DATABASE_PASSWORD: ecs.Secret.fromSecretsManager(databaseSceret, 'password'), }, }); //Create AirflowSchedulerService new ecs.FargateService(this, 'AirflowSchedulerService', { cluster: airflowCluster, taskDefinition: schedulerTask, serviceName: 'AirflowSchedulerServiceName', securityGroups: [this.airflowECSServiceSG], }); } /** * Create Airflow Worker ECS Service */ _createAirflowWorkerService(executionRole, taskRole, workerLogGroup, bucket, databaseSceret, database, dbName, redis, airflowCluster) { //Create Task Definition const workerTask = new ecs.FargateTaskDefinition(this, 'AriflowworkerTask', { executionRole, taskRole, cpu: 1024, memoryLimitMiB: 3072, family: 'airflow-worker', }); workerTask.addContainer('airflow-worker-container', { image: ecs.AssetImage.fromDockerImageAsset(this._createAirflowWorkerDockerImage()), logging: new ecs.AwsLogDriver({ streamPrefix: 'ecs', logGroup: workerLogGroup, }), environment: { AIRFLOW_FERNET_KEY: this.fernetKey, AIRFLOW_DATABASE_NAME: dbName, AIRFLOW_DATABASE_PORT_NUMBER: '5432', AIRFLOW_DATABASE_HOST: database.dbInstanceEndpointAddress, AIRFLOW_EXECUTOR: 'CeleryExecutor', AIRFLOW_WEBSERVER_HOST: 'webserver.airflow', AIRFLOW_LOAD_EXAMPLES: 'no', AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL: '30', REDIS_HOST: redis.attrRedisEndpointAddress, BUCKET_NAME: bucket.bucketName, }, secrets: { AIRFLOW_DATABASE_USERNAME: ecs.Secret.fromSecretsManager(databaseSceret, 'username'), AIRFLOW_DATABASE_PASSWORD: ecs.Secret.fromSecretsManager(databaseSceret, 'password'), }, portMappings: [{ containerPort: 8793 }], }); //Create AirflowWorkerService new ecs.FargateService(this, 'AirflowWorkerService', { cluster: airflowCluster, taskDefinition: workerTask, serviceName: 'AirflowWorkerServiceName', securityGroups: [this.airflowECSServiceSG], }); } _createAirflowWebServiceDockerImage() { return new assets.DockerImageAsset(this, 'airflow-webserver', { directory: path.join(__dirname, '/../docker-images/airflow-webserver'), }); } _createAirflowSchedulerDockerImage() { return new assets.DockerImageAsset(this, 'airflow-scheduler', { directory: path.join(__dirname, '/../docker-images/airflow-scheduler'), }); } _createAirflowWorkerDockerImage() { return new assets.DockerImageAsset(this, 'airflow-worker', { directory: path.join(__dirname, '/../docker-images/airflow-worker'), }); } } exports.Airflow = Airflow; _a = JSII_RTTI_SYMBOL_1; Airflow[_a] = { fqn: "cdk-serverless-airflow.Airflow", version: "0.7.6" }; //# sourceMappingURL=data:application/json;base64,