@aws-cdk/aws-glue-alpha
Version:
The CDK Construct Library for AWS::Glue
394 lines (393 loc) • 14 kB
TypeScript
import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch';
import * as events from 'aws-cdk-lib/aws-events';
import * as iam from 'aws-cdk-lib/aws-iam';
import * as logs from 'aws-cdk-lib/aws-logs';
import * as cdk from 'aws-cdk-lib/core';
import * as constructs from 'constructs';
import { Code } from '../code';
import { MetricType, JobState, WorkerType, GlueVersion } from '../constants';
import { IConnection } from '../connection';
import { ISecurityConfiguration } from '../security-configuration';
/**
* Interface representing a new or an imported Glue Job
*/
export interface IJob extends cdk.IResource, iam.IGrantable {
/**
* The name of the job.
* @attribute
*/
readonly jobName: string;
/**
* The ARN of the job.
* @attribute
*/
readonly jobArn: string;
/**
* Defines a CloudWatch event rule triggered when something happens with this job.
*
* @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types
*/
onEvent(id: string, options?: events.OnEventOptions): events.Rule;
/**
* Defines a CloudWatch event rule triggered when this job moves to the SUCCEEDED state.
*
* @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types
*/
onSuccess(id: string, options?: events.OnEventOptions): events.Rule;
/**
* Defines a CloudWatch event rule triggered when this job moves to the FAILED state.
*
* @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types
*/
onFailure(id: string, options?: events.OnEventOptions): events.Rule;
/**
* Defines a CloudWatch event rule triggered when this job moves to the TIMEOUT state.
*
* @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types
*/
onTimeout(id: string, options?: events.OnEventOptions): events.Rule;
/**
* Create a CloudWatch metric.
*
* @param metricName name of the metric typically prefixed with `glue.driver.`, `glue.<executorId>.` or `glue.ALL.`.
* @param type the metric type.
* @param props metric options.
*
* @see https://docs.aws.amazon.com/glue/latest/dg/monitoring-awsglue-with-cloudwatch-metrics.html
*/
metric(metricName: string, type: MetricType, props?: cloudwatch.MetricOptions): cloudwatch.Metric;
/**
* Create a CloudWatch Metric indicating job success.
*/
metricSuccess(props?: cloudwatch.MetricOptions): cloudwatch.Metric;
/**
* Create a CloudWatch Metric indicating job failure.
*/
metricFailure(props?: cloudwatch.MetricOptions): cloudwatch.Metric;
/**
* Create a CloudWatch Metric indicating job timeout.
*/
metricTimeout(props?: cloudwatch.MetricOptions): cloudwatch.Metric;
}
/**
* Properties for enabling Continuous Logging for Glue Jobs.
*
* @see https://docs.aws.amazon.com/glue/latest/dg/monitor-continuous-logging-enable.html
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
*/
export interface ContinuousLoggingProps {
/**
* Enable continuous logging.
*/
readonly enabled: boolean;
/**
* Specify a custom CloudWatch log group name.
*
* @default - a log group is created with name `/aws-glue/jobs/logs-v2/`.
*/
readonly logGroup?: logs.ILogGroup;
/**
* Specify a custom CloudWatch log stream prefix.
*
* @default - the job run ID.
*/
readonly logStreamPrefix?: string;
/**
* Filter out non-useful Apache Spark driver/executor and Apache Hadoop YARN heartbeat log messages.
*
* @default true
*/
readonly quiet?: boolean;
/**
* Apply the provided conversion pattern.
*
* This is a Log4j Conversion Pattern to customize driver and executor logs.
*
* @default `%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n`
*/
readonly conversionPattern?: string;
}
/**
* A base class is needed to be able to import existing Jobs into a CDK app to
* reference as part of a larger stack or construct. JobBase has the subset
* of attributes required to identify and reference an existing Glue Job,
* as well as some CloudWatch metric convenience functions to configure an
* event-driven flow using the job.
*/
export declare abstract class JobBase extends cdk.Resource implements IJob {
abstract readonly jobArn: string;
abstract readonly jobName: string;
abstract readonly grantPrincipal: iam.IPrincipal;
/**
* Create a CloudWatch Event Rule for this Glue Job when it's in a given state
*
* @param id construct id
* @param options event options. Note that some values are overridden if provided, these are
* - eventPattern.source = ['aws.glue']
* - eventPattern.detailType = ['Glue Job State Change', 'Glue Job Run Status']
* - eventPattern.detail.jobName = [this.jobName]
*
* @see https://docs.aws.amazon.com/AmazonCloudWatch/latest/events/EventTypes.html#glue-event-types
*/
onEvent(id: string, options?: events.OnEventOptions): events.Rule;
/**
* Create a CloudWatch Event Rule for the transition into the input jobState.
*
* @param id construct id.
* @param jobState the job state.
* @param options optional event options.
*/
protected onStateChange(id: string, jobState: JobState, options?: events.OnEventOptions): events.Rule;
/**
* Create a CloudWatch Event Rule matching JobState.SUCCEEDED.
*
* @param id construct id.
* @param options optional event options. default is {}.
*/
onSuccess(id: string, options?: events.OnEventOptions): events.Rule;
/**
* Return a CloudWatch Event Rule matching FAILED state.
*
* @param id construct id.
* @param options optional event options. default is {}.
*/
onFailure(id: string, options?: events.OnEventOptions): events.Rule;
/**
* Return a CloudWatch Event Rule matching TIMEOUT state.
*
* @param id construct id.
* @param options optional event options. default is {}.
*/
onTimeout(id: string, options?: events.OnEventOptions): events.Rule;
/**
* Create a CloudWatch metric.
*
* @param metricName name of the metric typically prefixed with `glue.driver.`, `glue.<executorId>.` or `glue.ALL.`.
* @param type the metric type.
* @param props metric options.
*
* @see https://docs.aws.amazon.com/glue/latest/dg/monitoring-awsglue-with-cloudwatch-metrics.html
*/
metric(metricName: string, type: MetricType, props?: cloudwatch.MetricOptions): cloudwatch.Metric;
/**
* Return a CloudWatch Metric indicating job success.
*
* This metric is based on the Rule returned by no-args onSuccess() call.
*/
metricSuccess(props?: cloudwatch.MetricOptions): cloudwatch.Metric;
/**
* Return a CloudWatch Metric indicating job failure.
*
* This metric is based on the Rule returned by no-args onFailure() call.
*/
metricFailure(props?: cloudwatch.MetricOptions): cloudwatch.Metric;
/**
* Return a CloudWatch Metric indicating job timeout.
*
* This metric is based on the Rule returned by no-args onTimeout() call.
*/
metricTimeout(props?: cloudwatch.MetricOptions): cloudwatch.Metric;
/**
* Creates or retrieves a singleton event rule for the input job state for use with the metric JobState methods.
*
* @param id construct id.
* @param jobState the job state.
*/
private metricJobStateRule;
/**
* Returns the job arn
*/
protected buildJobArn(scope: constructs.Construct, jobName: string): string;
}
/**
* A subset of Job attributes are required for importing an existing job
* into a CDK project. This is only used when using fromJobAttributes
* to identify and reference the existing job.
*/
export interface JobAttributes {
/**
* The name of the job.
*/
readonly jobName: string;
/**
* The IAM role assumed by Glue to run this job.
*
* @default - undefined
*/
readonly role?: iam.IRole;
}
/**
* JobProps will be used to create new Glue Jobs using this L2 Construct.
*/
export interface JobProps {
/**
* Script Code Location (required)
* Script to run when the Glue job executes. Can be uploaded
* from the local directory structure using fromAsset
* or referenced via S3 location using fromBucket
*/
readonly script: Code;
/**
* IAM Role (required)
* IAM Role to use for Glue job execution
* Must be specified by the developer because the L2 doesn't have visibility
* into the actions the script(s) takes during the job execution
* The role must trust the Glue service principal (glue.amazonaws.com)
* and be granted sufficient permissions.
*
* @see https://docs.aws.amazon.com/glue/latest/dg/getting-started-access.html
*/
readonly role: iam.IRole;
/**
* Name of the Glue job (optional)
* Developer-specified name of the Glue job
*
* @default - a name is automatically generated
*/
readonly jobName?: string;
/**
* Description (optional)
* Developer-specified description of the Glue job
*
* @default - no value
*/
readonly description?: string;
/**
* Number of Workers (optional)
* Number of workers for Glue to use during job execution
*
* @default 10
*/
readonly numberOfWorkers?: number;
/**
* Worker Type (optional)
* Type of Worker for Glue to use during job execution
* Enum options: Standard, G_1X, G_2X, G_025X. G_4X, G_8X, Z_2X
*
* @default WorkerType.G_1X
*/
readonly workerType?: WorkerType;
/**
* Max Concurrent Runs (optional)
* The maximum number of runs this Glue job can concurrently run
*
* An error is returned when this threshold is reached. The maximum value
* you can specify is controlled by a service limit.
*
* @default 1
*/
readonly maxConcurrentRuns?: number;
/**
* Default Arguments (optional)
* The default arguments for every run of this Glue job,
* specified as name-value pairs.
*
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
* for a list of reserved parameters
* @default - no arguments
*/
readonly defaultArguments?: {
[key: string]: string;
};
/**
* Connections (optional)
* List of connections to use for this Glue job
* Connections are used to connect to other AWS Service or resources within a VPC.
*
* @default [] - no connections are added to the job
*/
readonly connections?: IConnection[];
/**
* Max Retries (optional)
* Maximum number of retry attempts Glue performs if the job fails
*
* @default 0
*/
readonly maxRetries?: number;
/**
* Timeout (optional)
* The maximum time that a job run can consume resources before it is
* terminated and enters TIMEOUT status. Specified in minutes.
*
* @default 2880 (2 days for non-streaming)
*
*/
readonly timeout?: cdk.Duration;
/**
* Security Configuration (optional)
* Defines the encryption options for the Glue job
*
* @default - no security configuration.
*/
readonly securityConfiguration?: ISecurityConfiguration;
/**
* Tags (optional)
* A list of key:value pairs of tags to apply to this Glue job resources
*
* @default {} - no tags
*/
readonly tags?: {
[key: string]: string;
};
/**
* Glue Version
* The version of Glue to use to execute this job
*
* @default 3.0 for ETL
*/
readonly glueVersion?: GlueVersion;
/**
* Enables the collection of metrics for job profiling.
*
* @default - no profiling metrics emitted.
*
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
*/
readonly enableProfilingMetrics?: boolean;
/**
* Enables continuous logging with the specified props.
*
* @default - continuous logging is enabled.
*
* @see https://docs.aws.amazon.com/glue/latest/dg/monitor-continuous-logging-enable.html
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
**/
readonly continuousLogging?: ContinuousLoggingProps;
}
/**
* A Glue Job.
* @resource AWS::Glue::Job
*/
export declare abstract class Job extends JobBase {
/**
* Identifies an existing Glue Job from a subset of attributes that can
* be referenced from within another Stack or Construct.
*
* @param scope The scope creating construct (usually `this`)
* @param id The construct's id.
* @param attrs Attributes for the Glue Job we want to import
*/
static fromJobAttributes(scope: constructs.Construct, id: string, attrs: JobAttributes): IJob;
/**
* The IAM role Glue assumes to run this job.
*/
abstract readonly role: iam.IRole;
/**
* Check no usage of reserved arguments.
*
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
*/
protected checkNoReservedArgs(defaultArguments?: {
[key: string]: string;
}): {
[key: string]: string;
} | undefined;
/**
* Setup Continuous Logging Properties
* @param role The IAM role to use for continuous logging
* @param props The properties for continuous logging configuration
* @returns String containing the args for the continuous logging command
*/
protected setupContinuousLogging(role: iam.IRole, props: ContinuousLoggingProps | undefined): any;
protected codeS3ObjectUrl(code: Code): string;
}