aws-cdk-lib
Version:
Version 2 of the AWS Cloud Development Kit library
378 lines (377 loc) • 13.5 kB
TypeScript
import { Construct } from 'constructs';
import * as iam from '../../../aws-iam';
import * as logs from '../../../aws-logs';
import * as s3 from '../../../aws-s3';
import * as sfn from '../../../aws-stepfunctions';
interface EmrContainersStartJobRunOptions {
/**
* The ID of the virtual cluster where the job will be run
*/
readonly virtualCluster: VirtualClusterInput;
/**
* The name of the job run.
*
* @default - No job run name
*/
readonly jobName?: string;
/**
* The execution role for the job run.
*
* If `virtualClusterId` is from a JSON input path, an execution role must be provided.
* If an execution role is provided, follow the documentation to update the role trust policy.
* @see https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/setting-up-trust-policy.html
*
* @default - Automatically generated only when the provided `virtualClusterId` is not an encoded JSON path
*/
readonly executionRole?: iam.IRole;
/**
* The Amazon EMR release version to use for the job run.
*/
readonly releaseLabel: ReleaseLabel;
/**
* The configurations for the application running in the job run.
*
* Maximum of 100 items
*
* @see https://docs.aws.amazon.com/emr-on-eks/latest/APIReference/API_Configuration.html
*
* @default - No application config
*/
readonly applicationConfig?: ApplicationConfiguration[];
/**
* The job driver for the job run.
*
* @see https://docs.aws.amazon.com/emr-on-eks/latest/APIReference/API_JobDriver.html
*/
readonly jobDriver: JobDriver;
/**
* Configuration for monitoring the job run
*
* @see https://docs.aws.amazon.com/emr-on-eks/latest/APIReference/API_MonitoringConfiguration.html
*
* @default - logging enabled and resources automatically generated if `monitoring.logging` is set to `true`
*/
readonly monitoring?: Monitoring;
/**
* The tags assigned to job runs.
*
* @default - None
*/
readonly tags?: {
[key: string]: string;
};
}
/**
* Properties for calling EMR Containers StartJobRun using JSONPath.
*/
export interface EmrContainersStartJobRunJsonPathProps extends sfn.TaskStateJsonPathBaseProps, EmrContainersStartJobRunOptions {
}
/**
* Properties for calling EMR Containers StartJobRun using JSONata.
*/
export interface EmrContainersStartJobRunJsonataProps extends sfn.TaskStateJsonataBaseProps, EmrContainersStartJobRunOptions {
}
/**
* The props for a EMR Containers StartJobRun Task.
*/
export interface EmrContainersStartJobRunProps extends sfn.TaskStateBaseProps, EmrContainersStartJobRunOptions {
}
/**
* Starts a job run.
*
* A job is a unit of work that you submit to Amazon EMR on EKS for execution.
* The work performed by the job can be defined by a Spark jar, PySpark script, or SparkSQL query.
* A job run is an execution of the job on the virtual cluster.
*
* @see https://docs.aws.amazon.com/step-functions/latest/dg/connect-emr-eks.html
*/
export declare class EmrContainersStartJobRun extends sfn.TaskStateBase implements iam.IGrantable {
private readonly props;
/**
* Starts a job run Task using JSONPath.
*
* A job is a unit of work that you submit to Amazon EMR on EKS for execution.
* The work performed by the job can be defined by a Spark jar, PySpark script, or SparkSQL query.
* A job run is an execution of the job on the virtual cluster.
*
* @see https://docs.aws.amazon.com/step-functions/latest/dg/connect-emr-eks.html
*/
static jsonPath(scope: Construct, id: string, props: EmrContainersStartJobRunJsonPathProps): EmrContainersStartJobRun;
/**
* Starts a job run Task using JSONata.
*
* A job is a unit of work that you submit to Amazon EMR on EKS for execution.
* The work performed by the job can be defined by a Spark jar, PySpark script, or SparkSQL query.
* A job run is an execution of the job on the virtual cluster.
*
* @see https://docs.aws.amazon.com/step-functions/latest/dg/connect-emr-eks.html
*/
static jsonata(scope: Construct, id: string, props: EmrContainersStartJobRunJsonataProps): EmrContainersStartJobRun;
private static readonly SUPPORTED_INTEGRATION_PATTERNS;
protected readonly taskMetrics?: sfn.TaskMetricsConfig;
protected readonly taskPolicies?: iam.PolicyStatement[];
readonly grantPrincipal: iam.IPrincipal;
private role;
private readonly logGroup?;
private readonly logBucket?;
private readonly integrationPattern;
constructor(scope: Construct, id: string, props: EmrContainersStartJobRunProps);
/**
* @internal
*/
protected _renderTask(topLevelQueryLanguage?: sfn.QueryLanguage): any;
/**
* Render the EMR Containers ConfigurationProperty as JSON
*/
private applicationConfigPropertyToJson;
private validateAppConfigPropertiesLength;
private validatePropertiesNestedAppConfigBothNotUndefined;
private validateAppConfig;
private isArrayOfStrings;
private validateEntryPointArguments;
private validateEntryPointArgumentsLength;
private validateSparkSubmitParametersLength;
private validateEntryPoint;
private validateSparkSubmitJobDriver;
private assignLogGroup;
private assignLogBucket;
private createJobExecutionRole;
private grantMonitoringPolicies;
/**
* If an execution role is not provided by user, the automatically generated job execution role must create a trust relationship
* between itself and the identity of the EMR managed service account in order to run jobs on the Kubernetes namespace.
*
* This cannot occur if the user provided virtualClusterId is within an encoded JSON path.
*
* The trust relationship can be created by updating the trust policy of the job execution role.
*
* @param role the automatically generated job execution role
*/
private updateRoleTrustPolicy;
private createPolicyStatements;
}
/**
* The information about job driver for Spark submit.
*/
export interface SparkSubmitJobDriver {
/**
* The entry point of job application.
*
* Length Constraints: Minimum length of 1. Maximum length of 256.
*/
readonly entryPoint: sfn.TaskInput;
/**
* The arguments for a job application in a task input object containing an array of strings
*
* Length Constraints: Minimum length of 1. Maximum length of 10280.
* @type sfn.TaskInput which expects payload as an array of strings
*
* @default - No arguments defined
*/
readonly entryPointArguments?: sfn.TaskInput;
/**
* The Spark submit parameters that are used for job runs.
*
* Length Constraints: Minimum length of 1. Maximum length of 102400.
*
* @default - No spark submit parameters
*/
readonly sparkSubmitParameters?: string;
}
/**
* Specify the driver that the EMR Containers job runs on.
* The job driver is used to provide an input for the job that will be run.
*/
export interface JobDriver {
/**
* The job driver parameters specified for spark submit.
*
* @see https://docs.aws.amazon.com/emr-on-eks/latest/APIReference/API_SparkSubmitJobDriver.html
*
*/
readonly sparkSubmitJobDriver: SparkSubmitJobDriver;
}
/**
* The classification within a EMR Containers application configuration.
* Class can be extended to add other classifications.
* For example, new Classification('xxx-yyy');
*/
export declare class Classification {
readonly classificationStatement: string;
/**
* Sets the maximizeResourceAllocation property to true or false.
* When true, Amazon EMR automatically configures spark-defaults properties based on cluster hardware configuration.
*
* For more info:
* @see https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-spark-configure.html#emr-spark-maximizeresourceallocation
*/
static readonly SPARK: Classification;
/**
* Sets values in the spark-defaults.conf file.
*
* For more info:
* @see https://spark.apache.org/docs/latest/configuration.html
*/
static readonly SPARK_DEFAULTS: Classification;
/**
* Sets values in the spark-env.sh file.
*
* For more info:
* @see https://spark.apache.org/docs/latest/configuration.html#environment-variables
*/
static readonly SPARK_ENV: Classification;
/**
* Sets values in the hive-site.xml for Spark.
*/
static readonly SPARK_HIVE_SITE: Classification;
/**
* Sets values in the log4j.properties file.
*
* For more settings and info:
* @see https://github.com/apache/spark/blob/master/conf/log4j.properties.template
*/
static readonly SPARK_LOG4J: Classification;
/**
* Sets values in the metrics.properties file.
*
* For more settings and info:
* @see https://github.com/apache/spark/blob/master/conf/metrics.properties.template
*/
static readonly SPARK_METRICS: Classification;
/**
* Creates a new Classification
*
* @param classificationStatement A literal string in case a new EMR classification is released, if not already defined.
*/
constructor(classificationStatement: string);
}
/**
* A configuration specification to be used when provisioning virtual clusters,
* which can include configurations for applications and software bundled with Amazon EMR on EKS.
*
* A configuration consists of a classification, properties, and optional nested configurations.
* A classification refers to an application-specific configuration file.
* Properties are the settings you want to change in that file.
* @see https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-configure-apps.html
*/
export interface ApplicationConfiguration {
/**
* The classification within a configuration.
*
* Length Constraints: Minimum length of 1. Maximum length of 1024.
*/
readonly classification: Classification;
/**
* A list of additional configurations to apply within a configuration object.
*
* Array Members: Maximum number of 100 items.
*
* @default - No other configurations
*/
readonly nestedConfig?: ApplicationConfiguration[];
/**
* A set of properties specified within a configuration classification.
*
* Map Entries: Maximum number of 100 items.
*
* @default - No properties
*/
readonly properties?: {
[key: string]: string;
};
}
/**
* Configuration setting for monitoring.
*/
export interface Monitoring {
/**
* Enable logging for this job.
*
* If set to true, will automatically create a Cloudwatch Log Group and S3 bucket.
* This will be set to `true` implicitly if values are provided for `logGroup` or `logBucket`.
*
* @default true - true if values are provided for `logGroup` or `logBucket`, false otherwise
*/
readonly logging?: boolean;
/**
* A log group for CloudWatch monitoring.
*
* You can configure your jobs to send log information to CloudWatch Logs.
*
* @default - if `logging` is manually set to `true` and a `logGroup` is not provided, a `logGroup` will be automatically generated`.
*/
readonly logGroup?: logs.ILogGroup;
/**
* A log stream name prefix for Cloudwatch monitoring.
*
* @default - Log streams created in this log group have no default prefix
*/
readonly logStreamNamePrefix?: string;
/**
* Amazon S3 Bucket for monitoring log publishing.
*
* You can configure your jobs to send log information to Amazon S3.
*
* @default - if `logging` is manually set to `true` and a `logBucket` is not provided, a `logBucket` will be automatically generated`.
*/
readonly logBucket?: s3.IBucket;
/**
* Monitoring configurations for the persistent application UI.
*
* @default true
*/
readonly persistentAppUI?: boolean;
}
/**
* The Amazon EMR release version to use for the job run.
*
* Can be extended to include new EMR releases
*
* For example, `new ReleaseLabel('emr-x.xx.x-latest');`
*/
export declare class ReleaseLabel {
readonly label: string;
/**
* EMR Release version 5.32.0
*/
static readonly EMR_5_32_0: ReleaseLabel;
/**
* EMR Release version 5.33.0
*/
static readonly EMR_5_33_0: ReleaseLabel;
/**
* EMR Release version 6.2.0
*/
static readonly EMR_6_2_0: ReleaseLabel;
/**
* EMR Release version 6.3.0
*/
static readonly EMR_6_3_0: ReleaseLabel;
/**
* Initializes the label string.
*
* @param label A literal string that contains the release-version ex. 'emr-x.x.x-latest'
*/
constructor(label: string);
}
/**
* Class that returns a virtual cluster's id depending on input type
*/
export declare class VirtualClusterInput {
readonly id: string;
/**
* Input for a virtualClusterId from a Task Input
*/
static fromTaskInput(taskInput: sfn.TaskInput): VirtualClusterInput;
/**
* Input for virtualClusterId from a literal string
*/
static fromVirtualClusterId(virtualClusterId: string): VirtualClusterInput;
/**
* Initializes the virtual cluster ID.
*
* @param id The VirtualCluster Id
*/
private constructor();
}
export {};