@aws-cdk/aws-glue-alpha
Version:
The CDK Construct Library for AWS::Glue
128 lines • 20.7 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.SparkJob = void 0;
const jsiiDeprecationWarnings = require("../../.warnings.jsii.js");
const JSII_RTTI_SYMBOL_1 = Symbol.for("jsii.rtti");
const s3 = require("aws-cdk-lib/aws-s3");
const metadata_resource_1 = require("aws-cdk-lib/core/lib/metadata-resource");
const job_1 = require("./job");
const aws_cdk_lib_1 = require("aws-cdk-lib");
const os_1 = require("os");
/**
* Base class for different types of Spark Jobs.
*/
class SparkJob extends job_1.Job {
static [JSII_RTTI_SYMBOL_1] = { fqn: "@aws-cdk/aws-glue-alpha.SparkJob", version: "2.223.0-alpha.0" };
role;
grantPrincipal;
/**
* The Spark UI logs location if Spark UI monitoring and debugging is enabled.
*
* @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html
* @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html
*/
sparkUILoggingLocation;
constructor(scope, id, props) {
super(scope, id, {
physicalName: props.jobName,
});
try {
jsiiDeprecationWarnings._aws_cdk_aws_glue_alpha_SparkJobProps(props);
}
catch (error) {
if (process.env.JSII_DEBUG !== "1" && error.name === "DeprecationError") {
Error.captureStackTrace(error, SparkJob);
}
throw error;
}
// Enhanced CDK Analytics Telemetry
(0, metadata_resource_1.addConstructMetadata)(this, props);
this.role = props.role;
this.grantPrincipal = this.role;
this.sparkUILoggingLocation = props.sparkUI ? this.setupSparkUILoggingLocation(props.sparkUI) : undefined;
}
nonExecutableCommonArguments(props) {
try {
jsiiDeprecationWarnings._aws_cdk_aws_glue_alpha_SparkJobProps(props);
}
catch (error) {
if (process.env.JSII_DEBUG !== "1" && error.name === "DeprecationError") {
Error.captureStackTrace(error, this.nonExecutableCommonArguments);
}
throw error;
}
// Enable CloudWatch metrics and continuous logging by default as a best practice
const continuousLoggingArgs = this.setupContinuousLogging(this.role, props.continuousLogging);
// Conditionally include metrics arguments (default to enabled for backward compatibility)
const profilingMetricsArgs = (props.enableMetrics ?? true) ? { '--enable-metrics': '' } : {};
const observabilityMetricsArgs = (props.enableObservabilityMetrics ?? true) ? { '--enable-observability-metrics': 'true' } : {};
// Set spark ui args, if spark ui logging had been setup
const sparkUIArgs = this.sparkUILoggingLocation ? ({
'--enable-spark-ui': 'true',
'--spark-event-logs-path': this.sparkUILoggingLocation.bucket.s3UrlForObject(this.sparkUILoggingLocation.prefix).replace(/\/?$/, '/'), // path will always end with a slash
}) : {};
return {
...continuousLoggingArgs,
...profilingMetricsArgs,
...observabilityMetricsArgs,
...sparkUIArgs,
...this.checkNoReservedArgs(props.defaultArguments),
};
}
/**
* Set the arguments for extra {@link Code}-related properties
*/
setupExtraCodeArguments(args, props) {
try {
jsiiDeprecationWarnings._aws_cdk_aws_glue_alpha_SparkExtraCodeProps(props);
}
catch (error) {
if (process.env.JSII_DEBUG !== "1" && error.name === "DeprecationError") {
Error.captureStackTrace(error, this.setupExtraCodeArguments);
}
throw error;
}
if (props.extraJars && props.extraJars.length > 0) {
args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(',');
}
if (props.extraJarsFirst) {
args['--user-jars-first'] = 'true';
}
if (props.extraPythonFiles && props.extraPythonFiles.length > 0) {
args['--extra-py-files'] = props.extraPythonFiles.map(code => this.codeS3ObjectUrl(code)).join(',');
}
if (props.extraFiles && props.extraFiles.length > 0) {
args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(',');
}
}
setupSparkUILoggingLocation(props) {
validateSparkUiPrefix(props.prefix);
const bucket = props.bucket ?? new s3.Bucket(this, 'SparkUIBucket', { enforceSSL: true, encryption: s3.BucketEncryption.S3_MANAGED });
bucket.grantReadWrite(this, cleanSparkUiPrefixForGrant(props.prefix));
return {
prefix: props.prefix,
bucket,
};
}
}
exports.SparkJob = SparkJob;
function validateSparkUiPrefix(prefix) {
if (!prefix || aws_cdk_lib_1.Token.isUnresolved(prefix)) {
// skip validation if prefix is not specified or is a token
return;
}
const errors = [];
if (!prefix.startsWith('/')) {
errors.push('Prefix must begin with \'/\'');
}
if (prefix.endsWith('/')) {
errors.push('Prefix must not end with \'/\'');
}
if (errors.length > 0) {
throw new aws_cdk_lib_1.UnscopedValidationError(`Invalid prefix format (value: ${prefix})${os_1.EOL}${errors.join(os_1.EOL)}`);
}
}
function cleanSparkUiPrefixForGrant(prefix) {
return prefix !== undefined ? prefix.slice(1) + '/*' : undefined;
}
//# sourceMappingURL=data:application/json;base64,{"version":3,"file":"spark-job.js","sourceRoot":"","sources":["spark-job.ts"],"names":[],"mappings":";;;;;AACA,yCAAyC;AACzC,8EAA8E;AAG9E,+BAAsC;AACtC,6CAA6D;AAC7D,2BAAyB;AAoHzB;;GAEG;AACH,MAAsB,QAAS,SAAQ,SAAG;;IACxB,IAAI,CAAY;IAChB,cAAc,CAAiB;IAE/C;;;;;OAKG;IACa,sBAAsB,CAA0B;IAEhE,YAAY,KAA2B,EAAE,EAAU,EAAE,KAAoB;QACvE,KAAK,CAAC,KAAK,EAAE,EAAE,EAAE;YACf,YAAY,EAAE,KAAK,CAAC,OAAO;SAC5B,CAAC,CAAC;;;;;;+CAfe,QAAQ;;;;QAgB1B,mCAAmC;QACnC,IAAA,wCAAoB,EAAC,IAAI,EAAE,KAAK,CAAC,CAAC;QAElC,IAAI,CAAC,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC;QACvB,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC,IAAI,CAAC;QAEhC,IAAI,CAAC,sBAAsB,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,2BAA2B,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;KAC3G;IAES,4BAA4B,CAAC,KAAoB;;;;;;;;;;QACzD,iFAAiF;QACjF,MAAM,qBAAqB,GAAG,IAAI,CAAC,sBAAsB,CAAC,IAAI,CAAC,IAAI,EAAE,KAAK,CAAC,iBAAiB,CAAC,CAAC;QAE9F,0FAA0F;QAC1F,MAAM,oBAAoB,GAAG,CAAC,KAAK,CAAC,aAAa,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,kBAAkB,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7F,MAAM,wBAAwB,GAAG,CAAC,KAAK,CAAC,0BAA0B,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE,gCAAgC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAEhI,wDAAwD;QACxD,MAAM,WAAW,GAAG,IAAI,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;YACjD,mBAAmB,EAAE,MAAM;YAC3B,yBAAyB,EAAE,IAAI,CAAC,sBAAsB,CAAC,MAAM,CAAC,cAAc,CAAC,IAAI,CAAC,sBAAsB,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,EAAE,oCAAoC;SAC5K,CAAC,CAAA,CAAC,CAAC,EAAE,CAAC;QAEP,OAAO;YACL,GAAG,qBAAqB;YACxB,GAAG,oBAAoB;YACvB,GAAG,wBAAwB;YAC3B,GAAG,WAAW;YACd,GAAG,IAAI,CAAC,mBAAmB,CAAC,KAAK,CAAC,gBAAgB,CAAC;SACpD,CAAC;KACH;IAED;;OAEG;IACO,uBAAuB,CAAC,IAA+B,EAAE,KAA0B;;;;;;;;;;QAC3F,IAAI,KAAK,CAAC,SAAS,IAAI,KAAK,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAClD,IAAI,CAAC,cAAc,CAAC,GAAG,KAAK,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC3F,CAAC;QACD,IAAI,KAAK,CAAC,cAAc,EAAE,CAAC;YACzB,IAAI,CAAC,mBAAmB,CAAC,GAAG,MAAM,CAAC;QACrC,CAAC;QACD,IAAI,KAAK,CAAC,gBAAgB,IAAI,KAAK,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAChE,IAAI,CAAC,kBAAkB,CAAC,GAAG,KAAK,CAAC,gBAAgB,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACtG,CAAC;QACD,IAAI,KAAK,CAAC,UAAU,IAAI,KAAK,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpD,IAAI,CAAC,eAAe,CAAC,GAAG,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC7F,CAAC;KACF;IAEO,2BAA2B,CAAC,KAAmB;QACrD,qBAAqB,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;QACpC,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,IAAI,IAAI,EAAE,CAAC,MAAM,CAAC,IAAI,EAAE,eAAe,EAAE,EAAE,UAAU,EAAE,IAAI,EAAE,UAAU,EAAE,EAAE,CAAC,gBAAgB,CAAC,UAAU,EAAE,CAAC,CAAC;QACtI,MAAM,CAAC,cAAc,CAAC,IAAI,EAAE,0BAA0B,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;QACtE,OAAO;YACL,MAAM,EAAE,KAAK,CAAC,MAAM;YACpB,MAAM;SACP,CAAC;KACH;;AA1EH,4BA2EC;AAED,SAAS,qBAAqB,CAAC,MAAe;IAC5C,IAAI,CAAC,MAAM,IAAI,mBAAK,CAAC,YAAY,CAAC,MAAM,CAAC,EAAE,CAAC;QAC1C,2DAA2D;QAC3D,OAAO;IACT,CAAC;IAED,MAAM,MAAM,GAAa,EAAE,CAAC;IAE5B,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;QAC5B,MAAM,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;IAC9C,CAAC;IAED,IAAI,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QACzB,MAAM,CAAC,IAAI,CAAC,gCAAgC,CAAC,CAAC;IAChD,CAAC;IAED,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,MAAM,IAAI,qCAAuB,CAAC,iCAAiC,MAAM,IAAI,QAAG,GAAG,MAAM,CAAC,IAAI,CAAC,QAAG,CAAC,EAAE,CAAC,CAAC;IACzG,CAAC;AACH,CAAC;AAED,SAAS,0BAA0B,CAAC,MAAe;IACjD,OAAO,MAAM,KAAK,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC;AACnE,CAAC","sourcesContent":["import * as iam from 'aws-cdk-lib/aws-iam';\nimport * as s3 from 'aws-cdk-lib/aws-s3';\nimport { addConstructMetadata } from 'aws-cdk-lib/core/lib/metadata-resource';\nimport * as constructs from 'constructs';\nimport { Code } from '../code';\nimport { Job, JobProps } from './job';\nimport { Token, UnscopedValidationError } from 'aws-cdk-lib';\nimport { EOL } from 'os';\n\n/**\n * Code props for different {@link Code} assets used by different types of Spark jobs.\n */\nexport interface SparkExtraCodeProps {\n  /**\n   * Extra Python Files S3 URL (optional)\n   * S3 URL where additional python dependencies are located\n   *\n   * @default - no extra files\n   */\n  readonly extraPythonFiles?: Code[];\n\n  /**\n   * Additional files, such as configuration files that AWS Glue copies to the working directory of your script before executing it.\n   *\n   * @default - no extra files specified.\n   *\n   * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html\n   */\n  readonly extraFiles?: Code[];\n\n  /**\n   * Extra Jars S3 URL (optional)\n   * S3 URL where additional jar dependencies are located\n   * @default - no extra jar files\n   */\n  readonly extraJars?: Code[];\n\n  /**\n   * Setting this value to true prioritizes the customer's extra JAR files in the classpath.\n   *\n   * @default false - priority is not given to user-provided jars\n   *\n   * @see `--user-jars-first` in https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html\n   */\n  readonly extraJarsFirst?: boolean;\n}\n\n/**\n * Properties for enabling Spark UI monitoring feature for Spark-based Glue jobs.\n *\n * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html\n * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html\n */\nexport interface SparkUIProps {\n  /**\n   * The bucket where the Glue job stores the logs.\n   *\n   * @default a new bucket will be created.\n   */\n  readonly bucket?: s3.IBucket;\n\n  /**\n   * The path inside the bucket (objects prefix) where the Glue job stores the logs.\n   * Use format `'/foo/bar'`\n   *\n   * @default - the logs will be written at the root of the bucket\n   */\n  readonly prefix?: string;\n}\n\n/**\n * The Spark UI logging location.\n *\n * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html\n * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html\n */\nexport interface SparkUILoggingLocation {\n  /**\n   * The bucket where the Glue job stores the logs.\n   */\n  readonly bucket: s3.IBucket;\n\n  /**\n   * The path inside the bucket (objects prefix) where the Glue job stores the logs.\n   *\n   * @default '/' - the logs will be written at the root of the bucket\n   */\n  readonly prefix?: string;\n}\n\n/**\n * Common properties for different types of Spark jobs.\n */\nexport interface SparkJobProps extends JobProps {\n  /**\n   * Enables the Spark UI debugging and monitoring with the specified props.\n   *\n   * @default - Spark UI debugging and monitoring is disabled.\n   *\n   * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html\n   * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html\n   */\n  readonly sparkUI?: SparkUIProps;\n\n  /**\n   * Enable profiling metrics for the Glue job.\n   *\n   * When enabled, adds '--enable-metrics' to job arguments.\n   *\n   * @default true\n   */\n  readonly enableMetrics?: boolean;\n\n  /**\n   * Enable observability metrics for the Glue job.\n   *\n   * When enabled, adds '--enable-observability-metrics': 'true' to job arguments.\n   *\n   * @default true\n   */\n  readonly enableObservabilityMetrics?: boolean;\n}\n\n/**\n * Base class for different types of Spark Jobs.\n */\nexport abstract class SparkJob extends Job {\n  public readonly role: iam.IRole;\n  public readonly grantPrincipal: iam.IPrincipal;\n\n  /**\n   * The Spark UI logs location if Spark UI monitoring and debugging is enabled.\n   *\n   * @see https://docs.aws.amazon.com/glue/latest/dg/monitor-spark-ui-jobs.html\n   * @see https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html\n   */\n  public readonly sparkUILoggingLocation?: SparkUILoggingLocation;\n\n  constructor(scope: constructs.Construct, id: string, props: SparkJobProps) {\n    super(scope, id, {\n      physicalName: props.jobName,\n    });\n    // Enhanced CDK Analytics Telemetry\n    addConstructMetadata(this, props);\n\n    this.role = props.role;\n    this.grantPrincipal = this.role;\n\n    this.sparkUILoggingLocation = props.sparkUI ? this.setupSparkUILoggingLocation(props.sparkUI) : undefined;\n  }\n\n  protected nonExecutableCommonArguments(props: SparkJobProps): {[key: string]: string} {\n    // Enable CloudWatch metrics and continuous logging by default as a best practice\n    const continuousLoggingArgs = this.setupContinuousLogging(this.role, props.continuousLogging);\n\n    // Conditionally include metrics arguments (default to enabled for backward compatibility)\n    const profilingMetricsArgs = (props.enableMetrics ?? true) ? { '--enable-metrics': '' } : {};\n    const observabilityMetricsArgs = (props.enableObservabilityMetrics ?? true) ? { '--enable-observability-metrics': 'true' } : {};\n\n    // Set spark ui args, if spark ui logging had been setup\n    const sparkUIArgs = this.sparkUILoggingLocation ? ({\n      '--enable-spark-ui': 'true',\n      '--spark-event-logs-path': this.sparkUILoggingLocation.bucket.s3UrlForObject(this.sparkUILoggingLocation.prefix).replace(/\\/?$/, '/'), // path will always end with a slash\n    }): {};\n\n    return {\n      ...continuousLoggingArgs,\n      ...profilingMetricsArgs,\n      ...observabilityMetricsArgs,\n      ...sparkUIArgs,\n      ...this.checkNoReservedArgs(props.defaultArguments),\n    };\n  }\n\n  /**\n   * Set the arguments for extra {@link Code}-related properties\n   */\n  protected setupExtraCodeArguments(args: { [key: string]: string }, props: SparkExtraCodeProps) {\n    if (props.extraJars && props.extraJars.length > 0) {\n      args['--extra-jars'] = props.extraJars.map(code => this.codeS3ObjectUrl(code)).join(',');\n    }\n    if (props.extraJarsFirst) {\n      args['--user-jars-first'] = 'true';\n    }\n    if (props.extraPythonFiles && props.extraPythonFiles.length > 0) {\n      args['--extra-py-files'] = props.extraPythonFiles.map(code => this.codeS3ObjectUrl(code)).join(',');\n    }\n    if (props.extraFiles && props.extraFiles.length > 0) {\n      args['--extra-files'] = props.extraFiles.map(code => this.codeS3ObjectUrl(code)).join(',');\n    }\n  }\n\n  private setupSparkUILoggingLocation(props: SparkUIProps): SparkUILoggingLocation {\n    validateSparkUiPrefix(props.prefix);\n    const bucket = props.bucket ?? new s3.Bucket(this, 'SparkUIBucket', { enforceSSL: true, encryption: s3.BucketEncryption.S3_MANAGED });\n    bucket.grantReadWrite(this, cleanSparkUiPrefixForGrant(props.prefix));\n    return {\n      prefix: props.prefix,\n      bucket,\n    };\n  }\n}\n\nfunction validateSparkUiPrefix(prefix?: string): void {\n  if (!prefix || Token.isUnresolved(prefix)) {\n    // skip validation if prefix is not specified or is a token\n    return;\n  }\n\n  const errors: string[] = [];\n\n  if (!prefix.startsWith('/')) {\n    errors.push('Prefix must begin with \\'/\\'');\n  }\n\n  if (prefix.endsWith('/')) {\n    errors.push('Prefix must not end with \\'/\\'');\n  }\n\n  if (errors.length > 0) {\n    throw new UnscopedValidationError(`Invalid prefix format (value: ${prefix})${EOL}${errors.join(EOL)}`);\n  }\n}\n\nfunction cleanSparkUiPrefixForGrant(prefix?: string): string | undefined {\n  return prefix !== undefined ? prefix.slice(1) + '/*' : undefined;\n}\n"]}