@pulumi/databricks
Version:
A Pulumi package for creating and managing databricks cloud resources.
587 lines (586 loc) • 40.4 kB
TypeScript
import * as pulumi from "@pulumi/pulumi";
import * as inputs from "./types/input";
import * as outputs from "./types/output";
/**
* ## Import
*
* The resource cluster can be imported using cluster id.
*
* bash
*
* ```sh
* $ pulumi import databricks:index/cluster:Cluster this <cluster-id>
* ```
*/
export declare class Cluster extends pulumi.CustomResource {
/**
* Get an existing Cluster resource's state with the given name, ID, and optional extra
* properties used to qualify the lookup.
*
* @param name The _unique_ name of the resulting resource.
* @param id The _unique_ provider ID of the resource to lookup.
* @param state Any extra arguments used during the lookup.
* @param opts Optional settings to control the behavior of the CustomResource.
*/
static get(name: string, id: pulumi.Input<pulumi.ID>, state?: ClusterState, opts?: pulumi.CustomResourceOptions): Cluster;
/**
* Returns true if the given object is an instance of Cluster. This is designed to work even
* when multiple copies of the Pulumi SDK have been loaded into the same process.
*/
static isInstance(obj: any): obj is Cluster;
/**
* Whether to use policy default values for missing cluster attributes.
*/
readonly applyPolicyDefaultValues: pulumi.Output<boolean | undefined>;
readonly autoscale: pulumi.Output<outputs.ClusterAutoscale | undefined>;
/**
* Automatically terminate the cluster after being inactive for this time in minutes. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. Defaults to `60`. *We highly recommend having this setting present for Interactive/BI clusters.*
*/
readonly autoterminationMinutes: pulumi.Output<number | undefined>;
readonly awsAttributes: pulumi.Output<outputs.ClusterAwsAttributes | undefined>;
readonly azureAttributes: pulumi.Output<outputs.ClusterAzureAttributes | undefined>;
readonly clusterId: pulumi.Output<string>;
readonly clusterLogConf: pulumi.Output<outputs.ClusterClusterLogConf | undefined>;
readonly clusterMountInfos: pulumi.Output<outputs.ClusterClusterMountInfo[] | undefined>;
/**
* Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
*/
readonly clusterName: pulumi.Output<string | undefined>;
/**
* should have tag `ResourceClass` set to value `Serverless`
*
* For example:
*
* ```typescript
* import * as pulumi from "@pulumi/pulumi";
* import * as databricks from "@pulumi/databricks";
*
* const clusterWithTableAccessControl = new databricks.Cluster("cluster_with_table_access_control", {
* clusterName: "Shared High-Concurrency",
* sparkVersion: latestLts.id,
* nodeTypeId: smallest.id,
* autoterminationMinutes: 20,
* sparkConf: {
* "spark.databricks.repl.allowedLanguages": "python,sql",
* "spark.databricks.cluster.profile": "serverless",
* },
* customTags: {
* ResourceClass: "Serverless",
* },
* });
* ```
*/
readonly customTags: pulumi.Output<{
[key: string]: string;
} | undefined>;
/**
* Select the security features of the cluster (see [API docs](https://docs.databricks.com/api/workspace/clusters/create#data_security_mode) for full list of values). [Unity Catalog requires](https://docs.databricks.com/data-governance/unity-catalog/compute.html#create-clusters--sql-warehouses-with-unity-catalog-access) `SINGLE_USER` or `USER_ISOLATION` mode. `LEGACY_PASSTHROUGH` for passthrough cluster and `LEGACY_TABLE_ACL` for Table ACL cluster. If omitted, default security features are enabled. To disable security features use `NONE` or legacy mode `NO_ISOLATION`. If `kind` is specified, then the following options are available:
* * `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration.
* * `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`.
* * `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`.
*/
readonly dataSecurityMode: pulumi.Output<string | undefined>;
/**
* (map) Tags that are added by Databricks by default, regardless of any `customTags` that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name: <Databricks internal use>, and any workspace and pool tags.
*/
readonly defaultTags: pulumi.Output<{
[key: string]: string;
}>;
readonly dockerImage: pulumi.Output<outputs.ClusterDockerImage | undefined>;
/**
* similar to `instancePoolId`, but for driver node. If omitted, and `instancePoolId` is specified, then the driver will be allocated from that pool.
*/
readonly driverInstancePoolId: pulumi.Output<string>;
/**
* The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as `nodeTypeId` defined above.
*/
readonly driverNodeTypeId: pulumi.Output<string>;
/**
* If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have `autoterminationMinutes` and `autoscale` attributes set. More documentation available at [cluster configuration page](https://docs.databricks.com/clusters/configure.html#autoscaling-local-storage-1).
*/
readonly enableElasticDisk: pulumi.Output<boolean>;
/**
* Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and uses it to encrypt all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. *Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.*
*/
readonly enableLocalDiskEncryption: pulumi.Output<boolean>;
readonly gcpAttributes: pulumi.Output<outputs.ClusterGcpAttributes | undefined>;
/**
* An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
*/
readonly idempotencyToken: pulumi.Output<string | undefined>;
readonly initScripts: pulumi.Output<outputs.ClusterInitScript[] | undefined>;
/**
* To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to `TERMINATED`, the instances it used are returned to the pool and reused by a different cluster.
*/
readonly instancePoolId: pulumi.Output<string | undefined>;
/**
* boolean value specifying if the cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is [limited to 100](https://docs.databricks.com/clusters/clusters-manage.html#pin-a-cluster), so `apply` may fail if you have more than that (this number may change over time, so check Databricks documentation for actual number).
*/
readonly isPinned: pulumi.Output<boolean | undefined>;
/**
* When set to true, Databricks will automatically set single node related `customTags`, `sparkConf`, and `numWorkers`.
*/
readonly isSingleNode: pulumi.Output<boolean | undefined>;
/**
* The kind of compute described by this compute specification. Possible values (see [API docs](https://docs.databricks.com/api/workspace/clusters/create#kind) for full list): `CLASSIC_PREVIEW` (if corresponding public preview is enabled).
*/
readonly kind: pulumi.Output<string | undefined>;
readonly libraries: pulumi.Output<outputs.ClusterLibrary[] | undefined>;
/**
* If true, the provider will not wait for the cluster to reach `RUNNING` state when creating the cluster, allowing cluster creation and library installation to continue asynchronously. Defaults to false (the provider will wait for cluster creation and library installation to succeed).
*
* The following example demonstrates how to create an autoscaling cluster with [Delta Cache](https://docs.databricks.com/delta/optimizations/delta-cache.html) enabled:
*
* ```typescript
* import * as pulumi from "@pulumi/pulumi";
* import * as databricks from "@pulumi/databricks";
*
* const smallest = databricks.getNodeType({
* localDisk: true,
* });
* const latestLts = databricks.getSparkVersion({
* longTermSupport: true,
* });
* const sharedAutoscaling = new databricks.Cluster("shared_autoscaling", {
* clusterName: "Shared Autoscaling",
* sparkVersion: latestLts.then(latestLts => latestLts.id),
* nodeTypeId: smallest.then(smallest => smallest.id),
* autoterminationMinutes: 20,
* autoscale: {
* minWorkers: 1,
* maxWorkers: 50,
* },
* sparkConf: {
* "spark.databricks.io.cache.enabled": "true",
* "spark.databricks.io.cache.maxDiskUsage": "50g",
* "spark.databricks.io.cache.maxMetaDataCache": "1g",
* },
* });
* ```
*/
readonly noWait: pulumi.Output<boolean | undefined>;
/**
* Any supported databricks.getNodeType id. If `instancePoolId` is specified, this field is not needed.
*/
readonly nodeTypeId: pulumi.Output<string>;
/**
* Number of worker nodes that this cluster should have. A cluster has one Spark driver and `numWorkers` executors for a total of `numWorkers` + 1 Spark nodes.
*/
readonly numWorkers: pulumi.Output<number | undefined>;
/**
* Identifier of Cluster Policy to validate cluster and preset certain defaults. *The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters.* For example, when you specify `policyId` of [external metastore](https://docs.databricks.com/administration-guide/clusters/policies.html#external-metastore-policy) policy, you still have to fill in relevant keys for `sparkConf`. If relevant fields aren't filled in, then it will cause the configuration drift detected on each plan/apply, and Pulumi will try to apply the detected changes.
*/
readonly policyId: pulumi.Output<string | undefined>;
/**
* The type of runtime engine to use. If not specified, the runtime engine type is inferred based on the sparkVersion value. Allowed values include: `PHOTON`, `STANDARD`.
*/
readonly runtimeEngine: pulumi.Output<string | undefined>;
/**
* The optional user name of the user (or group name if `kind` if specified) to assign to an interactive cluster. This field is required when using `dataSecurityMode` set to `SINGLE_USER` or AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters).
*/
readonly singleUserName: pulumi.Output<string | undefined>;
/**
* should have following items:
* * `spark.databricks.repl.allowedLanguages` set to a list of supported languages, for example: `python,sql`, or `python,sql,r`. Scala is not supported!
* * `spark.databricks.cluster.profile` set to `serverless`
*/
readonly sparkConf: pulumi.Output<{
[key: string]: string;
} | undefined>;
/**
* Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
*/
readonly sparkEnvVars: pulumi.Output<{
[key: string]: string;
} | undefined>;
/**
* [Runtime version](https://docs.databricks.com/runtime/index.html) of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
*/
readonly sparkVersion: pulumi.Output<string>;
/**
* SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
*/
readonly sshPublicKeys: pulumi.Output<string[] | undefined>;
/**
* (string) State of the cluster.
*/
readonly state: pulumi.Output<string>;
readonly url: pulumi.Output<string>;
/**
* Whenever ML runtime should be selected or not. Actual runtime is determined by `sparkVersion` (DBR release), this field `useMlRuntime`, and whether `nodeTypeId` is GPU node or not.
*/
readonly useMlRuntime: pulumi.Output<boolean | undefined>;
readonly workloadType: pulumi.Output<outputs.ClusterWorkloadType | undefined>;
/**
* Create a Cluster resource with the given unique name, arguments, and options.
*
* @param name The _unique_ name of the resource.
* @param args The arguments to use to populate this resource's properties.
* @param opts A bag of options that control this resource's behavior.
*/
constructor(name: string, args: ClusterArgs, opts?: pulumi.CustomResourceOptions);
}
/**
* Input properties used for looking up and filtering Cluster resources.
*/
export interface ClusterState {
/**
* Whether to use policy default values for missing cluster attributes.
*/
applyPolicyDefaultValues?: pulumi.Input<boolean>;
autoscale?: pulumi.Input<inputs.ClusterAutoscale>;
/**
* Automatically terminate the cluster after being inactive for this time in minutes. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. Defaults to `60`. *We highly recommend having this setting present for Interactive/BI clusters.*
*/
autoterminationMinutes?: pulumi.Input<number>;
awsAttributes?: pulumi.Input<inputs.ClusterAwsAttributes>;
azureAttributes?: pulumi.Input<inputs.ClusterAzureAttributes>;
clusterId?: pulumi.Input<string>;
clusterLogConf?: pulumi.Input<inputs.ClusterClusterLogConf>;
clusterMountInfos?: pulumi.Input<pulumi.Input<inputs.ClusterClusterMountInfo>[]>;
/**
* Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
*/
clusterName?: pulumi.Input<string>;
/**
* should have tag `ResourceClass` set to value `Serverless`
*
* For example:
*
* ```typescript
* import * as pulumi from "@pulumi/pulumi";
* import * as databricks from "@pulumi/databricks";
*
* const clusterWithTableAccessControl = new databricks.Cluster("cluster_with_table_access_control", {
* clusterName: "Shared High-Concurrency",
* sparkVersion: latestLts.id,
* nodeTypeId: smallest.id,
* autoterminationMinutes: 20,
* sparkConf: {
* "spark.databricks.repl.allowedLanguages": "python,sql",
* "spark.databricks.cluster.profile": "serverless",
* },
* customTags: {
* ResourceClass: "Serverless",
* },
* });
* ```
*/
customTags?: pulumi.Input<{
[key: string]: pulumi.Input<string>;
}>;
/**
* Select the security features of the cluster (see [API docs](https://docs.databricks.com/api/workspace/clusters/create#data_security_mode) for full list of values). [Unity Catalog requires](https://docs.databricks.com/data-governance/unity-catalog/compute.html#create-clusters--sql-warehouses-with-unity-catalog-access) `SINGLE_USER` or `USER_ISOLATION` mode. `LEGACY_PASSTHROUGH` for passthrough cluster and `LEGACY_TABLE_ACL` for Table ACL cluster. If omitted, default security features are enabled. To disable security features use `NONE` or legacy mode `NO_ISOLATION`. If `kind` is specified, then the following options are available:
* * `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration.
* * `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`.
* * `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`.
*/
dataSecurityMode?: pulumi.Input<string>;
/**
* (map) Tags that are added by Databricks by default, regardless of any `customTags` that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name: <Databricks internal use>, and any workspace and pool tags.
*/
defaultTags?: pulumi.Input<{
[key: string]: pulumi.Input<string>;
}>;
dockerImage?: pulumi.Input<inputs.ClusterDockerImage>;
/**
* similar to `instancePoolId`, but for driver node. If omitted, and `instancePoolId` is specified, then the driver will be allocated from that pool.
*/
driverInstancePoolId?: pulumi.Input<string>;
/**
* The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as `nodeTypeId` defined above.
*/
driverNodeTypeId?: pulumi.Input<string>;
/**
* If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have `autoterminationMinutes` and `autoscale` attributes set. More documentation available at [cluster configuration page](https://docs.databricks.com/clusters/configure.html#autoscaling-local-storage-1).
*/
enableElasticDisk?: pulumi.Input<boolean>;
/**
* Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and uses it to encrypt all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. *Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.*
*/
enableLocalDiskEncryption?: pulumi.Input<boolean>;
gcpAttributes?: pulumi.Input<inputs.ClusterGcpAttributes>;
/**
* An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
*/
idempotencyToken?: pulumi.Input<string>;
initScripts?: pulumi.Input<pulumi.Input<inputs.ClusterInitScript>[]>;
/**
* To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to `TERMINATED`, the instances it used are returned to the pool and reused by a different cluster.
*/
instancePoolId?: pulumi.Input<string>;
/**
* boolean value specifying if the cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is [limited to 100](https://docs.databricks.com/clusters/clusters-manage.html#pin-a-cluster), so `apply` may fail if you have more than that (this number may change over time, so check Databricks documentation for actual number).
*/
isPinned?: pulumi.Input<boolean>;
/**
* When set to true, Databricks will automatically set single node related `customTags`, `sparkConf`, and `numWorkers`.
*/
isSingleNode?: pulumi.Input<boolean>;
/**
* The kind of compute described by this compute specification. Possible values (see [API docs](https://docs.databricks.com/api/workspace/clusters/create#kind) for full list): `CLASSIC_PREVIEW` (if corresponding public preview is enabled).
*/
kind?: pulumi.Input<string>;
libraries?: pulumi.Input<pulumi.Input<inputs.ClusterLibrary>[]>;
/**
* If true, the provider will not wait for the cluster to reach `RUNNING` state when creating the cluster, allowing cluster creation and library installation to continue asynchronously. Defaults to false (the provider will wait for cluster creation and library installation to succeed).
*
* The following example demonstrates how to create an autoscaling cluster with [Delta Cache](https://docs.databricks.com/delta/optimizations/delta-cache.html) enabled:
*
* ```typescript
* import * as pulumi from "@pulumi/pulumi";
* import * as databricks from "@pulumi/databricks";
*
* const smallest = databricks.getNodeType({
* localDisk: true,
* });
* const latestLts = databricks.getSparkVersion({
* longTermSupport: true,
* });
* const sharedAutoscaling = new databricks.Cluster("shared_autoscaling", {
* clusterName: "Shared Autoscaling",
* sparkVersion: latestLts.then(latestLts => latestLts.id),
* nodeTypeId: smallest.then(smallest => smallest.id),
* autoterminationMinutes: 20,
* autoscale: {
* minWorkers: 1,
* maxWorkers: 50,
* },
* sparkConf: {
* "spark.databricks.io.cache.enabled": "true",
* "spark.databricks.io.cache.maxDiskUsage": "50g",
* "spark.databricks.io.cache.maxMetaDataCache": "1g",
* },
* });
* ```
*/
noWait?: pulumi.Input<boolean>;
/**
* Any supported databricks.getNodeType id. If `instancePoolId` is specified, this field is not needed.
*/
nodeTypeId?: pulumi.Input<string>;
/**
* Number of worker nodes that this cluster should have. A cluster has one Spark driver and `numWorkers` executors for a total of `numWorkers` + 1 Spark nodes.
*/
numWorkers?: pulumi.Input<number>;
/**
* Identifier of Cluster Policy to validate cluster and preset certain defaults. *The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters.* For example, when you specify `policyId` of [external metastore](https://docs.databricks.com/administration-guide/clusters/policies.html#external-metastore-policy) policy, you still have to fill in relevant keys for `sparkConf`. If relevant fields aren't filled in, then it will cause the configuration drift detected on each plan/apply, and Pulumi will try to apply the detected changes.
*/
policyId?: pulumi.Input<string>;
/**
* The type of runtime engine to use. If not specified, the runtime engine type is inferred based on the sparkVersion value. Allowed values include: `PHOTON`, `STANDARD`.
*/
runtimeEngine?: pulumi.Input<string>;
/**
* The optional user name of the user (or group name if `kind` if specified) to assign to an interactive cluster. This field is required when using `dataSecurityMode` set to `SINGLE_USER` or AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters).
*/
singleUserName?: pulumi.Input<string>;
/**
* should have following items:
* * `spark.databricks.repl.allowedLanguages` set to a list of supported languages, for example: `python,sql`, or `python,sql,r`. Scala is not supported!
* * `spark.databricks.cluster.profile` set to `serverless`
*/
sparkConf?: pulumi.Input<{
[key: string]: pulumi.Input<string>;
}>;
/**
* Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
*/
sparkEnvVars?: pulumi.Input<{
[key: string]: pulumi.Input<string>;
}>;
/**
* [Runtime version](https://docs.databricks.com/runtime/index.html) of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
*/
sparkVersion?: pulumi.Input<string>;
/**
* SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
*/
sshPublicKeys?: pulumi.Input<pulumi.Input<string>[]>;
/**
* (string) State of the cluster.
*/
state?: pulumi.Input<string>;
url?: pulumi.Input<string>;
/**
* Whenever ML runtime should be selected or not. Actual runtime is determined by `sparkVersion` (DBR release), this field `useMlRuntime`, and whether `nodeTypeId` is GPU node or not.
*/
useMlRuntime?: pulumi.Input<boolean>;
workloadType?: pulumi.Input<inputs.ClusterWorkloadType>;
}
/**
* The set of arguments for constructing a Cluster resource.
*/
export interface ClusterArgs {
/**
* Whether to use policy default values for missing cluster attributes.
*/
applyPolicyDefaultValues?: pulumi.Input<boolean>;
autoscale?: pulumi.Input<inputs.ClusterAutoscale>;
/**
* Automatically terminate the cluster after being inactive for this time in minutes. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. Defaults to `60`. *We highly recommend having this setting present for Interactive/BI clusters.*
*/
autoterminationMinutes?: pulumi.Input<number>;
awsAttributes?: pulumi.Input<inputs.ClusterAwsAttributes>;
azureAttributes?: pulumi.Input<inputs.ClusterAzureAttributes>;
clusterLogConf?: pulumi.Input<inputs.ClusterClusterLogConf>;
clusterMountInfos?: pulumi.Input<pulumi.Input<inputs.ClusterClusterMountInfo>[]>;
/**
* Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
*/
clusterName?: pulumi.Input<string>;
/**
* should have tag `ResourceClass` set to value `Serverless`
*
* For example:
*
* ```typescript
* import * as pulumi from "@pulumi/pulumi";
* import * as databricks from "@pulumi/databricks";
*
* const clusterWithTableAccessControl = new databricks.Cluster("cluster_with_table_access_control", {
* clusterName: "Shared High-Concurrency",
* sparkVersion: latestLts.id,
* nodeTypeId: smallest.id,
* autoterminationMinutes: 20,
* sparkConf: {
* "spark.databricks.repl.allowedLanguages": "python,sql",
* "spark.databricks.cluster.profile": "serverless",
* },
* customTags: {
* ResourceClass: "Serverless",
* },
* });
* ```
*/
customTags?: pulumi.Input<{
[key: string]: pulumi.Input<string>;
}>;
/**
* Select the security features of the cluster (see [API docs](https://docs.databricks.com/api/workspace/clusters/create#data_security_mode) for full list of values). [Unity Catalog requires](https://docs.databricks.com/data-governance/unity-catalog/compute.html#create-clusters--sql-warehouses-with-unity-catalog-access) `SINGLE_USER` or `USER_ISOLATION` mode. `LEGACY_PASSTHROUGH` for passthrough cluster and `LEGACY_TABLE_ACL` for Table ACL cluster. If omitted, default security features are enabled. To disable security features use `NONE` or legacy mode `NO_ISOLATION`. If `kind` is specified, then the following options are available:
* * `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration.
* * `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`.
* * `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`.
*/
dataSecurityMode?: pulumi.Input<string>;
dockerImage?: pulumi.Input<inputs.ClusterDockerImage>;
/**
* similar to `instancePoolId`, but for driver node. If omitted, and `instancePoolId` is specified, then the driver will be allocated from that pool.
*/
driverInstancePoolId?: pulumi.Input<string>;
/**
* The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as `nodeTypeId` defined above.
*/
driverNodeTypeId?: pulumi.Input<string>;
/**
* If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have `autoterminationMinutes` and `autoscale` attributes set. More documentation available at [cluster configuration page](https://docs.databricks.com/clusters/configure.html#autoscaling-local-storage-1).
*/
enableElasticDisk?: pulumi.Input<boolean>;
/**
* Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and uses it to encrypt all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. *Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.*
*/
enableLocalDiskEncryption?: pulumi.Input<boolean>;
gcpAttributes?: pulumi.Input<inputs.ClusterGcpAttributes>;
/**
* An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
*/
idempotencyToken?: pulumi.Input<string>;
initScripts?: pulumi.Input<pulumi.Input<inputs.ClusterInitScript>[]>;
/**
* To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to `TERMINATED`, the instances it used are returned to the pool and reused by a different cluster.
*/
instancePoolId?: pulumi.Input<string>;
/**
* boolean value specifying if the cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is [limited to 100](https://docs.databricks.com/clusters/clusters-manage.html#pin-a-cluster), so `apply` may fail if you have more than that (this number may change over time, so check Databricks documentation for actual number).
*/
isPinned?: pulumi.Input<boolean>;
/**
* When set to true, Databricks will automatically set single node related `customTags`, `sparkConf`, and `numWorkers`.
*/
isSingleNode?: pulumi.Input<boolean>;
/**
* The kind of compute described by this compute specification. Possible values (see [API docs](https://docs.databricks.com/api/workspace/clusters/create#kind) for full list): `CLASSIC_PREVIEW` (if corresponding public preview is enabled).
*/
kind?: pulumi.Input<string>;
libraries?: pulumi.Input<pulumi.Input<inputs.ClusterLibrary>[]>;
/**
* If true, the provider will not wait for the cluster to reach `RUNNING` state when creating the cluster, allowing cluster creation and library installation to continue asynchronously. Defaults to false (the provider will wait for cluster creation and library installation to succeed).
*
* The following example demonstrates how to create an autoscaling cluster with [Delta Cache](https://docs.databricks.com/delta/optimizations/delta-cache.html) enabled:
*
* ```typescript
* import * as pulumi from "@pulumi/pulumi";
* import * as databricks from "@pulumi/databricks";
*
* const smallest = databricks.getNodeType({
* localDisk: true,
* });
* const latestLts = databricks.getSparkVersion({
* longTermSupport: true,
* });
* const sharedAutoscaling = new databricks.Cluster("shared_autoscaling", {
* clusterName: "Shared Autoscaling",
* sparkVersion: latestLts.then(latestLts => latestLts.id),
* nodeTypeId: smallest.then(smallest => smallest.id),
* autoterminationMinutes: 20,
* autoscale: {
* minWorkers: 1,
* maxWorkers: 50,
* },
* sparkConf: {
* "spark.databricks.io.cache.enabled": "true",
* "spark.databricks.io.cache.maxDiskUsage": "50g",
* "spark.databricks.io.cache.maxMetaDataCache": "1g",
* },
* });
* ```
*/
noWait?: pulumi.Input<boolean>;
/**
* Any supported databricks.getNodeType id. If `instancePoolId` is specified, this field is not needed.
*/
nodeTypeId?: pulumi.Input<string>;
/**
* Number of worker nodes that this cluster should have. A cluster has one Spark driver and `numWorkers` executors for a total of `numWorkers` + 1 Spark nodes.
*/
numWorkers?: pulumi.Input<number>;
/**
* Identifier of Cluster Policy to validate cluster and preset certain defaults. *The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters.* For example, when you specify `policyId` of [external metastore](https://docs.databricks.com/administration-guide/clusters/policies.html#external-metastore-policy) policy, you still have to fill in relevant keys for `sparkConf`. If relevant fields aren't filled in, then it will cause the configuration drift detected on each plan/apply, and Pulumi will try to apply the detected changes.
*/
policyId?: pulumi.Input<string>;
/**
* The type of runtime engine to use. If not specified, the runtime engine type is inferred based on the sparkVersion value. Allowed values include: `PHOTON`, `STANDARD`.
*/
runtimeEngine?: pulumi.Input<string>;
/**
* The optional user name of the user (or group name if `kind` if specified) to assign to an interactive cluster. This field is required when using `dataSecurityMode` set to `SINGLE_USER` or AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters).
*/
singleUserName?: pulumi.Input<string>;
/**
* should have following items:
* * `spark.databricks.repl.allowedLanguages` set to a list of supported languages, for example: `python,sql`, or `python,sql,r`. Scala is not supported!
* * `spark.databricks.cluster.profile` set to `serverless`
*/
sparkConf?: pulumi.Input<{
[key: string]: pulumi.Input<string>;
}>;
/**
* Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
*/
sparkEnvVars?: pulumi.Input<{
[key: string]: pulumi.Input<string>;
}>;
/**
* [Runtime version](https://docs.databricks.com/runtime/index.html) of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
*/
sparkVersion: pulumi.Input<string>;
/**
* SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
*/
sshPublicKeys?: pulumi.Input<pulumi.Input<string>[]>;
/**
* Whenever ML runtime should be selected or not. Actual runtime is determined by `sparkVersion` (DBR release), this field `useMlRuntime`, and whether `nodeTypeId` is GPU node or not.
*/
useMlRuntime?: pulumi.Input<boolean>;
workloadType?: pulumi.Input<inputs.ClusterWorkloadType>;
}