UNPKG

googleapis

Version:
1,047 lines 849 kB
import { OAuth2Client, JWT, Compute, UserRefreshClient, BaseExternalAccountClient, GaxiosResponseWithHTTP2, GoogleConfigurable, MethodOptions, StreamMethodOptions, GlobalOptions, GoogleAuth, BodyResponseCallback, APIRequestContext } from 'googleapis-common'; import { Readable } from 'stream'; export declare namespace dataproc_v1 { export interface Options extends GlobalOptions { version: 'v1'; } interface StandardParameters { /** * Auth client or API Key for the request */ auth?: string | OAuth2Client | JWT | Compute | UserRefreshClient | BaseExternalAccountClient | GoogleAuth; /** * V1 error format. */ '$.xgafv'?: string; /** * OAuth access token. */ access_token?: string; /** * Data format for response. */ alt?: string; /** * JSONP */ callback?: string; /** * Selector specifying which fields to include in a partial response. */ fields?: string; /** * API key. Your API key identifies your project and provides you with API access, quota, and reports. Required unless you provide an OAuth 2.0 token. */ key?: string; /** * OAuth 2.0 token for the current user. */ oauth_token?: string; /** * Returns response with indentations and line breaks. */ prettyPrint?: boolean; /** * Available to use for quota purposes for server-side applications. Can be any arbitrary string assigned to a user, but should not exceed 40 characters. */ quotaUser?: string; /** * Legacy upload protocol for media (e.g. "media", "multipart"). */ uploadType?: string; /** * Upload protocol for media (e.g. "raw", "multipart"). */ upload_protocol?: string; } /** * Cloud Dataproc API * * Manages Hadoop-based clusters and jobs on Google Cloud Platform. * * @example * ```js * const {google} = require('googleapis'); * const dataproc = google.dataproc('v1'); * ``` */ export class Dataproc { context: APIRequestContext; projects: Resource$Projects; constructor(options: GlobalOptions, google?: GoogleConfigurable); } /** * Specifies the type and number of accelerator cards attached to the instances of an instance. See GPUs on Compute Engine (https://cloud.google.com/compute/docs/gpus/). */ export interface Schema$AcceleratorConfig { /** * The number of the accelerator cards of this type exposed to this instance. */ acceleratorCount?: number | null; /** * Full URL, partial URI, or short name of the accelerator type resource to expose to this instance. See Compute Engine AcceleratorTypes (https://cloud.google.com/compute/docs/reference/v1/acceleratorTypes).Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/acceleratorTypes/nvidia-tesla-t4 projects/[project_id]/zones/[zone]/acceleratorTypes/nvidia-tesla-t4 nvidia-tesla-t4Auto Zone Exception: If you are using the Dataproc Auto Zone Placement (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) feature, you must use the short name of the accelerator type resource, for example, nvidia-tesla-t4. */ acceleratorTypeUri?: string | null; } /** * Environment details of a Saprk Application. */ export interface Schema$AccessSessionSparkApplicationEnvironmentInfoResponse { /** * Details about the Environment that the application is running in. */ applicationEnvironmentInfo?: Schema$ApplicationEnvironmentInfo; } /** * Details of a particular job associated with Spark Application */ export interface Schema$AccessSessionSparkApplicationJobResponse { /** * Output only. Data corresponding to a spark job. */ jobData?: Schema$JobData; } /** * A summary of Spark Application */ export interface Schema$AccessSessionSparkApplicationResponse { /** * Output only. High level information corresponding to an application. */ application?: Schema$ApplicationInfo; } /** * Details of a query for a Spark Application */ export interface Schema$AccessSessionSparkApplicationSqlQueryResponse { /** * SQL Execution Data */ executionData?: Schema$SqlExecutionUiData; } /** * SparkPlanGraph for a Spark Application execution limited to maximum 10000 clusters. */ export interface Schema$AccessSessionSparkApplicationSqlSparkPlanGraphResponse { /** * SparkPlanGraph for a Spark Application execution. */ sparkPlanGraph?: Schema$SparkPlanGraph; } /** * Stage Attempt for a Stage of a Spark Application */ export interface Schema$AccessSessionSparkApplicationStageAttemptResponse { /** * Output only. Data corresponding to a stage. */ stageData?: Schema$StageData; } /** * RDD operation graph for a Spark Application Stage limited to maximum 10000 clusters. */ export interface Schema$AccessSessionSparkApplicationStageRddOperationGraphResponse { /** * RDD operation graph for a Spark Application Stage. */ rddOperationGraph?: Schema$RddOperationGraph; } /** * Environment details of a Saprk Application. */ export interface Schema$AccessSparkApplicationEnvironmentInfoResponse { /** * Details about the Environment that the application is running in. */ applicationEnvironmentInfo?: Schema$ApplicationEnvironmentInfo; } /** * Details of a particular job associated with Spark Application */ export interface Schema$AccessSparkApplicationJobResponse { /** * Output only. Data corresponding to a spark job. */ jobData?: Schema$JobData; } /** * A summary of Spark Application */ export interface Schema$AccessSparkApplicationResponse { /** * Output only. High level information corresponding to an application. */ application?: Schema$ApplicationInfo; } /** * Details of a query for a Spark Application */ export interface Schema$AccessSparkApplicationSqlQueryResponse { /** * SQL Execution Data */ executionData?: Schema$SqlExecutionUiData; } /** * SparkPlanGraph for a Spark Application execution limited to maximum 10000 clusters. */ export interface Schema$AccessSparkApplicationSqlSparkPlanGraphResponse { /** * SparkPlanGraph for a Spark Application execution. */ sparkPlanGraph?: Schema$SparkPlanGraph; } /** * Stage Attempt for a Stage of a Spark Application */ export interface Schema$AccessSparkApplicationStageAttemptResponse { /** * Output only. Data corresponding to a stage. */ stageData?: Schema$StageData; } /** * RDD operation graph for a Spark Application Stage limited to maximum 10000 clusters. */ export interface Schema$AccessSparkApplicationStageRddOperationGraphResponse { /** * RDD operation graph for a Spark Application Stage. */ rddOperationGraph?: Schema$RddOperationGraph; } export interface Schema$AccumulableInfo { accumullableInfoId?: string | null; name?: string | null; update?: string | null; value?: string | null; } /** * A request to analyze a batch workload. */ export interface Schema$AnalyzeBatchRequest { /** * Optional. A unique ID used to identify the request. If the service receives two AnalyzeBatchRequest (http://cloud/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.AnalyzeBatchRequest)s with the same request_id, the second request is ignored and the Operation that corresponds to the first request created and stored in the backend is returned.Recommendation: Set this value to a UUID (https://en.wikipedia.org/wiki/Universally_unique_identifier).The value must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). The maximum length is 40 characters. */ requestId?: string | null; /** * Optional. The requestor ID is used to identify if the request comes from a GCA investigation or the old Ask Gemini Experience. */ requestorId?: string | null; } /** * Metadata describing the Analyze operation. */ export interface Schema$AnalyzeOperationMetadata { /** * Output only. name of the workload being analyzed. */ analyzedWorkloadName?: string | null; /** * Output only. Type of the workload being analyzed. */ analyzedWorkloadType?: string | null; /** * Output only. unique identifier of the workload typically generated by control plane. E.g. batch uuid. */ analyzedWorkloadUuid?: string | null; /** * Output only. The time when the operation was created. */ createTime?: string | null; /** * Output only. Short description of the operation. */ description?: string | null; /** * Output only. The time when the operation finished. */ doneTime?: string | null; /** * Output only. Labels associated with the operation. */ labels?: { [key: string]: string; } | null; /** * Output only. Warnings encountered during operation execution. */ warnings?: string[] | null; } /** * Specific attempt of an application. */ export interface Schema$ApplicationAttemptInfo { appSparkVersion?: string | null; attemptId?: string | null; completed?: boolean | null; durationMillis?: string | null; endTime?: string | null; lastUpdated?: string | null; sparkUser?: string | null; startTime?: string | null; } /** * Details about the Environment that the application is running in. */ export interface Schema$ApplicationEnvironmentInfo { classpathEntries?: { [key: string]: string; } | null; hadoopProperties?: { [key: string]: string; } | null; metricsProperties?: { [key: string]: string; } | null; resourceProfiles?: Schema$ResourceProfileInfo[]; runtime?: Schema$SparkRuntimeInfo; sparkProperties?: { [key: string]: string; } | null; systemProperties?: { [key: string]: string; } | null; } /** * High level information corresponding to an application. */ export interface Schema$ApplicationInfo { applicationContextIngestionStatus?: string | null; applicationId?: string | null; attempts?: Schema$ApplicationAttemptInfo[]; coresGranted?: number | null; coresPerExecutor?: number | null; maxCores?: number | null; memoryPerExecutorMb?: number | null; name?: string | null; quantileDataStatus?: string | null; } export interface Schema$AppSummary { numCompletedJobs?: number | null; numCompletedStages?: number | null; } /** * Authentication configuration for a workload is used to set the default identity for the workload execution. The config specifies the type of identity (service account or user) that will be used by workloads to access resources on the project(s). */ export interface Schema$AuthenticationConfig { /** * Optional. Authentication type for the user workload running in containers. */ userWorkloadAuthenticationType?: string | null; } /** * Autoscaling Policy config associated with the cluster. */ export interface Schema$AutoscalingConfig { /** * Optional. The autoscaling policy used by the cluster.Only resource names including projectid and location (region) are valid. Examples: https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id] projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]Note that the policy must be in the same project and Dataproc region. */ policyUri?: string | null; } /** * Describes an autoscaling policy for Dataproc cluster autoscaler. */ export interface Schema$AutoscalingPolicy { basicAlgorithm?: Schema$BasicAutoscalingAlgorithm; /** * Optional. The type of the clusters for which this autoscaling policy is to be configured. */ clusterType?: string | null; /** * Required. The policy id.The id must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). Cannot begin or end with underscore or hyphen. Must consist of between 3 and 50 characters. */ id?: string | null; /** * Optional. The labels to associate with this autoscaling policy. Label keys must contain 1 to 63 characters, and must conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). Label values may be empty, but, if present, must contain 1 to 63 characters, and must conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be associated with an autoscaling policy. */ labels?: { [key: string]: string; } | null; /** * Output only. The "resource name" of the autoscaling policy, as described in https://cloud.google.com/apis/design/resource_names. For projects.regions.autoscalingPolicies, the resource name of the policy has the following format: projects/{project_id\}/regions/{region\}/autoscalingPolicies/{policy_id\} For projects.locations.autoscalingPolicies, the resource name of the policy has the following format: projects/{project_id\}/locations/{location\}/autoscalingPolicies/{policy_id\} */ name?: string | null; /** * Optional. Describes how the autoscaler will operate for secondary workers. */ secondaryWorkerConfig?: Schema$InstanceGroupAutoscalingPolicyConfig; /** * Required. Describes how the autoscaler will operate for primary workers. */ workerConfig?: Schema$InstanceGroupAutoscalingPolicyConfig; } /** * Autotuning configuration of the workload. */ export interface Schema$AutotuningConfig { /** * Optional. Scenarios for which tunings are applied. */ scenarios?: string[] | null; } /** * Node group identification and configuration information. */ export interface Schema$AuxiliaryNodeGroup { /** * Required. Node group configuration. */ nodeGroup?: Schema$NodeGroup; /** * Optional. A node group ID. Generated if not specified.The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), and hyphens (-). Cannot begin or end with underscore or hyphen. Must consist of from 3 to 33 characters. */ nodeGroupId?: string | null; } /** * Auxiliary services configuration for a Cluster. */ export interface Schema$AuxiliaryServicesConfig { /** * Optional. The Hive Metastore configuration for this workload. */ metastoreConfig?: Schema$MetastoreConfig; /** * Optional. The Spark History Server configuration for the workload. */ sparkHistoryServerConfig?: Schema$SparkHistoryServerConfig; } /** * Basic algorithm for autoscaling. */ export interface Schema$BasicAutoscalingAlgorithm { /** * Optional. Duration between scaling events. A scaling period starts after the update operation from the previous event has completed.Bounds: 2m, 1d. Default: 2m. */ cooldownPeriod?: string | null; /** * Optional. Spark Standalone autoscaling configuration */ sparkStandaloneConfig?: Schema$SparkStandaloneAutoscalingConfig; /** * Optional. YARN autoscaling configuration. */ yarnConfig?: Schema$BasicYarnAutoscalingConfig; } /** * Basic autoscaling configurations for YARN. */ export interface Schema$BasicYarnAutoscalingConfig { /** * Required. Timeout for YARN graceful decommissioning of Node Managers. Specifies the duration to wait for jobs to complete before forcefully removing workers (and potentially interrupting jobs). Only applicable to downscaling operations.Bounds: 0s, 1d. */ gracefulDecommissionTimeout?: string | null; /** * Required. Fraction of average YARN pending memory in the last cooldown period for which to remove workers. A scale-down factor of 1 will result in scaling down so that there is no available memory remaining after the update (more aggressive scaling). A scale-down factor of 0 disables removing workers, which can be beneficial for autoscaling a single job. See How autoscaling works (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works) for more information.Bounds: 0.0, 1.0. */ scaleDownFactor?: number | null; /** * Optional. Minimum scale-down threshold as a fraction of total cluster size before scaling occurs. For example, in a 20-worker cluster, a threshold of 0.1 means the autoscaler must recommend at least a 2 worker scale-down for the cluster to scale. A threshold of 0 means the autoscaler will scale down on any recommended change.Bounds: 0.0, 1.0. Default: 0.0. */ scaleDownMinWorkerFraction?: number | null; /** * Required. Fraction of average YARN pending memory in the last cooldown period for which to add workers. A scale-up factor of 1.0 will result in scaling up so that there is no pending memory remaining after the update (more aggressive scaling). A scale-up factor closer to 0 will result in a smaller magnitude of scaling up (less aggressive scaling). See How autoscaling works (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works) for more information.Bounds: 0.0, 1.0. */ scaleUpFactor?: number | null; /** * Optional. Minimum scale-up threshold as a fraction of total cluster size before scaling occurs. For example, in a 20-worker cluster, a threshold of 0.1 means the autoscaler must recommend at least a 2-worker scale-up for the cluster to scale. A threshold of 0 means the autoscaler will scale up on any recommended change.Bounds: 0.0, 1.0. Default: 0.0. */ scaleUpMinWorkerFraction?: number | null; } /** * A representation of a batch workload in the service. */ export interface Schema$Batch { /** * Output only. The time when the batch was created. */ createTime?: string | null; /** * Output only. The email address of the user who created the batch. */ creator?: string | null; /** * Optional. Environment configuration for the batch execution. */ environmentConfig?: Schema$EnvironmentConfig; /** * Optional. The labels to associate with this batch. Label keys must contain 1 to 63 characters, and must conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). Label values may be empty, but, if present, must contain 1 to 63 characters, and must conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be associated with a batch. */ labels?: { [key: string]: string; } | null; /** * Output only. The resource name of the batch. */ name?: string | null; /** * Output only. The resource name of the operation associated with this batch. */ operation?: string | null; /** * Optional. PySpark batch config. */ pysparkBatch?: Schema$PySparkBatch; /** * Optional. Runtime configuration for the batch execution. */ runtimeConfig?: Schema$RuntimeConfig; /** * Output only. Runtime information about batch execution. */ runtimeInfo?: Schema$RuntimeInfo; /** * Optional. Spark batch config. */ sparkBatch?: Schema$SparkBatch; /** * Optional. SparkR batch config. */ sparkRBatch?: Schema$SparkRBatch; /** * Optional. SparkSql batch config. */ sparkSqlBatch?: Schema$SparkSqlBatch; /** * Output only. The state of the batch. */ state?: string | null; /** * Output only. Historical state information for the batch. */ stateHistory?: Schema$StateHistory[]; /** * Output only. Batch state details, such as a failure description if the state is FAILED. */ stateMessage?: string | null; /** * Output only. The time when the batch entered a current state. */ stateTime?: string | null; /** * Output only. A batch UUID (Unique Universal Identifier). The service generates this value when it creates the batch. */ uuid?: string | null; } /** * Metadata describing the Batch operation. */ export interface Schema$BatchOperationMetadata { /** * Name of the batch for the operation. */ batch?: string | null; /** * Batch UUID for the operation. */ batchUuid?: string | null; /** * The time when the operation was created. */ createTime?: string | null; /** * Short description of the operation. */ description?: string | null; /** * The time when the operation finished. */ doneTime?: string | null; /** * Labels associated with the operation. */ labels?: { [key: string]: string; } | null; /** * The operation type. */ operationType?: string | null; /** * Warnings encountered during operation execution. */ warnings?: string[] | null; } /** * Associates members, or principals, with a role. */ export interface Schema$Binding { /** * The condition that is associated with this binding.If the condition evaluates to true, then this binding applies to the current request.If the condition evaluates to false, then this binding does not apply to the current request. However, a different role binding might grant the same role to one or more of the principals in this binding.To learn which resources support conditions in their IAM policies, see the IAM documentation (https://cloud.google.com/iam/help/conditions/resource-policies). */ condition?: Schema$Expr; /** * Specifies the principals requesting access for a Google Cloud resource. members can have the following values: allUsers: A special identifier that represents anyone who is on the internet; with or without a Google account. allAuthenticatedUsers: A special identifier that represents anyone who is authenticated with a Google account or a service account. Does not include identities that come from external identity providers (IdPs) through identity federation. user:{emailid\}: An email address that represents a specific Google account. For example, alice@example.com . serviceAccount:{emailid\}: An email address that represents a Google service account. For example, my-other-app@appspot.gserviceaccount.com. serviceAccount:{projectid\}.svc.id.goog[{namespace\}/{kubernetes-sa\}]: An identifier for a Kubernetes service account (https://cloud.google.com/kubernetes-engine/docs/how-to/kubernetes-service-accounts). For example, my-project.svc.id.goog[my-namespace/my-kubernetes-sa]. group:{emailid\}: An email address that represents a Google group. For example, admins@example.com. domain:{domain\}: The G Suite domain (primary) that represents all the users of that domain. For example, google.com or example.com. principal://iam.googleapis.com/locations/global/workforcePools/{pool_id\}/subject/{subject_attribute_value\}: A single identity in a workforce identity pool. principalSet://iam.googleapis.com/locations/global/workforcePools/{pool_id\}/group/{group_id\}: All workforce identities in a group. principalSet://iam.googleapis.com/locations/global/workforcePools/{pool_id\}/attribute.{attribute_name\}/{attribute_value\}: All workforce identities with a specific attribute value. principalSet://iam.googleapis.com/locations/global/workforcePools/{pool_id\}/x: All identities in a workforce identity pool. principal://iam.googleapis.com/projects/{project_number\}/locations/global/workloadIdentityPools/{pool_id\}/subject/{subject_attribute_value\}: A single identity in a workload identity pool. principalSet://iam.googleapis.com/projects/{project_number\}/locations/global/workloadIdentityPools/{pool_id\}/group/{group_id\}: A workload identity pool group. principalSet://iam.googleapis.com/projects/{project_number\}/locations/global/workloadIdentityPools/{pool_id\}/attribute.{attribute_name\}/{attribute_value\}: All identities in a workload identity pool with a certain attribute. principalSet://iam.googleapis.com/projects/{project_number\}/locations/global/workloadIdentityPools/{pool_id\}/x: All identities in a workload identity pool. deleted:user:{emailid\}?uid={uniqueid\}: An email address (plus unique identifier) representing a user that has been recently deleted. For example, alice@example.com?uid=123456789012345678901. If the user is recovered, this value reverts to user:{emailid\} and the recovered user retains the role in the binding. deleted:serviceAccount:{emailid\}?uid={uniqueid\}: An email address (plus unique identifier) representing a service account that has been recently deleted. For example, my-other-app@appspot.gserviceaccount.com?uid=123456789012345678901. If the service account is undeleted, this value reverts to serviceAccount:{emailid\} and the undeleted service account retains the role in the binding. deleted:group:{emailid\}?uid={uniqueid\}: An email address (plus unique identifier) representing a Google group that has been recently deleted. For example, admins@example.com?uid=123456789012345678901. If the group is recovered, this value reverts to group:{emailid\} and the recovered group retains the role in the binding. deleted:principal://iam.googleapis.com/locations/global/workforcePools/{pool_id\}/subject/{subject_attribute_value\}: Deleted single identity in a workforce identity pool. For example, deleted:principal://iam.googleapis.com/locations/global/workforcePools/my-pool-id/subject/my-subject-attribute-value. */ members?: string[] | null; /** * Role that is assigned to the list of members, or principals. For example, roles/viewer, roles/editor, or roles/owner.For an overview of the IAM roles and permissions, see the IAM documentation (https://cloud.google.com/iam/docs/roles-overview). For a list of the available pre-defined roles, see here (https://cloud.google.com/iam/docs/understanding-roles). */ role?: string | null; } /** * Native Build Info */ export interface Schema$BuildInfo { /** * Optional. Build key. */ buildKey?: string | null; /** * Optional. Build value. */ buildValue?: string | null; } /** * A request to cancel a job. */ export interface Schema$CancelJobRequest { } /** * Describes the identifying information, config, and status of a Dataproc cluster */ export interface Schema$Cluster { /** * Required. The cluster name, which must be unique within a project. The name must start with a lowercase letter, and can contain up to 51 lowercase letters, numbers, and hyphens. It cannot end with a hyphen. The name of a deleted cluster can be reused. */ clusterName?: string | null; /** * Output only. A cluster UUID (Unique Universal Identifier). Dataproc generates this value when it creates the cluster. */ clusterUuid?: string | null; /** * Optional. The cluster config for a cluster of Compute Engine Instances. Note that Dataproc may set default values, and values may change when clusters are updated.Exactly one of ClusterConfig or VirtualClusterConfig must be specified. */ config?: Schema$ClusterConfig; /** * Optional. The labels to associate with this cluster. Label keys must contain 1 to 63 characters, and must conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). Label values may be empty, but, if present, must contain 1 to 63 characters, and must conform to RFC 1035 (https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be associated with a cluster. */ labels?: { [key: string]: string; } | null; /** * Output only. Contains cluster daemon metrics such as HDFS and YARN stats.Beta Feature: This report is available for testing purposes only. It may be changed before final release. */ metrics?: Schema$ClusterMetrics; /** * Required. The Google Cloud Platform project ID that the cluster belongs to. */ projectId?: string | null; /** * Output only. Cluster status. */ status?: Schema$ClusterStatus; /** * Output only. The previous cluster status. */ statusHistory?: Schema$ClusterStatus[]; /** * Optional. The virtual cluster config is used when creating a Dataproc cluster that does not directly control the underlying compute resources, for example, when creating a Dataproc-on-GKE cluster (https://cloud.google.com/dataproc/docs/guides/dpgke/dataproc-gke-overview). Dataproc may set default values, and values may change when clusters are updated. Exactly one of config or virtual_cluster_config must be specified. */ virtualClusterConfig?: Schema$VirtualClusterConfig; } /** * The cluster config. */ export interface Schema$ClusterConfig { /** * Optional. Autoscaling config for the policy associated with the cluster. Cluster does not autoscale if this field is unset. */ autoscalingConfig?: Schema$AutoscalingConfig; /** * Optional. The node group settings. */ auxiliaryNodeGroups?: Schema$AuxiliaryNodeGroup[]; /** * Optional. The cluster tier. */ clusterTier?: string | null; /** * Optional. The type of the cluster. */ clusterType?: string | null; /** * Optional. A Cloud Storage bucket used to stage job dependencies, config files, and job driver console output. If you do not specify a staging bucket, Cloud Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's staging bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket (see Dataproc staging and temp buckets (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). This field requires a Cloud Storage bucket name, not a gs://... URI to a Cloud Storage bucket. */ configBucket?: string | null; /** * Optional. The config for Dataproc metrics. */ dataprocMetricConfig?: Schema$DataprocMetricConfig; /** * Optional. A Cloud Storage bucket used to collect checkpoint diagnostic data (https://cloud.google.com/dataproc/docs/support/diagnose-clusters#checkpoint_diagnostic_data). If you do not specify a diagnostic bucket, Cloud Dataproc will use the Dataproc temp bucket to collect the checkpoint diagnostic data. This field requires a Cloud Storage bucket name, not a gs://... URI to a Cloud Storage bucket. */ diagnosticBucket?: string | null; /** * Optional. Encryption settings for the cluster. */ encryptionConfig?: Schema$EncryptionConfig; /** * Optional. Port/endpoint configuration for this cluster */ endpointConfig?: Schema$EndpointConfig; /** * Optional. The shared Compute Engine config settings for all instances in a cluster. */ gceClusterConfig?: Schema$GceClusterConfig; /** * Optional. BETA. The Kubernetes Engine config for Dataproc clusters deployed to The Kubernetes Engine config for Dataproc clusters deployed to Kubernetes. These config settings are mutually exclusive with Compute Engine-based options, such as gce_cluster_config, master_config, worker_config, secondary_worker_config, and autoscaling_config. */ gkeClusterConfig?: Schema$GkeClusterConfig; /** * Optional. Commands to execute on each node after config is completed. By default, executables are run on master and all worker nodes. You can test a node's role metadata to run an executable on a master or worker node, as shown below using curl (you can also use wget): ROLE=$(curl -H Metadata-Flavor:Google http://metadata/computeMetadata/v1/instance/attributes/dataproc-role) if [[ "${ROLE\}" == 'Master' ]]; then ... master specific actions ... else ... worker specific actions ... fi */ initializationActions?: Schema$NodeInitializationAction[]; /** * Optional. Lifecycle setting for the cluster. */ lifecycleConfig?: Schema$LifecycleConfig; /** * Optional. The Compute Engine config settings for the cluster's master instance. */ masterConfig?: Schema$InstanceGroupConfig; /** * Optional. Metastore configuration. */ metastoreConfig?: Schema$MetastoreConfig; /** * Optional. The Compute Engine config settings for a cluster's secondary worker instances */ secondaryWorkerConfig?: Schema$InstanceGroupConfig; /** * Optional. Security settings for the cluster. */ securityConfig?: Schema$SecurityConfig; /** * Optional. The config settings for cluster software. */ softwareConfig?: Schema$SoftwareConfig; /** * Optional. A Cloud Storage bucket used to store ephemeral cluster and jobs data, such as Spark and MapReduce history files. If you do not specify a temp bucket, Dataproc will determine a Cloud Storage location (US, ASIA, or EU) for your cluster's temp bucket according to the Compute Engine zone where your cluster is deployed, and then create and manage this project-level, per-location bucket. The default bucket has a TTL of 90 days, but you can use any TTL (or none) if you specify a bucket (see Dataproc staging and temp buckets (https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). This field requires a Cloud Storage bucket name, not a gs://... URI to a Cloud Storage bucket. */ tempBucket?: string | null; /** * Optional. The Compute Engine config settings for the cluster's worker instances. */ workerConfig?: Schema$InstanceGroupConfig; } /** * Contains cluster daemon metrics, such as HDFS and YARN stats.Beta Feature: This report is available for testing purposes only. It may be changed before final release. */ export interface Schema$ClusterMetrics { /** * The HDFS metrics. */ hdfsMetrics?: { [key: string]: string; } | null; /** * YARN metrics. */ yarnMetrics?: { [key: string]: string; } | null; } /** * The cluster operation triggered by a workflow. */ export interface Schema$ClusterOperation { /** * Output only. Indicates the operation is done. */ done?: boolean | null; /** * Output only. Error, if operation failed. */ error?: string | null; /** * Output only. The id of the cluster operation. */ operationId?: string | null; } /** * Metadata describing the operation. */ export interface Schema$ClusterOperationMetadata { /** * Output only. Child operation ids */ childOperationIds?: string[] | null; /** * Output only. Name of the cluster for the operation. */ clusterName?: string | null; /** * Output only. Cluster UUID for the operation. */ clusterUuid?: string | null; /** * Output only. Short description of operation. */ description?: string | null; /** * Output only. Labels associated with the operation */ labels?: { [key: string]: string; } | null; /** * Output only. The operation type. */ operationType?: string | null; /** * Output only. Current operation status. */ status?: Schema$ClusterOperationStatus; /** * Output only. The previous operation status. */ statusHistory?: Schema$ClusterOperationStatus[]; /** * Output only. Errors encountered during operation execution. */ warnings?: string[] | null; } /** * The status of the operation. */ export interface Schema$ClusterOperationStatus { /** * Output only. A message containing any operation metadata details. */ details?: string | null; /** * Output only. A message containing the detailed operation state. */ innerState?: string | null; /** * Output only. A message containing the operation state. */ state?: string | null; /** * Output only. The time this state was entered. */ stateStartTime?: string | null; } /** * A selector that chooses target cluster for jobs based on metadata. */ export interface Schema$ClusterSelector { /** * Required. The cluster labels. Cluster must have all labels to match. */ clusterLabels?: { [key: string]: string; } | null; /** * Optional. The zone where workflow process executes. This parameter does not affect the selection of the cluster.If unspecified, the zone of the first cluster matching the selector is used. */ zone?: string | null; } /** * The status of a cluster and its instances. */ export interface Schema$ClusterStatus { /** * Optional. Output only. Details of cluster's state. */ detail?: string | null; /** * Output only. The cluster's state. */ state?: string | null; /** * Output only. Time when this state was entered (see JSON representation of Timestamp (https://developers.google.com/protocol-buffers/docs/proto3#json)). */ stateStartTime?: string | null; /** * Output only. Additional state information that includes status reported by the agent. */ substate?: string | null; } /** * Cluster to be repaired */ export interface Schema$ClusterToRepair { /** * Required. Repair action to take on the cluster resource. */ clusterRepairAction?: string | null; } /** * Confidential Instance Config for clusters using Confidential VMs (https://cloud.google.com/compute/confidential-vm/docs) */ export interface Schema$ConfidentialInstanceConfig { /** * Optional. Defines whether the instance should have confidential compute enabled. */ enableConfidentialCompute?: boolean | null; } /** * Consolidated summary about executors used by the application. */ export interface Schema$ConsolidatedExecutorSummary { activeTasks?: number | null; completedTasks?: number | null; count?: number | null; diskUsed?: string | null; failedTasks?: number | null; isExcluded?: number | null; maxMemory?: string | null; memoryMetrics?: Schema$MemoryMetrics; memoryUsed?: string | null; rddBlocks?: number | null; totalCores?: number | null; totalDurationMillis?: string | null; totalGcTimeMillis?: string | null; totalInputBytes?: string | null; totalShuffleRead?: string | null; totalShuffleWrite?: string | null; totalTasks?: number | null; } /** * Dataproc metric config. */ export interface Schema$DataprocMetricConfig { /** * Required. Metrics sources to enable. */ metrics?: Schema$Metric[]; } /** * A request to collect cluster diagnostic information. */ export interface Schema$DiagnoseClusterRequest { /** * Optional. Time interval in which diagnosis should be carried out on the cluster. */ diagnosisInterval?: Schema$Interval; /** * Optional. DEPRECATED Specifies the job on which diagnosis is to be performed. Format: projects/{project\}/regions/{region\}/jobs/{job\} */ job?: string | null; /** * Optional. Specifies a list of jobs on which diagnosis is to be performed. Format: projects/{project\}/regions/{region\}/jobs/{job\} */ jobs?: string[] | null; /** * Optional. (Optional) The access type to the diagnostic tarball. If not specified, falls back to default access of the bucket */ tarballAccess?: string | null; /** * Optional. (Optional) The output Cloud Storage directory for the diagnostic tarball. If not specified, a task-specific directory in the cluster's staging bucket will be used. */ tarballGcsDir?: string | null; /** * Optional. DEPRECATED Specifies the yarn application on which diagnosis is to be performed. */ yarnApplicationId?: string | null; /** * Optional. Specifies a list of yarn applications on which diagnosis is to be performed. */ yarnApplicationIds?: string[] | null; } /** * The location of diagnostic output. */ export interface Schema$DiagnoseClusterResults { /** * Output only. The Cloud Storage URI of the diagnostic output. The output report is a plain text file with a summary of collected diagnostics. */ outputUri?: string | null; } /** * Specifies the config of boot disk and attached disk options for a group of VM instances. */ export interface Schema$DiskConfig { /** * Optional. Indicates how many IOPS to provision for the disk. This sets the number of I/O operations per second that the disk can handle. This field is supported only if boot_disk_type is hyperdisk-balanced. */ bootDiskProvisionedIops?: string | null; /** * Optional. Indicates how much throughput to provision for the disk. This sets the number of throughput mb per second that the disk can handle. Values must be greater than or equal to 1. This field is supported only if boot_disk_type is hyperdisk-balanced. */ bootDiskProvisionedThroughput?: string | null; /** * Optional. Size in GB of the boot disk (default is 500GB). */ bootDiskSizeGb?: number | null; /** * Optional. Type of the boot disk (default is "pd-standard"). Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive), "pd-ssd" (Persistent Disk Solid State Drive), or "pd-standard" (Persistent Disk Hard Disk Drive). See Disk types (https://cloud.google.com/compute/docs/disks#disk-types). */ bootDiskType?: string | null; /** * Optional. Interface type of local SSDs (default is "scsi"). Valid values: "scsi" (Small Computer System Interface), "nvme" (Non-Volatile Memory Express). See local SSD performance (https://cloud.google.com/compute/docs/disks/local-ssd#performance). */ localSsdInterface?: string | null; /** * Optional. Number of attached SSDs, from 0 to 8 (default is 0). If SSDs are not attached, the boot disk is used to store runtime logs and HDFS (https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. If one or more SSDs are attached, this runtime bulk data is spread across them, and the boot disk contains only basic config and installed binaries.Note: Local SSD options may vary by machine type and number of vCPUs selected. */ numLocalSsds?: number | null; } /** * Driver scheduling configuration. */ export interface Schema$DriverSchedulingConfig { /** * Required. The amount of memory in MB the driver is requesting. */ memoryMb?: number | null; /** * Required. The number of vCPUs the driver is requesting. */ vcores?: number | null; } /** * A generic empty message that you can re-use to avoid defining duplicated empty messages in your APIs. A typical example is to use it as the request or the response type of an API method. For instance: service Foo { rpc Bar(google.protobuf.Empty) returns (google.protobuf.Empty); \} */ export interface Schema$Empty { } /** * Encryption settings for the cluster. */ export interface Schema$EncryptionConfig { /** * Optional. The Cloud KMS key resource name to use for persistent disk encryption for all instances in the cluster. See Use CMEK with cluster data (https://cloud.google.com//dataproc/docs/concepts/configuring-clusters/customer-managed-encryption#use_cmek_with_cluster_data) for more information. */ gcePdKmsKeyName?: string | null; /** * Optional. The Cloud KMS key resource name to use for cluster persistent disk and job argument encryption. See Use CMEK with cluster data (https://cloud.google.com//dataproc/docs/concepts/configuring-clusters/customer-managed-encryption#use_cmek_with_cluster_data) for more information.When this key resource name is provided, the following job arguments of the following job types submitted to the cluster are encrypted using CMEK: FlinkJob args (https://cloud.google.com/dataproc/docs/reference/rest/v1/FlinkJob) HadoopJob args (https://cloud.google.com/dataproc/docs/reference/rest/v1/HadoopJob) SparkJob args (https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkJob) SparkRJob args (https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkRJob) PySparkJob args (https://cloud.google.com/dataproc/docs/reference/rest/v1/PySparkJob) SparkSqlJob (https://cloud.google.com/dataproc/docs/reference/rest/v1/SparkSqlJob) scriptVariables and queryList.queries HiveJob (https://cloud.google.com/dataproc/docs/reference/rest/v1/HiveJob) scriptVariables and queryList.queries PigJob (https://cloud.google.com/dataproc/docs/reference/rest/v1/PigJob) scriptVariables and queryList.queries PrestoJob (https://cloud.google.com/dataproc/docs/reference/rest/v1/PrestoJob) scriptVariables and queryList.queries */ kmsKey?: string | null; } /** * Endpoint config for this cluster */ export interface Schema$EndpointConfig { /** * Optional. If true, enable http access to specific ports on the cluster from external sources. Defaults to false. */ enableHttpPortAccess?: boolean | null; /** * Output only. The map of port descriptions to URLs. Will only be populated if enable_http_port_access is true. */ httpPorts?: { [key: string]: string; } | null; } /** * Environment configuration for a workload. */ export interface Schema$EnvironmentConfig { /** * Optional. Execution configuration for a workload. */ executionConfig?: Schema$ExecutionConfig; /** * Optional. Peripherals configuration that workload has access to. */ peripheralsConfig?: Schema$PeripheralsConfig; } /** * Execution configuration for a workload. */ export interface Schema$ExecutionConfig { /** * Optional. Authentication configuration used to set the default identity for the workload execution. The config specifies the type of identity (service account or user) that will be used by workloads to access resources on the project(s). */ authenticationConfig?: Schema$AuthenticationConf