googleapis
Version:
Google APIs Client Library for Node.js
1,622 lines • 138 kB
TypeScript
/**
* Copyright 2015 Google Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { AxiosPromise } from 'axios';
import { GoogleApis } from '../..';
import { BodyResponseCallback, GlobalOptions, MethodOptions } from '../../lib/api';
/**
* Dataflow API
*
* Manages Google Cloud Dataflow projects on Google Cloud Platform.
*
* @example
* const google = require('googleapis');
* const dataflow = google.dataflow('v1b3');
*
* @namespace dataflow
* @type {Function}
* @version v1b3
* @variation v1b3
* @param {object=} options Options for Dataflow
*/
export declare class Dataflow {
_options: GlobalOptions;
google: GoogleApis;
root: this;
projects: Resource$Projects;
constructor(options: GlobalOptions, google: GoogleApis);
getRoot(): this;
}
/**
* Obsolete in favor of ApproximateReportedProgress and ApproximateSplitRequest.
*/
export interface Schema$ApproximateProgress {
/**
* Obsolete.
*/
percentComplete: number;
/**
* Obsolete.
*/
position: Schema$Position;
/**
* Obsolete.
*/
remainingTime: string;
}
/**
* A progress measurement of a WorkItem by a worker.
*/
export interface Schema$ApproximateReportedProgress {
/**
* Total amount of parallelism in the portion of input of this task that has
* already been consumed and is no longer active. In the first two examples
* above (see remaining_parallelism), the value should be 29 or 2
* respectively. The sum of remaining_parallelism and consumed_parallelism
* should equal the total amount of parallelism in this work item. If
* specified, must be finite.
*/
consumedParallelism: Schema$ReportedParallelism;
/**
* Completion as fraction of the input consumed, from 0.0 (beginning, nothing
* consumed), to 1.0 (end of the input, entire input consumed).
*/
fractionConsumed: number;
/**
* A Position within the work to represent a progress.
*/
position: Schema$Position;
/**
* Total amount of parallelism in the input of this task that remains, (i.e.
* can be delegated to this task and any new tasks via dynamic splitting).
* Always at least 1 for non-finished work items and 0 for finished.
* "Amount of parallelism" refers to how many non-empty parts of the
* input can be read in parallel. This does not necessarily equal number of
* records. An input that can be read in parallel down to the individual
* records is called "perfectly splittable". An example of
* non-perfectly parallelizable input is a block-compressed file format where
* a block of records has to be read as a whole, but different blocks can be
* read in parallel. Examples: * If we are processing record #30 (starting at
* 1) out of 50 in a perfectly splittable 50-record input, this value should
* be 21 (20 remaining + 1 current). * If we are reading through block 3 in
* a block-compressed file consisting of 5 blocks, this value should be 3
* (since blocks 4 and 5 can be processed in parallel by new tasks via
* dynamic splitting and the current task remains processing block 3). * If
* we are reading through the last block in a block-compressed file, or
* reading or processing the last record in a perfectly splittable input,
* this value should be 1, because apart from the current task, no additional
* remainder can be split off.
*/
remainingParallelism: Schema$ReportedParallelism;
}
/**
* A suggestion by the service to the worker to dynamically split the WorkItem.
*/
export interface Schema$ApproximateSplitRequest {
/**
* A fraction at which to split the work item, from 0.0 (beginning of the
* input) to 1.0 (end of the input).
*/
fractionConsumed: number;
/**
* A Position at which to split the work item.
*/
position: Schema$Position;
}
/**
* A structured message reporting an autoscaling decision made by the Dataflow
* service.
*/
export interface Schema$AutoscalingEvent {
/**
* The current number of workers the job has.
*/
currentNumWorkers: string;
/**
* A message describing why the system decided to adjust the current number of
* workers, why it failed, or why the system decided to not make any changes
* to the number of workers.
*/
description: Schema$StructuredMessage;
/**
* The type of autoscaling event to report.
*/
eventType: string;
/**
* The target number of workers the worker pool wants to resize to use.
*/
targetNumWorkers: string;
/**
* The time this event was emitted to indicate a new target or current
* num_workers value.
*/
time: string;
}
/**
* Settings for WorkerPool autoscaling.
*/
export interface Schema$AutoscalingSettings {
/**
* The algorithm to use for autoscaling.
*/
algorithm: string;
/**
* The maximum number of workers to cap scaling at.
*/
maxNumWorkers: number;
}
/**
* Description of an interstitial value between transforms in an execution
* stage.
*/
export interface Schema$ComponentSource {
/**
* Dataflow service generated name for this source.
*/
name: string;
/**
* User name for the original user transform or collection with which this
* source is most closely associated.
*/
originalTransformOrCollection: string;
/**
* Human-readable name for this transform; may be user or system generated.
*/
userName: string;
}
/**
* Description of a transform executed as part of an execution stage.
*/
export interface Schema$ComponentTransform {
/**
* Dataflow service generated name for this source.
*/
name: string;
/**
* User name for the original user transform with which this transform is most
* closely associated.
*/
originalTransform: string;
/**
* Human-readable name for this transform; may be user or system generated.
*/
userName: string;
}
/**
* All configuration data for a particular Computation.
*/
export interface Schema$ComputationTopology {
/**
* The ID of the computation.
*/
computationId: string;
/**
* The inputs to the computation.
*/
inputs: Schema$StreamLocation[];
/**
* The key ranges processed by the computation.
*/
keyRanges: Schema$KeyRangeLocation[];
/**
* The outputs from the computation.
*/
outputs: Schema$StreamLocation[];
/**
* The state family values.
*/
stateFamilies: Schema$StateFamilyConfig[];
/**
* The system stage name.
*/
systemStageName: string;
}
/**
* A position that encapsulates an inner position and an index for the inner
* position. A ConcatPosition can be used by a reader of a source that
* encapsulates a set of other sources.
*/
export interface Schema$ConcatPosition {
/**
* Index of the inner source.
*/
index: number;
/**
* Position within the inner source.
*/
position: Schema$Position;
}
/**
* CounterMetadata includes all static non-name non-value counter attributes.
*/
export interface Schema$CounterMetadata {
/**
* Human-readable description of the counter semantics.
*/
description: string;
/**
* Counter aggregation kind.
*/
kind: string;
/**
* A string referring to the unit type.
*/
otherUnits: string;
/**
* System defined Units, see above enum.
*/
standardUnits: string;
}
/**
* Identifies a counter within a per-job namespace. Counters whose structured
* names are the same get merged into a single value for the job.
*/
export interface Schema$CounterStructuredName {
/**
* Name of the optimized step being executed by the workers.
*/
componentStepName: string;
/**
* Name of the stage. An execution step contains multiple component steps.
*/
executionStepName: string;
/**
* Index of an input collection that's being read from/written to as a
* side input. The index identifies a step's side inputs starting by 1
* (e.g. the first side input has input_index 1, the third has input_index 3).
* Side inputs are identified by a pair of (original_step_name, input_index).
* This field helps uniquely identify them.
*/
inputIndex: number;
/**
* Counter name. Not necessarily globally-unique, but unique within the
* context of the other fields. Required.
*/
name: string;
/**
* One of the standard Origins defined above.
*/
origin: string;
/**
* The step name requesting an operation, such as GBK. I.e. the ParDo causing
* a read/write from shuffle to occur, or a read from side inputs.
*/
originalRequestingStepName: string;
/**
* System generated name of the original step in the user's graph, before
* optimization.
*/
originalStepName: string;
/**
* A string containing a more specific namespace of the counter's origin.
*/
originNamespace: string;
/**
* Portion of this counter, either key or value.
*/
portion: string;
/**
* ID of a particular worker.
*/
workerId: string;
}
/**
* A single message which encapsulates structured name and metadata for a given
* counter.
*/
export interface Schema$CounterStructuredNameAndMetadata {
/**
* Metadata associated with a counter
*/
metadata: Schema$CounterMetadata;
/**
* Structured name of the counter.
*/
name: Schema$CounterStructuredName;
}
/**
* An update to a Counter sent from a worker.
*/
export interface Schema$CounterUpdate {
/**
* Boolean value for And, Or.
*/
boolean: boolean;
/**
* True if this counter is reported as the total cumulative aggregate value
* accumulated since the worker started working on this WorkItem. By default
* this is false, indicating that this counter is reported as a delta.
*/
cumulative: boolean;
/**
* Distribution data
*/
distribution: Schema$DistributionUpdate;
/**
* Floating point value for Sum, Max, Min.
*/
floatingPoint: number;
/**
* List of floating point numbers, for Set.
*/
floatingPointList: Schema$FloatingPointList;
/**
* Floating point mean aggregation value for Mean.
*/
floatingPointMean: Schema$FloatingPointMean;
/**
* Integer value for Sum, Max, Min.
*/
integer: Schema$SplitInt64;
/**
* Gauge data
*/
integerGauge: Schema$IntegerGauge;
/**
* List of integers, for Set.
*/
integerList: Schema$IntegerList;
/**
* Integer mean aggregation value for Mean.
*/
integerMean: Schema$IntegerMean;
/**
* Value for internally-defined counters used by the Dataflow service.
*/
internal: any;
/**
* Counter name and aggregation type.
*/
nameAndKind: Schema$NameAndKind;
/**
* The service-generated short identifier for this counter. The short_id ->
* (name, metadata) mapping is constant for the lifetime of a job.
*/
shortId: string;
/**
* List of strings, for Set.
*/
stringList: Schema$StringList;
/**
* Counter structured name and metadata.
*/
structuredNameAndMetadata: Schema$CounterStructuredNameAndMetadata;
}
/**
* Modeled after information exposed by /proc/stat.
*/
export interface Schema$CPUTime {
/**
* Average CPU utilization rate (% non-idle cpu / second) since previous
* sample.
*/
rate: number;
/**
* Timestamp of the measurement.
*/
timestamp: string;
/**
* Total active CPU time across all cores (ie., non-idle) in milliseconds
* since start-up.
*/
totalMs: string;
}
/**
* A request to create a Cloud Dataflow job from a template.
*/
export interface Schema$CreateJobFromTemplateRequest {
/**
* The runtime environment for the job.
*/
environment: Schema$RuntimeEnvironment;
/**
* Required. A Cloud Storage path to the template from which to create the
* job. Must be a valid Cloud Storage URL, beginning with `gs://`.
*/
gcsPath: string;
/**
* Required. The job name to use for the created job.
*/
jobName: string;
/**
* The location to which to direct the request.
*/
location: string;
/**
* The runtime parameters to pass to the job.
*/
parameters: any;
}
/**
* Identifies the location of a custom souce.
*/
export interface Schema$CustomSourceLocation {
/**
* Whether this source is stateful.
*/
stateful: boolean;
}
/**
* Data disk assignment for a given VM instance.
*/
export interface Schema$DataDiskAssignment {
/**
* Mounted data disks. The order is important a data disk's 0-based index
* in this list defines which persistent directory the disk is mounted to, for
* example the list of {
* "myproject-1014-104817-4c2-harness-0-disk-0" }, {
* "myproject-1014-104817-4c2-harness-0-disk-1" }.
*/
dataDisks: string[];
/**
* VM instance name the data disks mounted to, for example
* "myproject-1014-104817-4c2-harness-0".
*/
vmInstance: string;
}
/**
* Specification of one of the bundles produced as a result of splitting a
* Source (e.g. when executing a SourceSplitRequest, or when splitting an active
* task using WorkItemStatus.dynamic_source_split), relative to the source being
* split.
*/
export interface Schema$DerivedSource {
/**
* What source to base the produced source on (if any).
*/
derivationMode: string;
/**
* Specification of the source.
*/
source: Schema$Source;
}
/**
* Describes the data disk used by a workflow job.
*/
export interface Schema$Disk {
/**
* Disk storage type, as defined by Google Compute Engine. This must be a
* disk type appropriate to the project and zone in which the workers will
* run. If unknown or unspecified, the service will attempt to choose a
* reasonable default. For example, the standard persistent disk type is a
* resource name typically ending in "pd-standard". If SSD
* persistent disks are available, the resource name typically ends with
* "pd-ssd". The actual valid values are defined the Google Compute
* Engine API, not by the Cloud Dataflow API; consult the Google Compute
* Engine documentation for more information about determining the set of
* available disk types for a particular project and zone. Google Compute
* Engine Disk types are local to a particular project in a particular zone,
* and so the resource name will typically look something like this:
* compute.googleapis.com/projects/project-id/zones/zone/diskTypes/pd-standard
*/
diskType: string;
/**
* Directory in a VM where disk is mounted.
*/
mountPoint: string;
/**
* Size of disk in GB. If zero or unspecified, the service will attempt to
* choose a reasonable default.
*/
sizeGb: number;
}
/**
* Data provided with a pipeline or transform to provide descriptive info.
*/
export interface Schema$DisplayData {
/**
* Contains value if the data is of a boolean type.
*/
boolValue: boolean;
/**
* Contains value if the data is of duration type.
*/
durationValue: string;
/**
* Contains value if the data is of float type.
*/
floatValue: number;
/**
* Contains value if the data is of int64 type.
*/
int64Value: string;
/**
* Contains value if the data is of java class type.
*/
javaClassValue: string;
/**
* The key identifying the display data. This is intended to be used as a
* label for the display data when viewed in a dax monitoring system.
*/
key: string;
/**
* An optional label to display in a dax UI for the element.
*/
label: string;
/**
* The namespace for the key. This is usually a class name or programming
* language namespace (i.e. python module) which defines the display data.
* This allows a dax monitoring system to specially handle the data and
* perform custom rendering.
*/
namespace: string;
/**
* A possible additional shorter value to display. For example a
* java_class_name_value of com.mypackage.MyDoFn will be stored with MyDoFn as
* the short_str_value and com.mypackage.MyDoFn as the java_class_name value.
* short_str_value can be displayed and java_class_name_value will be
* displayed as a tooltip.
*/
shortStrValue: string;
/**
* Contains value if the data is of string type.
*/
strValue: string;
/**
* Contains value if the data is of timestamp type.
*/
timestampValue: string;
/**
* An optional full URL.
*/
url: string;
}
/**
* A metric value representing a distribution.
*/
export interface Schema$DistributionUpdate {
/**
* The count of the number of elements present in the distribution.
*/
count: Schema$SplitInt64;
/**
* (Optional) Histogram of value counts for the distribution.
*/
histogram: Schema$Histogram;
/**
* The maximum value present in the distribution.
*/
max: Schema$SplitInt64;
/**
* The minimum value present in the distribution.
*/
min: Schema$SplitInt64;
/**
* Use an int64 since we'd prefer the added precision. If overflow is a
* common problem we can detect it and use an additional int64 or a double.
*/
sum: Schema$SplitInt64;
/**
* Use a double since the sum of squares is likely to overflow int64.
*/
sumOfSquares: number;
}
/**
* When a task splits using WorkItemStatus.dynamic_source_split, this message
* describes the two parts of the split relative to the description of the
* current task's input.
*/
export interface Schema$DynamicSourceSplit {
/**
* Primary part (continued to be processed by worker). Specified relative to
* the previously-current source. Becomes current.
*/
primary: Schema$DerivedSource;
/**
* Residual part (returned to the pool of work). Specified relative to the
* previously-current source.
*/
residual: Schema$DerivedSource;
}
/**
* Describes the environment in which a Dataflow Job runs.
*/
export interface Schema$Environment {
/**
* The type of cluster manager API to use. If unknown or unspecified, the
* service will attempt to choose a reasonable default. This should be in the
* form of the API service name, e.g. "compute.googleapis.com".
*/
clusterManagerApiService: string;
/**
* The dataset for the current project where various workflow related tables
* are stored. The supported resource type is: Google BigQuery:
* bigquery.googleapis.com/{dataset}
*/
dataset: string;
/**
* The list of experiments to enable.
*/
experiments: string[];
/**
* Experimental settings.
*/
internalExperiments: any;
/**
* The Cloud Dataflow SDK pipeline options specified by the user. These
* options are passed through the service and are used to recreate the SDK
* pipeline options on the worker in a language agnostic and platform
* independent way.
*/
sdkPipelineOptions: any;
/**
* Identity to run virtual machines as. Defaults to the default account.
*/
serviceAccountEmail: string;
/**
* The prefix of the resources the system should use for temporary storage.
* The system will append the suffix "/temp-{JOBNAME} to this resource
* prefix, where {JOBNAME} is the value of the job_name field. The resulting
* bucket and object prefix is used as the prefix of the resources used to
* store temporary data needed during the job execution. NOTE: This will
* override the value in taskrunner_settings. The supported resource type is:
* Google Cloud Storage: storage.googleapis.com/{bucket}/{object}
* bucket.storage.googleapis.com/{object}
*/
tempStoragePrefix: string;
/**
* A description of the process that generated the request.
*/
userAgent: any;
/**
* A structure describing which components and their versions of the service
* are required in order to run the job.
*/
version: any;
/**
* The worker pools. At least one "harness" worker pool must be
* specified in order for the job to have workers.
*/
workerPools: Schema$WorkerPool[];
}
/**
* A message describing the state of a particular execution stage.
*/
export interface Schema$ExecutionStageState {
/**
* The time at which the stage transitioned to this state.
*/
currentStateTime: string;
/**
* The name of the execution stage.
*/
executionStageName: string;
/**
* Executions stage states allow the same set of values as JobState.
*/
executionStageState: string;
}
/**
* Description of the composing transforms, names/ids, and input/outputs of a
* stage of execution. Some composing transforms and sources may have been
* generated by the Dataflow service during execution planning.
*/
export interface Schema$ExecutionStageSummary {
/**
* Collections produced and consumed by component transforms of this stage.
*/
componentSource: Schema$ComponentSource[];
/**
* Transforms that comprise this execution stage.
*/
componentTransform: Schema$ComponentTransform[];
/**
* Dataflow service generated id for this stage.
*/
id: string;
/**
* Input sources for this stage.
*/
inputSource: Schema$StageSource[];
/**
* Type of tranform this stage is executing.
*/
kind: string;
/**
* Dataflow service generated name for this stage.
*/
name: string;
/**
* Output sources for this stage.
*/
outputSource: Schema$StageSource[];
}
/**
* Indicates which location failed to respond to a request for data.
*/
export interface Schema$FailedLocation {
/**
* The name of the failed location.
*/
name: string;
}
/**
* An instruction that copies its inputs (zero or more) to its (single) output.
*/
export interface Schema$FlattenInstruction {
/**
* Describes the inputs to the flatten instruction.
*/
inputs: Schema$InstructionInput[];
}
/**
* A metric value representing a list of floating point numbers.
*/
export interface Schema$FloatingPointList {
/**
* Elements of the list.
*/
elements: number[];
}
/**
* A representation of a floating point mean metric contribution.
*/
export interface Schema$FloatingPointMean {
/**
* The number of values being aggregated.
*/
count: Schema$SplitInt64;
/**
* The sum of all values being aggregated.
*/
sum: number;
}
/**
* Request to get updated debug configuration for component.
*/
export interface Schema$GetDebugConfigRequest {
/**
* The internal component id for which debug configuration is requested.
*/
componentId: string;
/**
* The location which contains the job specified by job_id.
*/
location: string;
/**
* The worker id, i.e., VM hostname.
*/
workerId: string;
}
/**
* Response to a get debug configuration request.
*/
export interface Schema$GetDebugConfigResponse {
/**
* The encoded debug configuration for the requested component.
*/
config: string;
}
/**
* The response to a GetTemplate request.
*/
export interface Schema$GetTemplateResponse {
/**
* The template metadata describing the template name, available parameters,
* etc.
*/
metadata: Schema$TemplateMetadata;
/**
* The status of the get template request. Any problems with the request will
* be indicated in the error_details.
*/
status: Schema$Status;
}
/**
* Histogram of value counts for a distribution. Buckets have an inclusive
* lower bound and exclusive upper bound and use "1,2,5 bucketing":
* The first bucket range is from [0,1) and all subsequent bucket boundaries are
* powers of ten multiplied by 1, 2, or 5. Thus, bucket boundaries are 0, 1, 2,
* 5, 10, 20, 50, 100, 200, 500, 1000, ... Negative values are not supported.
*/
export interface Schema$Histogram {
/**
* Counts of values in each bucket. For efficiency, prefix and trailing
* buckets with count = 0 are elided. Buckets can store the full range of
* values of an unsigned long, with ULLONG_MAX falling into the 59th bucket
* with range [1e19, 2e19).
*/
bucketCounts: string[];
/**
* Starting index of first stored bucket. The non-inclusive upper-bound of the
* ith bucket is given by: pow(10,(i-first_bucket_offset)/3) *
* (1,2,5)[(i-first_bucket_offset)%3]
*/
firstBucketOffset: number;
}
/**
* An input of an instruction, as a reference to an output of a producer
* instruction.
*/
export interface Schema$InstructionInput {
/**
* The output index (origin zero) within the producer.
*/
outputNum: number;
/**
* The index (origin zero) of the parallel instruction that produces the
* output to be consumed by this input. This index is relative to the list of
* instructions in this input's instruction's containing MapTask.
*/
producerInstructionIndex: number;
}
/**
* An output of an instruction.
*/
export interface Schema$InstructionOutput {
/**
* The codec to use to encode data being written via this output.
*/
codec: any;
/**
* The user-provided name of this output.
*/
name: string;
/**
* For system-generated byte and mean byte metrics, certain instructions
* should only report the key size.
*/
onlyCountKeyBytes: boolean;
/**
* For system-generated byte and mean byte metrics, certain instructions
* should only report the value size.
*/
onlyCountValueBytes: boolean;
/**
* System-defined name for this output in the original workflow graph. Outputs
* that do not contribute to an original instruction do not set this.
*/
originalName: string;
/**
* System-defined name of this output. Unique across the workflow.
*/
systemName: string;
}
/**
* A metric value representing temporal values of a variable.
*/
export interface Schema$IntegerGauge {
/**
* The time at which this value was measured. Measured as msecs from epoch.
*/
timestamp: string;
/**
* The value of the variable represented by this gauge.
*/
value: Schema$SplitInt64;
}
/**
* A metric value representing a list of integers.
*/
export interface Schema$IntegerList {
/**
* Elements of the list.
*/
elements: Schema$SplitInt64[];
}
/**
* A representation of an integer mean metric contribution.
*/
export interface Schema$IntegerMean {
/**
* The number of values being aggregated.
*/
count: Schema$SplitInt64;
/**
* The sum of all values being aggregated.
*/
sum: Schema$SplitInt64;
}
/**
* Defines a job to be run by the Cloud Dataflow service.
*/
export interface Schema$Job {
/**
* The client's unique identifier of the job, re-used across retried
* attempts. If this field is set, the service will ensure its uniqueness. The
* request to create a job will fail if the service has knowledge of a
* previously submitted job with the same client's ID and job name. The
* caller may use this field to ensure idempotence of job creation across
* retried attempts to create a job. By default, the field is empty and, in
* that case, the service ignores it.
*/
clientRequestId: string;
/**
* The timestamp when the job was initially created. Immutable and set by the
* Cloud Dataflow service.
*/
createTime: string;
/**
* The current state of the job. Jobs are created in the `JOB_STATE_STOPPED`
* state unless otherwise specified. A job in the `JOB_STATE_RUNNING` state
* may asynchronously enter a terminal state. After a job has reached a
* terminal state, no further state updates may be made. This field may be
* mutated by the Cloud Dataflow service; callers cannot mutate it.
*/
currentState: string;
/**
* The timestamp associated with the current state.
*/
currentStateTime: string;
/**
* The environment for the job.
*/
environment: Schema$Environment;
/**
* Deprecated.
*/
executionInfo: Schema$JobExecutionInfo;
/**
* The unique ID of this job. This field is set by the Cloud Dataflow service
* when the Job is created, and is immutable for the life of the job.
*/
id: string;
/**
* User-defined labels for this job. The labels map can contain no more than
* 64 entries. Entries of the labels map are UTF8 strings that comply with
* the following restrictions: * Keys must conform to regexp:
* \p{Ll}\p{Lo}{0,62} * Values must conform to regexp:
* [\p{Ll}\p{Lo}\p{N}_-]{0,63} * Both keys and values are additionally
* constrained to be <= 128 bytes in size.
*/
labels: any;
/**
* The location that contains this job.
*/
location: string;
/**
* The user-specified Cloud Dataflow job name. Only one Job with a given name
* may exist in a project at any given time. If a caller attempts to create a
* Job with the same name as an already-existing Job, the attempt returns the
* existing Job. The name must match the regular expression
* `[a-z]([-a-z0-9]{0,38}[a-z0-9])?`
*/
name: string;
/**
* Preliminary field: The format of this data may change at any time. A
* description of the user pipeline and stages through which it is executed.
* Created by Cloud Dataflow service. Only retrieved with
* JOB_VIEW_DESCRIPTION or JOB_VIEW_ALL.
*/
pipelineDescription: Schema$PipelineDescription;
/**
* The ID of the Cloud Platform project that the job belongs to.
*/
projectId: string;
/**
* If another job is an update of this job (and thus, this job is in
* `JOB_STATE_UPDATED`), this field contains the ID of that job.
*/
replacedByJobId: string;
/**
* If this job is an update of an existing job, this field is the job ID of
* the job it replaced. When sending a `CreateJobRequest`, you can update a
* job by specifying it here. The job named here is stopped, and its
* intermediate state is transferred to this job.
*/
replaceJobId: string;
/**
* The job's requested state. `UpdateJob` may be used to switch between
* the `JOB_STATE_STOPPED` and `JOB_STATE_RUNNING` states, by setting
* requested_state. `UpdateJob` may also be used to directly set a job's
* requested state to `JOB_STATE_CANCELLED` or `JOB_STATE_DONE`, irrevocably
* terminating the job if it has not already reached a terminal state.
*/
requestedState: string;
/**
* This field may be mutated by the Cloud Dataflow service; callers cannot
* mutate it.
*/
stageStates: Schema$ExecutionStageState[];
/**
* The top-level steps that constitute the entire job.
*/
steps: Schema$Step[];
/**
* A set of files the system should be aware of that are used for temporary
* storage. These temporary files will be removed on job completion. No
* duplicates are allowed. No file patterns are supported. The supported
* files are: Google Cloud Storage: storage.googleapis.com/{bucket}/{object}
* bucket.storage.googleapis.com/{object}
*/
tempFiles: string[];
/**
* The map of transform name prefixes of the job to be replaced to the
* corresponding name prefixes of the new job.
*/
transformNameMapping: any;
/**
* The type of Cloud Dataflow job.
*/
type: string;
}
/**
* Additional information about how a Cloud Dataflow job will be executed that
* isn't contained in the submitted job.
*/
export interface Schema$JobExecutionInfo {
/**
* A mapping from each stage to the information about that stage.
*/
stages: any;
}
/**
* Contains information about how a particular google.dataflow.v1beta3.Step will
* be executed.
*/
export interface Schema$JobExecutionStageInfo {
/**
* The steps associated with the execution stage. Note that stages may have
* several steps, and that a given step might be run by more than one stage.
*/
stepName: string[];
}
/**
* A particular message pertaining to a Dataflow job.
*/
export interface Schema$JobMessage {
/**
* Deprecated.
*/
id: string;
/**
* Importance level of the message.
*/
messageImportance: string;
/**
* The text of the message.
*/
messageText: string;
/**
* The timestamp of the message.
*/
time: string;
}
/**
* JobMetrics contains a collection of metrics descibing the detailed progress
* of a Dataflow job. Metrics correspond to user-defined and system-defined
* metrics in the job. This resource captures only the most recent values of
* each metric; time-series data can be queried for them (under the same metric
* names) from Cloud Monitoring.
*/
export interface Schema$JobMetrics {
/**
* All metrics for this job.
*/
metrics: Schema$MetricUpdate[];
/**
* Timestamp as of which metric values are current.
*/
metricTime: string;
}
/**
* Data disk assignment information for a specific key-range of a sharded
* computation. Currently we only support UTF-8 character splits to simplify
* encoding into JSON.
*/
export interface Schema$KeyRangeDataDiskAssignment {
/**
* The name of the data disk where data for this range is stored. This name is
* local to the Google Cloud Platform project and uniquely identifies the disk
* within that project, for example
* "myproject-1014-104817-4c2-harness-0-disk-1".
*/
dataDisk: string;
/**
* The end (exclusive) of the key range.
*/
end: string;
/**
* The start (inclusive) of the key range.
*/
start: string;
}
/**
* Location information for a specific key-range of a sharded computation.
* Currently we only support UTF-8 character splits to simplify encoding into
* JSON.
*/
export interface Schema$KeyRangeLocation {
/**
* The name of the data disk where data for this range is stored. This name is
* local to the Google Cloud Platform project and uniquely identifies the disk
* within that project, for example
* "myproject-1014-104817-4c2-harness-0-disk-1".
*/
dataDisk: string;
/**
* The physical location of this range assignment to be used for streaming
* computation cross-worker message delivery.
*/
deliveryEndpoint: string;
/**
* DEPRECATED. The location of the persistent state for this range, as a
* persistent directory in the worker local filesystem.
*/
deprecatedPersistentDirectory: string;
/**
* The end (exclusive) of the key range.
*/
end: string;
/**
* The start (inclusive) of the key range.
*/
start: string;
}
/**
* Parameters to provide to the template being launched.
*/
export interface Schema$LaunchTemplateParameters {
/**
* The runtime environment for the job.
*/
environment: Schema$RuntimeEnvironment;
/**
* Required. The job name to use for the created job.
*/
jobName: string;
/**
* The runtime parameters to pass to the job.
*/
parameters: any;
}
/**
* Response to the request to launch a template.
*/
export interface Schema$LaunchTemplateResponse {
/**
* The job that was launched, if the request was not a dry run and the job was
* successfully launched.
*/
job: Schema$Job;
}
/**
* Request to lease WorkItems.
*/
export interface Schema$LeaseWorkItemRequest {
/**
* The current timestamp at the worker.
*/
currentWorkerTime: string;
/**
* The location which contains the WorkItem's job.
*/
location: string;
/**
* The initial lease period.
*/
requestedLeaseDuration: string;
/**
* Worker capabilities. WorkItems might be limited to workers with specific
* capabilities.
*/
workerCapabilities: string[];
/**
* Identifies the worker leasing work -- typically the ID of the virtual
* machine running the worker.
*/
workerId: string;
/**
* Filter for WorkItem type.
*/
workItemTypes: string[];
}
/**
* Response to a request to lease WorkItems.
*/
export interface Schema$LeaseWorkItemResponse {
/**
* A list of the leased WorkItems.
*/
workItems: Schema$WorkItem[];
}
/**
* Response to a request to list job messages.
*/
export interface Schema$ListJobMessagesResponse {
/**
* Autoscaling events in ascending timestamp order.
*/
autoscalingEvents: Schema$AutoscalingEvent[];
/**
* Messages in ascending timestamp order.
*/
jobMessages: Schema$JobMessage[];
/**
* The token to obtain the next page of results if there are more.
*/
nextPageToken: string;
}
/**
* Response to a request to list Cloud Dataflow jobs. This may be a partial
* response, depending on the page size in the ListJobsRequest.
*/
export interface Schema$ListJobsResponse {
/**
* Zero or more messages describing locations that failed to respond.
*/
failedLocation: Schema$FailedLocation[];
/**
* A subset of the requested job information.
*/
jobs: Schema$Job[];
/**
* Set if there may be more results than fit in this response.
*/
nextPageToken: string;
}
/**
* MapTask consists of an ordered set of instructions, each of which describes
* one particular low-level operation for the worker to perform in order to
* accomplish the MapTask's WorkItem. Each instruction must appear in the
* list before any instructions which depends on its output.
*/
export interface Schema$MapTask {
/**
* The instructions in the MapTask.
*/
instructions: Schema$ParallelInstruction[];
/**
* System-defined name of the stage containing this MapTask. Unique across the
* workflow.
*/
stageName: string;
/**
* System-defined name of this MapTask. Unique across the workflow.
*/
systemName: string;
}
/**
* The metric short id is returned to the user alongside an offset into
* ReportWorkItemStatusRequest
*/
export interface Schema$MetricShortId {
/**
* The index of the corresponding metric in the ReportWorkItemStatusRequest.
* Required.
*/
metricIndex: number;
/**
* The service-generated short identifier for the metric.
*/
shortId: string;
}
/**
* Identifies a metric, by describing the source which generated the metric.
*/
export interface Schema$MetricStructuredName {
/**
* Zero or more labeled fields which identify the part of the job this metric
* is associated with, such as the name of a step or collection. For example,
* built-in counters associated with steps will have context['step'] =
* <step-name>. Counters associated with PCollections in the SDK will
* have context['pcollection'] = <pcollection-name>.
*/
context: any;
/**
* Worker-defined metric name.
*/
name: string;
/**
* Origin (namespace) of metric name. May be blank for user-define metrics;
* will be "dataflow" for metrics defined by the Dataflow service or
* SDK.
*/
origin: string;
}
/**
* Describes the state of a metric.
*/
export interface Schema$MetricUpdate {
/**
* True if this metric is reported as the total cumulative aggregate value
* accumulated since the worker started working on this WorkItem. By default
* this is false, indicating that this metric is reported as a delta that is
* not associated with any WorkItem.
*/
cumulative: boolean;
/**
* A struct value describing properties of a distribution of numeric values.
*/
distribution: any;
/**
* Worker-computed aggregate value for internal use by the Dataflow service.
*/
internal: any;
/**
* Metric aggregation kind. The possible metric aggregation kinds are
* "Sum", "Max", "Min", "Mean",
* "Set", "And", "Or", and
* "Distribution". The specified aggregation kind is
* case-insensitive. If omitted, this is not an aggregated value but instead
* a single metric sample value.
*/
kind: string;
/**
* Worker-computed aggregate value for the "Mean" aggregation kind.
* This holds the count of the aggregated values and is used in combination
* with mean_sum above to obtain the actual mean aggregate value. The only
* possible value type is Long.
*/
meanCount: any;
/**
* Worker-computed aggregate value for the "Mean" aggregation kind.
* This holds the sum of the aggregated values and is used in combination with
* mean_count below to obtain the actual mean aggregate value. The only
* possible value types are Long and Double.
*/
meanSum: any;
/**
* Name of the metric.
*/
name: Schema$MetricStructuredName;
/**
* Worker-computed aggregate value for aggregation kinds "Sum",
* "Max", "Min", "And", and "Or". The
* possible value types are Long, Double, and Boolean.
*/
scalar: any;
/**
* Worker-computed aggregate value for the "Set" aggregation kind.
* The only possible value type is a list of Values whose type can be Long,
* Double, or String, according to the metric's type. All Values in the
* list must be of the same type.
*/
set: any;
/**
* Timestamp associated with the metric value. Optional when workers are
* reporting work progress; it will be filled in responses from the metrics
* API.
*/
updateTime: string;
}
/**
* Describes mounted data disk.
*/
export interface Schema$MountedDataDisk {
/**
* The name of the data disk. This name is local to the Google Cloud Platform
* project and uniquely identifies the disk within that project, for example
* "myproject-1014-104817-4c2-harness-0-disk-1".
*/
dataDisk: string;
}
/**
* Information about an output of a multi-output DoFn.
*/
export interface Schema$MultiOutputInfo {
/**
* The id of the tag the user code will emit to this output by; this should
* correspond to the tag of some SideInputInfo.
*/
tag: string;
}
/**
* Basic metadata about a counter.
*/
export interface Schema$NameAndKind {
/**
* Counter aggregation kind.
*/
kind: string;
/**
* Name of the counter.
*/
name: string;
}
/**
* The packages that must be installed in order for a worker to run the steps of
* the Cloud Dataflow job that will be assigned to its worker pool. This is the
* mechanism by which the Cloud Dataflow SDK causes code to be loaded onto the
* workers. For example, the Cloud Dataflow Java SDK might use this to install
* jars containing the user's code and all of the various dependencies
* (libraries, data files, etc.) required in order for that code to run.
*/
export interface Schema$Package {
/**
* The resource to read the package from. The supported resource type is:
* Google Cloud Storage: storage.googleapis.com/{bucket}
* bucket.storage.googleapis.com/
*/
location: string;
/**
* The name of the package.
*/
name: string;
}
/**
* Describes a particular operation comprising a MapTask.
*/
export interface Schema$ParallelInstruction {
/**
* Additional information for Flatten instructions.
*/
flatten: Schema$FlattenInstruction;
/**
* User-provided name of this operation.
*/
name: string;
/**
* System-defined name for the operation in the original workflow graph.
*/
originalName: string;
/**
* Describes the outputs of the instruction.
*/
outputs: Schema$InstructionOutput[];
/**
* Additional information for ParDo instructions.
*/
parDo: Schema$ParDoInstruction;
/**
* Additional information for PartialGroupByKey instructions.
*/
partialGroupByKey: Schema$PartialGroupByKeyInstruction;
/**
* Additional information for Read instructions.
*/
read: Schema$ReadInstruction;
/**
* System-defined name of this operation. Unique across the workflow.
*/
systemName: string;
/**
* Additional information for Write instructions.
*/
write: Schema$WriteInstruction;
}
/**
* Structured data associated with this message.
*/
export interface Schema$Parameter {
/**
* Key or name for this parameter.
*/
key: string;
/**
* Value for this parameter.
*/
value: any;
}
/**
* Metadata for a specific parameter.
*/
export interface Schema$ParameterMetadata {
/**
* Required. The help text to display for the parameter.
*/
helpText: string;
/**
* Optional. Whether the parameter is optional. Defaults to false.
*/
isOptional: boolean;
/**
* Required. The label to display for the parameter.
*/
label: string;
/**
* Required. The name of the parameter.
*/
name: string;
/**
* Optional. Regexes that the parameter must match.
*/
regexes: string[];
}
/**
* An instruction that does a ParDo operation. Takes one main input and zero or
* more side inputs, and produces zero or more outputs. Runs user code.
*/
export interface Schema$ParDoInstruction {
/**
* The input.
*/
input: Schema$InstructionInput;
/**
* Information about each of the outputs, if user_fn is a MultiDoFn.
*/
multiOutputInfos: Schema$MultiOutputInfo[];
/**
* The number of outputs.
*/
numOutputs: number;
/**
* Zero or more side inputs.
*/
sideInputs: Schema$SideInputInfo[];
/**
* The user function to invoke.
*/
userFn: any;
}
/**
* An instruction that does a partial group-by-key. One input and one output.
*/
export interface Schema$PartialGroupByKeyInstruction {
/**
* Describes the input to the partial group-by-key instruction.
*/
input: Schema$InstructionInput;
/**
* The codec to use for interpreting an element in the input PTable.
*/
inputElementCodec: any;
/**
* If this instruction includes a combining function this is the name of the
* intermediate store between the GBK and the CombineValues.
*/
originalCombineValuesInputStoreName: string;
/**
* If this instruction includes a combining function, this is the name of the
* CombineValues instruction lifted into this instruction.
*/
originalCombineValuesStepName: string;
/**
* Zero or more side inputs.
*/
sideInputs: Schema$SideInputInfo[];
/**
* The value combining function to invoke.
*/
valueCombiningFn: any;
}
/**
* A descriptive representation of submitted pipeline as well as the executed
* form. This data is provided by the Dataflow service for ease of visualizing
* the pipeline and interpretting Dataflow provided metrics.
*/
export interface Schema$PipelineDescription {
/**
* Pipeline level display data.
*/
displayData: Schema$DisplayData[];
/**
* Description of each stage of execution of the pipeline.
*/
executionPipelineStage: Schema$ExecutionStageSummary[];
/**
* Description of each transform in the pipeline and collections between them.
*/
originalPipelineTransform: Schema$TransformSummary[];
}
/**
* Position defines a position within a collection of data. The value can be
* either the end position, a key (used with ordered collections), a byte
* offset, or a record index.
*/
export interface Schema$Position {
/**
* Position is a byte offset.
*/
byteOffset: string;
/**
* CloudPosition is a concat position.
*/
concatPosition: Schema$ConcatPosition;
/**
* Position is past all other positions. Also useful for the end position of
* an unbounded range.
*/
end: boolean;
/**
* Position is a string key, ordered lexicographically.
*/
key: string;
/**
* Position is a record index.
*/
recordIndex: string;
/**
* CloudPosition is a base64 encoded BatchShufflePosition