Skip to content
This repository has been archived by the owner on Nov 29, 2023. It is now read-only.

Commit

Permalink
feat: add support for temp_bucket, endpoint_config in clusters; add p…
Browse files Browse the repository at this point in the history
…reemptibility for instance group configs (#60)

* feat: Additional fields for the `ClusterConfig` and `InstanceGroupConfig` messages.

This change includes the following updates:
1. There is a new `temp_bucket` field for clusters.
2. There is a new `endpoint_config` field for clusters.
3. There is a new `preemptibility` field for instance group configs.
4. There are various updates to the doc comments.

PiperOrigin-RevId: 323829608

Source-Author: Google APIs <noreply@google.com>
Source-Date: Wed Jul 29 11:26:43 2020 -0700
Source-Repo: googleapis/googleapis
Source-Sha: d8a3dfb82f5cae3f1bcdcec7c5726581532da7d5
Source-Link: googleapis/googleapis@d8a3dfb
  • Loading branch information
yoshi-automation authored Jul 31, 2020
1 parent 380e122 commit a80fc72
Show file tree
Hide file tree
Showing 11 changed files with 597 additions and 203 deletions.
26 changes: 25 additions & 1 deletion google/cloud/dataproc_v1/gapic/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class Component(enum.IntEnum):
Cluster components that can be activated.
Attributes:
COMPONENT_UNSPECIFIED (int): Unspecified component.
COMPONENT_UNSPECIFIED (int): Unspecified component. Specifying this will cause Cluster creation to fail.
ANACONDA (int): The Anaconda python distribution.
HIVE_WEBHCAT (int): The Hive Web HCatalog (the REST service for accessing HCatalog).
JUPYTER (int): The Jupyter Notebook.
Expand Down Expand Up @@ -103,6 +103,30 @@ class Substate(enum.IntEnum):
STALE_STATUS = 2


class InstanceGroupConfig(object):
class Preemptibility(enum.IntEnum):
"""
Controls the use of [preemptible instances]
(https://cloud.google.com/compute/docs/instances/preemptible) within the
group.
Attributes:
PREEMPTIBILITY_UNSPECIFIED (int): Preemptibility is unspecified, the system will choose the
appropriate setting for each instance group.
NON_PREEMPTIBLE (int): Instances are non-preemptible.
This option is allowed for all instance groups and is the only valid
value for Master and Worker instance groups.
PREEMPTIBLE (int): Instances are preemptible.
This option is allowed only for secondary worker groups.
"""

PREEMPTIBILITY_UNSPECIFIED = 0
NON_PREEMPTIBLE = 1
PREEMPTIBLE = 2


class JobStatus(object):
class State(enum.IntEnum):
"""
Expand Down
10 changes: 8 additions & 2 deletions google/cloud/dataproc_v1/proto/autoscaling_policies.proto
Original file line number Diff line number Diff line change
Expand Up @@ -164,20 +164,26 @@ message BasicYarnAutoscalingConfig {
// Bounds: [0s, 1d].
google.protobuf.Duration graceful_decommission_timeout = 5 [(google.api.field_behavior) = REQUIRED];

// Required. Fraction of average pending memory in the last cooldown period
// Required. Fraction of average YARN pending memory in the last cooldown period
// for which to add workers. A scale-up factor of 1.0 will result in scaling
// up so that there is no pending memory remaining after the update (more
// aggressive scaling). A scale-up factor closer to 0 will result in a smaller
// magnitude of scaling up (less aggressive scaling).
// See [How autoscaling
// works](/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works)
// for more information.
//
// Bounds: [0.0, 1.0].
double scale_up_factor = 1 [(google.api.field_behavior) = REQUIRED];

// Required. Fraction of average pending memory in the last cooldown period
// Required. Fraction of average YARN pending memory in the last cooldown period
// for which to remove workers. A scale-down factor of 1 will result in
// scaling down so that there is no available memory remaining after the
// update (more aggressive scaling). A scale-down factor of 0 disables
// removing workers, which can be beneficial for autoscaling a single job.
// See [How autoscaling
// works](/dataproc/docs/concepts/configuring-clusters/autoscaling#how_autoscaling_works)
// for more information.
//
// Bounds: [0.0, 1.0].
double scale_down_factor = 2 [(google.api.field_behavior) = REQUIRED];
Expand Down
15 changes: 10 additions & 5 deletions google/cloud/dataproc_v1/proto/autoscaling_policies_pb2.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

59 changes: 57 additions & 2 deletions google/cloud/dataproc_v1/proto/clusters.proto
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,17 @@ message ClusterConfig {
// bucket](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
string config_bucket = 1 [(google.api.field_behavior) = OPTIONAL];

// Optional. A Cloud Storage bucket used to store ephemeral cluster and jobs data,
// such as Spark and MapReduce history files.
// If you do not specify a temp bucket,
// Dataproc will determine a Cloud Storage location (US,
// ASIA, or EU) for your cluster's temp bucket according to the
// Compute Engine zone where your cluster is deployed, and then create
// and manage this project-level, per-location bucket. The default bucket has
// a TTL of 90 days, but you can use any TTL (or none) if you specify a
// bucket.
string temp_bucket = 2 [(google.api.field_behavior) = OPTIONAL];

// Optional. The shared Compute Engine config settings for
// all instances in a cluster.
GceClusterConfig gce_cluster_config = 8 [(google.api.field_behavior) = OPTIONAL];
Expand Down Expand Up @@ -216,6 +227,20 @@ message ClusterConfig {

// Optional. Lifecycle setting for the cluster.
LifecycleConfig lifecycle_config = 17 [(google.api.field_behavior) = OPTIONAL];

// Optional. Port/endpoint configuration for this cluster
EndpointConfig endpoint_config = 19 [(google.api.field_behavior) = OPTIONAL];
}

// Endpoint config for this cluster
message EndpointConfig {
// Output only. The map of port descriptions to URLs. Will only be populated
// if enable_http_port_access is true.
map<string, string> http_ports = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

// Optional. If true, enable http access to specific ports on the cluster
// from external sources. Defaults to false.
bool enable_http_port_access = 2 [(google.api.field_behavior) = OPTIONAL];
}

// Autoscaling Policy config associated with the cluster.
Expand Down Expand Up @@ -288,7 +313,7 @@ message GceClusterConfig {
bool internal_ip_only = 7 [(google.api.field_behavior) = OPTIONAL];

// Optional. The [Dataproc service
// account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_cloud_dataproc)
// account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_dataproc)
// (also see [VM Data Plane
// identity](https://cloud.google.com/dataproc/docs/concepts/iam/dataproc-principals#vm_service_account_data_plane_identity))
// used by Dataproc cluster VM instances to access Google Cloud Platform
Expand Down Expand Up @@ -332,6 +357,27 @@ message GceClusterConfig {
// The config settings for Compute Engine resources in
// an instance group, such as a master or worker group.
message InstanceGroupConfig {
// Controls the use of
// [preemptible instances]
// (https://cloud.google.com/compute/docs/instances/preemptible)
// within the group.
enum Preemptibility {
// Preemptibility is unspecified, the system will choose the
// appropriate setting for each instance group.
PREEMPTIBILITY_UNSPECIFIED = 0;

// Instances are non-preemptible.
//
// This option is allowed for all instance groups and is the only valid
// value for Master and Worker instance groups.
NON_PREEMPTIBLE = 1;

// Instances are preemptible.
//
// This option is allowed only for secondary worker groups.
PREEMPTIBLE = 2;
}

// Optional. The number of VM instances in the instance group.
// For master instance groups, must be set to 1.
int32 num_instances = 1 [(google.api.field_behavior) = OPTIONAL];
Expand Down Expand Up @@ -382,6 +428,15 @@ message InstanceGroupConfig {
// instances.
bool is_preemptible = 6 [(google.api.field_behavior) = OUTPUT_ONLY];

// Optional. Specifies the preemptibility of the instance group.
//
// The default value for master and worker groups is
// `NON_PREEMPTIBLE`. This default cannot be changed.
//
// The default value for secondary instances is
// `PREEMPTIBLE`.
Preemptibility preemptibility = 10 [(google.api.field_behavior) = OPTIONAL];

// Output only. The config for Compute Engine Instance Group
// Manager that manages this group.
// This is only used for preemptible instance groups.
Expand Down Expand Up @@ -608,7 +663,7 @@ message KerberosConfig {
message SoftwareConfig {
// Optional. The version of software inside the cluster. It must be one of the
// supported [Dataproc
// Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_cloud_dataproc_versions),
// Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_dataproc_versions),
// such as "1.2" (including a subminor version, such as "1.2.29"), or the
// ["preview"
// version](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions).
Expand Down
Loading

0 comments on commit a80fc72

Please sign in to comment.