Skip to content

Commit

Permalink
Add support for accelerators to dataproc (hashicorp#90)
Browse files Browse the repository at this point in the history
  • Loading branch information
modular-magician authored and rileykarson committed Nov 8, 2018
1 parent 072d86e commit 69737d2
Show file tree
Hide file tree
Showing 3 changed files with 200 additions and 5 deletions.
60 changes: 60 additions & 0 deletions google-beta/resource_dataproc_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,14 @@ func instanceConfigSchema() *schema.Schema {
},
},

// Note: preemptible workers don't support accelerators
"accelerators": {
Type: schema.TypeSet,
Optional: true,
ForceNew: true,
Elem: acceleratorsSchema(),
},

"instance_names": {
Type: schema.TypeList,
Computed: true,
Expand All @@ -372,6 +380,25 @@ func instanceConfigSchema() *schema.Schema {
}
}

// We need to pull accelerators' schema out so we can use it to make a set hash func
func acceleratorsSchema() *schema.Resource {
return &schema.Resource{
Schema: map[string]*schema.Schema{
"accelerator_type": {
Type: schema.TypeString,
Required: true,
ForceNew: true,
},

"accelerator_count": {
Type: schema.TypeInt,
Required: true,
ForceNew: true,
},
},
}
}

func resourceDataprocClusterCreate(d *schema.ResourceData, meta interface{}) error {
config := meta.(*Config)

Expand Down Expand Up @@ -607,9 +634,26 @@ func expandInstanceGroupConfig(cfg map[string]interface{}) *dataproc.InstanceGro
}
}
}

icg.Accelerators = expandAccelerators(cfg["accelerators"].(*schema.Set).List())
return icg
}

func expandAccelerators(configured []interface{}) []*dataproc.AcceleratorConfig {
accelerators := make([]*dataproc.AcceleratorConfig, 0, len(configured))
for _, raw := range configured {
data := raw.(map[string]interface{})
accelerator := dataproc.AcceleratorConfig{
AcceleratorTypeUri: data["accelerator_type"].(string),
AcceleratorCount: int64(data["accelerator_count"].(int)),
}

accelerators = append(accelerators, &accelerator)
}

return accelerators
}

func resourceDataprocClusterUpdate(d *schema.ResourceData, meta interface{}) error {
config := meta.(*Config)

Expand Down Expand Up @@ -746,6 +790,20 @@ func flattenSoftwareConfig(d *schema.ResourceData, sc *dataproc.SoftwareConfig)
return []map[string]interface{}{data}
}

func flattenAccelerators(accelerators []*dataproc.AcceleratorConfig) interface{} {
acceleratorsTypeSet := schema.NewSet(schema.HashResource(acceleratorsSchema()), []interface{}{})
for _, accelerator := range accelerators {
data := map[string]interface{}{
"accelerator_type": GetResourceNameFromSelfLink(accelerator.AcceleratorTypeUri),
"accelerator_count": int(accelerator.AcceleratorCount),
}

acceleratorsTypeSet.Add(data)
}

return acceleratorsTypeSet
}

func flattenInitializationActions(nia []*dataproc.NodeInitializationAction) ([]map[string]interface{}, error) {

actions := []map[string]interface{}{}
Expand Down Expand Up @@ -819,6 +877,8 @@ func flattenInstanceGroupConfig(d *schema.ResourceData, icg *dataproc.InstanceGr
disk["num_local_ssds"] = icg.DiskConfig.NumLocalSsds
disk["boot_disk_type"] = icg.DiskConfig.BootDiskType
}

data["accelerators"] = flattenAccelerators(icg.Accelerators)
}

data["disk_config"] = []map[string]interface{}{disk}
Expand Down
89 changes: 89 additions & 0 deletions google-beta/resource_dataproc_cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,66 @@ func TestAccDataprocCluster_basic(t *testing.T) {
})
}

func TestAccDataprocCluster_withAccelerators(t *testing.T) {
t.Parallel()

rnd := acctest.RandString(10)
var cluster dataproc.Cluster

project := getTestProjectFromEnv()
zone := "us-central1-a"
acceleratorType := "nvidia-tesla-k80"
acceleratorLink := fmt.Sprintf("https://www.googleapis.com/compute/beta/projects/%s/zones/%s/acceleratorTypes/%s", project, zone, acceleratorType)

resource.Test(t, resource.TestCase{
PreCheck: func() { testAccPreCheck(t) },
Providers: testAccProviders,
CheckDestroy: testAccCheckDataprocClusterDestroy(),
Steps: []resource.TestStep{
{
Config: testAccDataprocCluster_withAccelerators(rnd, zone, acceleratorType),
Check: resource.ComposeTestCheckFunc(
testAccCheckDataprocClusterExists("google_dataproc_cluster.accelerated_cluster", &cluster),
testAccCheckDataprocClusterAccelerator(&cluster, 1, acceleratorLink, 1, acceleratorLink),
),
},
},
})
}

func testAccCheckDataprocClusterAccelerator(cluster *dataproc.Cluster, masterCount int, masterAccelerator string, workerCount int, workerAccelerator string) resource.TestCheckFunc {
return func(s *terraform.State) error {

master := cluster.Config.MasterConfig.Accelerators
if len(master) != 1 {
return fmt.Errorf("Saw %d master accelerator types instead of 1", len(master))
}

if int(master[0].AcceleratorCount) != masterCount {
return fmt.Errorf("Saw %d master accelerators instead of %d", int(master[0].AcceleratorCount), masterCount)
}

if master[0].AcceleratorTypeUri != masterAccelerator {
return fmt.Errorf("Saw %s master accelerator type instead of %s", master[0].AcceleratorTypeUri, masterAccelerator)
}

worker := cluster.Config.WorkerConfig.Accelerators
if len(worker) != 1 {
return fmt.Errorf("Saw %d worker accelerator types instead of 1", len(worker))
}

if int(worker[0].AcceleratorCount) != workerCount {
return fmt.Errorf("Saw %d worker accelerators instead of %d", int(worker[0].AcceleratorCount), workerCount)
}

if worker[0].AcceleratorTypeUri != workerAccelerator {
return fmt.Errorf("Saw %s worker accelerator type instead of %s", worker[0].AcceleratorTypeUri, workerAccelerator)
}

return nil
}
}

func TestAccDataprocCluster_withInternalIpOnlyTrue(t *testing.T) {
t.Parallel()

Expand Down Expand Up @@ -630,6 +690,35 @@ resource "google_dataproc_cluster" "basic" {
`, rnd)
}

func testAccDataprocCluster_withAccelerators(rnd, zone, acceleratorType string) string {
return fmt.Sprintf(`
resource "google_dataproc_cluster" "accelerated_cluster" {
name = "dproc-cluster-test-%s"
region = "us-central1"
cluster_config {
gce_cluster_config {
zone = "%s"
}
master_config {
accelerators {
accelerator_type = "%s"
accelerator_count = "1"
}
}
worker_config {
accelerators {
accelerator_type = "%s"
accelerator_count = "1"
}
}
}
}
`, rnd, zone, acceleratorType, acceleratorType)
}

func testAccDataprocCluster_withInternalIpOnlyTrue(rnd string) string {
return fmt.Sprintf(`
variable subnetwork_cidr {
Expand Down
56 changes: 51 additions & 5 deletions website/docs/r/dataproc_cluster.html.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,18 @@ Manages a Cloud Dataproc cluster resource within GCP. For more information see
`labels`,`cluster_config.worker_config.num_instances` and `cluster_config.preemptible_worker_config.num_instances` are non-updateable. Changing others will cause recreation of the
whole cluster!

## Example usage
## Example Usage - Basic

```hcl
resource "google_dataproc_cluster" "simplecluster" {
name = "simplecluster"
region = "us-central1"
}
```

## Example Usage - Advanced

```hcl
resource "google_dataproc_cluster" "mycluster" {
name = "mycluster"
region = "us-central1"
Expand Down Expand Up @@ -79,6 +83,28 @@ resource "google_dataproc_cluster" "mycluster" {
}
```

## Example Usage - Using a GPU accelerator

```hcl
resource "google_dataproc_cluster" "accelerated_cluster" {
name = "my-cluster-with-gpu"
region = "us-central1"
cluster_config {
gce_cluster_config {
zone = "us-central1-a"
}
master_config {
accelerators {
accelerator_type = "nvidia-tesla-k80"
accelerator_count = "1"
}
}
}
}
```

## Argument Reference

* `name` - (Required) The name of the cluster, unique within the project and
Expand Down Expand Up @@ -227,18 +253,28 @@ The `cluster_config.master_config` block supports:

* `disk_config` (Optional) Disk Config

* `disk_config.boot_disk_type` - (Optional) The disk type of the primary disk attached to each node.
* `boot_disk_type` - (Optional) The disk type of the primary disk attached to each node.
One of `"pd-ssd"` or `"pd-standard"`. Defaults to `"pd-standard"`.

* `disk_config.boot_disk_size_gb` - (Optional, Computed) Size of the primary disk attached to each node, specified
* `boot_disk_size_gb` - (Optional, Computed) Size of the primary disk attached to each node, specified
in GB. The primary disk contains the boot volume and system libraries, and the
smallest allowed disk size is 10GB. GCP will default to a predetermined
computed value if not set (currently 500GB). Note: If SSDs are not
attached, it also contains the HDFS data blocks and Hadoop working directories.

* `disk_config.num_local_ssds` - (Optional) The amount of local SSD disks that will be
* `num_local_ssds` - (Optional) The amount of local SSD disks that will be
attached to each master cluster node. Defaults to 0.

* `accelerators` (Optional) The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times.

* `accelerator_type` - (Required) The short name of the accelerator type to expose to this instance. For example, `nvidia-tesla-k80`.

* `accelerator_count` - (Required) The number of the accelerator cards of this type exposed to this instance. Often restricted to one of `1`, `2`, `4`, or `8`.

~> The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select
zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check [accelerator availability by zone](https://cloud.google.com/compute/docs/reference/rest/v1/acceleratorTypes/list)
if you are trying to use accelerators in a given zone.

- - -

The `cluster_config.worker_config` block supports:
Expand Down Expand Up @@ -271,7 +307,7 @@ The `cluster_config.worker_config` block supports:

* `disk_config` (Optional) Disk Config

* `disk_config.boot_disk_type` - (Optional) The disk type of the primary disk attached to each node.
* `boot_disk_type` - (Optional) The disk type of the primary disk attached to each node.
One of `"pd-ssd"` or `"pd-standard"`. Defaults to `"pd-standard"`.

* `boot_disk_size_gb` - (Optional, Computed) Size of the primary disk attached to each worker node, specified
Expand All @@ -282,6 +318,16 @@ The `cluster_config.worker_config` block supports:
* `num_local_ssds` - (Optional) The amount of local SSD disks that will be
attached to each worker cluster node. Defaults to 0.

* `accelerators` (Optional) The Compute Engine accelerator configuration for these instances. Can be specified multiple times.

* `accelerator_type` - (Required) The short name of the accelerator type to expose to this instance. For example, `nvidia-tesla-k80`.

* `accelerator_count` - (Required) The number of the accelerator cards of this type exposed to this instance. Often restricted to one of `1`, `2`, `4`, or `8`.

~> The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select
zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check [accelerator availability by zone](https://cloud.google.com/compute/docs/reference/rest/v1/acceleratorTypes/list)
if you are trying to use accelerators in a given zone.

- - -

The `cluster_config.preemptible_worker_config` block supports:
Expand Down

0 comments on commit 69737d2

Please sign in to comment.