Add support for accelerators to dataproc (hashicorp#90)

luis-silva · Nov 8, 2018 · 69737d2 · 69737d2
1 parent 072d86e
commit 69737d2
Show file tree

Hide file tree

Showing 3 changed files with 200 additions and 5 deletions.
diff --git a/google-beta/resource_dataproc_cluster.go b/google-beta/resource_dataproc_cluster.go
@@ -362,6 +362,14 @@ func instanceConfigSchema() *schema.Schema {
 					},
 				},
 
+				// Note: preemptible workers don't support accelerators
+				"accelerators": {
+					Type:     schema.TypeSet,
+					Optional: true,
+					ForceNew: true,
+					Elem:     acceleratorsSchema(),
+				},
+
 				"instance_names": {
 					Type:     schema.TypeList,
 					Computed: true,
@@ -372,6 +380,25 @@ func instanceConfigSchema() *schema.Schema {
 	}
 }
 
+// We need to pull accelerators' schema out so we can use it to make a set hash func
+func acceleratorsSchema() *schema.Resource {
+	return &schema.Resource{
+		Schema: map[string]*schema.Schema{
+			"accelerator_type": {
+				Type:      schema.TypeString,
+				Required:  true,
+				ForceNew:  true,
+			},
+
+			"accelerator_count": {
+				Type:     schema.TypeInt,
+				Required: true,
+				ForceNew: true,
+			},
+		},
+	}
+}
+
 func resourceDataprocClusterCreate(d *schema.ResourceData, meta interface{}) error {
 	config := meta.(*Config)
 
@@ -607,9 +634,26 @@ func expandInstanceGroupConfig(cfg map[string]interface{}) *dataproc.InstanceGro
 			}
 		}
 	}
+
+	icg.Accelerators = expandAccelerators(cfg["accelerators"].(*schema.Set).List())
 	return icg
 }
 
+func expandAccelerators(configured []interface{}) []*dataproc.AcceleratorConfig {
+	accelerators := make([]*dataproc.AcceleratorConfig, 0, len(configured))
+	for _, raw := range configured {
+		data := raw.(map[string]interface{})
+		accelerator := dataproc.AcceleratorConfig{
+			AcceleratorTypeUri: data["accelerator_type"].(string),
+			AcceleratorCount:   int64(data["accelerator_count"].(int)),
+		}
+
+		accelerators = append(accelerators, &accelerator)
+	}
+
+	return accelerators
+}
+
 func resourceDataprocClusterUpdate(d *schema.ResourceData, meta interface{}) error {
 	config := meta.(*Config)
 
@@ -746,6 +790,20 @@ func flattenSoftwareConfig(d *schema.ResourceData, sc *dataproc.SoftwareConfig)
 	return []map[string]interface{}{data}
 }
 
+func flattenAccelerators(accelerators []*dataproc.AcceleratorConfig) interface{} {
+	acceleratorsTypeSet := schema.NewSet(schema.HashResource(acceleratorsSchema()), []interface{}{})
+	for _, accelerator := range accelerators {
+		data := map[string]interface{}{
+			"accelerator_type":  GetResourceNameFromSelfLink(accelerator.AcceleratorTypeUri),
+			"accelerator_count": int(accelerator.AcceleratorCount),
+		}
+
+		acceleratorsTypeSet.Add(data)
+	}
+
+	return acceleratorsTypeSet
+}
+
 func flattenInitializationActions(nia []*dataproc.NodeInitializationAction) ([]map[string]interface{}, error) {
 
 	actions := []map[string]interface{}{}
@@ -819,6 +877,8 @@ func flattenInstanceGroupConfig(d *schema.ResourceData, icg *dataproc.InstanceGr
 			disk["num_local_ssds"] = icg.DiskConfig.NumLocalSsds
 			disk["boot_disk_type"] = icg.DiskConfig.BootDiskType
 		}
+
+		data["accelerators"] = flattenAccelerators(icg.Accelerators)
 	}
 
 	data["disk_config"] = []map[string]interface{}{disk}

diff --git a/google-beta/resource_dataproc_cluster_test.go b/google-beta/resource_dataproc_cluster_test.go
@@ -134,6 +134,66 @@ func TestAccDataprocCluster_basic(t *testing.T) {
 	})
 }
 
+func TestAccDataprocCluster_withAccelerators(t *testing.T) {
+	t.Parallel()
+
+	rnd := acctest.RandString(10)
+	var cluster dataproc.Cluster
+
+	project := getTestProjectFromEnv()
+	zone := "us-central1-a"
+	acceleratorType := "nvidia-tesla-k80"
+	acceleratorLink := fmt.Sprintf("https://www.googleapis.com/compute/beta/projects/%s/zones/%s/acceleratorTypes/%s", project, zone, acceleratorType)
+
+	resource.Test(t, resource.TestCase{
+		PreCheck:     func() { testAccPreCheck(t) },
+		Providers:    testAccProviders,
+		CheckDestroy: testAccCheckDataprocClusterDestroy(),
+		Steps: []resource.TestStep{
+			{
+				Config: testAccDataprocCluster_withAccelerators(rnd, zone, acceleratorType),
+				Check: resource.ComposeTestCheckFunc(
+					testAccCheckDataprocClusterExists("google_dataproc_cluster.accelerated_cluster", &cluster),
+					testAccCheckDataprocClusterAccelerator(&cluster, 1, acceleratorLink, 1, acceleratorLink),
+				),
+			},
+		},
+	})
+}
+
+func testAccCheckDataprocClusterAccelerator(cluster *dataproc.Cluster, masterCount int, masterAccelerator string, workerCount int, workerAccelerator string) resource.TestCheckFunc {
+	return func(s *terraform.State) error {
+
+		master := cluster.Config.MasterConfig.Accelerators
+		if len(master) != 1 {
+			return fmt.Errorf("Saw %d master accelerator types instead of 1", len(master))
+		}
+
+		if int(master[0].AcceleratorCount) != masterCount {
+			return fmt.Errorf("Saw %d master accelerators instead of %d", int(master[0].AcceleratorCount), masterCount)
+		}
+
+		if master[0].AcceleratorTypeUri != masterAccelerator {
+			return fmt.Errorf("Saw %s master accelerator type instead of %s", master[0].AcceleratorTypeUri, masterAccelerator)
+		}
+
+		worker := cluster.Config.WorkerConfig.Accelerators
+		if len(worker) != 1 {
+			return fmt.Errorf("Saw %d worker accelerator types instead of 1", len(worker))
+		}
+
+		if int(worker[0].AcceleratorCount) != workerCount {
+			return fmt.Errorf("Saw %d worker accelerators instead of %d", int(worker[0].AcceleratorCount), workerCount)
+		}
+
+		if worker[0].AcceleratorTypeUri != workerAccelerator {
+			return fmt.Errorf("Saw %s worker accelerator type instead of %s", worker[0].AcceleratorTypeUri, workerAccelerator)
+		}
+
+		return nil
+	}
+}
+
 func TestAccDataprocCluster_withInternalIpOnlyTrue(t *testing.T) {
 	t.Parallel()
 
@@ -630,6 +690,35 @@ resource "google_dataproc_cluster" "basic" {
 `, rnd)
 }
 
+func testAccDataprocCluster_withAccelerators(rnd, zone, acceleratorType string) string {
+	return fmt.Sprintf(`
+resource "google_dataproc_cluster" "accelerated_cluster" {
+	name                  = "dproc-cluster-test-%s"
+	region                = "us-central1"
+
+	cluster_config {
+		gce_cluster_config {
+			zone = "%s"
+		}
+
+		master_config {
+			accelerators {
+				accelerator_type  = "%s"
+				accelerator_count = "1"
+			}
+		}
+
+		worker_config {
+			accelerators {
+				accelerator_type  = "%s"
+				accelerator_count = "1"
+			}
+		}
+	}
+}
+`, rnd, zone, acceleratorType, acceleratorType)
+}
+
 func testAccDataprocCluster_withInternalIpOnlyTrue(rnd string) string {
 	return fmt.Sprintf(`
 variable subnetwork_cidr {

diff --git a/website/docs/r/dataproc_cluster.html.markdown b/website/docs/r/dataproc_cluster.html.markdown
@@ -16,14 +16,18 @@ Manages a Cloud Dataproc cluster resource within GCP. For more information see
 `labels`,`cluster_config.worker_config.num_instances` and `cluster_config.preemptible_worker_config.num_instances` are non-updateable. Changing others will cause recreation of the
 whole cluster!
 
-## Example usage
+## Example Usage - Basic
 
 ```hcl
 resource "google_dataproc_cluster" "simplecluster" {
     name       = "simplecluster"
     region     = "us-central1"
 }
+```
+
+## Example Usage - Advanced
 
+```hcl
 resource "google_dataproc_cluster" "mycluster" {
     name       = "mycluster"
     region     = "us-central1"
@@ -79,6 +83,28 @@ resource "google_dataproc_cluster" "mycluster" {
 }
 ```
 
+## Example Usage - Using a GPU accelerator
+
+```hcl
+resource "google_dataproc_cluster" "accelerated_cluster" {
+    name   = "my-cluster-with-gpu"
+    region = "us-central1"
+
+    cluster_config {
+        gce_cluster_config {
+            zone = "us-central1-a"
+        }
+
+        master_config {
+            accelerators {
+                accelerator_type  = "nvidia-tesla-k80"
+                accelerator_count = "1"
+            }
+        }
+    }
+}
+```
+
 ## Argument Reference
 
 * `name` - (Required) The name of the cluster, unique within the project and
@@ -227,18 +253,28 @@ The `cluster_config.master_config` block supports:
 
 * `disk_config` (Optional) Disk Config
 
-	* `disk_config.boot_disk_type` - (Optional) The disk type of the primary disk attached to each node.
+	* `boot_disk_type` - (Optional) The disk type of the primary disk attached to each node.
 	One of `"pd-ssd"` or `"pd-standard"`. Defaults to `"pd-standard"`.
 
-	* `disk_config.boot_disk_size_gb` - (Optional, Computed) Size of the primary disk attached to each node, specified
+	* `boot_disk_size_gb` - (Optional, Computed) Size of the primary disk attached to each node, specified
 	in GB. The primary disk contains the boot volume and system libraries, and the
 	smallest allowed disk size is 10GB. GCP will default to a predetermined
 	computed value if not set (currently 500GB). Note: If SSDs are not
 	attached, it also contains the HDFS data blocks and Hadoop working directories.
 
-	* `disk_config.num_local_ssds` - (Optional) The amount of local SSD disks that will be
+	* `num_local_ssds` - (Optional) The amount of local SSD disks that will be
 	attached to each master cluster node. Defaults to 0.
 
+* `accelerators` (Optional) The Compute Engine accelerator (GPU) configuration for these instances. Can be specified multiple times.
+
+    * `accelerator_type` - (Required) The short name of the accelerator type to expose to this instance. For example, `nvidia-tesla-k80`.
+
+    * `accelerator_count` - (Required) The number of the accelerator cards of this type exposed to this instance. Often restricted to one of `1`, `2`, `4`, or `8`.
+
+~> The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select
+zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check [accelerator availability by zone](https://cloud.google.com/compute/docs/reference/rest/v1/acceleratorTypes/list)
+if you are trying to use accelerators in a given zone.
+
 - - -
 
 The `cluster_config.worker_config` block supports:
@@ -271,7 +307,7 @@ The `cluster_config.worker_config` block supports:
 
 * `disk_config` (Optional) Disk Config
 
-    * `disk_config.boot_disk_type` - (Optional) The disk type of the primary disk attached to each node.
+    * `boot_disk_type` - (Optional) The disk type of the primary disk attached to each node.
 	One of `"pd-ssd"` or `"pd-standard"`. Defaults to `"pd-standard"`.
 
     * `boot_disk_size_gb` - (Optional, Computed) Size of the primary disk attached to each worker node, specified
@@ -282,6 +318,16 @@ The `cluster_config.worker_config` block supports:
     * `num_local_ssds` - (Optional) The amount of local SSD disks that will be
 	attached to each worker cluster node. Defaults to 0.
 
+* `accelerators` (Optional) The Compute Engine accelerator configuration for these instances. Can be specified multiple times.
+
+    * `accelerator_type` - (Required) The short name of the accelerator type to expose to this instance. For example, `nvidia-tesla-k80`.
+
+    * `accelerator_count` - (Required) The number of the accelerator cards of this type exposed to this instance. Often restricted to one of `1`, `2`, `4`, or `8`.
+
+~> The Cloud Dataproc API can return unintuitive error messages when using accelerators; even when you have defined an accelerator, Auto Zone Placement does not exclusively select
+zones that have that accelerator available. If you get a 400 error that the accelerator can't be found, this is a likely cause. Make sure you check [accelerator availability by zone](https://cloud.google.com/compute/docs/reference/rest/v1/acceleratorTypes/list)
+if you are trying to use accelerators in a given zone.
+
 - - -
 
 The `cluster_config.preemptible_worker_config` block supports: