diff --git a/aws/resource_aws_emr_cluster.go b/aws/resource_aws_emr_cluster.go index 9521377804de..12f72528b4ad 100644 --- a/aws/resource_aws_emr_cluster.go +++ b/aws/resource_aws_emr_cluster.go @@ -382,6 +382,13 @@ func resourceAwsEMRCluster() *schema.Resource { Type: schema.TypeString, Computed: true, }, + "instance_count": { + Type: schema.TypeInt, + Optional: true, + ForceNew: true, + Default: 1, + ValidateFunc: validation.IntInSlice([]int{1, 3}), + }, "instance_type": { Type: schema.TypeString, Required: true, @@ -633,6 +640,11 @@ func resourceAwsEMRClusterCreate(d *schema.ResourceData, meta interface{}) error keepJobFlowAliveWhenNoSteps = v.(bool) } + // For multiple master nodes, EMR automatically enables + // termination protection and ignores this configuration at launch. + // There is additional handling after the job flow is running + // to potentially disable termination protection to match the + // desired Terraform configuration. terminationProtection := false if v, ok := d.GetOk("termination_protection"); ok { terminationProtection = v.(bool) @@ -646,7 +658,7 @@ func resourceAwsEMRClusterCreate(d *schema.ResourceData, meta interface{}) error m := l[0].(map[string]interface{}) instanceGroup := &emr.InstanceGroupConfig{ - InstanceCount: aws.Int64(1), + InstanceCount: aws.Int64(int64(m["instance_count"].(int))), InstanceRole: aws.String(emr.InstanceRoleTypeMaster), InstanceType: aws.String(m["instance_type"].(string)), Market: aws.String(emr.MarketTypeOnDemand), @@ -907,11 +919,28 @@ func resourceAwsEMRClusterCreate(d *schema.ResourceData, meta interface{}) error Delay: 30 * time.Second, // Wait 30 secs before starting } - _, err = stateConf.WaitForState() + clusterRaw, err := stateConf.WaitForState() if err != nil { return fmt.Errorf("Error waiting for EMR Cluster state to be \"WAITING\" or \"RUNNING\": %s", err) } + // For multiple master nodes, EMR automatically enables + // termination protection and ignores the configuration at launch. + // This additional handling is to potentially disable termination + // protection to match the desired Terraform configuration. + cluster := clusterRaw.(*emr.Cluster) + + if aws.BoolValue(cluster.TerminationProtected) != terminationProtection { + input := &emr.SetTerminationProtectionInput{ + JobFlowIds: []*string{aws.String(d.Id())}, + TerminationProtected: aws.Bool(terminationProtection), + } + + if _, err := conn.SetTerminationProtection(input); err != nil { + return fmt.Errorf("error setting EMR Cluster (%s) termination protection to match configuration: %s", d.Id(), err) + } + } + return resourceAwsEMRClusterRead(d, meta) } @@ -1533,11 +1562,12 @@ func flattenEmrMasterInstanceGroup(instanceGroup *emr.InstanceGroup) []interface } m := map[string]interface{}{ - "bid_price": aws.StringValue(instanceGroup.BidPrice), - "ebs_config": flattenEBSConfig(instanceGroup.EbsBlockDevices), - "id": aws.StringValue(instanceGroup.Id), - "instance_type": aws.StringValue(instanceGroup.InstanceType), - "name": aws.StringValue(instanceGroup.Name), + "bid_price": aws.StringValue(instanceGroup.BidPrice), + "ebs_config": flattenEBSConfig(instanceGroup.EbsBlockDevices), + "id": aws.StringValue(instanceGroup.Id), + "instance_count": aws.Int64Value(instanceGroup.RequestedInstanceCount), + "instance_type": aws.StringValue(instanceGroup.InstanceType), + "name": aws.StringValue(instanceGroup.Name), } return []interface{}{m} diff --git a/aws/resource_aws_emr_cluster_test.go b/aws/resource_aws_emr_cluster_test.go index 2ad3661a9c93..e8434d25f5c6 100644 --- a/aws/resource_aws_emr_cluster_test.go +++ b/aws/resource_aws_emr_cluster_test.go @@ -727,6 +727,46 @@ func TestAccAWSEMRCluster_MasterInstanceGroup_BidPrice(t *testing.T) { }) } +func TestAccAWSEMRCluster_MasterInstanceGroup_InstanceCount(t *testing.T) { + var cluster1, cluster2 emr.Cluster + rName := acctest.RandomWithPrefix("tf-acc-test") + resourceName := "aws_emr_cluster.test" + + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { testAccPreCheck(t) }, + Providers: testAccProviders, + CheckDestroy: testAccCheckAWSEmrDestroy, + Steps: []resource.TestStep{ + { + Config: testAccAWSEmrClusterConfigMasterInstanceGroupInstanceCount(rName, 3), + Check: resource.ComposeTestCheckFunc( + testAccCheckAWSEmrClusterExists(resourceName, &cluster1), + resource.TestCheckResourceAttr(resourceName, "master_instance_group.#", "1"), + resource.TestCheckResourceAttr(resourceName, "master_instance_group.0.instance_count", "3"), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + ImportStateVerifyIgnore: []string{ + "configurations", + "keep_job_flow_alive_when_no_steps", + }, + }, + { + Config: testAccAWSEmrClusterConfigMasterInstanceGroupInstanceCount(rName, 1), + Check: resource.ComposeTestCheckFunc( + testAccCheckAWSEmrClusterExists(resourceName, &cluster2), + testAccCheckAWSEmrClusterRecreated(&cluster1, &cluster2), + resource.TestCheckResourceAttr(resourceName, "master_instance_group.#", "1"), + resource.TestCheckResourceAttr(resourceName, "master_instance_group.0.instance_count", "1"), + ), + }, + }, + }) +} + func TestAccAWSEMRCluster_MasterInstanceGroup_InstanceType(t *testing.T) { var cluster1, cluster2 emr.Cluster rName := acctest.RandomWithPrefix("tf-acc-test") @@ -1451,7 +1491,7 @@ func testAccCheckAWSEmrClusterRecreated(i, j *emr.Cluster) resource.TestCheckFun } } -func testAccAWSEmrClusterConfigBaseVpc() string { +func testAccAWSEmrClusterConfigBaseVpc(mapPublicIpOnLaunch bool) string { return fmt.Sprintf(` data "aws_availability_zones" "current" {} @@ -1500,9 +1540,10 @@ resource "aws_security_group" "test" { } resource "aws_subnet" "test" { - availability_zone = "${data.aws_availability_zones.current.names[0]}" - cidr_block = "10.0.0.0/24" - vpc_id = "${aws_vpc.test.id}" + availability_zone = "${data.aws_availability_zones.current.names[0]}" + cidr_block = "10.0.0.0/24" + map_public_ip_on_launch = %[1]t + vpc_id = "${aws_vpc.test.id}" tags = { Name = "tf-acc-test-emr-cluster" @@ -1522,7 +1563,7 @@ resource "aws_route_table_association" "test" { route_table_id = "${aws_route_table.test.id}" subnet_id = "${aws_subnet.test.id}" } -`) +`, mapPublicIpOnLaunch) } func testAccAWSEmrClusterConfig_bootstrap(r string) string { @@ -3362,7 +3403,7 @@ resource "aws_main_route_table_association" "a" { } func testAccAWSEmrClusterConfigCoreInstanceGroupAutoscalingPolicy(rName, autoscalingPolicy string) string { - return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(` + return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(` data "aws_iam_policy_document" "test" { statement { actions = ["sts:AssumeRole"] @@ -3420,7 +3461,7 @@ resource "aws_emr_cluster" "test" { } func testAccAWSEmrClusterConfigCoreInstanceGroupAutoscalingPolicyRemoved(rName string) string { - return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(` + return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(` data "aws_iam_policy_document" "test" { statement { actions = ["sts:AssumeRole"] @@ -3477,7 +3518,7 @@ resource "aws_emr_cluster" "test" { } func testAccAWSEmrClusterConfigCoreInstanceGroupBidPrice(rName, bidPrice string) string { - return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(` + return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(` resource "aws_emr_cluster" "test" { applications = ["Spark"] keep_job_flow_alive_when_no_steps = true @@ -3507,7 +3548,7 @@ resource "aws_emr_cluster" "test" { } func testAccAWSEmrClusterConfigCoreInstanceGroupInstanceCount(rName string, instanceCount int) string { - return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(` + return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(` resource "aws_emr_cluster" "test" { applications = ["Spark"] keep_job_flow_alive_when_no_steps = true @@ -3537,7 +3578,7 @@ resource "aws_emr_cluster" "test" { } func testAccAWSEmrClusterConfigCoreInstanceGroupInstanceType(rName, instanceType string) string { - return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(` + return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(` resource "aws_emr_cluster" "test" { applications = ["Spark"] keep_job_flow_alive_when_no_steps = true @@ -3566,7 +3607,7 @@ resource "aws_emr_cluster" "test" { } func testAccAWSEmrClusterConfigCoreInstanceGroupName(rName, instanceGroupName string) string { - return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(` + return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(` resource "aws_emr_cluster" "test" { applications = ["Spark"] keep_job_flow_alive_when_no_steps = true @@ -3596,7 +3637,7 @@ resource "aws_emr_cluster" "test" { } func testAccAWSEmrClusterConfigCoreInstanceType(rName, coreInstanceType string) string { - return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(` + return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(` resource "aws_emr_cluster" "test" { applications = ["Spark"] keep_job_flow_alive_when_no_steps = true @@ -3623,7 +3664,7 @@ resource "aws_emr_cluster" "test" { } func testAccAWSEmrClusterConfigInstanceGroupCoreInstanceType(rName, coreInstanceType string) string { - return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(` + return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(` resource "aws_emr_cluster" "test" { applications = ["Spark"] keep_job_flow_alive_when_no_steps = true @@ -3656,7 +3697,7 @@ resource "aws_emr_cluster" "test" { } func testAccAWSEmrClusterConfigInstanceGroupMasterInstanceType(rName, masterInstanceType string) string { - return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(` + return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(` resource "aws_emr_cluster" "test" { applications = ["Spark"] keep_job_flow_alive_when_no_steps = true @@ -5425,7 +5466,7 @@ resource "aws_iam_role_policy_attachment" "emr-autoscaling-role" { } func testAccAWSEmrClusterConfigMasterInstanceGroupBidPrice(rName, bidPrice string) string { - return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(` + return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(` resource "aws_emr_cluster" "test" { applications = ["Spark"] keep_job_flow_alive_when_no_steps = true @@ -5450,8 +5491,42 @@ resource "aws_emr_cluster" "test" { `, rName, bidPrice) } +func testAccAWSEmrClusterConfigMasterInstanceGroupInstanceCount(rName string, instanceCount int) string { + return testAccAWSEmrClusterConfigBaseVpc(true) + fmt.Sprintf(` +resource "aws_emr_cluster" "test" { + applications = ["Spark"] + keep_job_flow_alive_when_no_steps = true + name = %[1]q + release_label = "emr-5.24.1" + service_role = "EMR_DefaultRole" + + # Termination protection is automatically enabled for multiple master clusters + termination_protection = false + + ec2_attributes { + emr_managed_master_security_group = "${aws_security_group.test.id}" + emr_managed_slave_security_group = "${aws_security_group.test.id}" + instance_profile = "EMR_EC2_DefaultRole" + subnet_id = "${aws_subnet.test.id}" + } + + master_instance_group { + instance_count = %[2]d + instance_type = "m4.large" + } + + # core_instance_group is required with multiple masters + core_instance_group { + instance_type = "m4.large" + } + + depends_on = ["aws_route_table_association.test"] +} +`, rName, instanceCount) +} + func testAccAWSEmrClusterConfigMasterInstanceGroupInstanceType(rName, instanceType string) string { - return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(` + return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(` resource "aws_emr_cluster" "test" { applications = ["Spark"] keep_job_flow_alive_when_no_steps = true @@ -5476,7 +5551,7 @@ resource "aws_emr_cluster" "test" { } func testAccAWSEmrClusterConfigMasterInstanceGroupName(rName, instanceGroupName string) string { - return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(` + return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(` resource "aws_emr_cluster" "test" { applications = ["Spark"] keep_job_flow_alive_when_no_steps = true @@ -5502,7 +5577,7 @@ resource "aws_emr_cluster" "test" { } func testAccAWSEmrClusterConfigMasterInstanceType(rName, masterInstanceType string) string { - return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(` + return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(` resource "aws_emr_cluster" "test" { applications = ["Spark"] keep_job_flow_alive_when_no_steps = true diff --git a/website/docs/r/emr_cluster.html.markdown b/website/docs/r/emr_cluster.html.markdown index a647c014ffc2..4b161b5c640e 100644 --- a/website/docs/r/emr_cluster.html.markdown +++ b/website/docs/r/emr_cluster.html.markdown @@ -176,6 +176,51 @@ resource "aws_emr_cluster" "example" { } ``` +### Multiple Node Master Instance Group + +Available in EMR version 5.23.0 and later, an EMR Cluster can be launched with three master nodes for high availability. Additional information about this functionality and its requirements can be found in the [EMR Management Guide](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-plan-ha.html). + +```hcl +# This configuration is for illustrative purposes and highlights +# only relevant configurations for working with this functionality. + +# Map public IP on launch must be enabled for public (Internet accessible) subnets +resource "aws_subnet" "example" { + # ... other configuration ... + + map_public_ip_on_launch = true +} + +resource "aws_emr_cluster" "example" { + # ... other configuration ... + + # EMR version must be 5.23.0 or later + release_label = "emr-5.24.1" + + # Termination protection is automatically enabled for multiple masters + # To destroy the cluster, this must be configured to false and applied first + termination_protection = true + + ec2_attributes { + # ... other configuration ... + + subnet_id = "${aws_subnet.example.id}" + } + + master_instance_group { + # ... other configuration ... + + # Master instance count must be set to 3 + instance_count = 3 + } + + # core_instance_group must be configured + core_instance_group { + # ... other configuration ... + } +} +``` + ## Argument Reference The following arguments are supported: @@ -194,7 +239,7 @@ The following arguments are supported: * `instance_group` - (Optional, **DEPRECATED**) Use the `master_instance_group` configuration block, `core_instance_group` configuration block and [`aws_emr_instance_group` resource(s)](/docs/providers/aws/r/emr_instance_group.html) instead. A list of `instance_group` objects for each instance group in the cluster. Exactly one of `master_instance_type` and `instance_group` must be specified. If `instance_group` is set, then it must contain a configuration block for at least the `MASTER` instance group type (as well as any additional instance groups). Cannot be specified if `master_instance_group` or `core_instance_group` configuration blocks are set. Defined below * `log_uri` - (Optional) S3 bucket to write the log files of the job flow. If a value is not provided, logs are not created * `applications` - (Optional) A list of applications for the cluster. Valid values are: `Flink`, `Hadoop`, `Hive`, `Mahout`, `Pig`, `Spark`, and `JupyterHub` (as of EMR 5.14.0). Case insensitive -* `termination_protection` - (Optional) Switch on/off termination protection (default is off) +* `termination_protection` - (Optional) Switch on/off termination protection (default is `false`, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set to `false`. * `keep_job_flow_alive_when_no_steps` - (Optional) Switch on/off run cluster with no steps or when all steps are complete (default is on) * `ec2_attributes` - (Optional) Attributes for the EC2 instances running the job flow. Defined below * `kerberos_attributes` - (Optional) Kerberos configuration for the cluster. Defined below @@ -299,6 +344,7 @@ Supported nested arguments for the `master_instance_group` configuration block: * `instance_type` - (Required) EC2 instance type for all instances in the instance group. * `bid_price` - (Optional) Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances. * `ebs_config` - (Optional) Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below. +* `instance_count` - (Optional) Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource's `core_instance_group` to be configured. Public (Internet accessible) instances must be created in VPC subnets that have [map public IP on launch](docs/providers/aws/r/subnet.html#map_public_ip_on_launch) enabled. Termination protection is automatically enabled when launched with multiple master nodes and Terraform must have the `termination_protection = false` configuration applied before destroying this resource. * `name` - (Optional) Friendly name given to the instance group. ## ebs_config