Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

resource/aws_emr_cluster: Add master_instance_group configuration block instance_count argument (support multiple master nodes) #9235

Merged
merged 3 commits into from
Jul 16, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 37 additions & 7 deletions aws/resource_aws_emr_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,13 @@ func resourceAwsEMRCluster() *schema.Resource {
Type: schema.TypeString,
Computed: true,
},
"instance_count": {
Type: schema.TypeInt,
Optional: true,
ForceNew: true,
Default: 1,
ValidateFunc: validation.IntInSlice([]int{1, 3}),
},
"instance_type": {
Type: schema.TypeString,
Required: true,
Expand Down Expand Up @@ -633,6 +640,11 @@ func resourceAwsEMRClusterCreate(d *schema.ResourceData, meta interface{}) error
keepJobFlowAliveWhenNoSteps = v.(bool)
}

// For multiple master nodes, EMR automatically enables
// termination protection and ignores this configuration at launch.
// There is additional handling after the job flow is running
// to potentially disable termination protection to match the
// desired Terraform configuration.
terminationProtection := false
if v, ok := d.GetOk("termination_protection"); ok {
terminationProtection = v.(bool)
Expand All @@ -646,7 +658,7 @@ func resourceAwsEMRClusterCreate(d *schema.ResourceData, meta interface{}) error
m := l[0].(map[string]interface{})

instanceGroup := &emr.InstanceGroupConfig{
InstanceCount: aws.Int64(1),
InstanceCount: aws.Int64(int64(m["instance_count"].(int))),
InstanceRole: aws.String(emr.InstanceRoleTypeMaster),
InstanceType: aws.String(m["instance_type"].(string)),
Market: aws.String(emr.MarketTypeOnDemand),
Expand Down Expand Up @@ -907,11 +919,28 @@ func resourceAwsEMRClusterCreate(d *schema.ResourceData, meta interface{}) error
Delay: 30 * time.Second, // Wait 30 secs before starting
}

_, err = stateConf.WaitForState()
clusterRaw, err := stateConf.WaitForState()
if err != nil {
return fmt.Errorf("Error waiting for EMR Cluster state to be \"WAITING\" or \"RUNNING\": %s", err)
}

// For multiple master nodes, EMR automatically enables
// termination protection and ignores the configuration at launch.
// This additional handling is to potentially disable termination
// protection to match the desired Terraform configuration.
cluster := clusterRaw.(*emr.Cluster)

if aws.BoolValue(cluster.TerminationProtected) != terminationProtection {
input := &emr.SetTerminationProtectionInput{
JobFlowIds: []*string{aws.String(d.Id())},
TerminationProtected: aws.Bool(terminationProtection),
}

if _, err := conn.SetTerminationProtection(input); err != nil {
return fmt.Errorf("error setting EMR Cluster (%s) termination protection to match configuration: %s", d.Id(), err)
}
}

return resourceAwsEMRClusterRead(d, meta)
}

Expand Down Expand Up @@ -1533,11 +1562,12 @@ func flattenEmrMasterInstanceGroup(instanceGroup *emr.InstanceGroup) []interface
}

m := map[string]interface{}{
"bid_price": aws.StringValue(instanceGroup.BidPrice),
"ebs_config": flattenEBSConfig(instanceGroup.EbsBlockDevices),
"id": aws.StringValue(instanceGroup.Id),
"instance_type": aws.StringValue(instanceGroup.InstanceType),
"name": aws.StringValue(instanceGroup.Name),
"bid_price": aws.StringValue(instanceGroup.BidPrice),
"ebs_config": flattenEBSConfig(instanceGroup.EbsBlockDevices),
"id": aws.StringValue(instanceGroup.Id),
"instance_count": aws.Int64Value(instanceGroup.RequestedInstanceCount),
"instance_type": aws.StringValue(instanceGroup.InstanceType),
"name": aws.StringValue(instanceGroup.Name),
}

return []interface{}{m}
Expand Down
111 changes: 93 additions & 18 deletions aws/resource_aws_emr_cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,46 @@ func TestAccAWSEMRCluster_MasterInstanceGroup_BidPrice(t *testing.T) {
})
}

func TestAccAWSEMRCluster_MasterInstanceGroup_InstanceCount(t *testing.T) {
var cluster1, cluster2 emr.Cluster
rName := acctest.RandomWithPrefix("tf-acc-test")
resourceName := "aws_emr_cluster.test"

resource.ParallelTest(t, resource.TestCase{
PreCheck: func() { testAccPreCheck(t) },
Providers: testAccProviders,
CheckDestroy: testAccCheckAWSEmrDestroy,
Steps: []resource.TestStep{
{
Config: testAccAWSEmrClusterConfigMasterInstanceGroupInstanceCount(rName, 3),
Check: resource.ComposeTestCheckFunc(
testAccCheckAWSEmrClusterExists(resourceName, &cluster1),
resource.TestCheckResourceAttr(resourceName, "master_instance_group.#", "1"),
resource.TestCheckResourceAttr(resourceName, "master_instance_group.0.instance_count", "3"),
),
},
{
ResourceName: resourceName,
ImportState: true,
ImportStateVerify: true,
ImportStateVerifyIgnore: []string{
"configurations",
"keep_job_flow_alive_when_no_steps",
},
},
{
Config: testAccAWSEmrClusterConfigMasterInstanceGroupInstanceCount(rName, 1),
Check: resource.ComposeTestCheckFunc(
testAccCheckAWSEmrClusterExists(resourceName, &cluster2),
testAccCheckAWSEmrClusterRecreated(&cluster1, &cluster2),
resource.TestCheckResourceAttr(resourceName, "master_instance_group.#", "1"),
resource.TestCheckResourceAttr(resourceName, "master_instance_group.0.instance_count", "1"),
),
},
},
})
}

func TestAccAWSEMRCluster_MasterInstanceGroup_InstanceType(t *testing.T) {
var cluster1, cluster2 emr.Cluster
rName := acctest.RandomWithPrefix("tf-acc-test")
Expand Down Expand Up @@ -1451,7 +1491,7 @@ func testAccCheckAWSEmrClusterRecreated(i, j *emr.Cluster) resource.TestCheckFun
}
}

func testAccAWSEmrClusterConfigBaseVpc() string {
func testAccAWSEmrClusterConfigBaseVpc(mapPublicIpOnLaunch bool) string {
return fmt.Sprintf(`
data "aws_availability_zones" "current" {}

Expand Down Expand Up @@ -1500,9 +1540,10 @@ resource "aws_security_group" "test" {
}

resource "aws_subnet" "test" {
availability_zone = "${data.aws_availability_zones.current.names[0]}"
cidr_block = "10.0.0.0/24"
vpc_id = "${aws_vpc.test.id}"
availability_zone = "${data.aws_availability_zones.current.names[0]}"
cidr_block = "10.0.0.0/24"
map_public_ip_on_launch = %[1]t
vpc_id = "${aws_vpc.test.id}"

tags = {
Name = "tf-acc-test-emr-cluster"
Expand All @@ -1522,7 +1563,7 @@ resource "aws_route_table_association" "test" {
route_table_id = "${aws_route_table.test.id}"
subnet_id = "${aws_subnet.test.id}"
}
`)
`, mapPublicIpOnLaunch)
}

func testAccAWSEmrClusterConfig_bootstrap(r string) string {
Expand Down Expand Up @@ -3362,7 +3403,7 @@ resource "aws_main_route_table_association" "a" {
}

func testAccAWSEmrClusterConfigCoreInstanceGroupAutoscalingPolicy(rName, autoscalingPolicy string) string {
return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(`
return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(`
data "aws_iam_policy_document" "test" {
statement {
actions = ["sts:AssumeRole"]
Expand Down Expand Up @@ -3420,7 +3461,7 @@ resource "aws_emr_cluster" "test" {
}

func testAccAWSEmrClusterConfigCoreInstanceGroupAutoscalingPolicyRemoved(rName string) string {
return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(`
return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(`
data "aws_iam_policy_document" "test" {
statement {
actions = ["sts:AssumeRole"]
Expand Down Expand Up @@ -3477,7 +3518,7 @@ resource "aws_emr_cluster" "test" {
}

func testAccAWSEmrClusterConfigCoreInstanceGroupBidPrice(rName, bidPrice string) string {
return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(`
return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(`
resource "aws_emr_cluster" "test" {
applications = ["Spark"]
keep_job_flow_alive_when_no_steps = true
Expand Down Expand Up @@ -3507,7 +3548,7 @@ resource "aws_emr_cluster" "test" {
}

func testAccAWSEmrClusterConfigCoreInstanceGroupInstanceCount(rName string, instanceCount int) string {
return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(`
return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(`
resource "aws_emr_cluster" "test" {
applications = ["Spark"]
keep_job_flow_alive_when_no_steps = true
Expand Down Expand Up @@ -3537,7 +3578,7 @@ resource "aws_emr_cluster" "test" {
}

func testAccAWSEmrClusterConfigCoreInstanceGroupInstanceType(rName, instanceType string) string {
return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(`
return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(`
resource "aws_emr_cluster" "test" {
applications = ["Spark"]
keep_job_flow_alive_when_no_steps = true
Expand Down Expand Up @@ -3566,7 +3607,7 @@ resource "aws_emr_cluster" "test" {
}

func testAccAWSEmrClusterConfigCoreInstanceGroupName(rName, instanceGroupName string) string {
return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(`
return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(`
resource "aws_emr_cluster" "test" {
applications = ["Spark"]
keep_job_flow_alive_when_no_steps = true
Expand Down Expand Up @@ -3596,7 +3637,7 @@ resource "aws_emr_cluster" "test" {
}

func testAccAWSEmrClusterConfigCoreInstanceType(rName, coreInstanceType string) string {
return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(`
return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(`
resource "aws_emr_cluster" "test" {
applications = ["Spark"]
keep_job_flow_alive_when_no_steps = true
Expand All @@ -3623,7 +3664,7 @@ resource "aws_emr_cluster" "test" {
}

func testAccAWSEmrClusterConfigInstanceGroupCoreInstanceType(rName, coreInstanceType string) string {
return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(`
return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(`
resource "aws_emr_cluster" "test" {
applications = ["Spark"]
keep_job_flow_alive_when_no_steps = true
Expand Down Expand Up @@ -3656,7 +3697,7 @@ resource "aws_emr_cluster" "test" {
}

func testAccAWSEmrClusterConfigInstanceGroupMasterInstanceType(rName, masterInstanceType string) string {
return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(`
return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(`
resource "aws_emr_cluster" "test" {
applications = ["Spark"]
keep_job_flow_alive_when_no_steps = true
Expand Down Expand Up @@ -5425,7 +5466,7 @@ resource "aws_iam_role_policy_attachment" "emr-autoscaling-role" {
}

func testAccAWSEmrClusterConfigMasterInstanceGroupBidPrice(rName, bidPrice string) string {
return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(`
return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(`
resource "aws_emr_cluster" "test" {
applications = ["Spark"]
keep_job_flow_alive_when_no_steps = true
Expand All @@ -5450,8 +5491,42 @@ resource "aws_emr_cluster" "test" {
`, rName, bidPrice)
}

func testAccAWSEmrClusterConfigMasterInstanceGroupInstanceCount(rName string, instanceCount int) string {
return testAccAWSEmrClusterConfigBaseVpc(true) + fmt.Sprintf(`
resource "aws_emr_cluster" "test" {
applications = ["Spark"]
keep_job_flow_alive_when_no_steps = true
name = %[1]q
release_label = "emr-5.24.1"
service_role = "EMR_DefaultRole"

# Termination protection is automatically enabled for multiple master clusters
termination_protection = false

ec2_attributes {
emr_managed_master_security_group = "${aws_security_group.test.id}"
emr_managed_slave_security_group = "${aws_security_group.test.id}"
instance_profile = "EMR_EC2_DefaultRole"
subnet_id = "${aws_subnet.test.id}"
}

master_instance_group {
instance_count = %[2]d
instance_type = "m4.large"
}

# core_instance_group is required with multiple masters
core_instance_group {
instance_type = "m4.large"
}

depends_on = ["aws_route_table_association.test"]
}
`, rName, instanceCount)
}

func testAccAWSEmrClusterConfigMasterInstanceGroupInstanceType(rName, instanceType string) string {
return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(`
return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(`
resource "aws_emr_cluster" "test" {
applications = ["Spark"]
keep_job_flow_alive_when_no_steps = true
Expand All @@ -5476,7 +5551,7 @@ resource "aws_emr_cluster" "test" {
}

func testAccAWSEmrClusterConfigMasterInstanceGroupName(rName, instanceGroupName string) string {
return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(`
return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(`
resource "aws_emr_cluster" "test" {
applications = ["Spark"]
keep_job_flow_alive_when_no_steps = true
Expand All @@ -5502,7 +5577,7 @@ resource "aws_emr_cluster" "test" {
}

func testAccAWSEmrClusterConfigMasterInstanceType(rName, masterInstanceType string) string {
return testAccAWSEmrClusterConfigBaseVpc() + fmt.Sprintf(`
return testAccAWSEmrClusterConfigBaseVpc(false) + fmt.Sprintf(`
resource "aws_emr_cluster" "test" {
applications = ["Spark"]
keep_job_flow_alive_when_no_steps = true
Expand Down
48 changes: 47 additions & 1 deletion website/docs/r/emr_cluster.html.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,51 @@ resource "aws_emr_cluster" "example" {
}
```

### Multiple Node Master Instance Group

Available in EMR version 5.23.0 and later, an EMR Cluster can be launched with three master nodes for high availability. Additional information about this functionality and its requirements can be found in the [EMR Management Guide](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-plan-ha.html).

```hcl
# This configuration is for illustrative purposes and highlights
# only relevant configurations for working with this functionality.

# Map public IP on launch must be enabled for public (Internet accessible) subnets
resource "aws_subnet" "example" {
# ... other configuration ...

map_public_ip_on_launch = true
}

resource "aws_emr_cluster" "example" {
# ... other configuration ...

# EMR version must be 5.23.0 or later
release_label = "emr-5.24.1"

# Termination protection is automatically enabled for multiple masters
# To destroy the cluster, this must be configured to false and applied first
termination_protection = true

ec2_attributes {
# ... other configuration ...

subnet_id = "${aws_subnet.example.id}"
}

master_instance_group {
# ... other configuration ...

# Master instance count must be set to 3
instance_count = 3
}

# core_instance_group must be configured
core_instance_group {
# ... other configuration ...
}
}
```

## Argument Reference

The following arguments are supported:
Expand All @@ -194,7 +239,7 @@ The following arguments are supported:
* `instance_group` - (Optional, **DEPRECATED**) Use the `master_instance_group` configuration block, `core_instance_group` configuration block and [`aws_emr_instance_group` resource(s)](/docs/providers/aws/r/emr_instance_group.html) instead. A list of `instance_group` objects for each instance group in the cluster. Exactly one of `master_instance_type` and `instance_group` must be specified. If `instance_group` is set, then it must contain a configuration block for at least the `MASTER` instance group type (as well as any additional instance groups). Cannot be specified if `master_instance_group` or `core_instance_group` configuration blocks are set. Defined below
* `log_uri` - (Optional) S3 bucket to write the log files of the job flow. If a value is not provided, logs are not created
* `applications` - (Optional) A list of applications for the cluster. Valid values are: `Flink`, `Hadoop`, `Hive`, `Mahout`, `Pig`, `Spark`, and `JupyterHub` (as of EMR 5.14.0). Case insensitive
* `termination_protection` - (Optional) Switch on/off termination protection (default is off)
* `termination_protection` - (Optional) Switch on/off termination protection (default is `false`, except when using multiple master nodes). Before attempting to destroy the resource when termination protection is enabled, this configuration must be applied with its value set to `false`.
* `keep_job_flow_alive_when_no_steps` - (Optional) Switch on/off run cluster with no steps or when all steps are complete (default is on)
* `ec2_attributes` - (Optional) Attributes for the EC2 instances running the job flow. Defined below
* `kerberos_attributes` - (Optional) Kerberos configuration for the cluster. Defined below
Expand Down Expand Up @@ -299,6 +344,7 @@ Supported nested arguments for the `master_instance_group` configuration block:
* `instance_type` - (Required) EC2 instance type for all instances in the instance group.
* `bid_price` - (Optional) Bid price for each EC2 instance in the instance group, expressed in USD. By setting this attribute, the instance group is being declared as a Spot Instance, and will implicitly create a Spot request. Leave this blank to use On-Demand Instances.
* `ebs_config` - (Optional) Configuration block(s) for EBS volumes attached to each instance in the instance group. Detailed below.
* `instance_count` - (Optional) Target number of instances for the instance group. Must be 1 or 3. Defaults to 1. Launching with multiple master nodes is only supported in EMR version 5.23.0+, and requires this resource's `core_instance_group` to be configured. Public (Internet accessible) instances must be created in VPC subnets that have [map public IP on launch](docs/providers/aws/r/subnet.html#map_public_ip_on_launch) enabled. Termination protection is automatically enabled when launched with multiple master nodes and Terraform must have the `termination_protection = false` configuration applied before destroying this resource.
* `name` - (Optional) Friendly name given to the instance group.

## ebs_config
Expand Down