Skip to content

Commit

Permalink
[QT-525] enos: use spot instances for Vault targets
Browse files Browse the repository at this point in the history
The previous strategy for provisioning infrastructure targets was to use
the cheapest instances that could reliably perform as Vault cluster
nodes. With this change we introduce a new model of bidding for spot
fleet instances with the goal of costs savings and often more powerful
instances. The spot fleet instance bidding has only been implemented for
Vault clusters. Updating our Consul backend bidding will be handled in
another PR.

* Create a new `vault_cluster` module that handles installation,
  configuration, initializing, and unsealing Vault clusters.
* Create a `target_ec2_instances` module that can provision a group of
  instances on-demand.
* Create a `target_ec2_spot_fleet` module that can bid on a fleet of
  spot instances.
* Extend every Enos scenario to utilize the spot fleet target acquisition
  strategy and the `vault_cluster` module.
* Update our Enos CI modules to handle both the `aws-nuke` permissions
  and also the privileges to provision spot fleets.

Signed-off-by: Ryan Cragun <me@ryan.ec>
  • Loading branch information
ryancragun committed Apr 7, 2023
1 parent 05bd025 commit 23f5b16
Show file tree
Hide file tree
Showing 21 changed files with 2,177 additions and 495 deletions.
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,10 @@ enos/.terraform/*
enos/.terraform.lock.hcl
enos/*.tfstate
enos/*.tfstate.*
enos/**/.terraform/*
enos/**/.terraform.lock.hcl
enos/**/*.tfstate
enos/**/*.tfstate.*

.DS_Store
.idea
Expand Down Expand Up @@ -127,4 +131,4 @@ website/components/node_modules
.releaser/
*.log

tools/godoctests/.bin
tools/godoctests/.bin
71 changes: 65 additions & 6 deletions enos/ci/service-user-iam/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ resource "aws_iam_role" "role" {

data "aws_iam_policy_document" "assume_role_policy_document" {
provider = aws.us_east_1

statement {
effect = "Allow"
actions = ["sts:AssumeRole"]
Expand All @@ -46,31 +47,75 @@ resource "aws_iam_role_policy" "role_policy" {
provider = aws.us_east_1
role = aws_iam_role.role.name
name = "${local.service_user}_policy"
policy = data.aws_iam_policy_document.iam_policy_document.json
policy = data.aws_iam_policy_document.role_policy.json
}

data "aws_iam_policy_document" "role_policy" {
source_policy_documents = [
data.aws_iam_policy_document.enos_scenario.json,
data.aws_iam_policy_document.aws_nuke.json,
]
}

data "aws_iam_policy_document" "aws_nuke" {
provider = aws.us_east_1

statement {
effect = "Allow"
actions = [
"ec2:DescribeInternetGateways",
"ec2:DescribeNatGateways",
"ec2:DescribeRegions",
"ec2:DescribeVpnGateways",
"iam:DeleteAccessKey",
"iam:DeleteUser",
"iam:DeleteUserPolicy",
"iam:GetUser",
"iam:ListAccessKeys",
"iam:ListAccountAliases",
"iam:ListGroupsForUser",
"iam:ListUserPolicies",
"iam:ListUserTags",
"iam:ListUsers",
"iam:UntagUser",
"servicequotas:ListServiceQuotas"
]

resources = ["*"]
}
}

data "aws_iam_policy_document" "iam_policy_document" {
data "aws_iam_policy_document" "enos_scenario" {
provider = aws.us_east_1

statement {
effect = "Allow"
actions = [
"ec2:AssociateRouteTable",
"ec2:AttachInternetGateway",
"ec2:AuthorizeSecurityGroupEgress",
"ec2:AuthorizeSecurityGroupIngress",
"ec2:CancelSpotFleetRequests",
"ec2:CancelSpotInstanceRequests",
"ec2:CreateInternetGateway",
"ec2:CreateKeyPair",
"ec2:CreateLaunchTemplate",
"ec2:CreateLaunchTemplateVersion",
"ec2:CreateRoute",
"ec2:CreateRouteTable",
"ec2:CreateSecurityGroup",
"ec2:CreateSpotDatafeedSubscription",
"ec2:CreateSubnet",
"ec2:CreateTags",
"ec2:CreateVolume",
"ec2:CreateVPC",
"ec2:DeleteInternetGateway",
"ec2:DeleteLaunchTemplate",
"ec2:DeleteLaunchTemplateVersions",
"ec2:DeleteKeyPair",
"ec2:DeleteRouteTable",
"ec2:DeleteSecurityGroup",
"ec2:DeleteSpotDatafeedSubscription",
"ec2:DeleteSubnet",
"ec2:DeleteTags",
"ec2:DeleteVolume",
Expand All @@ -84,14 +129,22 @@ data "aws_iam_policy_document" "iam_policy_document" {
"ec2:DescribeInstanceTypeOfferings",
"ec2:DescribeInstanceTypes",
"ec2:DescribeInternetGateways",
"ec2:DescribeInternetGateways",
"ec2:DescribeKeyPairs",
"ec2:DescribeLaunchTemplates",
"ec2:DescribeLaunchTemplateVersions",
"ec2:DescribeNatGateways",
"ec2:DescribeNetworkAcls",
"ec2:DescribeNetworkInterfaces",
"ec2:DescribeRegions",
"ec2:DescribeRouteTables",
"ec2:DescribeSecurityGroups",
"ec2:DescribeSpotDatafeedSubscription",
"ec2:DescribeSpotFleetInstances",
"ec2:DescribeSpotFleetInstanceRequests",
"ec2:DescribeSpotFleetRequests",
"ec2:DescribeSpotFleetRequestHistory",
"ec2:DescribeSpotInstanceRequests",
"ec2:DescribeSpotPriceHistory",
"ec2:DescribeSubnets",
"ec2:DescribeTags",
"ec2:DescribeVolumes",
Expand All @@ -102,14 +155,21 @@ data "aws_iam_policy_document" "iam_policy_document" {
"ec2:DescribeVpnGateways",
"ec2:DetachInternetGateway",
"ec2:DisassociateRouteTable",
"ec2:GetLaunchTemplateData",
"ec2:GetSpotPlacementScores",
"ec2:ImportKeyPair",
"ec2:ModifyInstanceAttribute",
"ec2:ModifyLaunchTemplate",
"ec2:ModifySpotFleetRequest",
"ec2:ModifySubnetAttribute",
"ec2:ModifyVPCAttribute",
"ec2:RequestSpotInstances",
"ec2:RequestSpotFleet",
"ec2:ResetInstanceAttribute",
"ec2:RevokeSecurityGroupEgress",
"ec2:RevokeSecurityGroupIngress",
"ec2:RunInstances",
"ec2:SendSpotInstanceInterruptions",
"ec2:TerminateInstances",
"elasticloadbalancing:DescribeLoadBalancers",
"elasticloadbalancing:DescribeTargetGroups",
Expand All @@ -118,11 +178,10 @@ data "aws_iam_policy_document" "iam_policy_document" {
"iam:CreateInstanceProfile",
"iam:CreatePolicy",
"iam:CreateRole",
"iam:CreateRole",
"iam:CreateServiceLinkedRole",
"iam:DeleteInstanceProfile",
"iam:DeletePolicy",
"iam:DeleteRole",
"iam:DeleteRole",
"iam:DeleteRolePolicy",
"iam:DetachRolePolicy",
"iam:GetInstanceProfile",
Expand All @@ -135,7 +194,6 @@ data "aws_iam_policy_document" "iam_policy_document" {
"iam:ListPolicies",
"iam:ListRolePolicies",
"iam:ListRoles",
"iam:ListRoles",
"iam:PassRole",
"iam:PutRolePolicy",
"iam:RemoveRoleFromInstanceProfile",
Expand All @@ -153,6 +211,7 @@ data "aws_iam_policy_document" "iam_policy_document" {
"kms:ScheduleKeyDeletion",
"servicequotas:ListServiceQuotas"
]

resources = ["*"]
}
}
39 changes: 34 additions & 5 deletions enos/ci/service-user-iam/service-quotas.tf
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,62 @@
locals {
// This is the code of the service quota to request a change for. Each adjustable limit has a
// unique code. See, https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/servicequotas_service_quota#quota_code
subnets_per_vps_quota = "L-F678F1CE"
subnets_per_vpcs_quota = "L-F678F1CE"
standard_spot_instance_requests_quota = "L-34B43A08"
}

resource "aws_servicequotas_service_quota" "vpcs_per_region_us_east_1" {
provider = aws.us_east_2
quota_code = local.subnets_per_vps_quota
quota_code = local.subnets_per_vpcs_quota
service_code = "vpc"
value = 50
}

resource "aws_servicequotas_service_quota" "vpcs_per_region_us_east_2" {
provider = aws.us_east_2
quota_code = local.subnets_per_vps_quota
quota_code = local.subnets_per_vpcs_quota
service_code = "vpc"
value = 50
}

resource "aws_servicequotas_service_quota" "vpcs_per_region_us_west_1" {
provider = aws.us_west_1
quota_code = local.subnets_per_vps_quota
quota_code = local.subnets_per_vpcs_quota
service_code = "vpc"
value = 50
}

resource "aws_servicequotas_service_quota" "vpcs_per_region_us_west_2" {
provider = aws.us_west_2
quota_code = local.subnets_per_vps_quota
quota_code = local.subnets_per_vpcs_quota
service_code = "vpc"
value = 50
}

resource "aws_servicequotas_service_quota" "spot_requests_per_region_us_east_1" {
provider = aws.us_east_2
quota_code = local.standard_spot_instance_requests_quota
service_code = "ec2"
value = 640
}

resource "aws_servicequotas_service_quota" "spot_requests_per_region_us_east_2" {
provider = aws.us_east_2
quota_code = local.standard_spot_instance_requests_quota
service_code = "ec2"
value = 640
}

resource "aws_servicequotas_service_quota" "spot_requests_per_region_us_west_1" {
provider = aws.us_west_1
quota_code = local.standard_spot_instance_requests_quota
service_code = "ec2"
value = 640
}

resource "aws_servicequotas_service_quota" "spot_requests_per_region_us_west_2" {
provider = aws.us_west_2
quota_code = local.standard_spot_instance_requests_quota
service_code = "ec2"
value = 640
}
32 changes: 23 additions & 9 deletions enos/enos-modules.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -68,30 +68,44 @@ module "shutdown_multiple_nodes" {
source = "./modules/shutdown_multiple_nodes"
}

module "target_ec2_instances" {
source = "./modules/target_ec2_instances"

common_tags = var.tags
instance_count = var.vault_instance_count
project_name = var.project_name
ssh_keypair = var.aws_ssh_keypair_name
}

module "target_ec2_spot_fleet" {
source = "./modules/target_ec2_spot_fleet"

common_tags = var.tags
instance_mem_min = 4096
instance_cpu_min = 2
project_name = var.project_name
// Current on-demand cost of t3.medium in us-east.
spot_price_max = "0.0416"
ssh_keypair = var.aws_ssh_keypair_name
}

module "vault_agent" {
source = "./modules/vault_agent"

vault_install_dir = var.vault_install_dir
vault_instance_count = var.vault_instance_count
}


module "vault_verify_agent_output" {
source = "./modules/vault_verify_agent_output"

vault_instance_count = var.vault_instance_count
}

module "vault_cluster" {
source = "app.terraform.io/hashicorp-qti/aws-vault/enos"
# source = "../../terraform-enos-aws-vault"
source = "./modules/vault_cluster"

common_tags = var.tags
environment = "ci"
instance_count = var.vault_instance_count
project_name = var.project_name
ssh_aws_keypair = var.aws_ssh_keypair_name
vault_install_dir = var.vault_install_dir
install_dir = var.vault_install_dir
}

module "vault_get_cluster_ips" {
Expand Down
Loading

0 comments on commit 23f5b16

Please sign in to comment.