Skip to content

Commit

Permalink
fix(fis): move cwagent install script to userdata and recover missing…
Browse files Browse the repository at this point in the history
… alarms (#67)

* fix(fis): recovered missing alarms

* feat(ssm): install cwagent

* fix(ssm): rename aws partition module

* fix(fis): ssm association dependency update
  • Loading branch information
Young-ook authored Apr 18, 2022
1 parent 1c6fb72 commit 66382e2
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 60 deletions.
56 changes: 26 additions & 30 deletions examples/fis/awscw.tf
Original file line number Diff line number Diff line change
@@ -1,38 +1,17 @@
resource "aws_ssm_association" "install-cwagent" {
depends_on = [module.ec2]
name = "AWS-ConfigureAWSPackage"

targets {
key = "tag:release"
values = ["baseline,canary"]
}

### monitoring/agent
resource "aws_ssm_association" "cwagent" {
association_name = "SSM-StartCWAgent"
name = "AmazonCloudWatch-ManageAgent"
parameters = {
action = "Install"
name = "AmazonCloudWatchAgent"
action = "start"
}
}

resource "time_sleep" "wait" {
depends_on = [aws_ssm_association.install-cwagent]
create_duration = "30s"
}

resource "aws_ssm_association" "start-cwagent" {
depends_on = [time_sleep.wait]
name = "AmazonCloudWatch-ManageAgent"

targets {
key = "tag:release"
values = ["baseline,canary"]
}

parameters = {
action = "start"
values = ["baseline", "canary"]
}
}

### application/monitoring
### monitoring/alarm
resource "aws_cloudwatch_metric_alarm" "cpu" {
alarm_name = local.cw_cpu_alarm_name
alarm_description = "This metric monitors ec2 cpu utilization"
Expand All @@ -45,7 +24,6 @@ resource "aws_cloudwatch_metric_alarm" "cpu" {
statistic = "Average"
threshold = 60
insufficient_data_actions = []

dimensions = {
AutoScalingGroupName = module.ec2.cluster.data_plane.node_groups.baseline.name
}
Expand All @@ -63,7 +41,6 @@ resource "aws_cloudwatch_metric_alarm" "api-p90" {
unit = "Seconds"
threshold = 0.1
extended_statistic = "p90"

dimensions = {
LoadBalancer = aws_lb.alb.arn_suffix
}
Expand All @@ -81,4 +58,23 @@ resource "aws_cloudwatch_metric_alarm" "api-avg" {
unit = "Seconds"
statistic = "Average"
threshold = 0.1
dimensions = {
LoadBalancer = aws_lb.alb.arn_suffix
}
}

resource "aws_cloudwatch_metric_alarm" "api-http502" {
alarm_name = local.cw_api_http502_alarm_name
alarm_description = "This metric monitors HTTP 502 response from backed ec2 instances"
tags = merge(local.default-tags, var.tags)
comparison_operator = "GreaterThanThreshold"
evaluation_periods = 1
metric_name = "HTTPCode_ELB_502_Count"
namespace = "AWS/ApplicationELB"
period = 60
statistic = "Sum"
threshold = 3
dimensions = {
LoadBalancer = aws_lb.alb.arn_suffix
}
}
50 changes: 36 additions & 14 deletions examples/fis/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ provider "aws" {
region = var.aws_region
}

### foundation/network
### network/vpc
module "vpc" {
source = "Young-ook/spinnaker/aws//modules/spinnaker-aware-aws-vpc"
name = var.name
Expand All @@ -21,16 +21,6 @@ module "vpc" {
vpc_endpoint_config = []
}

resource "aws_lb" "alb" {
name = local.alb_name
tags = merge(local.default-tags, var.tags)
internal = true
load_balancer_type = "application"
security_groups = [aws_security_group.alb.id]
subnets = values(module.vpc.subnets["private"])
enable_deletion_protection = false
}

# security/firewall
resource "aws_security_group" "alb" {
name = local.alb_sg_name
Expand Down Expand Up @@ -81,6 +71,17 @@ resource "aws_security_group" "alb_aware" {
}
}

### network/loadbalancer
resource "aws_lb" "alb" {
name = local.alb_name
tags = merge(local.default-tags, var.tags)
internal = true
load_balancer_type = "application"
security_groups = [aws_security_group.alb.id]
subnets = values(module.vpc.subnets["private"])
enable_deletion_protection = false
}

resource "aws_lb_listener" "http" {
load_balancer_arn = aws_lb.alb.arn
port = 80
Expand Down Expand Up @@ -112,8 +113,29 @@ resource "aws_lb_target_group" "http" {
}
}

### application/script
locals {
vclient = join("\n", [
"#!/bin/bash",
"while true; do",
" curl -I http://${aws_lb.alb.dns_name}",
" echo",
" sleep 1",
"done",
]
)
vserver = join("\n", [
"sudo yum update -y",
"sudo yum install -y httpd",
"sudo rm /etc/httpd/conf.d/welcome.conf",
"sudo systemctl start httpd",
]
)
}

### application/ec2
module "ec2" {
depends_on = [aws_ssm_association.cwagent]
source = "Young-ook/ssm/aws"
name = var.name
tags = var.tags
Expand All @@ -128,7 +150,7 @@ module "ec2" {
security_groups = [aws_security_group.alb_aware.id]
target_group_arns = [aws_lb_target_group.http.arn]
tags = { release = "baseline" }
user_data = "#!/bin/bash\nsudo yum update -y\nsudo yum install -y httpd\nsudo rm /etc/httpd/conf.d/welcome.conf\nsudo systemctl start httpd"
user_data = local.vserver
},
{
name = "canary"
Expand All @@ -139,7 +161,7 @@ module "ec2" {
security_groups = [aws_security_group.alb_aware.id]
target_group_arns = [aws_lb_target_group.http.arn]
tags = { release = "canary" }
user_data = "#!/bin/bash\namazon-linux-extras install nginx1\nsystemctl start nginx"
user_data = local.vserver
},
{
name = "loadgen"
Expand All @@ -156,7 +178,7 @@ module "ec2" {
### Initially, this module places all ec2 instances in a specific Availability Zone (AZ).
### This configuration is not fault tolerant when Single AZ goes down.
### After our first attempt at experimenting with 'terminte ec2 instances'
### we will scale the autoscaling-group cross-AZ for high availability.
### We will scale the autoscaling-group cross-AZ for high availability.
###
### Switch the 'subnets' variable to the list of whole private subnets created in the example.

Expand Down
12 changes: 1 addition & 11 deletions examples/fis/outputs.tf
Original file line number Diff line number Diff line change
@@ -1,14 +1,4 @@
locals {
vclient = join("\n", ["",
"#!/bin/bash",
"while true; do",
" curl -I http://${aws_lb.alb.dns_name}",
" echo",
" sleep 1",
"done",
]
)
}
### output variables

output "vclient" {
description = "Script to call APIs as a virtual client"
Expand Down
29 changes: 24 additions & 5 deletions main.tf
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
## ec2 autoscaling groups with systems manager/session manager

module "current" {
module "aws" {
source = "Young-ook/spinnaker/aws//modules/aws-partitions"
version = ">= 2.0"
}

## features
Expand All @@ -21,15 +20,15 @@ resource "aws_iam_role" "asg" {
Action = "sts:AssumeRole"
Effect = "Allow"
Principal = {
Service = [format("ec2.%s", module.current.partition.dns_suffix)]
Service = [format("ec2.%s", module.aws.partition.dns_suffix)]
}
}]
Version = "2012-10-17"
})
}

resource "aws_iam_role_policy_attachment" "ssm-managed" {
policy_arn = format("arn:%s:iam::aws:policy/AmazonSSMManagedInstanceCore", module.current.partition.partition)
policy_arn = format("arn:%s:iam::aws:policy/AmazonSSMManagedInstanceCore", module.aws.partition.partition)
role = aws_iam_role.asg.id
}

Expand Down Expand Up @@ -63,12 +62,32 @@ data "aws_ami" "al2" {
}
}

data "cloudinit_config" "ng" {
for_each = { for ng in var.node_groups : ng.name => ng }
base64_encode = true
gzip = false

part {
content_type = "text/x-shellscript"
content = <<-EOT
#!/bin/bash
sudo yum update -y
yum install -y amazon-cloudwatch-agent
EOT
}

part {
content_type = "text/x-shellscript"
content = lookup(each.value, "user_data", "")
}
}

resource "aws_launch_template" "ng" {
for_each = { for ng in var.node_groups : ng.name => ng }
name = join("-", [local.name, each.key])
tags = merge(local.default-tags, var.tags, lookup(each.value, "tags", {}))
image_id = lookup(each.value, "image_id", data.aws_ami.al2[each.key].id)
user_data = base64encode(lookup(each.value, "user_data", ""))
user_data = data.cloudinit_config.ng[each.key].rendered
instance_type = lookup(each.value, "instance_type", "t3.medium")
key_name = lookup(each.value, "key_name", null)

Expand Down

0 comments on commit 66382e2

Please sign in to comment.