From 1c6fb72ffa432fc5142e3325159cfde4d0edbf43 Mon Sep 17 00:00:00 2001 From: youngookkim Date: Mon, 18 Apr 2022 18:03:12 +0900 Subject: [PATCH] fix(fis): apply awsfis module (#66) * chore(fis): rename files of fis example * fix(fis): apply awsfis module * chore(fis): grooming awscw resources --- examples/fis/aws-cw.tf | 33 ---- examples/fis/aws-fis.tf | 150 ------------------ examples/fis/awscw.tf | 84 ++++++++++ examples/fis/awsfis.tf | 84 ++++++++++ .../fis/{tc1.tfvars => fixture.tc1.tfvars} | 0 examples/fis/main.tf | 72 --------- .../fis/templates/create-fis-templates.tpl | 6 - .../fis/templates/delete-fis-templates.tpl | 6 - 8 files changed, 168 insertions(+), 267 deletions(-) delete mode 100644 examples/fis/aws-cw.tf delete mode 100644 examples/fis/aws-fis.tf create mode 100644 examples/fis/awscw.tf create mode 100644 examples/fis/awsfis.tf rename examples/fis/{tc1.tfvars => fixture.tc1.tfvars} (100%) delete mode 100644 examples/fis/templates/create-fis-templates.tpl delete mode 100644 examples/fis/templates/delete-fis-templates.tpl diff --git a/examples/fis/aws-cw.tf b/examples/fis/aws-cw.tf deleted file mode 100644 index 1ae78ac..0000000 --- a/examples/fis/aws-cw.tf +++ /dev/null @@ -1,33 +0,0 @@ -resource "aws_ssm_association" "install-cwagent" { - depends_on = [module.ec2] - name = "AWS-ConfigureAWSPackage" - - targets { - key = "tag:release" - values = ["baseline,canary"] - } - - parameters = { - action = "Install" - name = "AmazonCloudWatchAgent" - } -} - -resource "time_sleep" "wait" { - depends_on = [aws_ssm_association.install-cwagent] - create_duration = "30s" -} - -resource "aws_ssm_association" "start-cwagent" { - depends_on = [time_sleep.wait] - name = "AmazonCloudWatch-ManageAgent" - - targets { - key = "tag:release" - values = ["baseline,canary"] - } - - parameters = { - action = "start" - } -} diff --git a/examples/fis/aws-fis.tf b/examples/fis/aws-fis.tf deleted file mode 100644 index a71fd2f..0000000 --- a/examples/fis/aws-fis.tf +++ /dev/null @@ -1,150 +0,0 @@ -module "current" { - source = "Young-ook/spinnaker/aws//modules/aws-partitions" - version = ">= 2.0" -} - -resource "aws_iam_role" "fis-run" { - name = local.fis_role_name - tags = merge(local.default-tags, var.tags) - assume_role_policy = jsonencode({ - Statement = [{ - Action = "sts:AssumeRole" - Effect = "Allow" - Principal = { - Service = [format("fis.%s", module.current.partition.dns_suffix)] - } - }] - Version = "2012-10-17" - }) -} - -resource "aws_iam_role_policy_attachment" "fis-run" { - policy_arn = format("arn:%s:iam::aws:policy/PowerUserAccess", module.current.partition.partition) - role = aws_iam_role.fis-run.id -} - -### systems manager document for fault injection simulator experiment - -resource "aws_ssm_document" "disk-stress" { - name = "FIS-Run-Disk-Stress" - tags = merge(local.default-tags, var.tags) - document_format = "YAML" - document_type = "Command" - content = file("${path.module}/templates/disk-stress.yaml") -} - -### fault injection simulator experiment templates - -locals { - target_vpc = module.vpc.vpc.id - target_role = module.ec2.role.arn - target_ec2 = module.ec2.cluster.data_plane.node_groups.baseline.name - stop_condition_alarm = aws_cloudwatch_metric_alarm.cpu.arn -} - -resource "local_file" "cpu-stress" { - content = templatefile("${path.module}/templates/cpu-stress.tpl", { - asg = local.target_ec2 - region = var.aws_region - alarm = local.stop_condition_alarm - role = aws_iam_role.fis-run.arn - }) - filename = "${path.module}/.fis/cpu-stress.json" - file_permission = "0600" -} - -resource "local_file" "network-latency" { - content = templatefile("${path.module}/templates/network-latency.tpl", { - asg = local.target_ec2 - region = var.aws_region - alarm = local.stop_condition_alarm - role = aws_iam_role.fis-run.arn - }) - filename = "${path.module}/.fis/network-latency.json" - file_permission = "0600" -} - -# drawing lots for choosing a subnet -resource "random_integer" "az" { - min = 0 - max = length(var.azs) - 1 -} - -resource "local_file" "terminate-instances" { - content = templatefile("${path.module}/templates/terminate-instances.tpl", { - asg = local.target_ec2 - az = var.azs[random_integer.az.result] - vpc = local.target_vpc - alarm = local.stop_condition_alarm - role = aws_iam_role.fis-run.arn - }) - filename = "${path.module}/.fis/terminate-instances.json" - file_permission = "0600" -} - -resource "local_file" "throttle-ec2-api" { - content = templatefile("${path.module}/templates/throttle-ec2-api.tpl", { - asg_role = local.target_role - alarm = local.stop_condition_alarm - role = aws_iam_role.fis-run.arn - }) - filename = "${path.module}/.fis/throttle-ec2-api.json" - file_permission = "0600" -} - -resource "local_file" "disk-stress" { - content = templatefile("${path.module}/templates/disk-stress.tpl", { - doc_arn = aws_ssm_document.disk-stress.arn - region = var.aws_region - alarm = local.stop_condition_alarm - role = aws_iam_role.fis-run.arn - }) - filename = "${path.module}/.fis/disk-stress.json" - file_permission = "0600" -} - -resource "local_file" "create-fis-templates" { - content = templatefile("${path.module}/templates/create-fis-templates.tpl", { - region = var.aws_region - }) - filename = "${path.module}/.fis/create-fis-templates.sh" - file_permission = "0600" -} - -resource "null_resource" "create-fis-templates" { - depends_on = [ - local_file.cpu-stress, - local_file.network-latency, - local_file.throttle-ec2-api, - local_file.terminate-instances, - local_file.disk-stress, - local_file.create-fis-templates, - ] - provisioner "local-exec" { - when = create - command = "cd ${path.module}/.fis && bash create-fis-templates.sh" - } -} - -resource "local_file" "delete-fis-templates" { - content = templatefile("${path.module}/templates/delete-fis-templates.tpl", { - region = var.aws_region - }) - filename = "${path.module}/.fis/delete-fis-templates.sh" - file_permission = "0600" -} - -resource "null_resource" "delete-fis-templates" { - depends_on = [ - local_file.cpu-stress, - local_file.network-latency, - local_file.throttle-ec2-api, - local_file.terminate-instances, - local_file.disk-stress, - local_file.delete-fis-templates, - ] - provisioner "local-exec" { - when = destroy - command = "cd ${path.module}/.fis && bash delete-fis-templates.sh" - } -} diff --git a/examples/fis/awscw.tf b/examples/fis/awscw.tf new file mode 100644 index 0000000..6484dff --- /dev/null +++ b/examples/fis/awscw.tf @@ -0,0 +1,84 @@ +resource "aws_ssm_association" "install-cwagent" { + depends_on = [module.ec2] + name = "AWS-ConfigureAWSPackage" + + targets { + key = "tag:release" + values = ["baseline,canary"] + } + + parameters = { + action = "Install" + name = "AmazonCloudWatchAgent" + } +} + +resource "time_sleep" "wait" { + depends_on = [aws_ssm_association.install-cwagent] + create_duration = "30s" +} + +resource "aws_ssm_association" "start-cwagent" { + depends_on = [time_sleep.wait] + name = "AmazonCloudWatch-ManageAgent" + + targets { + key = "tag:release" + values = ["baseline,canary"] + } + + parameters = { + action = "start" + } +} + +### application/monitoring +resource "aws_cloudwatch_metric_alarm" "cpu" { + alarm_name = local.cw_cpu_alarm_name + alarm_description = "This metric monitors ec2 cpu utilization" + tags = merge(local.default-tags, var.tags) + comparison_operator = "GreaterThanOrEqualToThreshold" + evaluation_periods = 3 + metric_name = "CPUUtilization" + namespace = "AWS/EC2" + period = 60 + statistic = "Average" + threshold = 60 + insufficient_data_actions = [] + + dimensions = { + AutoScalingGroupName = module.ec2.cluster.data_plane.node_groups.baseline.name + } +} + +resource "aws_cloudwatch_metric_alarm" "api-p90" { + alarm_name = local.cw_api_p90_alarm_name + alarm_description = "This metric monitors percentile of response latency" + tags = merge(local.default-tags, var.tags) + comparison_operator = "GreaterThanThreshold" + evaluation_periods = 1 + metric_name = "TargetResponseTime" + namespace = "AWS/ApplicationELB" + period = 60 + unit = "Seconds" + threshold = 0.1 + extended_statistic = "p90" + + dimensions = { + LoadBalancer = aws_lb.alb.arn_suffix + } +} + +resource "aws_cloudwatch_metric_alarm" "api-avg" { + alarm_name = local.cw_api_avg_alarm_name + alarm_description = "This metric monitors average time of response latency" + tags = merge(local.default-tags, var.tags) + comparison_operator = "GreaterThanThreshold" + evaluation_periods = 1 + metric_name = "TargetResponseTime" + namespace = "AWS/ApplicationELB" + period = 60 + unit = "Seconds" + statistic = "Average" + threshold = 0.1 + diff --git a/examples/fis/awsfis.tf b/examples/fis/awsfis.tf new file mode 100644 index 0000000..c877188 --- /dev/null +++ b/examples/fis/awsfis.tf @@ -0,0 +1,84 @@ +### systems manager document for fault injection simulator experiment + +resource "aws_ssm_document" "disk-stress" { + name = "FIS-Run-Disk-Stress" + tags = merge(local.default-tags, var.tags) + document_format = "YAML" + document_type = "Command" + content = file("${path.module}/templates/disk-stress.yaml") +} + +### fault injection simulator experiment templates + +# drawing lots for choosing a subnet +resource "random_integer" "az" { + min = 0 + max = length(var.azs) - 1 +} + +locals { + target_vpc = module.vpc.vpc.id + target_role = module.ec2.role.arn + target_asg = module.ec2.cluster.data_plane.node_groups.baseline.name + fis_role = module.awsfis.role.arn + + experiments = [ + { + name = "cpu-stress" + template = "${path.cwd}/templates/cpu-stress.tpl" + params = { + asg = local.target_asg + region = var.aws_region + alarm = aws_cloudwatch_metric_alarm.cpu.arn + role = local.fis_role + } + }, + { + name = "network-latency" + template = "${path.cwd}/templates/network-latency.tpl" + params = { + asg = local.target_asg + region = var.aws_region + alarm = aws_cloudwatch_metric_alarm.cpu.arn + role = local.fis_role + } + }, + { + name = "terminate-instances" + template = "${path.cwd}/templates/terminate-instances.tpl" + params = { + asg = local.target_asg + az = var.azs[random_integer.az.result] + vpc = local.target_vpc + alarm = aws_cloudwatch_metric_alarm.cpu.arn + role = local.fis_role + } + }, + { + name = "throttle-ec2-api" + template = "${path.cwd}/templates/throttle-ec2-api.tpl" + params = { + asg_role = local.target_role + alarm = aws_cloudwatch_metric_alarm.cpu.arn + role = local.fis_role + } + }, + { + name = "disk-stress" + template = "${path.cwd}/templates/disk-stress.tpl" + params = { + doc_arn = aws_ssm_document.disk-stress.arn + region = var.aws_region + alarm = aws_cloudwatch_metric_alarm.cpu.arn + role = local.fis_role + } + }, + ] +} + +module "awsfis" { + source = "Young-ook/fis/aws" + name = var.name + tags = var.tags + experiments = local.experiments +} diff --git a/examples/fis/tc1.tfvars b/examples/fis/fixture.tc1.tfvars similarity index 100% rename from examples/fis/tc1.tfvars rename to examples/fis/fixture.tc1.tfvars diff --git a/examples/fis/main.tf b/examples/fis/main.tf index 893652a..73742b8 100644 --- a/examples/fis/main.tf +++ b/examples/fis/main.tf @@ -177,75 +177,3 @@ resource "aws_autoscaling_policy" "target-tracking" { target_value = 10.0 } } - -### application/monitoring -resource "aws_cloudwatch_metric_alarm" "cpu" { - alarm_name = local.cw_cpu_alarm_name - alarm_description = "This metric monitors ec2 cpu utilization" - tags = merge(local.default-tags, var.tags) - comparison_operator = "GreaterThanOrEqualToThreshold" - evaluation_periods = 3 - metric_name = "CPUUtilization" - namespace = "AWS/EC2" - period = 60 - statistic = "Average" - threshold = 60 - insufficient_data_actions = [] - - dimensions = { - AutoScalingGroupName = module.ec2.cluster.data_plane.node_groups.baseline.name - } -} - -resource "aws_cloudwatch_metric_alarm" "api-p90" { - alarm_name = local.cw_api_p90_alarm_name - alarm_description = "This metric monitors percentile of response latency" - tags = merge(local.default-tags, var.tags) - comparison_operator = "GreaterThanThreshold" - evaluation_periods = 1 - metric_name = "TargetResponseTime" - namespace = "AWS/ApplicationELB" - period = 60 - unit = "Seconds" - threshold = 0.1 - extended_statistic = "p90" - - dimensions = { - LoadBalancer = aws_lb.alb.arn_suffix - } -} - -resource "aws_cloudwatch_metric_alarm" "api-avg" { - alarm_name = local.cw_api_avg_alarm_name - alarm_description = "This metric monitors average time of response latency" - tags = merge(local.default-tags, var.tags) - comparison_operator = "GreaterThanThreshold" - evaluation_periods = 1 - metric_name = "TargetResponseTime" - namespace = "AWS/ApplicationELB" - period = 60 - unit = "Seconds" - statistic = "Average" - threshold = 0.1 - - dimensions = { - LoadBalancer = aws_lb.alb.arn_suffix - } -} - -resource "aws_cloudwatch_metric_alarm" "api-http502" { - alarm_name = local.cw_api_http502_alarm_name - alarm_description = "This metric monitors HTTP 502 response from backed ec2 instances" - tags = merge(local.default-tags, var.tags) - comparison_operator = "GreaterThanThreshold" - evaluation_periods = 1 - metric_name = "HTTPCode_ELB_502_Count" - namespace = "AWS/ApplicationELB" - period = 60 - statistic = "Sum" - threshold = 3 - - dimensions = { - LoadBalancer = aws_lb.alb.arn_suffix - } -} diff --git a/examples/fis/templates/create-fis-templates.tpl b/examples/fis/templates/create-fis-templates.tpl deleted file mode 100644 index c640ae7..0000000 --- a/examples/fis/templates/create-fis-templates.tpl +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -OUTPUT='.fis_cli_result' -TEMPLATES=('cpu-stress.json' 'network-latency.json' 'terminate-instances.json' 'throttle-ec2-api.json' 'disk-stress.json') -for template in $${TEMPLATES[@]}; do - aws fis create-experiment-template --region ${region} --output text --cli-input-json file://$${template} --query 'experimentTemplate.id' 2>&1 | tee -a $${OUTPUT} -done diff --git a/examples/fis/templates/delete-fis-templates.tpl b/examples/fis/templates/delete-fis-templates.tpl deleted file mode 100644 index b9aee6a..0000000 --- a/examples/fis/templates/delete-fis-templates.tpl +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -OUTPUT='.fis_cli_result' -while read id; do - aws fis delete-experiment-template --region ${region} --output text --id $${id} --query 'experimentTemplate.id' 2>&1 > /dev/null -done < $${OUTPUT} -rm $${OUTPUT}