Skip to content

Commit

Permalink
feat(fis): disk stress experiment (#54)
Browse files Browse the repository at this point in the history
* feat(fis): disk stress experiment

* feat(fis): cloudwatch agent install automation

* fix(fis): changed disk stress target

* fix(fis): adjusted waiting time between ssm commands
  • Loading branch information
Young-ook authored Aug 23, 2021
1 parent 02ef59f commit 903a4d6
Show file tree
Hide file tree
Showing 7 changed files with 185 additions and 7 deletions.
25 changes: 24 additions & 1 deletion examples/fis/aws-fis.tf
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,16 @@ resource "aws_iam_role_policy_attachment" "fis-run" {
role = aws_iam_role.fis-run.id
}

### systems manager document for fault injection simulator experiment

resource "aws_ssm_document" "disk-stress" {
name = "FIS-Run-Disk-Stress"
tags = merge(local.default-tags, var.tags)
document_format = "YAML"
document_type = "Command"
content = file("${path.module}/templates/disk-stress.yaml")
}

### fault injection simulator experiment templates

locals {
Expand Down Expand Up @@ -81,11 +91,22 @@ resource "local_file" "throttle-ec2-api" {
file_permission = "0600"
}

resource "local_file" "disk-stress" {
content = templatefile("${path.module}/templates/disk-stress.tpl", {
doc_arn = aws_ssm_document.disk-stress.arn
region = var.aws_region
alarm = local.stop_condition_alarm
role = aws_iam_role.fis-run.arn
})
filename = "${path.module}/disk-stress.json"
file_permission = "0600"
}

resource "local_file" "create-templates" {
content = join("\n", [
"#!/bin/bash -ex",
"OUTPUT='.fis_cli_result'",
"TEMPLATES=('cpu-stress.json' 'network-latency.json' 'terminate-instances.json' 'throttle-ec2-api.json')",
"TEMPLATES=('cpu-stress.json' 'network-latency.json' 'terminate-instances.json' 'throttle-ec2-api.json' 'disk-stress.json')",
"for template in $${TEMPLATES[@]}; do",
" aws fis create-experiment-template --cli-input-json file://$${template} --output text --query 'experimentTemplate.id' 2>&1 | tee -a $${OUTPUT}",
"done",
Expand All @@ -101,6 +122,7 @@ resource "null_resource" "create-templates" {
local_file.network-latency,
local_file.throttle-ec2-api,
local_file.terminate-instances,
local_file.disk-stress,
local_file.create-templates,
]
provisioner "local-exec" {
Expand Down Expand Up @@ -129,6 +151,7 @@ resource "null_resource" "delete-templates" {
local_file.network-latency,
local_file.throttle-ec2-api,
local_file.terminate-instances,
local_file.disk-stress,
local_file.delete-templates,
]

Expand Down
9 changes: 5 additions & 4 deletions examples/fis/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,11 @@ resource "aws_lb_target_group" "http" {

### application/ec2
module "ec2" {
source = "../../"
name = var.name
tags = var.tags
subnets = values(module.vpc.subnets["private"])
source = "../../"
name = var.name
tags = var.tags
subnets = values(module.vpc.subnets["private"])
policy_arns = ["arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy"]
node_groups = [
{
name = "baseline"
Expand Down
10 changes: 9 additions & 1 deletion examples/fis/outputs.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,12 @@ locals {
output "vclient" {
description = "Script to call APIs as a virtual client"
value = local.vclient
}
}

resource "local_file" "cwagent" {
content = templatefile("${path.module}/templates/cwagent.tpl", {
region = var.aws_region
})
filename = "${path.module}/cwagent.sh"
file_permission = "0600"
}
12 changes: 12 additions & 0 deletions examples/fis/templates/cwagent.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash
aws ssm create-association --name AWS-ConfigureAWSPackage \
--parameters 'action=Install,name=AmazonCloudWatchAgent' \
--targets 'Key=tag:release,Values=baseline,canary' \
--region ${region} --output text

sleep 30

aws ssm create-association --name AmazonCloudWatch-ManageAgent \
--parameters 'action=start' \
--targets 'Key=tag:release,Values=baseline,canary' \
--region ${region} --output text
43 changes: 43 additions & 0 deletions examples/fis/templates/disk-stress.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"tags": {
"Name": "DiskStress"
},
"description": "Run a Disk fault injection on the specified instance",
"targets": {
"ec2-instances": {
"resourceType": "aws:ec2:instance",
"resourceTags": {
"env": "prod",
"release": "canary"
},
"filters": [
{
"path": "State.Name",
"values": ["running"]
}
],
"selectionMode": "COUNT(1)"
}
},
"actions": {
"DiskStress": {
"actionId": "aws:ssm:send-command",
"description": "run disk stress using ssm",
"parameters": {
"duration": "PT1M",
"documentArn": "${doc_arn}",
"documentParameters": "{\"DurationSeconds\": \"60\", \"Workers\": \"4\", \"Percent\": \"70\", \"InstallDependencies\": \"True\"}"
},
"targets": {
"Instances": "ec2-instances"
}
}
},
"stopConditions": [
{
"source": "aws:cloudwatch:alarm",
"value": "${alarm}"
}
],
"roleArn": "${role}"
}
91 changes: 91 additions & 0 deletions examples/fis/templates/disk-stress.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
---
description: |
### Document name - FIS-Run-Disk-Stress
## What does this document do?
It runs disk stress on an instance via stress-ng tool.
## Input Parameters
* DurationSeconds: (Required) The duration - in seconds - of the disk stress.
* Workers: The number of virtual disk stressors (default: 1).
* Percent: The percentage of virtual disk to use (required).
* InstallDependencies: If set to True, Systems Manager installs the required dependencies on the target instances. (default: True).
## Output Parameters
None.
schemaVersion: '2.2'
parameters:
DurationSeconds:
type: String
description: "(Required) The duration - in seconds - of the disk stress."
allowedPattern: "^[0-9]+$"
Workers:
type: String
description: "The number of disk stressors (default: 1)."
default: "1"
allowedPattern: "^[0-9]+$"
Percent:
type: String
description: "The percentage of disk to use (required)."
allowedPattern: "^[0-9]+$"
InstallDependencies:
type: String
description: "If set to True, Systems Manager installs the required dependencies on the target instances. (default: True)."
default: 'True'
allowedValues:
- 'True'
- 'False'
mainSteps:
- action: aws:runShellScript
name: InstallDependencies
precondition:
StringEquals:
- platformType
- Linux
description: |
## Parameter: InstallDependencies
If set to True, this step installs the required dependecy via operating system's repository. It supports both
Debian (apt) and CentOS (yum) based package managers.
inputs:
runCommand:
- |
#!/bin/bash
if [[ "{{ InstallDependencies }}" == True ]] ; then
if [[ "$( which stress-ng 2>/dev/null )" ]] ; then echo Dependency is already installed. ; exit ; fi
echo "Installing required dependencies"
if [ -f "/etc/system-release" ] ; then
if cat /etc/system-release | grep -i 'Amazon Linux' ; then
sudo amazon-linux-extras install testing
sudo yum -y install stress-ng
else
echo "There was a problem installing dependencies."
exit 1
fi
elif cat /etc/issue | grep -i Ubuntu ; then
sudo apt-get update -y
sudo DEBIAN_FRONTEND=noninteractive sudo apt-get install -y stress-ng
else
echo "There was a problem installing dependencies."
exit 1
fi
fi
- action: aws:runShellScript
name: ExecuteStressNg
precondition:
StringEquals:
- platformType
- Linux
description: |
## Parameters: DurationSeconds, Workers and Percent
This step will run a disk stress test on the instance for the specified DurationSeconds time.
It will start `Workers` number of workers, using `Percent` of the total available disk.
inputs:
maxAttempts: 1
runCommand:
- |
if [ {{ DurationSeconds }} -lt 1 ] || [ {{ DurationSeconds }} -gt 43200 ] ; then echo DurationSeconds parameter value must be between 1 and 43200 && exit; fi
pgrep stress-ng && echo Another stress-ng command is running, exiting... && exit
echo Initiating disk stress for {{ DurationSeconds }} seconds, {{ Workers }} workers, using {{ Percent }} percent of total available disk...
stress-ng --fallocate {{ Workers }} --fallocate-bytes {{ Percent }}% -t {{ DurationSeconds }}s --metrics
echo Finished disk stress.
2 changes: 1 addition & 1 deletion examples/fis/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ variable "azs" {

variable "vpc_endpoint_config" {
description = "A list of vpc endpoint configurations"
type = list
type = list(any)
default = null
}

Expand Down

0 comments on commit 903a4d6

Please sign in to comment.