Skip to content

Commit

Permalink
Merging main into this branch
Browse files Browse the repository at this point in the history
  • Loading branch information
HusseinYasser committed May 18, 2024
2 parents d21607f + 9fa0f03 commit 7a15e1f
Show file tree
Hide file tree
Showing 107 changed files with 4,582 additions and 1,608 deletions.
12 changes: 11 additions & 1 deletion .github/workflows/maven-publish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,14 @@ jobs:
run: |
docker build ./webserver --tag ahmad45123/workup:webserver
docker push ahmad45123/workup:webserver
- name: Build and push Autoscaler Image
run: |
docker build ./autoscaler/docker-swarm-autoscaler --tag ahmad45123/workup:autoscaler
docker push ahmad45123/workup:autoscaler
- name: Build and push Mediaserver Image
run: |
docker build ./mediaserver --tag ahmad45123/workup:mediaserver
docker push ahmad45123/workup:mediaserver
4 changes: 2 additions & 2 deletions .idea/compiler.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion .idea/encodings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ build:
docker build ./services/users --tag workup:service_users
docker build ./services/contracts --tag workup:service_contracts
docker build ./webserver --tag workup:webserver
docker build ./mediaserver --tag workup:mediaserver

up:
docker stack deploy -c compose.yaml -c compose.override.yaml workup
Expand Down
43 changes: 43 additions & 0 deletions autoscaler/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# docker-swarm-autoscaler

## Current Release: 0.1.0

This project is intended to bring auto service staling to Docker Swarm. This script uses prometheus paired with cadvisor metrics to determine cpu usage. It then uses a manager node to determine if a service wants to be autoscaled and uses a manager node to scale the service.

Currently the project only uses cpu to autoscale. If cpu usage reaches 85% the service will scale up, if it reaches 25% it will scale down.

## Usage
1. You can deploy prometheus, cadvisor, and docker-swarm-autoscaler by running `docker stack deploy -c swarm-autoscaler-stack.yml autoscaler` from the root of this repo.
* You can also utilize an already deploy prometheus and cadvisor by specifying the `PROMETHEUS_URL` in docker-swarm-autoscaler environment. `swarm-autoscaler-stack.yml` shows an example of this.
* docker-swarm-autoscale needs a placement contstraint to deploy to a manager. `swarm-autoscaler-stack.yml` shows an example of this.
2. For services you want to autoscale you will need a deploy label `swarm.autoscaler=true`.

```
deploy:
labels:
- "swarm.autoscaler=true"
```

This is best paired with resource constraints limits. This is also under the deploy key.

```
deploy:
resources:
reservations:
cpus: '0.25'
memory: 512M
limits:
cpus: '0.50'
```

## Configuration
| Setting | Value | Description |
| --- | --- | --- |
| `swarm.autoscaler` | `true` | Required. This enables autoscaling for a service. Anything other than `true` will not enable it |
| `swarm.autoscaler.minimum` | Integer | Optional. This is the minimum number of replicas wanted for a service. The autoscaler will not downscale below this number |
| `swarm.autoscaler.maximum` | Integer | Optional. This is the maximum number of replicas wanted for a service. The autoscaler will not scale up past this number |

## Test
You can deploy a test app with the following commands below. Helloworld is initially only 1 replica. The autoscaler will scale to the minimum 3 replicas.
1. `docker stack deploy -c swarm-autoscaler-stack.yml autoscaler`
2. `docker stack deploy -c helloworld.yml hello`
26 changes: 26 additions & 0 deletions autoscaler/docker-swarm-autoscaler/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
FROM ubuntu:xenial

RUN apt-get update -qq \
&& apt-get install -y -qq \
jq \
apt-transport-https \
ca-certificates \
curl \
software-properties-common \
dnsutils \
&& curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \
&& add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu xenial stable" \
&& apt-get update -qq \
&& apt-get install -y -qq \
docker-ce=5:19.03.5* \
&& apt-get -qq clean \
&& apt-get autoremove -y \
&& rm -rf \
/var/lib/apt/lists/* \
/tmp/* \
/var/tmp/*

COPY auto-scale.sh /auto-scale.sh
RUN chmod a+x /auto-scale.sh

CMD ["/auto-scale.sh"]
114 changes: 114 additions & 0 deletions autoscaler/docker-swarm-autoscaler/auto-scale.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#!/bin/bash

LOOP=${LOOP:='yes'}
CPU_PERCENTAGE_UPPER_LIMIT=60
CPU_PERCENTAGE_LOWER_LIMIT=25
PROMETHEUS_API="api/v1/query?query="
PROMETHEUS_QUERY="sum(rate(container_cpu_usage_seconds_total%7Bcontainer_label_com_docker_swarm_task_name%3D~%27.%2B%27%7D%5B5m%5D))BY(container_label_com_docker_swarm_service_name%2Cinstance)*100"

get_high_cpu_services () {
local prometheus_results="${1}"
local services=""
for service in $(printf "%s$prometheus_results" | jq ".data.result[] | select( all(.value[1]|tonumber; . > $CPU_PERCENTAGE_UPPER_LIMIT) ) | .metric.container_label_com_docker_swarm_service_name" | sed 's/"//g' | sort | uniq); do
services="$services $service"
done
echo $services
}

get_all_services () {
local prometheus_results="${1}"
local services=""
for service in $(printf "%s$prometheus_results" | jq ".data.result[].metric.container_label_com_docker_swarm_service_name" | sed 's/"//g' | sort | uniq); do
services="$services $service"
done
}

get_low_cpu_services () {
local prometheus_results="${1}"
local services=""
for service in $(printf "%s$prometheus_results" | jq ".data.result[] | select( all(.value[1]|tonumber; . < $CPU_PERCENTAGE_LOWER_LIMIT) ) | .metric.container_label_com_docker_swarm_service_name" | sed 's/"//g' | sort | uniq); do
services="$services $service"
done

echo $services
}

default_scale () {
service_name=$1
auto_scale_label=$(docker service inspect $service_name | jq '.[].Spec.Labels["swarm.autoscaler"]')
replica_minimum=$(docker service inspect $service_name | jq '.[].Spec.Labels["swarm.autoscaler.minimum"]' | sed 's/\"//g')
replica_maximum=$(docker service inspect $service_name | jq '.[].Spec.Labels["swarm.autoscaler.maximum"]' | sed 's/\"//g')
if [[ "${auto_scale_label}" == "\"true\"" ]]; then
echo Service $service has an autoscale label.
current_replicas=$(docker service inspect $service_name | jq ".[].Spec.Mode.Replicated | .Replicas")
if [[ $replica_minimum -gt $current_replicas ]]; then
echo Service $service_name is below the minimum. Scaling to the minimum of $replica_minimum
docker service scale $service_name=$replica_minimum
elif [[ $current_replicas -gt $replica_maximum ]]; then
echo Service $service_name is above the maximum. Scaling to the maximum of $replica_maximum
docker service scale $service_name=$replica_maximum
fi
else
echo Service $service does not have an autoscale label.
fi

}

scale_down () {
service_name=$1
auto_scale_label=$(docker service inspect $service_name | jq '.[].Spec.Labels["swarm.autoscaler"]')
replica_minimum=$(docker service inspect $service_name | jq '.[].Spec.Labels["swarm.autoscaler.minimum"]' | sed 's/\"//g')
if [[ "${auto_scale_label}" == "\"true\"" ]]; then
current_replicas=$(docker service inspect $service_name | jq ".[].Spec.Mode.Replicated | .Replicas")
new_replicas=$(expr $current_replicas - 1)
if [[ $replica_minimum -le $new_replicas ]]; then
echo Scaling down the service $service_name to $new_replicas
docker service scale $service_name=$new_replicas
elif [[ $current_replicas -eq $replica_minimum ]]; then
echo Service $service_name has the minumum number of replicas.
fi
fi

}

scale_up () {
service_name=$1
auto_scale_label=$(docker service inspect $service_name | jq '.[].Spec.Labels["swarm.autoscaler"]')
replica_maximum=$(docker service inspect $service_name | jq '.[].Spec.Labels["swarm.autoscaler.maximum"]' | sed 's/\"//g')
if [[ "${auto_scale_label}" == "\"true\"" ]]; then
current_replicas=$(docker service inspect $service_name | jq ".[].Spec.Mode.Replicated | .Replicas")
new_replicas=$(expr $current_replicas + 1)
if [[ $current_replicas -eq $replica_maximum ]]; then
echo Service $service already has the maximum of $replica_maximum replicas
elif [[ $replica_maximum -ge $new_replicas ]]; then
echo Scaling up the service $service_name to $new_replicas
docker service scale $service_name=$new_replicas
fi
fi
}

main () {
prometheus_initial_results=$(curl --silent "${PROMETHEUS_URL}/${PROMETHEUS_API}${PROMETHEUS_QUERY}" | jq .)
echo Prometheus results
echo $prometheus_initial_results
for service in $(get_all_services "${prometheus_initial_results}"); do
default_scale $service
done
echo Checking for high cpu services
for service in $(get_high_cpu_services "${prometheus_initial_results}"); do
echo Service $service is above $CPU_PERCENTAGE_UPPER_LIMIT percent cpu usage.
scale_up $service
done
echo Checking for low cpu services
for service in $(get_low_cpu_services "${prometheus_initial_results}"); do
echo Service $service is below $CPU_PERCENTAGE_LOWER_LIMIT percent cpu usage.
scale_down $service
done
}

main
while [[ $LOOP == 'yes' ]]; do
echo Waiting 5 seconds for the next test
sleep 5s
main
done
18 changes: 18 additions & 0 deletions autoscaler/prometheus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
global:
scrape_interval: 5s
evaluation_interval: 5s

scrape_configs:
- job_name: 'prometheus'
dns_sd_configs:
- names:
- 'tasks.prometheus'
type: 'A'
port: 9090

- job_name: 'cadvisor'
dns_sd_configs:
- names:
- 'tasks.cadvisor'
type: 'A'
port: 8080
73 changes: 73 additions & 0 deletions autoscaler/swarm-autoscaler-stack.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
version: "3.7"

networks:
autoscale:

configs:
prometheus_config:
file: ./prometheus.yml

services:
docker-swarm-autoscaler:
image: ahmad45123/workup:autoscaler
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
environment:
- PROMETHEUS_URL=http://prometheus:9090
networks:
- autoscale
deploy:
mode: replicated
replicas: 1
placement:
constraints:
- node.role == manager
resources:
limits:
cpus: '0.10'
memory: 128M
reservations:
cpus: '0.10'
memory: 64M
cadvisor:
image: gcr.io/cadvisor/cadvisor
networks:
- autoscale
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker/:/var/lib/docker:ro
- /dev/disk/:/dev/disk:ro
deploy:
mode: global
resources:
limits:
cpus: '0.10'
memory: 128M
reservations:
cpus: '0.10'
memory: 64M

prometheus:
image: prom/prometheus:v2.12.0
networks:
- autoscale
command: ["--storage.tsdb.retention.size=1GB", "--config.file=/etc/prometheus/prometheus.yml", "--web.console.libraries=/etc/prometheus/console_libraries", "--web.console.templates=/etc/prometheus/consoles", "--web.enable-lifecycle"]
configs:
- source: prometheus_config
target: /etc/prometheus/prometheus.yml
deploy:
mode: replicated
replicas: 1
placement:
constraints:
- node.role == manager
resources:
limits:
cpus: '0.50'
memory: 1024M
reservations:
cpus: '0.50'
memory: 128M
4 changes: 3 additions & 1 deletion compose.override.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

version: '3.7'
services:
service_jobs:
Expand All @@ -15,3 +14,6 @@ services:

service_webserver:
image: workup:webserver

service_mediaserver:
image: workup:mediaserver
Loading

0 comments on commit 7a15e1f

Please sign in to comment.