Merging main into this branch

Ahmad45123 · May 18, 2024 · 7a15e1f · 7a15e1f
2 parents d21607f + 9fa0f03
commit 7a15e1f
Show file tree

Hide file tree

Showing 107 changed files with 4,582 additions and 1,608 deletions.
diff --git a/.github/workflows/maven-publish.yaml b/.github/workflows/maven-publish.yaml
@@ -62,4 +62,14 @@ jobs:
       run: |
         docker build ./webserver --tag ahmad45123/workup:webserver
         docker push ahmad45123/workup:webserver
-  
+
+    - name: Build and push Autoscaler Image
+      run: |
+        docker build ./autoscaler/docker-swarm-autoscaler --tag ahmad45123/workup:autoscaler
+        docker push ahmad45123/workup:autoscaler
+
+    - name: Build and push Mediaserver Image
+      run: |
+        docker build ./mediaserver --tag ahmad45123/workup:mediaserver
+        docker push ahmad45123/workup:mediaserver
+  
diff --git a/.idea/compiler.xml b/.idea/compiler.xml
diff --git a/.idea/encodings.xml b/.idea/encodings.xml
diff --git a/Makefile b/Makefile
@@ -7,6 +7,7 @@ build:
 	docker build ./services/users --tag workup:service_users
 	docker build ./services/contracts --tag workup:service_contracts
 	docker build ./webserver --tag workup:webserver
+	docker build ./mediaserver --tag workup:mediaserver
 
 up: 
 	docker stack deploy -c compose.yaml -c compose.override.yaml workup

diff --git a/autoscaler/README.md b/autoscaler/README.md
@@ -0,0 +1,43 @@
+# docker-swarm-autoscaler
+
+## Current Release: 0.1.0
+
+This project is intended to bring auto service staling to Docker Swarm. This script uses prometheus paired with cadvisor metrics to determine cpu usage. It then uses a manager node to determine if a service wants to be autoscaled and uses a manager node to scale the service.
+
+Currently the project only uses cpu to autoscale. If cpu usage reaches 85% the service will scale up, if it reaches 25% it will scale down.
+
+## Usage
+1. You can deploy prometheus, cadvisor, and docker-swarm-autoscaler by running `docker stack deploy -c swarm-autoscaler-stack.yml autoscaler` from the root of this repo.  
+  * You can also utilize an already deploy prometheus and cadvisor by specifying the `PROMETHEUS_URL` in docker-swarm-autoscaler environment. `swarm-autoscaler-stack.yml` shows an example of this.  
+  * docker-swarm-autoscale needs a placement contstraint to deploy to a manager. `swarm-autoscaler-stack.yml` shows an example of this.  
+2. For services you want to autoscale you will need a deploy label `swarm.autoscaler=true`. 
+
+```
+deploy:
+  labels:
+    - "swarm.autoscaler=true"
+```
+
+This is best paired with resource constraints limits. This is also under the deploy key.
+
+```
+deploy:
+  resources:
+    reservations:
+      cpus: '0.25'
+      memory: 512M
+    limits:
+      cpus: '0.50'
+```
+
+## Configuration
+| Setting | Value | Description |
+| --- | --- | --- |
+| `swarm.autoscaler` | `true` | Required. This enables autoscaling for a service. Anything other than `true` will not enable it |
+| `swarm.autoscaler.minimum` | Integer | Optional. This is the minimum number of replicas wanted for a service. The autoscaler will not downscale below this number |
+| `swarm.autoscaler.maximum` | Integer | Optional. This is the maximum number of replicas wanted for a service. The autoscaler will not scale up past this number | 
+
+## Test
+You can deploy a test app with the following commands below. Helloworld is initially only 1 replica. The autoscaler will scale to the minimum 3 replicas.
+1. `docker stack deploy -c swarm-autoscaler-stack.yml autoscaler`
+2. `docker stack deploy -c helloworld.yml hello`
diff --git a/autoscaler/docker-swarm-autoscaler/Dockerfile b/autoscaler/docker-swarm-autoscaler/Dockerfile
@@ -0,0 +1,26 @@
+FROM ubuntu:xenial
+
+RUN apt-get update -qq \
+  && apt-get install -y -qq \
+    jq \
+    apt-transport-https \
+    ca-certificates \
+    curl \
+    software-properties-common \
+    dnsutils \
+  && curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \
+  && add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu xenial stable" \
+  && apt-get update -qq \
+  && apt-get install -y -qq \
+    docker-ce=5:19.03.5* \
+  && apt-get -qq clean \
+  && apt-get autoremove -y \
+  && rm -rf \
+    /var/lib/apt/lists/* \
+    /tmp/* \
+    /var/tmp/*
+
+COPY auto-scale.sh /auto-scale.sh
+RUN chmod a+x /auto-scale.sh
+
+CMD ["/auto-scale.sh"]
diff --git a/autoscaler/docker-swarm-autoscaler/auto-scale.sh b/autoscaler/docker-swarm-autoscaler/auto-scale.sh
@@ -0,0 +1,114 @@
+#!/bin/bash
+
+LOOP=${LOOP:='yes'}
+CPU_PERCENTAGE_UPPER_LIMIT=60
+CPU_PERCENTAGE_LOWER_LIMIT=25
+PROMETHEUS_API="api/v1/query?query="
+PROMETHEUS_QUERY="sum(rate(container_cpu_usage_seconds_total%7Bcontainer_label_com_docker_swarm_task_name%3D~%27.%2B%27%7D%5B5m%5D))BY(container_label_com_docker_swarm_service_name%2Cinstance)*100"
+
+get_high_cpu_services () {
+  local prometheus_results="${1}"
+  local services=""
+  for service in $(printf "%s$prometheus_results" | jq ".data.result[] | select( all(.value[1]|tonumber; . > $CPU_PERCENTAGE_UPPER_LIMIT) ) | .metric.container_label_com_docker_swarm_service_name" | sed 's/"//g' | sort | uniq); do
+    services="$services $service"
+  done
+  echo $services
+}
+
+get_all_services () {
+  local prometheus_results="${1}"
+  local services=""
+  for service in $(printf "%s$prometheus_results" | jq ".data.result[].metric.container_label_com_docker_swarm_service_name" | sed 's/"//g' | sort | uniq); do
+    services="$services $service"
+  done
+}
+
+get_low_cpu_services () {
+  local prometheus_results="${1}"
+  local services=""
+  for service in $(printf "%s$prometheus_results" | jq ".data.result[] | select( all(.value[1]|tonumber; . < $CPU_PERCENTAGE_LOWER_LIMIT) ) | .metric.container_label_com_docker_swarm_service_name" | sed 's/"//g' | sort | uniq); do
+    services="$services $service"
+  done
+
+  echo $services
+}
+
+default_scale () {
+  service_name=$1
+  auto_scale_label=$(docker service inspect $service_name | jq '.[].Spec.Labels["swarm.autoscaler"]')
+  replica_minimum=$(docker service inspect $service_name | jq '.[].Spec.Labels["swarm.autoscaler.minimum"]' | sed 's/\"//g')
+  replica_maximum=$(docker service inspect $service_name | jq '.[].Spec.Labels["swarm.autoscaler.maximum"]' | sed 's/\"//g')
+  if [[ "${auto_scale_label}" == "\"true\"" ]]; then
+    echo Service $service has an autoscale label.
+    current_replicas=$(docker service inspect $service_name | jq ".[].Spec.Mode.Replicated | .Replicas")
+    if [[ $replica_minimum -gt $current_replicas ]]; then
+      echo Service $service_name is below the minimum. Scaling to the minimum of $replica_minimum
+      docker service scale $service_name=$replica_minimum
+    elif [[ $current_replicas -gt $replica_maximum ]]; then
+      echo Service $service_name is above the maximum. Scaling to the maximum of $replica_maximum
+      docker service scale $service_name=$replica_maximum
+    fi
+  else
+    echo Service $service does not have an autoscale label.
+  fi
+
+}
+
+scale_down () {
+  service_name=$1
+  auto_scale_label=$(docker service inspect $service_name | jq '.[].Spec.Labels["swarm.autoscaler"]')
+  replica_minimum=$(docker service inspect $service_name | jq '.[].Spec.Labels["swarm.autoscaler.minimum"]' | sed 's/\"//g')
+  if [[ "${auto_scale_label}" == "\"true\"" ]]; then
+    current_replicas=$(docker service inspect $service_name | jq ".[].Spec.Mode.Replicated | .Replicas")
+    new_replicas=$(expr $current_replicas - 1)
+    if [[ $replica_minimum -le $new_replicas ]]; then
+      echo Scaling down the service $service_name to $new_replicas
+      docker service scale $service_name=$new_replicas
+    elif [[ $current_replicas -eq $replica_minimum ]]; then
+      echo Service $service_name has the minumum number of replicas.
+    fi
+  fi
+
+}
+
+scale_up () {
+  service_name=$1
+  auto_scale_label=$(docker service inspect $service_name | jq '.[].Spec.Labels["swarm.autoscaler"]')
+  replica_maximum=$(docker service inspect $service_name | jq '.[].Spec.Labels["swarm.autoscaler.maximum"]' | sed 's/\"//g')
+  if [[ "${auto_scale_label}" == "\"true\"" ]]; then
+    current_replicas=$(docker service inspect $service_name | jq ".[].Spec.Mode.Replicated | .Replicas")
+    new_replicas=$(expr $current_replicas + 1)
+    if [[ $current_replicas -eq $replica_maximum ]]; then
+      echo Service $service already has the maximum of $replica_maximum replicas
+    elif [[ $replica_maximum -ge $new_replicas ]]; then
+      echo Scaling up the service $service_name to $new_replicas
+      docker service scale $service_name=$new_replicas
+    fi
+  fi
+}
+
+main () {
+    prometheus_initial_results=$(curl --silent "${PROMETHEUS_URL}/${PROMETHEUS_API}${PROMETHEUS_QUERY}" | jq .)
+    echo Prometheus results
+    echo $prometheus_initial_results
+    for service in $(get_all_services "${prometheus_initial_results}"); do
+      default_scale $service
+    done
+    echo Checking for high cpu services
+    for service in $(get_high_cpu_services "${prometheus_initial_results}"); do
+      echo Service $service is above $CPU_PERCENTAGE_UPPER_LIMIT percent cpu usage.
+      scale_up $service
+    done
+    echo Checking for low cpu services
+    for service in $(get_low_cpu_services "${prometheus_initial_results}"); do
+      echo Service $service is below $CPU_PERCENTAGE_LOWER_LIMIT percent cpu usage.
+      scale_down $service  
+    done
+}
+
+main
+while [[ $LOOP == 'yes' ]]; do
+  echo Waiting 5 seconds for the next test
+  sleep 5s
+  main
+done
diff --git a/autoscaler/prometheus.yml b/autoscaler/prometheus.yml
@@ -0,0 +1,18 @@
+global:
+  scrape_interval:     5s
+  evaluation_interval: 5s
+
+scrape_configs:
+  - job_name: 'prometheus'
+    dns_sd_configs:
+    - names:
+      - 'tasks.prometheus'
+      type: 'A'
+      port: 9090
+
+  - job_name: 'cadvisor'
+    dns_sd_configs:
+    - names:
+      - 'tasks.cadvisor'
+      type: 'A'
+      port: 8080
diff --git a/autoscaler/swarm-autoscaler-stack.yml b/autoscaler/swarm-autoscaler-stack.yml
@@ -0,0 +1,73 @@
+version: "3.7"
+
+networks:
+  autoscale:
+
+configs:
+  prometheus_config:
+    file: ./prometheus.yml
+
+services:
+  docker-swarm-autoscaler:
+    image: ahmad45123/workup:autoscaler
+    volumes:
+      - /var/run/docker.sock:/var/run/docker.sock:ro
+    environment:
+      - PROMETHEUS_URL=http://prometheus:9090
+    networks:
+      - autoscale
+    deploy:
+      mode: replicated
+      replicas: 1
+      placement:
+        constraints:
+          - node.role == manager
+      resources:
+        limits:
+          cpus: '0.10'
+          memory: 128M
+        reservations:
+          cpus: '0.10'
+          memory: 64M
+  cadvisor:
+    image: gcr.io/cadvisor/cadvisor
+    networks:
+      - autoscale
+    volumes:
+      - /var/run/docker.sock:/var/run/docker.sock:ro
+      - /:/rootfs:ro
+      - /var/run:/var/run:ro
+      - /sys:/sys:ro
+      - /var/lib/docker/:/var/lib/docker:ro
+      - /dev/disk/:/dev/disk:ro
+    deploy:
+      mode: global
+      resources:
+        limits:
+          cpus: '0.10'
+          memory: 128M
+        reservations:
+          cpus: '0.10'
+          memory: 64M
+
+  prometheus:
+    image: prom/prometheus:v2.12.0
+    networks:
+      - autoscale
+    command: ["--storage.tsdb.retention.size=1GB", "--config.file=/etc/prometheus/prometheus.yml", "--web.console.libraries=/etc/prometheus/console_libraries", "--web.console.templates=/etc/prometheus/consoles", "--web.enable-lifecycle"]
+    configs:
+       - source: prometheus_config
+         target: /etc/prometheus/prometheus.yml
+    deploy:
+      mode: replicated
+      replicas: 1
+      placement:
+        constraints:
+          - node.role == manager
+      resources:
+        limits:
+          cpus: '0.50'
+          memory: 1024M
+        reservations:
+          cpus: '0.50'
+          memory: 128M
diff --git a/compose.override.yaml b/compose.override.yaml
@@ -1,4 +1,3 @@
-
 version: '3.7'
 services:
   service_jobs:
@@ -15,3 +14,6 @@ services:
 
   service_webserver:
     image: workup:webserver
+
+  service_mediaserver:
+    image: workup:mediaserver