From 5a8b292086d1bf2aec6e8a428f75b690430b4aa8 Mon Sep 17 00:00:00 2001 From: Donal McBreen Date: Wed, 18 Sep 2024 14:42:01 +0100 Subject: [PATCH] Simplified deploy/drain timeouts Remove `stop_wait_time` and `readiness_timeout` from the root config and remove `deploy_timeout` and `drain_timeout` from the proxy config. Instead we'll just have `deploy_timeout` and `drain_timeout` in the root config. For roles that run the proxy, they are passed to the kamal-proxy deploy command. Once that returns we can assume the container is ready to shut down. For other roles, we'll use the `deploy_timeout` when polling the container to see if it is ready and the `drain_timeout` when stopping the container. --- lib/kamal/cli/healthcheck/poller.rb | 4 +-- lib/kamal/commands/app.rb | 2 +- lib/kamal/configuration.rb | 10 +++++--- .../configuration/docs/configuration.yml | 15 ++++++----- lib/kamal/configuration/docs/proxy.yml | 5 ---- lib/kamal/configuration/proxy.rb | 14 +++++++---- lib/kamal/configuration/role.rb | 6 +++++ test/cli/app_test.rb | 8 +++--- test/cli/proxy_test.rb | 6 ++--- test/commands/app_test.rb | 12 ++++++--- test/commands/proxy_test.rb | 2 +- test/configuration/role_test.rb | 8 ++++++ test/configuration/validation_test.rb | 2 +- test/fixtures/deploy_with_proxy.yml | 4 +-- test/fixtures/deploy_with_roles.yml | 2 +- .../docker/deployer/app/config/deploy.yml | 5 ++-- .../deployer/app_with_roles/config/deploy.yml | 25 +++++++++++++++++-- 17 files changed, 84 insertions(+), 46 deletions(-) diff --git a/lib/kamal/cli/healthcheck/poller.rb b/lib/kamal/cli/healthcheck/poller.rb index fad518454..fc3d0dec9 100644 --- a/lib/kamal/cli/healthcheck/poller.rb +++ b/lib/kamal/cli/healthcheck/poller.rb @@ -3,7 +3,7 @@ module Kamal::Cli::Healthcheck::Poller def wait_for_healthy(role, &block) attempt = 1 - timeout_at = Time.now + KAMAL.config.readiness_timeout + timeout_at = Time.now + KAMAL.config.deploy_timeout readiness_delay = KAMAL.config.readiness_delay begin @@ -19,7 +19,7 @@ def wait_for_healthy(role, &block) end unless %w[ running healthy ].include?(status) - raise Kamal::Cli::Healthcheck::Error, "container not ready after #{KAMAL.config.readiness_timeout} seconds (#{status})" + raise Kamal::Cli::Healthcheck::Error, "container not ready after #{KAMAL.config.deploy_timeout} seconds (#{status})" end rescue Kamal::Cli::Healthcheck::Error => e time_left = timeout_at - Time.now diff --git a/lib/kamal/commands/app.rb b/lib/kamal/commands/app.rb index 417ad4637..b6e35d666 100644 --- a/lib/kamal/commands/app.rb +++ b/lib/kamal/commands/app.rb @@ -43,7 +43,7 @@ def status(version:) def stop(version: nil) pipe \ version ? container_id_for_version(version) : current_running_container_id, - xargs(config.stop_wait_time ? docker(:stop, "-t", config.stop_wait_time) : docker(:stop)) + xargs(docker(:stop, *role.stop_args)) end def info diff --git a/lib/kamal/configuration.rb b/lib/kamal/configuration.rb index 1d6dfd129..f6051ca7d 100644 --- a/lib/kamal/configuration.rb +++ b/lib/kamal/configuration.rb @@ -6,7 +6,7 @@ require "net/ssh/proxy/jump" class Kamal::Configuration - delegate :service, :image, :labels, :stop_wait_time, :hooks_path, to: :raw_config, allow_nil: true + delegate :service, :image, :labels, :hooks_path, to: :raw_config, allow_nil: true delegate :argumentize, :optionize, to: Kamal::Utils attr_reader :destination, :raw_config, :secrets @@ -189,8 +189,12 @@ def readiness_delay raw_config.readiness_delay || 7 end - def readiness_timeout - raw_config.readiness_timeout || 30 + def deploy_timeout + raw_config.deploy_timeout || 30 + end + + def drain_timeout + raw_config.drain_timeout || 30 end diff --git a/lib/kamal/configuration/docs/configuration.yml b/lib/kamal/configuration/docs/configuration.yml index 2e7b618d1..d3c1874de 100644 --- a/lib/kamal/configuration/docs/configuration.yml +++ b/lib/kamal/configuration/docs/configuration.yml @@ -93,11 +93,6 @@ primary_role: workers # Whether roles with no servers are allowed. Defaults to `false`. allow_empty_roles: false -# Stop wait time -# -# How long we wait for a container to stop before killing it, defaults to 30 seconds -stop_wait_time: 60 - # Retain containers # # How many old containers and images we retain, defaults to 5 @@ -114,11 +109,15 @@ minimum_version: 1.3.0 # This only applies to containers that do not run a proxy or specify a healthcheck readiness_delay: 4 -# Readiness timeout +# Deploy timeout # # How long to wait for a container to become ready, default 30 -# This only applies to containers that do not run a proxy -readiness_timeout: 4 +deploy_timeout: 10 + +# Drain timeout +# +# How long to wait for a containers to drain, default 30 +drain_timeout: 10 # Run directory # diff --git a/lib/kamal/configuration/docs/proxy.yml b/lib/kamal/configuration/docs/proxy.yml index 6840a75cb..85870d14c 100644 --- a/lib/kamal/configuration/docs/proxy.yml +++ b/lib/kamal/configuration/docs/proxy.yml @@ -38,11 +38,6 @@ proxy: # Defaults to false ssl: true - # Deploy timeout - # - # How long to wait for the app to boot when deploying, defaults to 30 seconds - deploy_timeout: 10s - # Response timeout # # How long to wait for requests to complete before timing out, defaults to 30 seconds diff --git a/lib/kamal/configuration/proxy.rb b/lib/kamal/configuration/proxy.rb index 8cce49b06..973442abe 100644 --- a/lib/kamal/configuration/proxy.rb +++ b/lib/kamal/configuration/proxy.rb @@ -39,12 +39,12 @@ def deploy_options { host: proxy_config["host"], tls: proxy_config["ssl"], - "deploy-timeout": proxy_config["deploy_timeout"], - "drain-timeout": proxy_config["drain_timeout"], - "health-check-interval": proxy_config.dig("healthcheck", "interval"), - "health-check-timeout": proxy_config.dig("healthcheck", "timeout"), + "deploy-timeout": seconds_duration(config.deploy_timeout), + "drain-timeout": seconds_duration(config.drain_timeout), + "health-check-interval": seconds_duration(proxy_config.dig("healthcheck", "interval")), + "health-check-timeout": seconds_duration(proxy_config.dig("healthcheck", "timeout")), "health-check-path": proxy_config.dig("healthcheck", "path"), - "target-timeout": proxy_config["response_timeout"], + "target-timeout": seconds_duration(proxy_config["response_timeout"]), "buffer-requests": proxy_config.fetch("buffering", { "requests": true }).fetch("requests", true), "buffer-responses": proxy_config.fetch("buffering", { "responses": true }).fetch("responses", true), "buffer-memory": proxy_config.dig("buffering", "memory"), @@ -68,4 +68,8 @@ def config_volume private attr_reader :config, :proxy_config + + def seconds_duration(value) + value ? "#{value}s" : nil + end end diff --git a/lib/kamal/configuration/role.rb b/lib/kamal/configuration/role.rb index 3f1e6d74b..9d5593172 100644 --- a/lib/kamal/configuration/role.rb +++ b/lib/kamal/configuration/role.rb @@ -65,6 +65,12 @@ def logging @logging ||= config.logging.merge(specialized_logging) end + def stop_args + # When deploying with the proxy, kamal-proxy will drain request before returning so we don't need to wait. + timeout = running_proxy? ? nil : config.drain_timeout + + [ *argumentize("-t", timeout) ] + end def env(host) @envs ||= {} diff --git a/test/cli/app_test.rb b/test/cli/app_test.rb index f3bd9fe5f..5fdf61c5d 100644 --- a/test/cli/app_test.rb +++ b/test/cli/app_test.rb @@ -128,9 +128,9 @@ class CliAppTest < CliTestCase SSHKit::Backend::Abstract.any_instance.stubs(:execute).returns("") SSHKit::Backend::Abstract.any_instance.stubs(:execute) - .with(:docker, :container, :ls, "--all", "--filter", "name=^app-web-latest$", "--quiet", "|", :xargs, :docker, :stop, raise_on_non_zero_exit: false).twice - SSHKit::Backend::Abstract.any_instance.stubs(:execute) - .with(:docker, :exec, "kamal-proxy", "kamal-proxy", :deploy, "app-web", "--target", "\"123:80\"", "--buffer-requests", "--buffer-responses", "--log-request-header", "\"Cache-Control\"", "--log-request-header", "\"Last-Modified\"", "--log-request-header", "\"User-Agent\"").raises(SSHKit::Command::Failed.new("Failed to deploy")) + .with(:docker, :container, :ls, "--all", "--filter", "name=^app-web-latest$", "--quiet", "|", :xargs, :docker, :stop, raise_on_non_zero_exit: false) + SSHKit::Backend::Abstract.any_instance.expects(:execute) + .with(:docker, :exec, "kamal-proxy", "kamal-proxy", :deploy, "app-web", "--target", "\"123:80\"", "--deploy-timeout", "\"1s\"", "--drain-timeout", "\"30s\"", "--buffer-requests", "--buffer-responses", "--log-request-header", "\"Cache-Control\"", "--log-request-header", "\"Last-Modified\"", "--log-request-header", "\"User-Agent\"").raises(SSHKit::Command::Failed.new("Failed to deploy")) stderred do run_command("boot", config: :with_roles, host: nil, allow_execute_error: true).tap do |output| @@ -190,7 +190,7 @@ class CliAppTest < CliTestCase run_command("start").tap do |output| assert_match "docker start app-web-999", output - assert_match "docker exec kamal-proxy kamal-proxy deploy app-web --target \"999:80\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\"", output + assert_match "docker exec kamal-proxy kamal-proxy deploy app-web --target \"999:80\" --deploy-timeout \"30s\" --drain-timeout \"30s\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\"", output end end diff --git a/test/cli/proxy_test.rb b/test/cli/proxy_test.rb index 9466ec41b..0296723bc 100644 --- a/test/cli/proxy_test.rb +++ b/test/cli/proxy_test.rb @@ -58,13 +58,13 @@ class CliProxyTest < CliTestCase assert_match "Running docker container stop traefik ; docker container prune --force --filter label=org.opencontainers.image.title=Traefik && docker image prune --all --force --filter label=org.opencontainers.image.title=Traefik on 1.1.1.1", output assert_match "docker container prune --force --filter label=org.opencontainers.image.title=kamal-proxy on 1.1.1.1", output assert_match "docker run --name kamal-proxy --network kamal --detach --restart unless-stopped --publish 80:80 --publish 443:443 --volume /var/run/docker.sock:/var/run/docker.sock --volume $(pwd)/.kamal/proxy/config:/home/kamal-proxy/.config/kamal-proxy --log-opt max-size=\"10m\" #{Kamal::Configuration::Proxy::DEFAULT_IMAGE} on 1.1.1.1", output - assert_match "docker exec kamal-proxy kamal-proxy deploy app-web --target \"abcdefabcdef:80\" --deploy-timeout \"6s\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\" --log-request-header \"User-Agent\" on 1.1.1.1", output + assert_match "docker exec kamal-proxy kamal-proxy deploy app-web --target \"abcdefabcdef:80\" --deploy-timeout \"6s\" --drain-timeout \"30s\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\" --log-request-header \"User-Agent\" on 1.1.1.1", output assert_match "docker container stop kamal-proxy on 1.1.1.2", output assert_match "Running docker container stop traefik ; docker container prune --force --filter label=org.opencontainers.image.title=Traefik && docker image prune --all --force --filter label=org.opencontainers.image.title=Traefik on 1.1.1.2", output assert_match "docker container prune --force --filter label=org.opencontainers.image.title=kamal-proxy on 1.1.1.2", output assert_match "docker run --name kamal-proxy --network kamal --detach --restart unless-stopped --publish 80:80 --publish 443:443 --volume /var/run/docker.sock:/var/run/docker.sock --volume $(pwd)/.kamal/proxy/config:/home/kamal-proxy/.config/kamal-proxy --log-opt max-size=\"10m\" #{Kamal::Configuration::Proxy::DEFAULT_IMAGE} on 1.1.1.2", output - assert_match "docker exec kamal-proxy kamal-proxy deploy app-web --target \"abcdefabcdef:80\" --deploy-timeout \"6s\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\" --log-request-header \"User-Agent\" on 1.1.1.2", output + assert_match "docker exec kamal-proxy kamal-proxy deploy app-web --target \"abcdefabcdef:80\" --deploy-timeout \"6s\" --drain-timeout \"30s\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\" --log-request-header \"User-Agent\" on 1.1.1.2", output end end @@ -211,7 +211,7 @@ class CliProxyTest < CliTestCase assert_match "/usr/bin/env mkdir -p .kamal/apps/app/env/roles", output assert_match %r{/usr/bin/env .* .kamal/apps/app/env/roles/web.env}, output assert_match %r{docker run --detach --restart unless-stopped --name app-web-latest --network kamal --hostname 1.1.1.1-.* -e KAMAL_CONTAINER_NAME="app-web-latest" -e KAMAL_VERSION="latest" --env-file .kamal/apps/app/env/roles/web.env --log-opt max-size="10m" --label service="app" --label role="web" --label destination dhh/app:latest}, output - assert_match "docker exec kamal-proxy kamal-proxy deploy app-web --target \"12345678:80\" --deploy-timeout \"6s\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\" --log-request-header \"User-Agent\"", output + assert_match "docker exec kamal-proxy kamal-proxy deploy app-web --target \"12345678:80\" --deploy-timeout \"6s\" --drain-timeout \"30s\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\" --log-request-header \"User-Agent\"", output assert_match "docker container ls --all --filter name=^app-web-12345678$ --quiet | xargs docker stop", output assert_match "docker tag dhh/app:latest dhh/app:latest", output assert_match "/usr/bin/env mkdir -p .kamal", output diff --git a/test/commands/app_test.rb b/test/commands/app_test.rb index 440626243..bb78d8fab 100644 --- a/test/commands/app_test.rb +++ b/test/commands/app_test.rb @@ -4,7 +4,7 @@ class CommandsAppTest < ActiveSupport::TestCase setup do setup_test_secrets("secrets" => "RAILS_MASTER_KEY=456") - @config = { service: "app", image: "dhh/app", registry: { "username" => "dhh", "password" => "secret" }, servers: [ "1.1.1.1" ], env: { "secret" => [ "RAILS_MASTER_KEY" ] }, builder: { "arch" => "amd64" } } + @config = { service: "app", image: "dhh/app", registry: { "username" => "dhh", "password" => "secret" }, servers: { "web" => [ "1.1.1.1" ], "workers" => [ "1.1.1.2" ] }, env: { "secret" => [ "RAILS_MASTER_KEY" ] }, builder: { "arch" => "amd64" } } end teardown do @@ -83,11 +83,15 @@ class CommandsAppTest < ActiveSupport::TestCase new_command.stop.join(" ") end - test "stop with custom stop wait time" do - @config[:stop_wait_time] = 30 + test "stop with custom drain timeout" do + @config[:drain_timeout] = 20 assert_equal \ - "sh -c 'docker ps --latest --quiet --filter label=service=app --filter label=role=web --filter status=running --filter status=restarting --filter ancestor=$(docker image ls --filter reference=dhh/app:latest --format '\\''{{.ID}}'\\'') ; docker ps --latest --quiet --filter label=service=app --filter label=role=web --filter status=running --filter status=restarting' | head -1 | xargs docker stop -t 30", + "sh -c 'docker ps --latest --quiet --filter label=service=app --filter label=role=web --filter status=running --filter status=restarting --filter ancestor=$(docker image ls --filter reference=dhh/app:latest --format '\\''{{.ID}}'\\'') ; docker ps --latest --quiet --filter label=service=app --filter label=role=web --filter status=running --filter status=restarting' | head -1 | xargs docker stop", new_command.stop.join(" ") + + assert_equal \ + "sh -c 'docker ps --latest --quiet --filter label=service=app --filter label=role=workers --filter status=running --filter status=restarting --filter ancestor=$(docker image ls --filter reference=dhh/app:latest --format '\\''{{.ID}}'\\'') ; docker ps --latest --quiet --filter label=service=app --filter label=role=workers --filter status=running --filter status=restarting' | head -1 | xargs docker stop -t 20", + new_command(role: "workers").stop.join(" ") end test "stop with version" do diff --git a/test/commands/proxy_test.rb b/test/commands/proxy_test.rb index 349e1a1be..6f331719d 100644 --- a/test/commands/proxy_test.rb +++ b/test/commands/proxy_test.rb @@ -109,7 +109,7 @@ class CommandsProxyTest < ActiveSupport::TestCase test "deploy" do assert_equal \ - "docker exec kamal-proxy kamal-proxy deploy service --target \"172.1.0.2:80\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\" --log-request-header \"User-Agent\"", + "docker exec kamal-proxy kamal-proxy deploy service --target \"172.1.0.2:80\" --deploy-timeout \"30s\" --drain-timeout \"30s\" --buffer-requests --buffer-responses --log-request-header \"Cache-Control\" --log-request-header \"Last-Modified\" --log-request-header \"User-Agent\"", new_command.deploy("service", target: "172.1.0.2").join(" ") end diff --git a/test/configuration/role_test.rb b/test/configuration/role_test.rb index 577190775..a15fed7dd 100644 --- a/test/configuration/role_test.rb +++ b/test/configuration/role_test.rb @@ -242,6 +242,14 @@ class ConfigurationRoleTest < ActiveSupport::TestCase ENV.delete("VERSION") end + test "stop args with proxy" do + assert_equal [], config_with_roles.role(:web).stop_args + end + + test "stop args with no proxy" do + assert_equal [ "-t", 30 ], config_with_roles.role(:workers).stop_args + end + private def config Kamal::Configuration.new(@deploy) diff --git a/test/configuration/validation_test.rb b/test/configuration/validation_test.rb index d8ac3e3bf..179aacfe8 100644 --- a/test/configuration/validation_test.rb +++ b/test/configuration/validation_test.rb @@ -14,7 +14,7 @@ class ConfigurationValidationTest < ActiveSupport::TestCase assert_error "#{key}: should be a boolean", **{ key => "foo" } end - [ :stop_wait_time, :retain_containers, :readiness_delay ].each do |key| + [ :deploy_timeout, :drain_timeout, :retain_containers, :readiness_delay ].each do |key| assert_error "#{key}: should be an integer", **{ key => "foo" } end diff --git a/test/fixtures/deploy_with_proxy.yml b/test/fixtures/deploy_with_proxy.yml index a1f7a8537..77174ac07 100644 --- a/test/fixtures/deploy_with_proxy.yml +++ b/test/fixtures/deploy_with_proxy.yml @@ -13,8 +13,6 @@ registry: builder: arch: amd64 -proxy: - deploy_timeout: 6s accessories: mysql: @@ -39,4 +37,4 @@ accessories: - data:/data readiness_delay: 0 -readiness_timeout: 1 +deploy_timeout: 6 diff --git a/test/fixtures/deploy_with_roles.yml b/test/fixtures/deploy_with_roles.yml index d831a5b47..6f1656bde 100644 --- a/test/fixtures/deploy_with_roles.yml +++ b/test/fixtures/deploy_with_roles.yml @@ -16,4 +16,4 @@ registry: password: pw builder: arch: amd64 -readiness_timeout: 1 +deploy_timeout: 1 diff --git a/test/integration/docker/deployer/app/config/deploy.yml b/test/integration/docker/deployer/app/config/deploy.yml index 5e9994026..b7835ca2e 100644 --- a/test/integration/docker/deployer/app/config/deploy.yml +++ b/test/integration/docker/deployer/app/config/deploy.yml @@ -20,8 +20,7 @@ env: secret: - SECRET_TAG asset_path: /usr/share/nginx/html/versions -proxy: - deploy_timeout: 2s +deploy_timeout: 2 registry: server: registry:4443 @@ -39,5 +38,5 @@ accessories: cmd: sh -c 'echo "Starting busybox..."; trap exit term; while true; do sleep 1; done' roles: - web -stop_wait_time: 1 +drain_time: 1 readiness_delay: 0 diff --git a/test/integration/docker/deployer/app_with_roles/config/deploy.yml b/test/integration/docker/deployer/app_with_roles/config/deploy.yml index 835b4ffda..e19f5a53b 100644 --- a/test/integration/docker/deployer/app_with_roles/config/deploy.yml +++ b/test/integration/docker/deployer/app_with_roles/config/deploy.yml @@ -9,8 +9,29 @@ servers: hosts: - vm3 cmd: sleep infinity +deploy_timeout: 2 +drain_timeout: 2 + proxy: - deploy_timeout: 2s + healthcheck: + interval: 1 + timeout: 1 + path: "/up" + response_timeout: 2 + buffering: + requests: true + responses: true + memory: 400_000 + max_request_body: 40_000_000 + max_response_body: 40_000_000 + forward_headers: true + logging: + request_headers: + - Cache-Control + - X-Forwarded-Proto + response_headers: + - X-Request-ID + - X-Request-Start asset_path: /usr/share/nginx/html/versions @@ -30,7 +51,7 @@ accessories: cmd: sh -c 'echo "Starting busybox..."; trap exit term; while true; do sleep 1; done' roles: - web -stop_wait_time: 1 +drain_time: 1 readiness_delay: 0 aliases: whome: version