Skip to content

Commit

Permalink
add reproducer to hashicorp/nomad#14850
Browse files Browse the repository at this point in the history
  • Loading branch information
Pavel Valodzka committed Oct 17, 2022
1 parent 9cf8b0d commit 2e4a7ae
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 5 deletions.
4 changes: 2 additions & 2 deletions Vagrantfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# vi: set ft=ruby :

Vagrant.configure(2) do |config|
config.vm.box = "bento/ubuntu-16.04" # 16.04 LTS
config.vm.box = "bento/ubuntu-20.04"
config.vm.provider "virtualbox" do |vb|
vb.memory = "1516"
end
Expand All @@ -19,4 +19,4 @@ Vagrant.configure(2) do |config|
n.vm.network "private_network", ip: "172.16.1.#{i+100}"
end
end
end
end
43 changes: 43 additions & 0 deletions detect.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/ruby

require 'open-uri'
require 'net/http'
require 'json'

ORIGIN = 'http://localhost:4646/'

def request(cmd)
JSON.parse(URI.open("#{ORIGIN}v1/#{cmd}").read)
end

def detect!(allocs)
out = []
index_collisions_cont = Hash.new{|hsh, key|
hsh[key] = Hash.new{|h,k| h[k] = [] }
}

max_version = allocs.map{|j| j['JobVersion'] }.max
allocs.each do |j|
client_status = j['ClientStatus']
next if client_status == 'complete'
id, version, name = j['ID'], j['JobVersion'], j['Name']
if client_status == 'running'
index_collisions_cont[version][name] << id
end
end

has_collisions = false
index_collisions_cont.each do |ver, index_collisions|
index_collisions.each do |name, ids|
next if ids.size == 1
puts("Collision detected:" + " #{ver.to_s} #{name}: #{ids.join(', ')}")
has_collisions = true
end
end

exit 1 if has_collisions
end

id = 'fail'
detect!(request("job/#{id}/allocations"))

6 changes: 3 additions & 3 deletions node-install-a.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ sudo docker --version

echo "Nomad Install Beginning..."
# For now we use a static version. Set to the latest tested version you want here.
NOMAD_VERSION=0.9.5
NOMAD_VERSION=1.3.6
cd /tmp/
sudo curl -sSL https://releases.hashicorp.com/nomad/${NOMAD_VERSION}/nomad_${NOMAD_VERSION}_linux_amd64.zip -o nomad.zip
if [ ! -d nomad ]; then
Expand All @@ -46,8 +46,8 @@ sudo cp /vagrant/nomad-config/nomad-server-east.hcl /etc/nomad.d/
echo "Consul Install Beginning..."
# Uncommend the first and comment the second line to get the latest edition
# Otherwise use the static number
CONSUL_VERSION=$(curl -s https://checkpoint-api.hashicorp.com/v1/check/consul | jq -r ".current_version")
#CONSUL_VERSION=1.4.0
#CONSUL_VERSION=$(curl -s https://checkpoint-api.hashicorp.com/v1/check/consul | jq -r ".current_version")
CONSUL_VERSION=1.12.5
sudo curl -sSL https://releases.hashicorp.com/consul/${CONSUL_VERSION}/consul_${CONSUL_VERSION}_linux_amd64.zip > consul.zip
if [ ! -d consul ]; then
sudo unzip /tmp/consul.zip
Expand Down
11 changes: 11 additions & 0 deletions reproduce_14850.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash -x

# I don't know if it matters, but cluster with the issue runs with a spread scheduler.
curl -XPUT localhost:4646/v1/operator/scheduler/configuration --data '{"SchedulerAlgorithm":"spread"}'

for i in {1..500}
do
nomad job run -var redeploy=$i test.nomad
ruby detect.rb || exit 1
sleep 5
done
84 changes: 84 additions & 0 deletions test.nomad
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
variable "redeploy" {
type = string
}

job "fail" {
datacenters = ["toronto"]


# constraint {
# attribute = "${attr.unique.consul.name}"
# operator = "regexp"
# value = "^(nomad-a-3)$"
# }

update {
healthy_deadline = "30s"
progress_deadline = "40s"
min_healthy_time = "0s"
}

meta {
redeploy = "${var.redeploy}"
}

group "fail-failed" {
count = "50"

update {
max_parallel = 50
}

network {
port "http" {
to = 8080
}
port "test1" { }
port "test2" { }
port "test3" { }
port "test4" { }
port "test5" { }
port "test6" { }
port "test7" { }
port "test8" { }
port "test9" { }
port "test0" { }
}
service {
port = "http"
check {
port = "http"
type = "http"
path = "/health"
method = "GET"
interval = "10s"
timeout = "2s"
check_restart {
limit = 2
}
}
}

task "fail" {
driver = "docker"
config {
# here https://medium.com/@obenaus.thomas/a-good-default-nomad-job-template-ea448b8a8cdd
image = "thobe/fail_service:latest"
ports = ["http"]
}
resources {
#cores = 1
memory = 64
cpu = 64
}

env {
# unhealhy config
#HEALTHY_FOR = 60
#UNHEALTHY_FOR = -1
# healthy config
HEALTHY_FOR = -1
}
}
}
}

0 comments on commit 2e4a7ae

Please sign in to comment.