Skip to content

Commit

Permalink
e2e: added tests for check restart behavior
Browse files Browse the repository at this point in the history
  • Loading branch information
tgross committed Jan 21, 2021
1 parent 0d149ec commit b05386e
Show file tree
Hide file tree
Showing 4 changed files with 234 additions and 0 deletions.
106 changes: 106 additions & 0 deletions e2e/consul/check_restart.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package consul

import (
"fmt"
"os"
"reflect"
"regexp"
"strings"
"time"

e2e "github.com/hashicorp/nomad/e2e/e2eutil"
"github.com/hashicorp/nomad/e2e/framework"
"github.com/hashicorp/nomad/helper/uuid"
)

const ns = ""

type CheckRestartE2ETest struct {
framework.TC
jobIds []string
}

func (tc *CheckRestartE2ETest) BeforeAll(f *framework.F) {
e2e.WaitForLeader(f.T(), tc.Nomad())
e2e.WaitForNodesReady(f.T(), tc.Nomad(), 1)
}

func (tc *CheckRestartE2ETest) AfterEach(f *framework.F) {
if os.Getenv("NOMAD_TEST_SKIPCLEANUP") == "1" {
return
}

for _, id := range tc.jobIds {
_, err := e2e.Command("nomad", "job", "stop", "-purge", id)
f.Assert().NoError(err)
}
tc.jobIds = []string{}
_, err := e2e.Command("nomad", "system", "gc")
f.Assert().NoError(err)
}

// TestGroupCheckRestart runs a job with a group service that will never
// become healthy. Both tasks should be restarted up to the 'restart' limit.
func (tc *CheckRestartE2ETest) TestGroupCheckRestart(f *framework.F) {

jobID := "test-group-check-restart-" + uuid.Generate()[0:8]
f.NoError(e2e.Register(jobID, "consul/input/checks_group_restart.nomad"))
tc.jobIds = append(tc.jobIds, jobID)

var allocID string

f.NoError(
e2e.WaitForAllocStatusComparison(
func() ([]string, error) { return e2e.AllocStatuses(jobID, ns) },
func(got []string) bool { return reflect.DeepEqual(got, []string{"failed"}) },
&e2e.WaitConfig{Interval: time.Second * 10, Retries: 30},
))

expected := "Exceeded allowed attempts 2 in interval 5m0s and mode is \"fail\""

out, err := e2e.Command("nomad", "alloc", "status", allocID)
f.NoError(err, "could not get allocation status")
f.Contains(out, expected,
fmt.Errorf("expected '%s', got\n%v", expected, out))

re := regexp.MustCompile(`Total Restarts += (.*)\n`)
match := re.FindAllStringSubmatch(out, -1)
for _, m := range match {
f.Equal("2", strings.TrimSpace(m[1]),
fmt.Errorf("expected exactly 2 restarts for both tasks, got:\n%v", out))
}
}

// TestTaskCheckRestart runs a job with a task service that will never become
// healthy. Only the failed task should be restarted up to the 'restart'
// limit.
func (tc *CheckRestartE2ETest) TestTaskCheckRestart(f *framework.F) {

jobID := "test-task-check-restart-" + uuid.Generate()[0:8]
f.NoError(e2e.Register(jobID, "consul/input/checks_task_restart.nomad"))
tc.jobIds = append(tc.jobIds, jobID)

var allocID string

f.NoError(
e2e.WaitForAllocStatusComparison(
func() ([]string, error) { return e2e.AllocStatuses(jobID, ns) },
func(got []string) bool { return reflect.DeepEqual(got, []string{"failed"}) },
&e2e.WaitConfig{Interval: time.Second * 10, Retries: 30},
))

expected := "Exceeded allowed attempts 2 in interval 5m0s and mode is \"fail\""

out, err := e2e.Command("nomad", "alloc", "status", allocID)
f.NoError(err, "could not get allocation status")
f.Contains(out, expected,
fmt.Errorf("expected '%s', got\n%v", expected, out))

re := regexp.MustCompile(`Total Restarts += (.*)\n`)
match := re.FindAllStringSubmatch(out, -1)
f.Equal("2", strings.TrimSpace(match[0][1]),
fmt.Errorf("expected exactly 2 restarts for failed task, got:\n%v", out))

f.Equal("0", strings.TrimSpace(match[1][1]),
fmt.Errorf("expected exactly no restarts for healthy task, got:\n%v", out))
}
1 change: 1 addition & 0 deletions e2e/consul/consul.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ func init() {
Cases: []framework.TestCase{
new(ConsulE2ETest),
new(ScriptChecksE2ETest),
new(CheckRestartE2ETest),
},
})
}
Expand Down
64 changes: 64 additions & 0 deletions e2e/consul/input/checks_group_restart.nomad
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
job "group_check_restart" {
datacenters = ["dc1"]
type = "service"

constraint {
attribute = "${attr.kernel.name}"
value = "linux"
}

group "group_check_restart" {
network {
mode = "bridge"
}

restart {
attempts = 2
delay = "1s"
interval = "5m"
mode = "fail"
}

service {
name = "group-service-1"
port = "9003"

# this check should always time out and so the service
# should not be marked healthy, resulting in the tasks
# getting restarted
check {
name = "always-dead"
type = "script"
task = "fail"
interval = "2s"
timeout = "1s"
command = "sleep"
args = ["10"]

check_restart {
limit = 2
grace = "5s"
ignore_warnings = false
}
}
}

task "fail" {
driver = "raw_exec"

config {
command = "bash"
args = ["-c", "sleep 15000"]
}
}

task "ok" {
driver = "raw_exec"

config {
command = "bash"
args = ["-c", "sleep 15000"]
}
}
}
}
63 changes: 63 additions & 0 deletions e2e/consul/input/checks_task_restart.nomad
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
job "task_check" {
datacenters = ["dc1"]
type = "service"

constraint {
attribute = "${attr.kernel.name}"
value = "linux"
}

group "task_check" {
count = 1

restart {
attempts = 2
delay = "1s"
interval = "5m"
mode = "fail"
}

task "fail" {

service {
name = "task-service-1"

# this check should always time out and so the service
# should not be marked healthy
check {
name = "always-dead"
type = "script"
interval = "2s"
timeout = "1s"
command = "sleep"
args = ["10"]

check_restart {
limit = 2
grace = "5s"
ignore_warnings = false
}

}
}

driver = "raw_exec"

config {
command = "bash"
args = ["-c", "sleep 15000"]
}
}


task "ok" {
driver = "raw_exec"

config {
command = "bash"
args = ["-c", "sleep 15000"]
}
}

}
}

0 comments on commit b05386e

Please sign in to comment.