Skip to content

Commit

Permalink
e2e: add e2e test for alloc restart zombies
Browse files Browse the repository at this point in the history
  • Loading branch information
shoenig committed May 15, 2023
1 parent 4d86fbe commit 509a6bf
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 0 deletions.
61 changes: 61 additions & 0 deletions e2e/clientstate/allocs_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright (c) HashiCorp, Inc.
// SPDX-License-Identifier: MPL-2.0

package clientstate

import (
"testing"
"time"

"github.com/hashicorp/nomad/e2e/e2eutil"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/shoenig/test/must"
"github.com/shoenig/test/wait"
)

func TestClientAllocs(t *testing.T) {
nomad := e2eutil.NomadClient(t)

e2eutil.WaitForLeader(t, nomad)
e2eutil.WaitForNodesReady(t, nomad, 1)

t.Run("testAllocZombie", testAllocZombie)
}

// testAllocZombie ensures that a restart of a dead allocation does not cause
// it to come back to life in a not-quite alive state.
//
// https://github.com/hashicorp/nomad/issues/17079
func testAllocZombie(t *testing.T) {
nomad := e2eutil.NomadClient(t)

jobID := "alloc-zombie-" + uuid.Short()
jobIDs := []string{jobID}
t.Cleanup(e2eutil.CleanupJobsAndGC(t, &jobIDs))

// start the job and wait for alloc to become failed
err := e2eutil.Register(jobID, "./input/alloc_zombie.nomad")
must.NoError(t, err)

allocID := e2eutil.SingleAllocID(t, jobID, "", 0)

// wait for alloc to be marked as failed
e2eutil.WaitForAllocStatus(t, nomad, allocID, "failed")

// wait for additional failures to know we got rescheduled
must.Wait(t, wait.InitialSuccess(
wait.BoolFunc(func() bool {
statuses, err := e2eutil.AllocStatusesRescheduled(jobID, "")
must.NoError(t, err)
return len(statuses) > 2
}),
wait.Timeout(1*time.Minute),
wait.Gap(1*time.Second),
))

// now attempt to restart our initial allocation
// which should do nothing but give us an error
output, err := e2eutil.Command("nomad", "alloc", "restart", allocID)
must.ErrorContains(t, err, "restart of an alloc that should not run")
must.StrContains(t, output, "Failed to restart allocation")
}
59 changes: 59 additions & 0 deletions e2e/clientstate/input/alloc_zombie.nomad
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Copyright (c) HashiCorp, Inc.
# SPDX-License-Identifier: MPL-2.0

job "alloc_zombie" {

group "group" {
network {
mode = "host"
port "http" {}
}

service {
name = "alloczombie"
port = "http"
provider = "nomad"

check {
name = "alloczombiecheck"
type = "http"
port = "http"
path = "/does/not/exist.txt"
interval = "2s"
timeout = "1s"
check_restart {
limit = 1
grace = "3s"
}
}
}

reschedule {
attempts = 3
interval = "1m"
delay = "5s"
delay_function = "constant"
unlimited = false
}

restart {
attempts = 0
delay = "5s"
mode = "fail"
}

task "python" {
driver = "raw_exec"

config {
command = "python3"
args = ["-m", "http.server", "${NOMAD_PORT_http}", "--directory", "/tmp"]
}

resources {
cpu = 10
memory = 64
}
}
}
}

0 comments on commit 509a6bf

Please sign in to comment.