Skip to content

Commit

Permalink
E2E tests for node drain
Browse files Browse the repository at this point in the history
* re-enable the previously disabled tests
* port off the old framework to use the stdlib runner
  • Loading branch information
tgross committed Apr 10, 2023
1 parent 520eb93 commit 69e7387
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 182 deletions.
2 changes: 1 addition & 1 deletion e2e/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
_ "github.com/hashicorp/nomad/e2e/lifecycle"
_ "github.com/hashicorp/nomad/e2e/metrics"
_ "github.com/hashicorp/nomad/e2e/networking"
_ "github.com/hashicorp/nomad/e2e/nodedrain"
_ "github.com/hashicorp/nomad/e2e/nomadexec"
_ "github.com/hashicorp/nomad/e2e/oversubscription"
_ "github.com/hashicorp/nomad/e2e/parameterized"
Expand All @@ -42,6 +41,7 @@ import (
// we get a quick check that they compile on every commit
_ "github.com/hashicorp/nomad/e2e/disconnectedclients"
_ "github.com/hashicorp/nomad/e2e/namespaces"
_ "github.com/hashicorp/nomad/e2e/nodedrain"
_ "github.com/hashicorp/nomad/e2e/volumes"
)

Expand Down
1 change: 0 additions & 1 deletion e2e/nodedrain/input/drain_deadline.nomad
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
job "drain_deadline" {
datacenters = ["dc1", "dc2"]

constraint {
attribute = "${attr.kernel.name}"
Expand Down
94 changes: 94 additions & 0 deletions e2e/nodedrain/node_drain_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ func TestNodeDrain(t *testing.T) {
t.Run("IgnoreSystem", testIgnoreSystem)
t.Run("EphemeralMigrate", testEphemeralMigrate)
t.Run("KeepIneligible", testKeepIneligible)
t.Run("DeadlineFlag", testDeadlineFlag)
t.Run("ForceFlag", testForceFlag)
}

// testIgnoreSystem tests that system jobs are left behind when the
Expand Down Expand Up @@ -173,6 +175,98 @@ func testKeepIneligible(t *testing.T) {
}
}

// testDeadlineFlag tests the enforcement of the node drain deadline so
// that allocations are terminated even if they haven't gracefully exited.
func testDeadlineFlag(t *testing.T) {

jobIDs := []string{}
nodeIDs := []string{}
t.Cleanup(cleanupDrainState(t))

jobID := "test-node-drain-" + uuid.Generate()[0:8]
must.NoError(t, e2eutil.Register(jobID, "./input/drain_deadline.nomad"))
jobIDs = append(jobIDs, jobID)

expected := []string{"running"}
must.NoError(t, e2eutil.WaitForAllocStatusExpected(jobID, ns, expected), must.Sprint("job should be running"))

nodes, err := nodesForJob(jobID)
must.NoError(t, err, must.Sprint("could not get nodes for job"))
must.Len(t, 1, nodes, must.Sprint("could not get nodes for job"))
nodeID := nodes[0]

t.Logf("draining node %v", nodeID)
out, err := e2eutil.Command(
"nomad", "node", "drain",
"-deadline", "5s",
"-enable", "-yes", "-detach", nodeID)
must.NoError(t, err, must.Sprintf("'nomad node drain %v' failed: %v\n%v", nodeID, err, out))
nodeIDs = append(nodeIDs, nodeID)

// the deadline is 40s but we can't guarantee its instantly terminated at
// that point, so we give it 30s which is well under the 2m kill_timeout in
// the job.
// deadline here needs to account for scheduling and propagation delays.
must.NoError(t, waitForNodeDrain(nodeID,
func(got []map[string]string) bool {
// FIXME: check the drain job alloc specifically. test
// may pass if client had another completed alloc
for _, alloc := range got {
if alloc["Status"] == "complete" {
return true
}
}
return false
}, &e2eutil.WaitConfig{Interval: time.Second, Retries: 40},
), must.Sprint("node did not drain immediately following deadline"))
}

// testForceFlag tests the enforcement of the node drain -force flag so that
// allocations are terminated immediately.
func testForceFlag(t *testing.T) {

jobIDs := []string{}
nodeIDs := []string{}
t.Cleanup(cleanupDrainState(t))

jobID := "test-node-drain-" + uuid.Generate()[0:8]
must.NoError(t, e2eutil.Register(jobID, "./input/drain_deadline.nomad"))
jobIDs = append(jobIDs, jobID)

expected := []string{"running"}
must.NoError(t, e2eutil.WaitForAllocStatusExpected(jobID, ns, expected), must.Sprint("job should be running"))

nodes, err := nodesForJob(jobID)
must.NoError(t, err, must.Sprint("could not get nodes for job"))
must.Len(t, 1, nodes, must.Sprint("could not get nodes for job"))
nodeID := nodes[0]

t.Logf("draining node %v", nodeID)
out, err := e2eutil.Command(
"nomad", "node", "drain",
"-force",
"-enable", "-yes", "-detach", nodeID)
must.NoError(t, err, must.Sprintf("'nomad node drain' failed: %v\n%v", err, out))
nodeIDs = append(nodeIDs, nodeID)

// we've passed -force but we can't guarantee its instantly terminated at
// that point, so we give it 30s which is under the 2m kill_timeout in
// the job
must.NoError(t, waitForNodeDrain(nodeID,
func(got []map[string]string) bool {
// FIXME: check the drain job alloc specifically. test
// may pass if client had another completed alloc
for _, alloc := range got {
if alloc["Status"] == "complete" {
return true
}
}
return false
}, &e2eutil.WaitConfig{Interval: time.Second, Retries: 40},
), must.Sprint("node did not drain immediately when forced"))

}

// registerAndWaitForRunning registers a job and waits for the expected number
// of allocations to be in a running state. Returns the allocations.
func registerAndWaitForRunning(t *testing.T, nomadClient *api.Client, jobID, jobSpec string, expectedCount int) []*api.AllocationListStub {
Expand Down
180 changes: 0 additions & 180 deletions e2e/nodedrain/nodedrain.go

This file was deleted.

0 comments on commit 69e7387

Please sign in to comment.