From 33929fa1f438af7a0e51d5a50c19947f2bee9b23 Mon Sep 17 00:00:00 2001 From: Daniel Bennett Date: Tue, 24 Oct 2023 11:22:24 -0500 Subject: [PATCH] scheduler: appropriately unblock evals with quotas (#18838) When an eval is blocked due to e.g. cpu exhausted on nodes, but there happens to also be a quota on the job's namespace, the eval would not get auto- unblocked when the node cpu got freed up. This change ensures, when considering quota during BlockedEvals.unblock(), that the block was due to quota in the first place, so unblocking does not get skipped due to the mere existence of a quota on the namespace. --- .changelog/18838.txt | 3 +++ nomad/blocked_evals.go | 7 +++++-- nomad/blocked_evals_test.go | 28 ++++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 2 deletions(-) create mode 100644 .changelog/18838.txt diff --git a/.changelog/18838.txt b/.changelog/18838.txt new file mode 100644 index 000000000000..5d8f8bb828a0 --- /dev/null +++ b/.changelog/18838.txt @@ -0,0 +1,3 @@ +```release-note:bug +scheduler (Enterprise): auto-unblock evals with associated quotas when node resources are freed up +``` diff --git a/nomad/blocked_evals.go b/nomad/blocked_evals.go index 1ec1beee4a38..f227a3c7cfce 100644 --- a/nomad/blocked_evals.go +++ b/nomad/blocked_evals.go @@ -567,10 +567,13 @@ func (b *BlockedEvals) unblock(computedClass, quota string, index uint64) { // never saw a node with the given computed class and thus needs to be // unblocked for correctness. for id, wrapped := range b.captured { - if quota != "" && wrapped.eval.QuotaLimitReached != quota { + if quota != "" && + wrapped.eval.QuotaLimitReached != "" && + wrapped.eval.QuotaLimitReached != quota { // We are unblocking based on quota and this eval doesn't match continue - } else if elig, ok := wrapped.eval.ClassEligibility[computedClass]; ok && !elig { + } + if elig, ok := wrapped.eval.ClassEligibility[computedClass]; ok && !elig { // Can skip because the eval has explicitly marked the node class // as ineligible. continue diff --git a/nomad/blocked_evals_test.go b/nomad/blocked_evals_test.go index e0e090bd412f..04d872beb423 100644 --- a/nomad/blocked_evals_test.go +++ b/nomad/blocked_evals_test.go @@ -13,6 +13,7 @@ import ( "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" + "github.com/shoenig/test/must" "github.com/stretchr/testify/require" ) @@ -322,6 +323,33 @@ func TestBlockedEvals_UnblockEligible_Quota(t *testing.T) { requireBlockedEvalsEnqueued(t, blocked, broker, 1) } +// The quota here is incidental. The eval is blocked due to something else, +// e.g. cpu exhausted, but there happens to also be a quota on the namespace. +func TestBlockedEvals_UnblockEligible_IncidentalQuota(t *testing.T) { + ci.Parallel(t) + + blocked, broker := testBlockedEvals(t) + + e := mock.BlockedEval() + e.Status = structs.EvalStatusBlocked + e.QuotaLimitReached = "" // explicitly not blocked due to quota limit + blocked.Block(e) + + // Verify block caused the eval to be tracked. + blockedStats := blocked.Stats() + must.Eq(t, 1, blockedStats.TotalBlocked) + must.MapLen(t, 1, blockedStats.BlockedResources.ByJob) + // but not due to quota. + must.Eq(t, 0, blockedStats.TotalQuotaLimit) + + // When unblocking, the quota name from the alloc is passed in, + // regardless of the cause of the initial blockage. + // Since the initial block in this test was due to something else, + // it should be unblocked without regard to quota. + blocked.UnblockQuota("foo", 1000) + requireBlockedEvalsEnqueued(t, blocked, broker, 1) +} + func TestBlockedEvals_UnblockIneligible_Quota(t *testing.T) { ci.Parallel(t) require := require.New(t)