diff --git a/.changelog/13274.txt b/.changelog/13274.txt new file mode 100644 index 000000000000..dc5f84f6087d --- /dev/null +++ b/.changelog/13274.txt @@ -0,0 +1,3 @@ +```release-note:bug +csi: Fixed a scheduler bug where failed feasibility checks would return early and prevent processing additional nodes +``` diff --git a/scheduler/feasible.go b/scheduler/feasible.go index 2fcf1a325a6d..18021d3edb68 100644 --- a/scheduler/feasible.go +++ b/scheduler/feasible.go @@ -1135,9 +1135,8 @@ OUTER: if w.available(option) { return option } - // We match the class but are temporarily unavailable, the eval - // should be blocked - return nil + // We match the class but are temporarily unavailable + continue OUTER case EvalComputedClassEscaped: tgEscaped = true case EvalComputedClassUnknown: diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index f76e03162b71..b6c76f422db0 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -6397,6 +6397,93 @@ func TestServiceSched_CSIVolumesPerAlloc(t *testing.T) { } +func TestServiceSched_CSITopology(t *testing.T) { + ci.Parallel(t) + h := NewHarness(t) + + zones := []string{"zone-0", "zone-1", "zone-2", "zone-3"} + + // Create some nodes, each running a CSI plugin with topology for + // a different "zone" + for i := 0; i < 12; i++ { + node := mock.Node() + node.Datacenter = zones[i%4] + node.CSINodePlugins = map[string]*structs.CSIInfo{ + "test-plugin-" + zones[i%4]: { + PluginID: "test-plugin-" + zones[i%4], + Healthy: true, + NodeInfo: &structs.CSINodeInfo{ + MaxVolumes: 3, + AccessibleTopology: &structs.CSITopology{ + Segments: map[string]string{"zone": zones[i%4]}}, + }, + }, + } + require.NoError(t, h.State.UpsertNode( + structs.MsgTypeTestSetup, h.NextIndex(), node)) + } + + // create 2 per-alloc volumes for those zones + vol0 := structs.NewCSIVolume("myvolume[0]", 0) + vol0.PluginID = "test-plugin-zone-0" + vol0.Namespace = structs.DefaultNamespace + vol0.AccessMode = structs.CSIVolumeAccessModeSingleNodeWriter + vol0.AttachmentMode = structs.CSIVolumeAttachmentModeFilesystem + vol0.RequestedTopologies = &structs.CSITopologyRequest{ + Required: []*structs.CSITopology{{ + Segments: map[string]string{"zone": "zone-0"}, + }}, + } + + vol1 := vol0.Copy() + vol1.ID = "myvolume[1]" + vol1.PluginID = "test-plugin-zone-1" + vol1.RequestedTopologies.Required[0].Segments["zone"] = "zone-1" + + require.NoError(t, h.State.UpsertCSIVolume( + h.NextIndex(), []*structs.CSIVolume{vol0, vol1})) + + // Create a job that uses those volumes + job := mock.Job() + job.Datacenters = zones + job.TaskGroups[0].Count = 2 + job.TaskGroups[0].Volumes = map[string]*structs.VolumeRequest{ + "myvolume": { + Type: "csi", + Name: "unique", + Source: "myvolume", + PerAlloc: true, + }, + } + + require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), job)) + + // Create a mock evaluation to register the job + eval := &structs.Evaluation{ + Namespace: structs.DefaultNamespace, + ID: uuid.Generate(), + Priority: job.Priority, + TriggeredBy: structs.EvalTriggerJobRegister, + JobID: job.ID, + Status: structs.EvalStatusPending, + } + + require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, + h.NextIndex(), []*structs.Evaluation{eval})) + + // Process the evaluation and expect a single plan without annotations + err := h.Process(NewServiceScheduler, eval) + require.NoError(t, err) + require.Len(t, h.Plans, 1, "expected one plan") + require.Nil(t, h.Plans[0].Annotations, "expected no annotations") + + // Expect the eval has not spawned a blocked eval + require.Equal(t, len(h.CreateEvals), 0) + require.Equal(t, "", h.Evals[0].BlockedEval, "did not expect a blocked eval") + require.Equal(t, structs.EvalStatusComplete, h.Evals[0].Status) + +} + // TestPropagateTaskState asserts that propagateTaskState only copies state // when the previous allocation is lost or draining. func TestPropagateTaskState(t *testing.T) {