Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scheduler: enhance reservation error message #2058

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/scheduler/plugins/deviceshare/plugin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2058,7 +2058,7 @@ func Test_Plugin_FilterReservation(t *testing.T) {
assert.True(t, status.IsSuccess())

status = pl.FilterReservation(context.TODO(), cycleState, pod, reservationInfo, "test-node-1")
assert.Equal(t, framework.NewStatus(framework.Unschedulable, "node(s) no reservation(s) to meet the device requirements"), status)
assert.Equal(t, framework.NewStatus(framework.Unschedulable, "Reservation(s) Insufficient gpu devices"), status)
}

func Test_Plugin_Reserve(t *testing.T) {
Expand Down
15 changes: 14 additions & 1 deletion pkg/scheduler/plugins/deviceshare/reservation.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ func (p *Plugin) tryAllocateFromReservation(

basicPreemptible = appendAllocated(nil, basicPreemptible, restoreState.mergedMatchedAllocated)

var reservationReasons []*framework.Status
for _, alloc := range matchedReservations {
rInfo := alloc.rInfo
preemptibleInRR := state.preemptibleInRRs[node.Name][rInfo.UID()]
Expand All @@ -222,6 +223,7 @@ func (p *Plugin) tryAllocateFromReservation(
hasSatisfiedReservation = true
break
}
reservationReasons = append(reservationReasons, status)
} else if allocatePolicy == schedulingv1alpha1.ReservationAllocatePolicyRestricted {
_, status := allocator.Allocate(preferred, preferred, nil, preemptible)
if status.IsSuccess() {
Expand All @@ -236,15 +238,26 @@ func (p *Plugin) tryAllocateFromReservation(
hasSatisfiedReservation = true
break
}
reservationReasons = append(reservationReasons, status)
}
}
}
if !hasSatisfiedReservation && requiredFromReservation {
return nil, framework.NewStatus(framework.Unschedulable, "node(s) no reservation(s) to meet the device requirements")
return nil, framework.NewStatus(framework.Unschedulable, p.makeReasonsByReservation(reservationReasons)...)
}
return result, nil
}

func (p *Plugin) makeReasonsByReservation(reservationReasons []*framework.Status) []string {
var reasons []string
for _, status := range reservationReasons {
for _, r := range status.Reasons() {
reasons = append(reasons, fmt.Sprintf("Reservation(s) %s", r))
}
}
return reasons
}

// scoreWithReservation combine the reservation with the node's resource usage to calculate the reservation score.
func (p *Plugin) scoreWithReservation(
allocator *AutopilotAllocator,
Expand Down
6 changes: 3 additions & 3 deletions pkg/scheduler/plugins/deviceshare/reservation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ func Test_tryAllocateFromReservation(t *testing.T) {
},
requiredFromReservation: true,
wantResult: nil,
wantStatus: framework.NewStatus(framework.Unschedulable, "node(s) no reservation(s) to meet the device requirements"),
wantStatus: framework.NewStatus(framework.Unschedulable, "Reservation(s) Insufficient gpu devices"),
},
{
name: "failed to allocate from Aligned policy reservation that remaining little not fits request",
Expand Down Expand Up @@ -566,7 +566,7 @@ func Test_tryAllocateFromReservation(t *testing.T) {
},
requiredFromReservation: true,
wantResult: nil,
wantStatus: framework.NewStatus(framework.Unschedulable, "node(s) no reservation(s) to meet the device requirements"),
wantStatus: framework.NewStatus(framework.Unschedulable, "Reservation(s) Insufficient gpu devices"),
},
{
name: "allocate from Restricted policy reservation",
Expand Down Expand Up @@ -650,7 +650,7 @@ func Test_tryAllocateFromReservation(t *testing.T) {
},
requiredFromReservation: true,
wantResult: nil,
wantStatus: framework.NewStatus(framework.Unschedulable, "node(s) no reservation(s) to meet the device requirements"),
wantStatus: framework.NewStatus(framework.Unschedulable, "Reservation(s) Insufficient gpu devices"),
},
}

Expand Down
75 changes: 61 additions & 14 deletions pkg/scheduler/plugins/reservation/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ package reservation
import (
"context"
"fmt"
"strconv"
"strings"

corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
Expand Down Expand Up @@ -152,29 +154,39 @@ type stateData struct {
preemptible map[string]corev1.ResourceList
preemptibleInRRs map[string]map[types.UID]corev1.ResourceList

nodeReservationStates map[string]nodeReservationState
preferredNode string
assumed *frameworkext.ReservationInfo
nodeReservationStates map[string]nodeReservationState
nodeReservationDiagnosis map[string]nodeDiagnosisState
preferredNode string
assumed *frameworkext.ReservationInfo
}

type nodeReservationState struct {
nodeName string
matched []*frameworkext.ReservationInfo

// podRequested represents all Pods(including matched reservation) requested resources
// but excluding the already allocated from unmatched reservations
podRequested *framework.Resource
// rAllocated represents the allocated resources of matched reservations
rAllocated *framework.Resource
}

type nodeDiagnosisState struct {
nodeName string
ownerMatched int // owner matched
isUnschedulableUnmatched int // owner matched but unmatched due to unschedulable
affinityUnmatched int // owner matched but unmatched due to affinity
}

func (s *stateData) Clone() framework.StateData {
ns := &stateData{
hasAffinity: s.hasAffinity,
podRequests: s.podRequests,
podRequestsResources: s.podRequestsResources,
nodeReservationStates: s.nodeReservationStates,
preferredNode: s.preferredNode,
assumed: s.assumed,
hasAffinity: s.hasAffinity,
podRequests: s.podRequests,
podRequestsResources: s.podRequestsResources,
nodeReservationStates: s.nodeReservationStates,
nodeReservationDiagnosis: s.nodeReservationDiagnosis,
preferredNode: s.preferredNode,
assumed: s.assumed,
}
preemptible := map[string]corev1.ResourceList{}
for nodeName, returned := range s.preemptible {
Expand Down Expand Up @@ -382,7 +394,7 @@ func (pl *Plugin) filterWithReservations(ctx context.Context, cycleState *framew

var hasSatisfiedReservation bool
allInsufficientResourcesByNode := sets.NewString()
allInsufficientResourcesByReservation := sets.NewString()
var allInsufficientResourcesByReservation []string
for _, rInfo := range matchedReservations {
resourceNames := quotav1.Intersection(rInfo.ResourceNames, podRequestsResourceNames)
if len(resourceNames) == 0 {
Expand Down Expand Up @@ -417,7 +429,7 @@ func (pl *Plugin) filterWithReservations(ctx context.Context, cycleState *framew
}
allInsufficientResourcesByNode.Insert(insufficientResourcesByNode...)
for _, insufficientResourceByReservation := range insufficientResourcesByReservation {
allInsufficientResourcesByReservation.Insert(string(insufficientResourceByReservation))
allInsufficientResourcesByReservation = append(allInsufficientResourcesByReservation, string(insufficientResourceByReservation))
}
}
}
Expand All @@ -428,8 +440,8 @@ func (pl *Plugin) filterWithReservations(ctx context.Context, cycleState *framew
for insufficientResourceByNode := range allInsufficientResourcesByNode {
failureReasons = append(failureReasons, fmt.Sprintf("Insufficient %s by node", insufficientResourceByNode))
}
for insufficientResourceByReservation := range allInsufficientResourcesByReservation {
failureReasons = append(failureReasons, fmt.Sprintf("Insufficient %s by reservation", insufficientResourceByReservation))
for _, insufficientResourceByReservation := range allInsufficientResourcesByReservation {
failureReasons = append(failureReasons, fmt.Sprintf("Reservation(s) Insufficient %s", insufficientResourceByReservation))
}
if len(failureReasons) == 0 {
failureReasons = append(failureReasons, ErrReasonNoReservationsMeetRequirements)
Expand Down Expand Up @@ -497,7 +509,42 @@ func fitsNode(podRequest *framework.Resource, nodeInfo *framework.NodeInfo, node

func (pl *Plugin) PostFilter(ctx context.Context, cycleState *framework.CycleState, pod *corev1.Pod, _ framework.NodeToStatusMap) (*framework.PostFilterResult, *framework.Status) {
// Implement an empty function to be compatible with existing configurations
return nil, framework.NewStatus(framework.Unschedulable)
state := getStateData(cycleState)
saintube marked this conversation as resolved.
Show resolved Hide resolved
reasons := pl.makePostFilterReasons(state)
return nil, framework.NewStatus(framework.Unschedulable, reasons...)
}

func (pl *Plugin) makePostFilterReasons(state *stateData) []string {
ownerMatched, affinityUnmatched, isUnSchedulableUnmatched := 0, 0, 0
for _, nodeState := range state.nodeReservationDiagnosis {
isUnSchedulableUnmatched += nodeState.isUnschedulableUnmatched
affinityUnmatched += nodeState.affinityUnmatched
ownerMatched += nodeState.ownerMatched
}
if ownerMatched <= 0 && !state.hasAffinity {
return nil
}

// Make the error messages: The framework does not aggregate the same reasons for the PostFilter, so we need
// to prepare the exact messages by ourselves.
var reasons []string
var b strings.Builder
if affinityUnmatched > 0 {
b.WriteString(strconv.Itoa(affinityUnmatched))
b.WriteString(" Reservation(s) didn't match affinity rules")
reasons = append(reasons, b.String())
b.Reset()
}
if isUnSchedulableUnmatched > 0 {
b.WriteString(strconv.Itoa(isUnSchedulableUnmatched))
b.WriteString(" Reservation(s) is unschedulable")
reasons = append(reasons, b.String())
b.Reset()
}
b.WriteString(strconv.Itoa(ownerMatched))
b.WriteString(" Reservation(s) matched owner total")
reasons = append(reasons, b.String())
return reasons
}

func (pl *Plugin) FilterReservation(ctx context.Context, cycleState *framework.CycleState, pod *corev1.Pod, reservationInfo *frameworkext.ReservationInfo, nodeName string) *framework.Status {
Expand Down
161 changes: 158 additions & 3 deletions pkg/scheduler/plugins/reservation/plugin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -870,7 +870,7 @@ func Test_filterWithReservations(t *testing.T) {
},
},
},
wantStatus: framework.NewStatus(framework.Unschedulable, "Insufficient cpu by reservation"),
wantStatus: framework.NewStatus(framework.Unschedulable, "Reservation(s) Insufficient cpu"),
},
{
name: "filter default reservations with preemption",
Expand Down Expand Up @@ -1160,7 +1160,74 @@ func Test_filterWithReservations(t *testing.T) {
},
},
},
wantStatus: framework.NewStatus(framework.Unschedulable, "Insufficient cpu by reservation"),
wantStatus: framework.NewStatus(framework.Unschedulable, "Reservation(s) Insufficient cpu"),
},
{
name: "failed to filter multiple restricted reservations with preempt from node",
stateData: &stateData{
hasAffinity: true,
podRequests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("4"),
},
podRequestsResources: &framework.Resource{
MilliCPU: 4 * 1000,
},
preemptible: map[string]corev1.ResourceList{
node.Name: {
corev1.ResourceCPU: resource.MustParse("4"),
},
},
preemptibleInRRs: nil,
nodeReservationStates: map[string]nodeReservationState{
node.Name: {
podRequested: &framework.Resource{
MilliCPU: 38 * 1000,
},
rAllocated: &framework.Resource{
MilliCPU: 10000,
},
matched: []*frameworkext.ReservationInfo{
{
Reservation: &schedulingv1alpha1.Reservation{
ObjectMeta: metav1.ObjectMeta{
Name: "test-r",
UID: "123456",
},
Spec: schedulingv1alpha1.ReservationSpec{
AllocatePolicy: schedulingv1alpha1.ReservationAllocatePolicyRestricted,
},
},
ResourceNames: []corev1.ResourceName{corev1.ResourceCPU},
Allocatable: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("6"),
},
Allocated: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("6"),
},
},
{
Reservation: &schedulingv1alpha1.Reservation{
ObjectMeta: metav1.ObjectMeta{
Name: "test-r-1",
UID: "7891011",
},
Spec: schedulingv1alpha1.ReservationSpec{
AllocatePolicy: schedulingv1alpha1.ReservationAllocatePolicyRestricted,
},
},
ResourceNames: []corev1.ResourceName{corev1.ResourceCPU},
Allocatable: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("4"),
},
Allocated: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("1"),
},
},
},
},
},
},
wantStatus: framework.NewStatus(framework.Unschedulable, "Reservation(s) Insufficient cpu", "Reservation(s) Insufficient cpu"),
},
{
name: "failed to filter restricted reservations with preempt from reservation and node",
Expand Down Expand Up @@ -1217,7 +1284,7 @@ func Test_filterWithReservations(t *testing.T) {
},
},
},
wantStatus: framework.NewStatus(framework.Unschedulable, "Insufficient cpu by reservation"),
wantStatus: framework.NewStatus(framework.Unschedulable, "Reservation(s) Insufficient cpu"),
},
{
name: "filter restricted reservations with preempt from reservation and node",
Expand Down Expand Up @@ -1766,6 +1833,94 @@ func TestFilterReservation(t *testing.T) {
}
}

func TestPostFilter(t *testing.T) {
type args struct {
hasStateData bool
hasAffinity bool
nodeReservationDiagnosis map[string]nodeDiagnosisState
}
tests := []struct {
name string
args args
want *framework.PostFilterResult
want1 *framework.Status
}{
{
name: "no reservation filtering",
args: args{
hasStateData: false,
},
want: nil,
want1: framework.NewStatus(framework.Unschedulable),
},
{
name: "show reservation owner matched when reservation affinity specified",
args: args{
hasStateData: true,
hasAffinity: true,
nodeReservationDiagnosis: map[string]nodeDiagnosisState{},
},
want: nil,
want1: framework.NewStatus(framework.Unschedulable, "0 Reservation(s) matched owner total"),
},
{
name: "show reservation owner matched, unschedulable unmatched",
args: args{
hasStateData: true,
nodeReservationDiagnosis: map[string]nodeDiagnosisState{
"test-node-0": {
ownerMatched: 3,
isUnschedulableUnmatched: 3,
affinityUnmatched: 0,
},
"test-node-1": {
ownerMatched: 1,
isUnschedulableUnmatched: 1,
affinityUnmatched: 0,
},
},
},
want: nil,
want1: framework.NewStatus(framework.Unschedulable, "4 Reservation(s) is unschedulable", "4 Reservation(s) matched owner total"),
},
{
name: "show reservation owner matched, unschedulable and affinity unmatched",
args: args{
hasStateData: true,
nodeReservationDiagnosis: map[string]nodeDiagnosisState{
"test-node-0": {
ownerMatched: 3,
isUnschedulableUnmatched: 0,
affinityUnmatched: 3,
},
"test-node-1": {
ownerMatched: 2,
isUnschedulableUnmatched: 1,
affinityUnmatched: 1,
},
},
},
want: nil,
want1: framework.NewStatus(framework.Unschedulable, "4 Reservation(s) didn't match affinity rules", "1 Reservation(s) is unschedulable", "5 Reservation(s) matched owner total"),
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
pl := &Plugin{}
cycleState := framework.NewCycleState()
if tt.args.hasStateData {
cycleState.Write(stateKey, &stateData{
hasAffinity: tt.args.hasAffinity,
nodeReservationDiagnosis: tt.args.nodeReservationDiagnosis,
})
}
got, got1 := pl.PostFilter(context.TODO(), cycleState, nil, nil)
assert.Equal(t, tt.want, got)
assert.Equal(t, tt.want1, got1)
})
}
}

func TestReserve(t *testing.T) {
reservation2C4G := &schedulingv1alpha1.Reservation{
ObjectMeta: metav1.ObjectMeta{
Expand Down
Loading