Skip to content

Commit

Permalink
scheduler: make device topology alignment switchable (#2044)
Browse files Browse the repository at this point in the history
Signed-off-by: wangjianyu.wjy <wangjianyu.wjy@alibaba-inc.com>
Co-authored-by: wangjianyu.wjy <wangjianyu.wjy@alibaba-inc.com>
  • Loading branch information
ZiMengSheng and wangjianyu.wjy authored May 25, 2024
1 parent b8892e0 commit f1afba1
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 12 deletions.
2 changes: 2 additions & 0 deletions pkg/scheduler/apis/config/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,4 +219,6 @@ type DeviceShareArgs struct {
Allocator string
// ScoringStrategy selects the device resource scoring strategy.
ScoringStrategy *ScoringStrategy
// DisableDeviceNUMATopologyAlignment indicates device don't need to align with other resources' numa topology
DisableDeviceNUMATopologyAlignment bool
}
2 changes: 2 additions & 0 deletions pkg/scheduler/apis/config/v1beta3/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,4 +214,6 @@ type DeviceShareArgs struct {
Allocator string `json:"allocator,omitempty"`
// ScoringStrategy selects the device resource scoring strategy.
ScoringStrategy *ScoringStrategy `json:"scoringStrategy,omitempty"`
// DisableDeviceNUMATopologyAlignment indicates device don't need to align with other resources' numa topology
DisableDeviceNUMATopologyAlignment bool `json:"disableDeviceNUMATopologyAlignment,omitempty"`
}
2 changes: 2 additions & 0 deletions pkg/scheduler/apis/config/v1beta3/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 26 additions & 12 deletions pkg/scheduler/plugins/deviceshare/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,10 @@ var (
)

type Plugin struct {
handle frameworkext.ExtendedHandle
nodeDeviceCache *nodeDeviceCache
scorer *resourceAllocationScorer
disableDeviceNUMATopologyAlignment bool
handle frameworkext.ExtendedHandle
nodeDeviceCache *nodeDeviceCache
scorer *resourceAllocationScorer
}

type preFilterState struct {
Expand Down Expand Up @@ -292,8 +293,11 @@ func (p *Plugin) Filter(ctx context.Context, cycleState *framework.CycleState, p
restoreState := reservationRestoreState.getNodeState(node.Name)
preemptible := appendAllocated(nil, restoreState.mergedUnmatchedUsed, state.preemptibleDevices[node.Name])

store := topologymanager.GetStore(cycleState)
affinity := store.GetAffinity(node.Name)
var affinity topologymanager.NUMATopologyHint
if !p.disableDeviceNUMATopologyAlignment {
store := topologymanager.GetStore(cycleState)
affinity = store.GetAffinity(nodeInfo.Node().Name)
}

allocator := &AutopilotAllocator{
state: state,
Expand All @@ -302,6 +306,9 @@ func (p *Plugin) Filter(ctx context.Context, cycleState *framework.CycleState, p
pod: pod,
numaNodes: affinity.NUMANodeAffinity,
}
if !p.disableDeviceNUMATopologyAlignment {
allocator.numaNodes = nil
}

nodeDeviceInfo.lock.RLock()
defer nodeDeviceInfo.lock.RUnlock()
Expand Down Expand Up @@ -354,8 +361,11 @@ func (p *Plugin) FilterReservation(ctx context.Context, cycleState *framework.Cy
return nil
}

store := topologymanager.GetStore(cycleState)
affinity := store.GetAffinity(nodeInfo.Node().Name)
var affinity topologymanager.NUMATopologyHint
if !p.disableDeviceNUMATopologyAlignment {
store := topologymanager.GetStore(cycleState)
affinity = store.GetAffinity(nodeInfo.Node().Name)
}

allocator := &AutopilotAllocator{
state: state,
Expand Down Expand Up @@ -393,8 +403,11 @@ func (p *Plugin) Reserve(ctx context.Context, cycleState *framework.CycleState,
return nil
}

store := topologymanager.GetStore(cycleState)
affinity := store.GetAffinity(nodeInfo.Node().Name)
var affinity topologymanager.NUMATopologyHint
if !p.disableDeviceNUMATopologyAlignment {
store := topologymanager.GetStore(cycleState)
affinity = store.GetAffinity(nodeInfo.Node().Name)
}

allocator := &AutopilotAllocator{
state: state,
Expand Down Expand Up @@ -559,8 +572,9 @@ func New(obj runtime.Object, handle framework.Handle) (framework.Plugin, error)
go deviceCache.gcNodeDevice(context.TODO(), handle.SharedInformerFactory(), defaultGCPeriod)

return &Plugin{
handle: extendedHandle,
nodeDeviceCache: deviceCache,
scorer: scorePlugin(args),
handle: extendedHandle,
nodeDeviceCache: deviceCache,
scorer: scorePlugin(args),
disableDeviceNUMATopologyAlignment: args.DisableDeviceNUMATopologyAlignment,
}, nil
}
6 changes: 6 additions & 0 deletions pkg/scheduler/plugins/deviceshare/topology_hint.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ import (
)

func (p *Plugin) GetPodTopologyHints(ctx context.Context, cycleState *framework.CycleState, pod *corev1.Pod, nodeName string) (map[string][]topologymanager.NUMATopologyHint, *framework.Status) {
if p.disableDeviceNUMATopologyAlignment {
return nil, nil
}
state, status := getPreFilterState(cycleState)
if !status.IsSuccess() {
return nil, status
Expand All @@ -55,6 +58,9 @@ func (p *Plugin) GetPodTopologyHints(ctx context.Context, cycleState *framework.
}

func (p *Plugin) Allocate(ctx context.Context, cycleState *framework.CycleState, affinity topologymanager.NUMATopologyHint, pod *corev1.Pod, nodeName string) *framework.Status {
if p.disableDeviceNUMATopologyAlignment {
return nil
}
state, status := getPreFilterState(cycleState)
if !status.IsSuccess() {
return status
Expand Down

0 comments on commit f1afba1

Please sign in to comment.