Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

koordlet: tc plugin for netqos #1920

Merged
merged 1 commit into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docker/koordlet.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ RUN go build -a -o koordlet cmd/koordlet/main.go
FROM --platform=$TARGETPLATFORM nvidia/cuda:11.8.0-base-ubuntu22.04
WORKDIR /
RUN apt-get update && apt-get install -y lvm2 && rm -rf /var/lib/apt/lists/*
RUN apt-get update && apt-get install -y iptables
COPY --from=builder /go/src/github.com/koordinator-sh/koordinator/koordlet .
COPY --from=builder /usr/local/lib /usr/lib
ENTRYPOINT ["/koordlet"]
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ require (
github.com/NVIDIA/go-nvml v0.11.6-0.0.20220823120812-7e2082095e82
github.com/cakturk/go-netstat v0.0.0-20200220111822-e5b49efee7a5
github.com/containerd/nri v0.6.1
github.com/coreos/go-iptables v0.5.0
github.com/docker/docker v20.10.21+incompatible
github.com/evanphx/json-patch v5.6.0+incompatible
github.com/fsnotify/fsnotify v1.6.0
Expand Down Expand Up @@ -196,7 +197,7 @@ require (
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.9 // indirect
github.com/vishvananda/netlink v1.1.1-0.20210330154013-f5de75959ad5 // indirect
github.com/vishvananda/netlink v1.1.1-0.20210330154013-f5de75959ad5
github.com/vishvananda/netns v0.0.4 // indirect
github.com/vmware/govmomi v0.30.6 // indirect
go.etcd.io/etcd/api/v3 v3.5.9 // indirect
Expand Down
1 change: 1 addition & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,7 @@ github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkE
github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
github.com/coreos/etcd v3.3.13+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
github.com/coreos/go-iptables v0.4.5/go.mod h1:/mVI274lEDI2ns62jHCDnCyBF9Iwsmekav8Dbxlm1MU=
github.com/coreos/go-iptables v0.5.0 h1:mw6SAibtHKZcNzAsOxjoHIG0gy5YFHhypWSSNc6EjbQ=
github.com/coreos/go-iptables v0.5.0/go.mod h1:/mVI274lEDI2ns62jHCDnCyBF9Iwsmekav8Dbxlm1MU=
github.com/coreos/go-oidc v2.2.1+incompatible/go.mod h1:CgnwVTmzoESiwO9qyAFEMiHoZ1nMCKZlZ9V6mm3/LKc=
github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
Expand Down
17 changes: 17 additions & 0 deletions pkg/koordlet/resourceexecutor/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ type CgroupReader interface {
ReadCPUProcs(parentDir string) ([]uint32, error)
ReadPSI(parentDir string) (*sysutil.PSIByResource, error)
ReadMemoryColdPageUsage(parentDir string) (uint64, error)
ReadNetClsId(parentDir string) (uint64, error)
}

var _ CgroupReader = &CgroupV1Reader{}
Expand Down Expand Up @@ -230,6 +231,14 @@ func (r *CgroupV1Reader) ReadPSI(parentDir string) (*sysutil.PSIByResource, erro
return psi, nil
}

func (r *CgroupV1Reader) ReadNetClsId(parentDir string) (uint64, error) {
resource, ok := sysutil.DefaultRegistry.Get(sysutil.CgroupVersionV1, sysutil.NetClsClassIdName)
if !ok {
return 0, ErrResourceNotRegistered
}
return readCgroupAndParseUint64(parentDir, resource)
}

var _ CgroupReader = &CgroupV2Reader{}

type CgroupV2Reader struct{}
Expand Down Expand Up @@ -436,6 +445,14 @@ func (r *CgroupV2Reader) ReadPSI(parentDir string) (*sysutil.PSIByResource, erro
return psi, nil
}

func (r *CgroupV2Reader) ReadNetClsId(parentDir string) (uint64, error) {
resource, ok := sysutil.DefaultRegistry.Get(sysutil.CgroupVersionV2, sysutil.NetClsClassIdName)
if !ok {
return 0, ErrResourceNotRegistered
}
return readCgroupAndParseUint64(parentDir, resource)
}

func NewCgroupReader() CgroupReader {
if sysutil.GetCurrentCgroupVersion() == sysutil.CgroupVersionV2 {
return &CgroupV2Reader{}
Expand Down
1 change: 1 addition & 0 deletions pkg/koordlet/resourceexecutor/updater.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ func init() {
sysutil.MemoryPriorityName,
sysutil.MemoryUsePriorityOomName,
sysutil.MemoryOomGroupName,
sysutil.NetClsClassIdName,
)
// special cases
DefaultCgroupUpdaterFactory.Register(NewCgroupUpdaterWithUpdateFunc(CgroupUpdateCPUSharesFunc), sysutil.CPUSharesName)
Expand Down
8 changes: 8 additions & 0 deletions pkg/koordlet/runtimehooks/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
"github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/hooks/cpuset"
"github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/hooks/gpu"
"github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/hooks/groupidentity"
"github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/hooks/tc"
"github.com/koordinator-sh/koordinator/pkg/koordlet/runtimehooks/hooks/terwayqos"
"github.com/koordinator-sh/koordinator/pkg/koordlet/util/system"
)
Expand Down Expand Up @@ -81,6 +82,11 @@ const (
// owner: @l1b0k
// alpha: v1.5
TerwayQoS featuregate.Feature = "TerwayQoS"

// TCNetworkQoS indicates a network qos implementation based on tc.
// owner: @lucming
// alpha: v1.5
TCNetworkQoS featuregate.Feature = "TCNetworkQoS"
)

var (
Expand All @@ -92,6 +98,7 @@ var (
CPUNormalization: {Default: false, PreRelease: featuregate.Alpha},
CoreSched: {Default: false, PreRelease: featuregate.Alpha},
TerwayQoS: {Default: false, PreRelease: featuregate.Alpha},
TCNetworkQoS: {Default: false, PreRelease: featuregate.Alpha},
}

runtimeHookPlugins = map[featuregate.Feature]HookPlugin{
Expand All @@ -102,6 +109,7 @@ var (
CPUNormalization: cpunormalization.Object(),
CoreSched: coresched.Object(),
TerwayQoS: terwayqos.Object(),
TCNetworkQoS: tc.Object(),
}
)

Expand Down
127 changes: 127 additions & 0 deletions pkg/koordlet/runtimehooks/hooks/tc/helper.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/*
Copyright 2022 The Koordinator Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package tc

import (
"fmt"
"strconv"
"strings"

"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/intstr"

slov1alpha1 "github.com/koordinator-sh/koordinator/apis/slo/v1alpha1"
)

func loadConfigFromNodeSlo(nodesloSpec *slov1alpha1.NodeSLOSpec) *NetQosGlobalConfig {
res := NetQosGlobalConfig{}
var total uint64 = 0
if nodesloSpec != nil && nodesloSpec.SystemStrategy != nil {
total = uint64(nodesloSpec.SystemStrategy.TotalNetworkBandwidth.Value())
res.HwRxBpsMax = total
res.HwTxBpsMax = total
}

if nodesloSpec.ResourceQOSStrategy == nil {
return &res
}

strategy := nodesloSpec.ResourceQOSStrategy
if strategy.LSClass != nil &&
strategy.LSClass.NetworkQOS != nil &&
*strategy.LSClass.NetworkQOS.Enable {
cur := strategy.LSClass.NetworkQOS
res.L1RxBpsMin = getBandwidthVal(total, cur.IngressRequest)
res.L1RxBpsMax = getBandwidthVal(total, cur.IngressLimit)
res.L1TxBpsMin = getBandwidthVal(total, cur.EgressRequest)
res.L1TxBpsMax = getBandwidthVal(total, cur.EgressLimit)
}

if strategy.BEClass != nil &&
strategy.BEClass.NetworkQOS != nil &&
*strategy.BEClass.NetworkQOS.Enable {
cur := strategy.BEClass.NetworkQOS
res.L2RxBpsMin = getBandwidthVal(total, cur.IngressRequest)
res.L2RxBpsMax = getBandwidthVal(total, cur.IngressLimit)
res.L2TxBpsMin = getBandwidthVal(total, cur.EgressRequest)
res.L2TxBpsMax = getBandwidthVal(total, cur.EgressLimit)
}

return &res
}

func getBandwidthVal(total uint64, intOrPercent *intstr.IntOrString) uint64 {
if intOrPercent == nil {
return 0
}

switch intOrPercent.Type {
case intstr.String:
return getBandwidthByQuantityFormat(intOrPercent.StrVal)
case intstr.Int:
return getBandwidthByPercentageFormat(total, intOrPercent.IntValue())
default:
return 0
}
}

func getBandwidthByQuantityFormat(quanityStr string) uint64 {
val, err := resource.ParseQuantity(quanityStr)
if err != nil {
return 0
}

return uint64(val.Value())
}

func getBandwidthByPercentageFormat(total uint64, percentage int) uint64 {
if percentage < 0 || percentage > 100 {
return 0
}

return total * uint64(percentage) / 100
}

func convertToClassId(major, minor int) string {
return fmt.Sprintf("%d:%d", major, minor)
}

// convertToHexClassId get class id in hex.
func convertToHexClassId(major, minor int) uint32 {
hexVal := fmt.Sprintf("%d%04d", major, minor)
decimalVal, _ := strconv.ParseUint(hexVal, 16, 32)
return uint32(decimalVal)
}

// convertIpToHex convert ip to it's hex format
// 10.211.248.149 => 0ad3f895
func convertIpToHex(ip string) string {
result := ""
elems := strings.Split(ip, ".")
for _, elem := range elems {
cur, _ := strconv.Atoi(elem)
hex := fmt.Sprintf("%x", cur)
// each ip segment takes up two hexadecimal digits, and when it does not take up all the bits,
// it needs to be filled with 0.
for i := 0; i < 2-len(hex); i++ {
hex = "0" + hex
}
result += hex
}

return result
}
Loading
Loading