Skip to content

Commit

Permalink
fix(region,host): guest reserve cpus compatible with numa allocate (#…
Browse files Browse the repository at this point in the history
…21803)

* fix(region,host): isolate guest reserved cpus

* fix(region,host): guest reserve cpus
  • Loading branch information
wanyaoqi authored Jan 7, 2025
1 parent c07c99e commit 2459378
Show file tree
Hide file tree
Showing 10 changed files with 230 additions and 63 deletions.
1 change: 1 addition & 0 deletions pkg/apis/compute/host.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,7 @@ type HostDetails struct {
// isolated device count
IsolatedDeviceCount int
IsolatedDeviceTypeCount map[string]int
GuestPinnedCpus []int

// host init warnning
SysWarn string `json:"sys_warn"`
Expand Down
41 changes: 24 additions & 17 deletions pkg/compute/models/guest_actions.go
Original file line number Diff line number Diff line change
Expand Up @@ -6313,27 +6313,36 @@ func (self *SGuest) PerformCpuset(ctx context.Context, userCred mcclient.TokenCr
return nil, httperrors.NewInputParameterError("Host cores %v not contains input %v", allCores, data.CPUS)
}

pinnedMap, err := host.GetPinnedCpusetCores(ctx, userCred)
hostReservedCpus, err := host.GetReservedCpus()
if err != nil {
return nil, errors.Wrap(err, "Get host pinned cpu cores")
return nil, errors.Wrap(err, "host get reserved cpus")
}

pinnedSets := sets.NewInt()
for key, pinned := range pinnedMap {
if key == self.GetId() {
continue
for i := range data.CPUS {
if hostReservedCpus.Contains(data.CPUS[i]) {
return nil, httperrors.NewBadRequestError("request cpu %d has been reserved", data.CPUS[i])
}
pinnedSets.Insert(pinned...)
}

if pinnedSets.HasAny(data.CPUS...) {
return nil, httperrors.NewInputParameterError("More than one of input cores %v already set in host %v", data.CPUS, pinnedSets.List())
pinnedMap, err := host.GetPinnedCpusetCores(ctx, userCred, []string{self.Id})
if err != nil {
return nil, errors.Wrap(err, "Get host pinned cpu cores")
}

if pinnedMap != nil {
for i := range data.CPUS {
if pinnedMap.Contains(data.CPUS[i]) {
return nil, httperrors.NewBadRequestError("request cpu %d has been set by other guests", data.CPUS[i])
}
}
}

if err := self.SetMetadata(ctx, api.VM_METADATA_CGROUP_CPUSET, data, userCred); err != nil {
return nil, errors.Wrap(err, "set metadata")
}

if err := host.updateHostReservedCpus(ctx, userCred); err != nil {
return nil, errors.Wrap(err, "updateHostReservedCpus")
}
return nil, self.StartGuestCPUSetTask(ctx, userCred, data)
}

Expand Down Expand Up @@ -6396,18 +6405,16 @@ func (self *SGuest) GetDetailsCpusetCores(ctx context.Context, userCred mcclient
return nil, err
}

usedMap, err := host.GetPinnedCpusetCores(ctx, userCred)
usedMap, err := host.GetPinnedCpusetCores(ctx, userCred, nil)
if err != nil {
return nil, err
}
usedSets := sets.NewInt()
for _, used := range usedMap {
usedSets.Insert(used...)
}

resp := &api.ServerGetCPUSetCoresResp{
HostCores: allCores,
HostUsedCores: usedSets.List(),
HostCores: allCores,
}
if usedMap != nil {
resp.HostUsedCores = usedMap.ToSlice()
}

// fetch cpuset pinned
Expand Down
92 changes: 83 additions & 9 deletions pkg/compute/models/hosts.go
Original file line number Diff line number Diff line change
Expand Up @@ -3835,6 +3835,12 @@ func (manager *SHostManager) FetchCustomizeColumns(
}
}
}
if !isList {
pinnedCpus, _ := hosts[i].GetPinnedCpusetCores(ctx, userCred, nil)
if pinnedCpus != nil {
rows[i].GuestPinnedCpus = pinnedCpus.ToSlice()
}
}

if usage, ok := guestResources[hostIds[i]]; ok {
rows[i].CpuCommit = usage.GuestVcpuCount
Expand Down Expand Up @@ -5070,6 +5076,17 @@ func (hh *SHost) PerformReserveCpus(
return nil, httperrors.NewInputParameterError("Can't reserve host all cpus")
}

pinnedCores, err := hh.GetPinnedCpusetCores(ctx, userCred, nil)
if err != nil {
return nil, err
}

if pinnedCores != nil {
if cs.Union(*pinnedCores).Size() != (cs.Size() + pinnedCores.Size()) {
return nil, httperrors.NewBadRequestError("request cpus confilct with guest pinned cpus")
}
}

if input.Mems != "" {
mems, err := cpuset.Parse(input.Mems)
if err != nil {
Expand Down Expand Up @@ -5102,11 +5119,8 @@ func (hh *SHost) PerformReserveCpus(
if err != nil {
return nil, err
}
if hh.CpuReserved != cs.Size() {
_, err = db.Update(hh, func() error {
hh.CpuReserved = cs.Size()
return nil
})
if err = hh.updateHostReservedCpus(ctx, userCred); err != nil {
return nil, errors.Wrap(err, "update host reserved cpus")
}
return nil, err
}
Expand Down Expand Up @@ -7328,20 +7342,80 @@ func (hh *SHost) PerformSyncIsolatedDevices(ctx context.Context, userCred mcclie
return res, nil
}

func (hh *SHost) GetPinnedCpusetCores(ctx context.Context, userCred mcclient.TokenCredential) (map[string][]int, error) {
func (hh *SHost) GetPinnedCpusetCores(ctx context.Context, userCred mcclient.TokenCredential, excludeGuestIds []string) (*cpuset.CPUSet, error) {
gsts, err := hh.GetGuests()
if err != nil {
return nil, errors.Wrap(err, "Get all guests")
}
ret := make(map[string][]int, 0)
ret := cpuset.NewBuilder()
for _, gst := range gsts {
if utils.IsInStringArray(gst.Id, excludeGuestIds) {
continue
}
pinned, err := gst.getPinnedCpusetCores(ctx, userCred)
if err != nil {
return nil, errors.Wrapf(err, "get guest %s pinned cpuset cores", gst.GetName())
}
ret[gst.GetId()] = pinned
ret.Add(pinned...)
}
return ret, nil
resCpuset := ret.Result()
if resCpuset.Size() == 0 {
return nil, nil
}
return &resCpuset, nil
}

func (hh *SHost) GetReservedCpus() (*cpuset.CPUSet, error) {
reservedCpusStr := hh.GetMetadata(context.Background(), api.HOSTMETA_RESERVED_CPUS_INFO, nil)
if reservedCpusStr != "" {
reservedCpusJson, err := jsonutils.ParseString(reservedCpusStr)
if err != nil {
return nil, errors.Wrap(err, "parse reserved cpus info failed")
}
reservedCpusInfo := api.HostReserveCpusInput{}
err = reservedCpusJson.Unmarshal(&reservedCpusInfo)
if err != nil {
return nil, errors.Wrap(err, "unmarshal host reserved cpus info failed")
}
if reservedCpusInfo.Cpus == "" {
return nil, nil
}
cs, err := cpuset.Parse(reservedCpusInfo.Cpus)
if err != nil {
return nil, errors.Wrap(err, "parse reserved cpuset")
}
return &cs, nil
}
return nil, nil
}

func (hh *SHost) updateHostReservedCpus(ctx context.Context, userCred mcclient.TokenCredential) error {
reservedCpus, err := hh.GetReservedCpus()
if err != nil {
return err
}
pinnedCpus, err := hh.GetPinnedCpusetCores(ctx, userCred, nil)
if err != nil {
return err
}
var reservedCpuCnt = 0
if reservedCpus != nil {
reservedCpuCnt += reservedCpus.Size()
}
if pinnedCpus != nil {
reservedCpuCnt += pinnedCpus.Size()
}
if hh.CpuReserved != reservedCpuCnt {
_, err = db.Update(hh, func() error {
hh.CpuReserved = reservedCpuCnt
return nil
})
if err != nil {
return err
}
}
hh.ClearSchedDescCache()
return nil
}

func (h *SHost) PerformSyncGuestNicTraffics(ctx context.Context, userCred mcclient.TokenCredential, query jsonutils.JSONObject, data jsonutils.JSONObject) (jsonutils.JSONObject, error) {
Expand Down
6 changes: 3 additions & 3 deletions pkg/hostman/guestman/guesthelper.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ type CpuSetCounter struct {
}

func NewGuestCpuSetCounter(
info *hostapi.HostTopology, reservedCpus *cpuset.CPUSet, numaAllocate, isContainerHost bool,
info *hostapi.HostTopology, reservedCpus cpuset.CPUSet, numaAllocate, isContainerHost bool,
hugepageSizeKB int, cpuCmtbound, memCmtBound float32, reservedMemMb int,
) (*CpuSetCounter, error) {
cpuSetCounter := new(CpuSetCounter)
Expand Down Expand Up @@ -262,7 +262,7 @@ func NewGuestCpuSetCounter(
cpuDie := new(CPUDie)
dieBuilder := cpuset.NewBuilder()
for k := 0; k < len(info.Nodes[i].Caches[j].LogicalProcessors); k++ {
if reservedCpus != nil && reservedCpus.Contains(int(info.Nodes[i].Caches[j].LogicalProcessors[k])) {
if reservedCpus.Contains(int(info.Nodes[i].Caches[j].LogicalProcessors[k])) {
reservedCpuCnt += 1
continue
}
Expand All @@ -280,7 +280,7 @@ func NewGuestCpuSetCounter(
dieBuilder := cpuset.NewBuilder()
for j := 0; j < len(info.Nodes[i].Cores); j++ {
for k := 0; k < len(info.Nodes[i].Cores[j].LogicalProcessors); k++ {
if reservedCpus != nil && reservedCpus.Contains(info.Nodes[i].Cores[j].LogicalProcessors[k]) {
if reservedCpus.Contains(info.Nodes[i].Cores[j].LogicalProcessors[k]) {
reservedCpuCnt += 1
continue
}
Expand Down
12 changes: 11 additions & 1 deletion pkg/hostman/guestman/guestman.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ import (
"yunion.io/x/onecloud/pkg/mcclient"
modules "yunion.io/x/onecloud/pkg/mcclient/modules/compute"
"yunion.io/x/onecloud/pkg/util/cgrouputils"
"yunion.io/x/onecloud/pkg/util/cgrouputils/cpuset"
"yunion.io/x/onecloud/pkg/util/fileutils2"
"yunion.io/x/onecloud/pkg/util/netutils2"
"yunion.io/x/onecloud/pkg/util/pod"
Expand Down Expand Up @@ -299,8 +300,17 @@ func (m *SGuestManager) Bootstrap() (chan struct{}, error) {
m.numaAllocate = !m.host.IsNumaAllocateEnabled() && enableMemAlloc && (len(hostTypo.Nodes) > 1)
}

var reserveCpus = cpuset.NewCPUSet()
hostReserveCpus, guestPinnedCpus := m.host.GetReservedCpusInfo()
if hostReserveCpus != nil {
reserveCpus = reserveCpus.Union(*hostReserveCpus)
}
if guestPinnedCpus != nil {
reserveCpus = reserveCpus.Union(*guestPinnedCpus)
}

cpuSet, err := NewGuestCpuSetCounter(
hostTypo, m.host.GetReservedCpusInfo(), m.numaAllocate, m.host.IsContainerHost(),
hostTypo, reserveCpus, m.numaAllocate, m.host.IsContainerHost(),
m.host.HugepageSizeKb(), m.host.CpuCmtBound(), m.host.MemCmtBound(), m.host.GetReservedMemMb(),
)
if err != nil {
Expand Down
23 changes: 22 additions & 1 deletion pkg/hostman/guestman/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -1113,6 +1113,28 @@ func (s *sPodGuestInstance) allocateCpuNumaPin() error {
return nil
}

if scpuset, ok := s.Desc.Metadata[computeapi.VM_METADATA_CGROUP_CPUSET]; ok {
cpusetJson, err := jsonutils.ParseString(scpuset)
if err != nil {
log.Errorf("failed parse server %s cpuset %s: %s", s.Id, scpuset, err)
return errors.Errorf("failed parse server %s cpuset %s: %s", s.Id, scpuset, err)
}
input := new(computeapi.ServerCPUSetInput)
err = cpusetJson.Unmarshal(input)
if err != nil {
log.Errorf("failed unmarshal server %s cpuset %s", s.Id, err)
return errors.Errorf("failed unmarshal server %s cpuset %s", s.Id, err)
}
cpus := input.CPUS
s.Desc.VcpuPin = []desc.SCpuPin{
{
Vcpus: fmt.Sprintf("0-%d", s.Desc.Cpu-1),
Pcpus: cpuset.NewCPUSet(cpus...).String(),
},
}
return nil
}

var cpus = make([]int, 0)
var preferNumaNodes = make([]int8, 0)
for i := range s.Desc.IsolatedDevices {
Expand Down Expand Up @@ -1150,7 +1172,6 @@ func (s *sPodGuestInstance) allocateCpuNumaPin() error {
} else {
vcpuPin[i].Vcpu = -1
}

}

memPin := &desc.SCpuNumaPin{
Expand Down
Loading

0 comments on commit 2459378

Please sign in to comment.