Skip to content

Commit

Permalink
feat(koordlet): support memoryEvictLowerPercent in memory evict (#132)
Browse files Browse the repository at this point in the history
Signed-off-by: shinytang6 <1074461480@qq.com>
  • Loading branch information
shinytang6 committed May 12, 2022
1 parent 70409bb commit 7701600
Show file tree
Hide file tree
Showing 2 changed files with 156 additions and 10 deletions.
24 changes: 18 additions & 6 deletions pkg/koordlet/resmanager/memory_evict.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,18 @@ func (m *MemoryEvictor) memoryEvict() {
return
}

lowerPercent := int64(0)
if thresholdConfig.MemoryEvictLowerPercent != nil {
lowerPercent = *thresholdConfig.MemoryEvictLowerPercent
} else {
lowerPercent = *thresholdPercent - memoryReleaseBufferPercent
}

if lowerPercent >= *thresholdPercent {
klog.Warningf("skip memory evict, lower percent(%v) should less than threshold percent(%v)", lowerPercent, thresholdPercent)
return
}

nodeMetric, podMetrics := m.resManager.collectNodeAndPodMetricLast()
if nodeMetric == nil {
klog.Warningf("skip memory evict, NodeMetric is nil")
Expand All @@ -102,20 +114,20 @@ func (m *MemoryEvictor) memoryEvict() {
return
}

klog.Infof("node(%v) MemoryUsage(%v): %.2f, evictThresholdUsage: %.2f",
klog.Infof("node(%v) MemoryUsage(%v): %.2f, evictThresholdUsage: %.2f, evictLowerUsage: %.2f",
m.resManager.nodeName,
nodeMetric.MemoryUsed.MemoryWithoutCache.Value(),
float64(nodeMemoryUsage)/100,
float64(*thresholdPercent)/100,
float64(lowerPercent)/100,
)

lowPercent := *thresholdPercent - memoryReleaseBufferPercent
memoryNeedRelease := memoryCapacity * (nodeMemoryUsage - lowPercent) / 100
memoryNeedRelease := memoryCapacity * (nodeMemoryUsage - lowerPercent) / 100
m.killAndEvictBEPods(node, podMetrics, memoryNeedRelease)
}

func (m *MemoryEvictor) killAndEvictBEPods(node *corev1.Node, podMetrics []*metriccache.PodResourceMetric, memoryNeedRelease int64) {
bePodInfos := m.getSortedPodInfos(podMetrics)
bePodInfos := m.getSortedBEPodInfos(podMetrics)
message := fmt.Sprintf("killAndEvictBEPods for node(%v), need to release memory: %v", m.resManager.nodeName, memoryNeedRelease)
memoryReleased := int64(0)

Expand All @@ -134,10 +146,10 @@ func (m *MemoryEvictor) killAndEvictBEPods(node *corev1.Node, podMetrics []*metr
m.resManager.evictPodsIfNotEvicted(killedPods, node, evictPodByNodeMemoryUsage, message)

m.lastEvictTime = time.Now()
klog.Infof("killAndEvictBEPods completed, memoryNeedRelease(%v) memoryReleased(%v)", memoryNeedRelease, memoryNeedRelease)
klog.Infof("killAndEvictBEPods completed, memoryNeedRelease(%v) memoryReleased(%v)", memoryNeedRelease, memoryReleased)
}

func (m *MemoryEvictor) getSortedPodInfos(podMetrics []*metriccache.PodResourceMetric) []*podInfo {
func (m *MemoryEvictor) getSortedBEPodInfos(podMetrics []*metriccache.PodResourceMetric) []*podInfo {
podMetricMap := make(map[string]*metriccache.PodResourceMetric, len(podMetrics))
for _, podMetric := range podMetrics {
podMetricMap[podMetric.PodUID] = podMetric
Expand Down
142 changes: 138 additions & 4 deletions pkg/koordlet/resmanager/memory_evict_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"github.com/golang/mock/gomock"
"github.com/stretchr/testify/assert"
corev1 "k8s.io/api/core/v1"
v1 "k8s.io/api/policy/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
Expand Down Expand Up @@ -112,7 +113,10 @@ func Test_memoryEvict(t *testing.T) {
createPodResourceMetric("test_be_pod_priority100_2", "8G"),
createPodResourceMetric("test_be_pod_priority120", "8G"),
},
thresholdConfig: &slov1alpha1.ResourceThresholdStrategy{MemoryEvictThresholdPercent: pointer.Int64Ptr(80)},
thresholdConfig: &slov1alpha1.ResourceThresholdStrategy{
Enable: pointer.BoolPtr(true),
MemoryEvictThresholdPercent: pointer.Int64Ptr(80),
},
expectEvictPods: []*corev1.Pod{},
expectNotEvictPods: []*corev1.Pod{
createMemoryEvictTestPod("test_lsr_pod", apiext.QoSLSR, 1000),
Expand Down Expand Up @@ -147,7 +151,10 @@ func Test_memoryEvict(t *testing.T) {
createPodResourceMetric("test_be_pod_priority100_2", "20G"), // evict
createPodResourceMetric("test_be_pod_priority120", "10G"),
},
thresholdConfig: &slov1alpha1.ResourceThresholdStrategy{MemoryEvictThresholdPercent: pointer.Int64Ptr(82)}, // >96G
thresholdConfig: &slov1alpha1.ResourceThresholdStrategy{
Enable: pointer.BoolPtr(true),
MemoryEvictThresholdPercent: pointer.Int64Ptr(82),
}, // >96G
expectEvictPods: []*corev1.Pod{
createMemoryEvictTestPod("test_be_pod_priority100_2", apiext.QoSBE, 100),
},
Expand Down Expand Up @@ -183,7 +190,10 @@ func Test_memoryEvict(t *testing.T) {
createPodResourceMetric("test_be_pod_priority100_2", "20G"), // evict
createPodResourceMetric("test_be_pod_priority120", "10G"),
},
thresholdConfig: &slov1alpha1.ResourceThresholdStrategy{MemoryEvictThresholdPercent: pointer.Int64Ptr(80)}, // >91.2G
thresholdConfig: &slov1alpha1.ResourceThresholdStrategy{
Enable: pointer.BoolPtr(true),
MemoryEvictThresholdPercent: pointer.Int64Ptr(80),
}, // >91.2G
expectEvictPods: []*corev1.Pod{
createMemoryEvictTestPod("test_be_pod_priority100_2", apiext.QoSBE, 100),
createMemoryEvictTestPod("test_be_pod_priority100_1", apiext.QoSBE, 100),
Expand Down Expand Up @@ -219,7 +229,130 @@ func Test_memoryEvict(t *testing.T) {
createPodResourceMetric("test_be_pod_priority100_2", "20G"), // evict
createPodResourceMetric("test_be_pod_priority120", "10G"), // evict
},
thresholdConfig: &slov1alpha1.ResourceThresholdStrategy{MemoryEvictThresholdPercent: pointer.Int64Ptr(50)}, // >60G
thresholdConfig: &slov1alpha1.ResourceThresholdStrategy{
Enable: pointer.BoolPtr(true),
MemoryEvictThresholdPercent: pointer.Int64Ptr(50),
}, // >60G
expectEvictPods: []*corev1.Pod{
createMemoryEvictTestPod("test_be_pod_priority100_2", apiext.QoSBE, 100),
createMemoryEvictTestPod("test_be_pod_priority100_1", apiext.QoSBE, 100),
createMemoryEvictTestPod("test_be_pod_priority120", apiext.QoSBE, 120),
},
expectNotEvictPods: []*corev1.Pod{
createMemoryEvictTestPod("test_lsr_pod", apiext.QoSLSR, 1000),
createMemoryEvictTestPod("test_ls_pod", apiext.QoSLS, 500),
createMemoryEvictTestPod("test_noqos_pod", apiext.QoSNone, 100),
},
},
{
name: "test_memoryevict_MemoryEvictLowerPercent_80",
node: getNode("80", "120G"),
pods: []*corev1.Pod{
createMemoryEvictTestPod("test_lsr_pod", apiext.QoSLSR, 1000),
createMemoryEvictTestPod("test_ls_pod", apiext.QoSLS, 500),
createMemoryEvictTestPod("test_noqos_pod", apiext.QoSNone, 100),
createMemoryEvictTestPod("test_be_pod_priority100_1", apiext.QoSBE, 100),
createMemoryEvictTestPod("test_be_pod_priority100_2", apiext.QoSBE, 100),
createMemoryEvictTestPod("test_be_pod_priority120", apiext.QoSBE, 120),
},
nodeMetric: &metriccache.NodeResourceMetric{
MemoryUsed: metriccache.MemoryMetric{
MemoryWithoutCache: resource.MustParse("115G"),
},
},
podMetrics: []*metriccache.PodResourceMetric{
createPodResourceMetric("test_lsr_pod", "40G"),
createPodResourceMetric("test_ls_pod", "30G"),
createPodResourceMetric("test_noqos_pod", "10G"),
createPodResourceMetric("test_be_pod_priority100_1", "5G"),
createPodResourceMetric("test_be_pod_priority100_2", "20G"), // evict
createPodResourceMetric("test_be_pod_priority120", "10G"),
},
thresholdConfig: &slov1alpha1.ResourceThresholdStrategy{
Enable: pointer.BoolPtr(true),
MemoryEvictThresholdPercent: pointer.Int64Ptr(82),
MemoryEvictLowerPercent: pointer.Int64Ptr(80),
}, // >96G
expectEvictPods: []*corev1.Pod{
createMemoryEvictTestPod("test_be_pod_priority100_2", apiext.QoSBE, 100),
},
expectNotEvictPods: []*corev1.Pod{
createMemoryEvictTestPod("test_lsr_pod", apiext.QoSLSR, 1000),
createMemoryEvictTestPod("test_ls_pod", apiext.QoSLS, 500),
createMemoryEvictTestPod("test_noqos_pod", apiext.QoSNone, 100),
createMemoryEvictTestPod("test_be_pod_priority100_1", apiext.QoSBE, 100),
createMemoryEvictTestPod("test_be_pod_priority120", apiext.QoSBE, 120),
},
},
{
name: "test_memoryevict_MemoryEvictLowerPercent_78",
node: getNode("80", "120G"),
pods: []*corev1.Pod{
createMemoryEvictTestPod("test_lsr_pod", apiext.QoSLSR, 1000),
createMemoryEvictTestPod("test_ls_pod", apiext.QoSLS, 500),
createMemoryEvictTestPod("test_noqos_pod", apiext.QoSNone, 100),
createMemoryEvictTestPod("test_be_pod_priority100_1", apiext.QoSBE, 100),
createMemoryEvictTestPod("test_be_pod_priority100_2", apiext.QoSBE, 100),
createMemoryEvictTestPod("test_be_pod_priority120", apiext.QoSBE, 120),
},
nodeMetric: &metriccache.NodeResourceMetric{
MemoryUsed: metriccache.MemoryMetric{
MemoryWithoutCache: resource.MustParse("115G"),
},
},
podMetrics: []*metriccache.PodResourceMetric{
createPodResourceMetric("test_lsr_pod", "40G"),
createPodResourceMetric("test_ls_pod", "30G"),
createPodResourceMetric("test_noqos_pod", "10G"),
createPodResourceMetric("test_be_pod_priority100_1", "5G"), // evict
createPodResourceMetric("test_be_pod_priority100_2", "20G"), // evict
createPodResourceMetric("test_be_pod_priority120", "10G"),
},
thresholdConfig: &slov1alpha1.ResourceThresholdStrategy{
Enable: pointer.BoolPtr(true),
MemoryEvictThresholdPercent: pointer.Int64Ptr(82),
MemoryEvictLowerPercent: pointer.Int64Ptr(78),
}, // >93.6G
expectEvictPods: []*corev1.Pod{
createMemoryEvictTestPod("test_be_pod_priority100_2", apiext.QoSBE, 100),
createMemoryEvictTestPod("test_be_pod_priority100_1", apiext.QoSBE, 100),
},
expectNotEvictPods: []*corev1.Pod{
createMemoryEvictTestPod("test_lsr_pod", apiext.QoSLSR, 1000),
createMemoryEvictTestPod("test_ls_pod", apiext.QoSLS, 500),
createMemoryEvictTestPod("test_noqos_pod", apiext.QoSNone, 100),
createMemoryEvictTestPod("test_be_pod_priority120", apiext.QoSBE, 120),
},
},
{
name: "test_memoryevict_MemoryEvictLowerPercent_74",
node: getNode("80", "120G"),
pods: []*corev1.Pod{
createMemoryEvictTestPod("test_lsr_pod", apiext.QoSLSR, 1000),
createMemoryEvictTestPod("test_ls_pod", apiext.QoSLS, 500),
createMemoryEvictTestPod("test_noqos_pod", apiext.QoSNone, 100),
createMemoryEvictTestPod("test_be_pod_priority100_1", apiext.QoSBE, 100),
createMemoryEvictTestPod("test_be_pod_priority100_2", apiext.QoSBE, 100),
createMemoryEvictTestPod("test_be_pod_priority120", apiext.QoSBE, 120),
},
nodeMetric: &metriccache.NodeResourceMetric{
MemoryUsed: metriccache.MemoryMetric{
MemoryWithoutCache: resource.MustParse("115G"),
},
},
podMetrics: []*metriccache.PodResourceMetric{
createPodResourceMetric("test_lsr_pod", "40G"),
createPodResourceMetric("test_ls_pod", "30G"),
createPodResourceMetric("test_noqos_pod", "10G"),
createPodResourceMetric("test_be_pod_priority100_1", "5G"), // evict
createPodResourceMetric("test_be_pod_priority100_2", "20G"), // evict
createPodResourceMetric("test_be_pod_priority120", "10G"), // evict
},
thresholdConfig: &slov1alpha1.ResourceThresholdStrategy{
Enable: pointer.BoolPtr(true),
MemoryEvictThresholdPercent: pointer.Int64Ptr(82),
MemoryEvictLowerPercent: pointer.Int64Ptr(74),
}, // >88.8G
expectEvictPods: []*corev1.Pod{
createMemoryEvictTestPod("test_be_pod_priority100_2", apiext.QoSBE, 100),
createMemoryEvictTestPod("test_be_pod_priority100_1", apiext.QoSBE, 100),
Expand Down Expand Up @@ -281,6 +414,7 @@ func Test_memoryEvict(t *testing.T) {
for _, pod := range tt.expectEvictPods {
getEvictObject, err := client.Tracker().Get(podsResource, pod.Namespace, pod.Name)
assert.NotNil(t, getEvictObject, "evictPod Fail", err)
assert.IsType(t, &v1.Eviction{}, getEvictObject, "evictPod Fail", pod.Name)
}

for _, pod := range tt.expectNotEvictPods {
Expand Down

0 comments on commit 7701600

Please sign in to comment.