From 15365d9b08f6cc54c99c9c20ef3fc4b0cb5c0fdf Mon Sep 17 00:00:00 2001 From: joey Date: Fri, 4 Aug 2023 23:35:00 +0800 Subject: [PATCH] fix inplace update failed due to virtual kubelet skip inPlaceUpdateState check when pod is running on vk node and enable `UpdateEnvFromMetadata` Signed-off-by: joey --- pkg/control/pubcontrol/pub_control.go | 2 +- pkg/control/pubcontrol/pub_control_utils.go | 35 ++++++ .../pubcontrol/pub_control_utils_test.go | 108 ++++++++++++++++++ 3 files changed, 144 insertions(+), 1 deletion(-) diff --git a/pkg/control/pubcontrol/pub_control.go b/pkg/control/pubcontrol/pub_control.go index 5230e174c0..e4587f55c4 100644 --- a/pkg/control/pubcontrol/pub_control.go +++ b/pkg/control/pubcontrol/pub_control.go @@ -143,7 +143,7 @@ func (c *commonControl) IsPodStateConsistent(pod *corev1.Pod) bool { } // whether other containers is consistent - if err := inplaceupdate.DefaultCheckInPlaceUpdateCompleted(pod); err != nil { + if err := inplaceupdate.DefaultCheckInPlaceUpdateCompleted(pod); err != nil && !shouldSkipCheckPUBCompleted(c.Client, pod) { klog.V(5).Infof("check pod(%s/%s) InPlaceUpdate failed: %s", pod.Namespace, pod.Name, err.Error()) return false } diff --git a/pkg/control/pubcontrol/pub_control_utils.go b/pkg/control/pubcontrol/pub_control_utils.go index 2736de2fab..67948b2a8f 100644 --- a/pkg/control/pubcontrol/pub_control_utils.go +++ b/pkg/control/pubcontrol/pub_control_utils.go @@ -18,10 +18,12 @@ package pubcontrol import ( "context" + "encoding/json" "fmt" "strings" "time" + appspub "github.com/openkruise/kruise/apis/apps/pub" policyv1alpha1 "github.com/openkruise/kruise/apis/policy/v1alpha1" kubeClient "github.com/openkruise/kruise/pkg/client" "github.com/openkruise/kruise/pkg/util" @@ -43,6 +45,11 @@ const ( MaxUnavailablePodSize = 2000 ) +var ( + labelNodeTypeKey = "type" + virtualNodeType = "virtual-kubelet" +) + var ConflictRetry = wait.Backoff{ Steps: 4, Duration: 500 * time.Millisecond, @@ -175,6 +182,34 @@ func PodUnavailableBudgetValidatePod(client client.Client, control PubControl, p return true, "", nil } +// isVirtualKubeletNode Determine whether the node is virtual kubelet +func isVirtualKubeletNode(cli client.Client, nodeName string) bool { + if len(nodeName) == 0 { + return false + } + node := &corev1.Node{} + if err := cli.Get(context.Background(), client.ObjectKey{Name: nodeName}, node); err != nil { + klog.Errorf("get node(%s) failed: %s", nodeName, err.Error()) + return false + } + return node.Labels[labelNodeTypeKey] == virtualNodeType +} + +// shouldSkipCheckPUBCompleted Determine whether the pod should skip check PUB completed +// if the pod is running on a virtual kubelet node and enable UpdateEnvFromMetadata, do not check inplace update state +func shouldSkipCheckPUBCompleted(cli client.Client, pod *corev1.Pod) bool { + if !isVirtualKubeletNode(cli, pod.Spec.NodeName) { + return false + } + inPlaceUpdateState := appspub.InPlaceUpdateState{} + if stateStr, ok := appspub.GetInPlaceUpdateState(pod); !ok { + return false + } else if err := json.Unmarshal([]byte(stateStr), &inPlaceUpdateState); err != nil { + return false + } + return inPlaceUpdateState.UpdateEnvFromMetadata +} + func checkAndDecrement(podName string, pub *policyv1alpha1.PodUnavailableBudget, operation policyv1alpha1.PubOperation) error { if pub.Status.UnavailableAllowed <= 0 { return errors.NewForbidden(policyv1alpha1.Resource("podunavailablebudget"), pub.Name, fmt.Errorf("pub unavailable allowed is negative")) diff --git a/pkg/control/pubcontrol/pub_control_utils_test.go b/pkg/control/pubcontrol/pub_control_utils_test.go index d6909ee105..2a98532f34 100644 --- a/pkg/control/pubcontrol/pub_control_utils_test.go +++ b/pkg/control/pubcontrol/pub_control_utils_test.go @@ -168,6 +168,25 @@ var ( }, }, } + + vkNode = &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "virtual-kubelet", + Labels: map[string]string{ + "type": "virtual-kubelet", + }, + }, + } + + nonVkNode = &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "not-virtual-kubelet", + Labels: map[string]string{ + "ack.aliyun.com": "xxx", + }, + }, + Spec: corev1.NodeSpec{}, + } ) func TestPodUnavailableBudgetValidatePod(t *testing.T) { @@ -398,3 +417,92 @@ func TestGetPodUnavailableBudgetForPod(t *testing.T) { }) } } + +func Test_isVirtualKubeletNode(t *testing.T) { + cases := []struct { + name string + node *corev1.Node + want bool + }{ + { + name: "virtual kubelet node", + node: vkNode, + want: true, + }, + { + name: "not virtual kubelet node", + node: nonVkNode, + want: false, + }, + } + + for _, cs := range cases { + t.Run(cs.name, func(t *testing.T) { + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(cs.node).Build() + got := isVirtualKubeletNode(fakeClient, cs.node.Name) + if got != cs.want { + t.Fatalf("isVirtualKubeletNode failed") + } + }) + } +} + +func Test_shouldSkipCheckPUBCompleted(t *testing.T) { + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(vkNode, nonVkNode).Build() + cases := []struct { + name string + pod *corev1.Pod + want bool + }{ + { + name: "running on virtual kubelet node with enable UpdateEnvFromMetadata", + pod: &corev1.Pod{ + Spec: corev1.PodSpec{ + NodeName: "virtual-kubelet", + }, + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + appspub.InPlaceUpdateStateKey: `{"updateEnvFromMetadata":true}`, + }, + }, + }, + want: true, + }, + { + name: "running on virtual kubelet node with disable UpdateEnvFromMetadata", + pod: &corev1.Pod{ + Spec: corev1.PodSpec{ + NodeName: "virtual-kubelet", + }, + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + appspub.InPlaceUpdateStateKey: `{"updateEnvFromMetadata":false}`, + }, + }, + }, + want: false, + }, + { + name: "not running on virtual kubelet node with enable UpdateEnvFromMetadata", + pod: &corev1.Pod{ + Spec: corev1.PodSpec{ + NodeName: "not-virtual-kubelet", + }, + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + appspub.InPlaceUpdateStateKey: `{"updateEnvFromMetadata":true}`, + }, + }, + }, + want: false, + }, + } + for _, cs := range cases { + t.Run(cs.name, func(t *testing.T) { + got := shouldSkipCheckPUBCompleted(fakeClient, cs.pod) + if got != cs.want { + t.Fatalf("shouldSkipCheckPUBCompleted failed") + } + }) + } +}