From 20ba503c08428cc2764c68474b0daa14e303db3a Mon Sep 17 00:00:00 2001 From: FillZpp Date: Fri, 18 Mar 2022 16:14:33 +0800 Subject: [PATCH] Optimize inplace update and support pod-template-hash label for cloneset Signed-off-by: FillZpp --- .../cloneset/cloneset_controller.go | 44 ++++--------- .../cloneset/cloneset_event_handler.go | 1 - pkg/controller/cloneset/sync/api.go | 4 +- .../cloneset/sync/cloneset_scale_test.go | 27 ++++---- .../cloneset/sync/cloneset_update.go | 58 ++++++++++++----- .../cloneset/sync/cloneset_update_test.go | 64 +++++++++++-------- .../cloneset/utils/cloneset_utils.go | 19 ++++-- .../framework/podunavailablebudget_util.go | 3 +- test/e2e/framework/workloadspread_util.go | 2 +- test/e2e/policy/podunavailablebudget.go | 44 ++++++++++--- 10 files changed, 159 insertions(+), 107 deletions(-) diff --git a/pkg/controller/cloneset/cloneset_controller.go b/pkg/controller/cloneset/cloneset_controller.go index f74bf798b2..f84f2af28b 100644 --- a/pkg/controller/cloneset/cloneset_controller.go +++ b/pkg/controller/cloneset/cloneset_controller.go @@ -191,6 +191,8 @@ func (r *ReconcileCloneSet) doReconcile(request reconcile.Request) (res reconcil } else { klog.Errorf("Failed syncing CloneSet %s: %v", request, retErr) } + // clean the duration store + _ = clonesetutils.DurationStore.Pop(request.String()) }() // Fetch the CloneSet instance @@ -202,7 +204,6 @@ func (r *ReconcileCloneSet) doReconcile(request reconcile.Request) (res reconcil // For additional cleanup logic use finalizers. klog.V(3).Infof("CloneSet %s has been deleted.", request) clonesetutils.ScaleExpectations.DeleteExpectations(request.String()) - clonesetutils.UpdateExpectations.DeleteExpectations(request.String()) return reconcile.Result{}, nil } return reconcile.Result{}, err @@ -256,20 +257,6 @@ func (r *ReconcileCloneSet) doReconcile(request reconcile.Request) (res reconcil return reconcile.Result{}, err } - // Refresh update expectations - for _, pod := range filteredPods { - clonesetutils.UpdateExpectations.ObserveUpdated(request.String(), updateRevision.Name, pod) - } - // If update expectations have not satisfied yet, just skip this reconcile. - if updateSatisfied, unsatisfiedDuration, updateDirtyPods := clonesetutils.UpdateExpectations.SatisfiedExpectations(request.String(), updateRevision.Name); !updateSatisfied { - if unsatisfiedDuration >= expectations.ExpectationTimeout { - klog.Warningf("Expectation unsatisfied overtime for %v, updateDirtyPods=%v, timeout=%v", request.String(), updateDirtyPods, unsatisfiedDuration) - return reconcile.Result{}, nil - } - klog.V(4).Infof("Not satisfied update for %v, updateDirtyPods=%v", request.String(), updateDirtyPods) - return reconcile.Result{RequeueAfter: expectations.ExpectationTimeout - unsatisfiedDuration}, nil - } - // If resourceVersion expectations have not satisfied yet, just skip this reconcile clonesetutils.ResourceVersionExpectations.Observe(updateRevision) if isSatisfied, unsatisfiedDuration := clonesetutils.ResourceVersionExpectations.IsSatisfied(updateRevision); !isSatisfied { @@ -331,7 +318,7 @@ func (r *ReconcileCloneSet) doReconcile(request reconcile.Request) (res reconcil } // scale and update pods - delayDuration, syncErr := r.syncCloneSet(instance, &newStatus, currentRevision, updateRevision, revisions, filteredPods, filteredPVCs) + syncErr := r.syncCloneSet(instance, &newStatus, currentRevision, updateRevision, revisions, filteredPods, filteredPVCs) // update new status if err = r.statusUpdater.UpdateCloneSetStatus(instance, &newStatus, filteredPods); err != nil { @@ -347,32 +334,28 @@ func (r *ReconcileCloneSet) doReconcile(request reconcile.Request) (res reconcil } if syncErr == nil && instance.Spec.MinReadySeconds > 0 && newStatus.AvailableReplicas != newStatus.ReadyReplicas { - minReadyDuration := time.Second * time.Duration(instance.Spec.MinReadySeconds) - if delayDuration == 0 || minReadyDuration < delayDuration { - delayDuration = minReadyDuration - } + clonesetutils.DurationStore.Push(request.String(), time.Second*time.Duration(instance.Spec.MinReadySeconds)) } - return reconcile.Result{RequeueAfter: delayDuration}, syncErr + return reconcile.Result{RequeueAfter: clonesetutils.DurationStore.Pop(request.String())}, syncErr } func (r *ReconcileCloneSet) syncCloneSet( instance *appsv1alpha1.CloneSet, newStatus *appsv1alpha1.CloneSetStatus, currentRevision, updateRevision *apps.ControllerRevision, revisions []*apps.ControllerRevision, filteredPods []*v1.Pod, filteredPVCs []*v1.PersistentVolumeClaim, -) (time.Duration, error) { - var delayDuration time.Duration +) error { if instance.DeletionTimestamp != nil { - return delayDuration, nil + return nil } // get the current and update revisions of the set. currentSet, err := r.revisionControl.ApplyRevision(instance, currentRevision) if err != nil { - return delayDuration, err + return err } updateSet, err := r.revisionControl.ApplyRevision(instance, updateRevision) if err != nil { - return delayDuration, err + return err } var scaling bool @@ -390,10 +373,10 @@ func (r *ReconcileCloneSet) syncCloneSet( err = podsScaleErr } if scaling { - return delayDuration, podsScaleErr + return podsScaleErr } - delayDuration, podsUpdateErr = r.syncControl.Update(updateSet, currentRevision, updateRevision, revisions, filteredPods, filteredPVCs) + podsUpdateErr = r.syncControl.Update(updateSet, currentRevision, updateRevision, revisions, filteredPods, filteredPVCs) if podsUpdateErr != nil { newStatus.Conditions = append(newStatus.Conditions, appsv1alpha1.CloneSetCondition{ Type: appsv1alpha1.CloneSetConditionFailedUpdate, @@ -401,13 +384,12 @@ func (r *ReconcileCloneSet) syncCloneSet( LastTransitionTime: metav1.Now(), Message: podsUpdateErr.Error(), }) - // If these is a delay duration, need not to return error to outside - if err == nil && delayDuration <= 0 { + if err == nil { err = podsUpdateErr } } - return delayDuration, err + return err } func (r *ReconcileCloneSet) getActiveRevisions(cs *appsv1alpha1.CloneSet, revisions []*apps.ControllerRevision) ( diff --git a/pkg/controller/cloneset/cloneset_event_handler.go b/pkg/controller/cloneset/cloneset_event_handler.go index 9ead71c1a5..df1a918a13 100644 --- a/pkg/controller/cloneset/cloneset_event_handler.go +++ b/pkg/controller/cloneset/cloneset_event_handler.go @@ -164,7 +164,6 @@ func (e *podEventHandler) Delete(evt event.DeleteEvent, q workqueue.RateLimiting klog.V(4).Infof("Pod %s/%s deleted, owner: %s", pod.Namespace, pod.Name, req.Name) clonesetutils.ScaleExpectations.ObserveScale(req.String(), expectations.Delete, pod.Name) - clonesetutils.UpdateExpectations.DeleteObject(req.String(), pod) q.Add(*req) } diff --git a/pkg/controller/cloneset/sync/api.go b/pkg/controller/cloneset/sync/api.go index e011ab1f55..17a4128a8f 100644 --- a/pkg/controller/cloneset/sync/api.go +++ b/pkg/controller/cloneset/sync/api.go @@ -17,8 +17,6 @@ limitations under the License. package sync import ( - "time" - appsv1alpha1 "github.com/openkruise/kruise/apis/apps/v1alpha1" clonesetutils "github.com/openkruise/kruise/pkg/controller/cloneset/utils" "github.com/openkruise/kruise/pkg/util/controllerfinder" @@ -41,7 +39,7 @@ type Interface interface { Update(cs *appsv1alpha1.CloneSet, currentRevision, updateRevision *apps.ControllerRevision, revisions []*apps.ControllerRevision, pods []*v1.Pod, pvcs []*v1.PersistentVolumeClaim, - ) (time.Duration, error) + ) error } type realControl struct { diff --git a/pkg/controller/cloneset/sync/cloneset_scale_test.go b/pkg/controller/cloneset/sync/cloneset_scale_test.go index 94e6c9019d..3f4ef7b430 100644 --- a/pkg/controller/cloneset/sync/cloneset_scale_test.go +++ b/pkg/controller/cloneset/sync/cloneset_scale_test.go @@ -80,10 +80,11 @@ func TestCreatePods(t *testing.T) { Name: "foo-id1", GenerateName: "foo-", Labels: map[string]string{ - appsv1alpha1.CloneSetInstanceID: "id1", - apps.ControllerRevisionHashLabelKey: "revision_abc", - "foo": "bar", - appspub.LifecycleStateKey: string(appspub.LifecycleStateNormal), + appsv1alpha1.CloneSetInstanceID: "id1", + apps.ControllerRevisionHashLabelKey: "revision_abc", + apps.DefaultDeploymentUniqueLabelKey: "revision_abc", + "foo": "bar", + appspub.LifecycleStateKey: string(appspub.LifecycleStateNormal), }, OwnerReferences: []metav1.OwnerReference{ { @@ -136,10 +137,11 @@ func TestCreatePods(t *testing.T) { Name: "foo-id3", GenerateName: "foo-", Labels: map[string]string{ - appsv1alpha1.CloneSetInstanceID: "id3", - apps.ControllerRevisionHashLabelKey: "revision_xyz", - "foo": "bar", - appspub.LifecycleStateKey: string(appspub.LifecycleStateNormal), + appsv1alpha1.CloneSetInstanceID: "id3", + apps.ControllerRevisionHashLabelKey: "revision_xyz", + apps.DefaultDeploymentUniqueLabelKey: "revision_xyz", + "foo": "bar", + appspub.LifecycleStateKey: string(appspub.LifecycleStateNormal), }, OwnerReferences: []metav1.OwnerReference{ { @@ -193,10 +195,11 @@ func TestCreatePods(t *testing.T) { Name: "foo-id4", GenerateName: "foo-", Labels: map[string]string{ - appsv1alpha1.CloneSetInstanceID: "id4", - apps.ControllerRevisionHashLabelKey: "revision_xyz", - "foo": "bar", - appspub.LifecycleStateKey: string(appspub.LifecycleStateNormal), + appsv1alpha1.CloneSetInstanceID: "id4", + apps.ControllerRevisionHashLabelKey: "revision_xyz", + apps.DefaultDeploymentUniqueLabelKey: "revision_xyz", + "foo": "bar", + appspub.LifecycleStateKey: string(appspub.LifecycleStateNormal), }, OwnerReferences: []metav1.OwnerReference{ { diff --git a/pkg/controller/cloneset/sync/cloneset_update.go b/pkg/controller/cloneset/sync/cloneset_update.go index 1e8375996a..79f444b575 100644 --- a/pkg/controller/cloneset/sync/cloneset_update.go +++ b/pkg/controller/cloneset/sync/cloneset_update.go @@ -17,6 +17,7 @@ limitations under the License. package sync import ( + "context" "fmt" "sort" "time" @@ -32,48 +33,54 @@ import ( utilfeature "github.com/openkruise/kruise/pkg/util/feature" "github.com/openkruise/kruise/pkg/util/inplaceupdate" "github.com/openkruise/kruise/pkg/util/lifecycle" - "github.com/openkruise/kruise/pkg/util/requeueduration" "github.com/openkruise/kruise/pkg/util/specifieddelete" "github.com/openkruise/kruise/pkg/util/updatesort" apps "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" ) func (c *realControl) Update(cs *appsv1alpha1.CloneSet, currentRevision, updateRevision *apps.ControllerRevision, revisions []*apps.ControllerRevision, pods []*v1.Pod, pvcs []*v1.PersistentVolumeClaim, -) (time.Duration, error) { +) error { - requeueDuration := requeueduration.Duration{} + key := clonesetutils.GetControllerKey(cs) coreControl := clonesetcore.New(cs) // 1. refresh states for all pods var modified bool for _, pod := range pods { - patched, duration, err := c.refreshPodState(cs, coreControl, pod) + patchedState, duration, err := c.refreshPodState(cs, coreControl, pod) if err != nil { - return 0, err + return err } else if duration > 0 { - requeueDuration.Update(duration) + clonesetutils.DurationStore.Push(key, duration) + } + // fix the pod-template-hash label for old pods before v1.1 + patchedHash, err := c.fixPodTemplateHashLabel(cs, pod) + if err != nil { + return err } - if patched { + if patchedState || patchedHash { modified = true } } if modified { - return requeueDuration.Get(), nil + return nil } if cs.Spec.UpdateStrategy.Paused { - return requeueDuration.Get(), nil + return nil } // 2. calculate update diff and the revision to update diffRes := calculateDiffsWithExpectation(cs, pods, currentRevision.Name, updateRevision.Name) if diffRes.updateNum == 0 { - return requeueDuration.Get(), nil + return nil } // 3. find all matched pods can update @@ -128,7 +135,7 @@ func (c *realControl) Update(cs *appsv1alpha1.CloneSet, if utilfeature.DefaultFeatureGate.Enabled(features.PodUnavailableBudgetUpdateGate) && len(waitUpdateIndexes) > 0 { pub, err = pubcontrol.GetPodUnavailableBudgetForPod(c.Client, c.controllerFinder, pods[waitUpdateIndexes[0]]) if err != nil { - return requeueDuration.Get(), err + return err } } // 6. update pods @@ -138,22 +145,23 @@ func (c *realControl) Update(cs *appsv1alpha1.CloneSet, if pub != nil { allowed, _, err := pubcontrol.PodUnavailableBudgetValidatePod(c.Client, pod, pubcontrol.NewPubControl(pub, c.controllerFinder, c.Client), pubcontrol.UpdateOperation, false) if err != nil { - return requeueDuration.Get(), err + return err // pub check does not pass, try again in seconds } else if !allowed { - return time.Second, nil + clonesetutils.DurationStore.Push(key, time.Second) + return nil } } duration, err := c.updatePod(cs, coreControl, targetRevision, revisions, pod, pvcs) if duration > 0 { - requeueDuration.Update(duration) + clonesetutils.DurationStore.Push(key, duration) } if err != nil { - return requeueDuration.Get(), err + return err } } - return requeueDuration.Get(), nil + return nil } func (c *realControl) refreshPodState(cs *appsv1alpha1.CloneSet, coreControl clonesetcore.Control, pod *v1.Pod) (bool, time.Duration, error) { @@ -198,6 +206,23 @@ func (c *realControl) refreshPodState(cs *appsv1alpha1.CloneSet, coreControl clo return false, res.DelayDuration, nil } +// fix the pod-template-hash label for old pods before v1.1 +func (c *realControl) fixPodTemplateHashLabel(cs *appsv1alpha1.CloneSet, pod *v1.Pod) (bool, error) { + if _, exists := pod.Labels[apps.DefaultDeploymentUniqueLabelKey]; exists { + return false, nil + } + patch := []byte(fmt.Sprintf(`{"metadata":{"labels":{"%s":"%s"}}}`, + apps.DefaultDeploymentUniqueLabelKey, + clonesetutils.GetShortHash(pod.Labels[apps.ControllerRevisionHashLabelKey]))) + pod = pod.DeepCopy() + if err := c.Patch(context.TODO(), pod, client.RawPatch(types.StrategicMergePatchType, patch)); err != nil { + klog.Warningf("CloneSet %s/%s failed to fix pod-template-hash to Pod %s: %v", cs.Namespace, cs.Name, pod.Name, err) + return false, err + } + clonesetutils.ResourceVersionExpectations.Expect(pod) + return true, nil +} + func (c *realControl) updatePod(cs *appsv1alpha1.CloneSet, coreControl clonesetcore.Control, updateRevision *apps.ControllerRevision, revisions []*apps.ControllerRevision, pod *v1.Pod, pvcs []*v1.PersistentVolumeClaim, @@ -257,7 +282,6 @@ func (c *realControl) updatePod(cs *appsv1alpha1.CloneSet, coreControl clonesetc if res.UpdateErr == nil { c.recorder.Eventf(cs, v1.EventTypeNormal, "SuccessfulUpdatePodInPlace", "successfully update pod %s in-place(revision %v)", pod.Name, updateRevision.Name) clonesetutils.ResourceVersionExpectations.Expect(&metav1.ObjectMeta{UID: pod.UID, ResourceVersion: res.NewResourceVersion}) - clonesetutils.UpdateExpectations.ExpectUpdated(clonesetutils.GetControllerKey(cs), updateRevision.Name, pod) return res.DelayDuration, nil } diff --git a/pkg/controller/cloneset/sync/cloneset_update_test.go b/pkg/controller/cloneset/sync/cloneset_update_test.go index edf6b0594c..8ef8e19478 100644 --- a/pkg/controller/cloneset/sync/cloneset_update_test.go +++ b/pkg/controller/cloneset/sync/cloneset_update_test.go @@ -99,7 +99,7 @@ func TestUpdate(t *testing.T) { }, expectedPods: []*v1.Pod{ { - ObjectMeta: metav1.ObjectMeta{Name: "pod-0", Labels: map[string]string{apps.ControllerRevisionHashLabelKey: "rev_new"}}, + ObjectMeta: metav1.ObjectMeta{Name: "pod-0", Labels: map[string]string{apps.ControllerRevisionHashLabelKey: "rev_new", apps.DefaultDeploymentUniqueLabelKey: "rev_new"}}, Spec: v1.PodSpec{ReadinessGates: []v1.PodReadinessGate{{ConditionType: appspub.InPlaceUpdateReady}}}, Status: v1.PodStatus{Phase: v1.PodRunning, Conditions: []v1.PodCondition{ {Type: v1.PodReady, Status: v1.ConditionTrue}, @@ -123,7 +123,7 @@ func TestUpdate(t *testing.T) { }, expectedPods: []*v1.Pod{ { - ObjectMeta: metav1.ObjectMeta{Name: "pod-0", Labels: map[string]string{apps.ControllerRevisionHashLabelKey: "rev_new"}, ResourceVersion: "1"}, + ObjectMeta: metav1.ObjectMeta{Name: "pod-0", Labels: map[string]string{apps.ControllerRevisionHashLabelKey: "rev_new", apps.DefaultDeploymentUniqueLabelKey: "rev_new"}, ResourceVersion: "1"}, Spec: v1.PodSpec{ReadinessGates: []v1.PodReadinessGate{{ConditionType: appspub.InPlaceUpdateReady}}}, Status: v1.PodStatus{Phase: v1.PodRunning, Conditions: []v1.PodCondition{ {Type: v1.PodReady, Status: v1.ConditionTrue}, @@ -142,8 +142,9 @@ func TestUpdate(t *testing.T) { pods: []*v1.Pod{ { ObjectMeta: metav1.ObjectMeta{Name: "pod-0", Labels: map[string]string{ - apps.ControllerRevisionHashLabelKey: "rev_old", - appsv1alpha1.CloneSetInstanceID: "id-0", + apps.ControllerRevisionHashLabelKey: "rev_old", + apps.DefaultDeploymentUniqueLabelKey: "rev_old", + appsv1alpha1.CloneSetInstanceID: "id-0", }}, Spec: v1.PodSpec{ReadinessGates: []v1.PodReadinessGate{{ConditionType: appspub.InPlaceUpdateReady}}}, Status: v1.PodStatus{Phase: v1.PodRunning, Conditions: []v1.PodCondition{ @@ -160,9 +161,10 @@ func TestUpdate(t *testing.T) { expectedPods: []*v1.Pod{ { ObjectMeta: metav1.ObjectMeta{Name: "pod-0", ResourceVersion: "1", Labels: map[string]string{ - apps.ControllerRevisionHashLabelKey: "rev_old", - appsv1alpha1.CloneSetInstanceID: "id-0", - appsv1alpha1.SpecifiedDeleteKey: "true", + apps.ControllerRevisionHashLabelKey: "rev_old", + apps.DefaultDeploymentUniqueLabelKey: "rev_old", + appsv1alpha1.CloneSetInstanceID: "id-0", + appsv1alpha1.SpecifiedDeleteKey: "true", }}, Spec: v1.PodSpec{ReadinessGates: []v1.PodReadinessGate{{ConditionType: appspub.InPlaceUpdateReady}}}, Status: v1.PodStatus{Phase: v1.PodRunning, Conditions: []v1.PodCondition{ @@ -196,8 +198,9 @@ func TestUpdate(t *testing.T) { pods: []*v1.Pod{ { ObjectMeta: metav1.ObjectMeta{Name: "pod-0", Labels: map[string]string{ - apps.ControllerRevisionHashLabelKey: "rev_old", - appsv1alpha1.CloneSetInstanceID: "id-0", + apps.ControllerRevisionHashLabelKey: "rev_old", + apps.DefaultDeploymentUniqueLabelKey: "rev_old", + appsv1alpha1.CloneSetInstanceID: "id-0", }}, Spec: v1.PodSpec{ ReadinessGates: []v1.PodReadinessGate{{ConditionType: appspub.InPlaceUpdateReady}}, @@ -221,9 +224,10 @@ func TestUpdate(t *testing.T) { expectedPods: []*v1.Pod{ { ObjectMeta: metav1.ObjectMeta{Name: "pod-0", ResourceVersion: "1", Labels: map[string]string{ - apps.ControllerRevisionHashLabelKey: "rev_old", - appsv1alpha1.CloneSetInstanceID: "id-0", - appsv1alpha1.SpecifiedDeleteKey: "true", + apps.ControllerRevisionHashLabelKey: "rev_old", + apps.DefaultDeploymentUniqueLabelKey: "rev_old", + appsv1alpha1.CloneSetInstanceID: "id-0", + appsv1alpha1.SpecifiedDeleteKey: "true", }}, Spec: v1.PodSpec{ ReadinessGates: []v1.PodReadinessGate{{ConditionType: appspub.InPlaceUpdateReady}}, @@ -264,8 +268,9 @@ func TestUpdate(t *testing.T) { pods: []*v1.Pod{ { ObjectMeta: metav1.ObjectMeta{Name: "pod-0", Labels: map[string]string{ - apps.ControllerRevisionHashLabelKey: "rev_old", - appsv1alpha1.CloneSetInstanceID: "id-0", + apps.ControllerRevisionHashLabelKey: "rev_old", + apps.DefaultDeploymentUniqueLabelKey: "rev_old", + appsv1alpha1.CloneSetInstanceID: "id-0", }}, Spec: v1.PodSpec{ ReadinessGates: []v1.PodReadinessGate{{ConditionType: appspub.InPlaceUpdateReady}}, @@ -290,9 +295,10 @@ func TestUpdate(t *testing.T) { { ObjectMeta: metav1.ObjectMeta{Name: "pod-0", Labels: map[string]string{ - apps.ControllerRevisionHashLabelKey: "rev_new", - appsv1alpha1.CloneSetInstanceID: "id-0", - appspub.LifecycleStateKey: string(appspub.LifecycleStateUpdating), + apps.ControllerRevisionHashLabelKey: "rev_new", + apps.DefaultDeploymentUniqueLabelKey: "rev_new", + appsv1alpha1.CloneSetInstanceID: "id-0", + appspub.LifecycleStateKey: string(appspub.LifecycleStateUpdating), }, Annotations: map[string]string{appspub.InPlaceUpdateStateKey: util.DumpJSON(appspub.InPlaceUpdateState{ Revision: "rev_new", @@ -341,8 +347,9 @@ func TestUpdate(t *testing.T) { pods: []*v1.Pod{ { ObjectMeta: metav1.ObjectMeta{Name: "pod-0", Labels: map[string]string{ - apps.ControllerRevisionHashLabelKey: "rev_old", - appsv1alpha1.CloneSetInstanceID: "id-0", + apps.ControllerRevisionHashLabelKey: "rev_old", + apps.DefaultDeploymentUniqueLabelKey: "rev_old", + appsv1alpha1.CloneSetInstanceID: "id-0", }}, Spec: v1.PodSpec{ ReadinessGates: []v1.PodReadinessGate{{ConditionType: appspub.InPlaceUpdateReady}}, @@ -367,9 +374,10 @@ func TestUpdate(t *testing.T) { { ObjectMeta: metav1.ObjectMeta{Name: "pod-0", Labels: map[string]string{ - apps.ControllerRevisionHashLabelKey: "rev_new", - appsv1alpha1.CloneSetInstanceID: "id-0", - appspub.LifecycleStateKey: string(appspub.LifecycleStateUpdating), + apps.ControllerRevisionHashLabelKey: "rev_new", + apps.DefaultDeploymentUniqueLabelKey: "rev_new", + appsv1alpha1.CloneSetInstanceID: "id-0", + appspub.LifecycleStateKey: string(appspub.LifecycleStateUpdating), }, Annotations: map[string]string{ appspub.InPlaceUpdateStateKey: util.DumpJSON(appspub.InPlaceUpdateState{ @@ -451,8 +459,9 @@ func TestUpdate(t *testing.T) { { ObjectMeta: metav1.ObjectMeta{Name: "pod-0", Labels: map[string]string{ - apps.ControllerRevisionHashLabelKey: "rev_new", - appsv1alpha1.CloneSetInstanceID: "id-0", + apps.ControllerRevisionHashLabelKey: "rev_new", + apps.DefaultDeploymentUniqueLabelKey: "rev_new", + appsv1alpha1.CloneSetInstanceID: "id-0", }, Annotations: map[string]string{ appspub.InPlaceUpdateStateKey: util.DumpJSON(appspub.InPlaceUpdateState{ @@ -533,8 +542,9 @@ func TestUpdate(t *testing.T) { { ObjectMeta: metav1.ObjectMeta{Name: "pod-0", Labels: map[string]string{ - apps.ControllerRevisionHashLabelKey: "rev_new", - appsv1alpha1.CloneSetInstanceID: "id-0", + apps.ControllerRevisionHashLabelKey: "rev_new", + apps.DefaultDeploymentUniqueLabelKey: "rev_new", + appsv1alpha1.CloneSetInstanceID: "id-0", }, Annotations: map[string]string{ appspub.InPlaceUpdateStateKey: util.DumpJSON(appspub.InPlaceUpdateState{ @@ -583,7 +593,7 @@ func TestUpdate(t *testing.T) { if len(mc.revisions) > 0 { currentRevision = mc.revisions[0] } - if _, err := ctrl.Update(mc.cs, currentRevision, mc.updateRevision, mc.revisions, mc.pods, mc.pvcs); err != nil { + if err := ctrl.Update(mc.cs, currentRevision, mc.updateRevision, mc.revisions, mc.pods, mc.pvcs); err != nil { t.Fatalf("Failed to test %s, manage error: %v", mc.name, err) } podList := v1.PodList{} diff --git a/pkg/controller/cloneset/utils/cloneset_utils.go b/pkg/controller/cloneset/utils/cloneset_utils.go index eebd7dad7a..9b0cc0a332 100644 --- a/pkg/controller/cloneset/utils/cloneset_utils.go +++ b/pkg/controller/cloneset/utils/cloneset_utils.go @@ -27,6 +27,7 @@ import ( utilclient "github.com/openkruise/kruise/pkg/util/client" "github.com/openkruise/kruise/pkg/util/expectations" utilfeature "github.com/openkruise/kruise/pkg/util/feature" + "github.com/openkruise/kruise/pkg/util/requeueduration" apps "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -45,8 +46,10 @@ var ( WriteRevisionHash = RevisionAdapterImpl.WriteRevisionHash ScaleExpectations = expectations.NewScaleExpectations() - UpdateExpectations = expectations.NewUpdateExpectations(RevisionAdapterImpl) ResourceVersionExpectations = expectations.NewResourceVersionExpectation() + + // DurationStore is a short cut for any sub-functions to notify the reconcile how long to wait to requeue + DurationStore = requeueduration.DurationStore{} ) type revisionAdapterImpl struct { @@ -57,20 +60,26 @@ func (r *revisionAdapterImpl) EqualToRevisionHash(_ string, obj metav1.Object, h if objHash == hash { return true } - return r.getShortHash(hash) == r.getShortHash(objHash) + return GetShortHash(hash) == GetShortHash(objHash) } func (r *revisionAdapterImpl) WriteRevisionHash(obj metav1.Object, hash string) { if obj.GetLabels() == nil { obj.SetLabels(make(map[string]string, 1)) } + // Note that controller-revision-hash defaults to be "{CLONESET_NAME}-{HASH}", + // and it will be "{HASH}" if CloneSetShortHash feature-gate has been enabled. + // But pod-template-hash should always be the short format. + shortHash := GetShortHash(hash) if utilfeature.DefaultFeatureGate.Enabled(features.CloneSetShortHash) { - hash = r.getShortHash(hash) + obj.GetLabels()[apps.ControllerRevisionHashLabelKey] = shortHash + } else { + obj.GetLabels()[apps.ControllerRevisionHashLabelKey] = hash } - obj.GetLabels()[apps.ControllerRevisionHashLabelKey] = hash + obj.GetLabels()[apps.DefaultDeploymentUniqueLabelKey] = shortHash } -func (r *revisionAdapterImpl) getShortHash(hash string) string { +func GetShortHash(hash string) string { // This makes sure the real hash must be the last '-' substring of revision name // vendor/k8s.io/kubernetes/pkg/controller/history/controller_history.go#82 list := strings.Split(hash, "-") diff --git a/test/e2e/framework/podunavailablebudget_util.go b/test/e2e/framework/podunavailablebudget_util.go index 04dd05a49a..33c01573ef 100644 --- a/test/e2e/framework/podunavailablebudget_util.go +++ b/test/e2e/framework/podunavailablebudget_util.go @@ -290,7 +290,8 @@ func (t *PodUnavailableBudgetTester) WaitForCloneSetMinReadyAndRunning(cloneSets } readyReplicas += inner.Status.ReadyReplicas count := *inner.Spec.Replicas - if inner.Status.UpdatedReplicas == count && count == inner.Status.ReadyReplicas && count == inner.Status.Replicas { + if inner.Generation == inner.Status.ObservedGeneration && inner.Status.UpdatedReplicas == count && + count == inner.Status.ReadyReplicas && count == inner.Status.Replicas { completed++ } } diff --git a/test/e2e/framework/workloadspread_util.go b/test/e2e/framework/workloadspread_util.go index 7456c000f9..235557d2fc 100644 --- a/test/e2e/framework/workloadspread_util.go +++ b/test/e2e/framework/workloadspread_util.go @@ -255,7 +255,7 @@ func (t *WorkloadSpreadTester) WaitForCloneSetRunning(cloneSet *appsv1alpha1.Clo return false, err } if inner.Generation == inner.Status.ObservedGeneration && *inner.Spec.Replicas == inner.Status.ReadyReplicas && - *inner.Spec.Replicas == inner.Status.Replicas { + *inner.Spec.Replicas == inner.Status.Replicas && *inner.Spec.Replicas == inner.Status.UpdatedReplicas { return true, nil } return false, nil diff --git a/test/e2e/policy/podunavailablebudget.go b/test/e2e/policy/podunavailablebudget.go index 9b0dc4634d..3bcb7d9b7a 100644 --- a/test/e2e/policy/podunavailablebudget.go +++ b/test/e2e/policy/podunavailablebudget.go @@ -33,6 +33,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" clientset "k8s.io/client-go/kubernetes" + "k8s.io/client-go/util/retry" utilpointer "k8s.io/utils/pointer" ) @@ -746,9 +747,15 @@ var _ = SIGDescribe("PodUnavailableBudget", func() { // update success image ginkgo.By(fmt.Sprintf("update CloneSet(%s.%s) success image", cloneset.Namespace, cloneset.Name)) - cloneset, _ = kc.AppsV1alpha1().CloneSets(cloneset.Namespace).Get(context.TODO(), cloneset.Name, metav1.GetOptions{}) - cloneset.Spec.Template.Spec.Containers[0].Image = NewWebserverImage - cloneset, err = kc.AppsV1alpha1().CloneSets(cloneset.Namespace).Update(context.TODO(), cloneset, metav1.UpdateOptions{}) + err = retry.RetryOnConflict(retry.DefaultRetry, func() error { + cloneset, err = kc.AppsV1alpha1().CloneSets(cloneset.Namespace).Get(context.TODO(), cloneset.Name, metav1.GetOptions{}) + if err != nil { + return err + } + cloneset.Spec.Template.Spec.Containers[0].Image = NewWebserverImage + cloneset, err = kc.AppsV1alpha1().CloneSets(cloneset.Namespace).Update(context.TODO(), cloneset, metav1.UpdateOptions{}) + return err + }) gomega.Expect(err).NotTo(gomega.HaveOccurred()) tester.WaitForCloneSetMinReadyAndRunning([]*appsv1alpha1.CloneSet{cloneset}, 1) @@ -865,14 +872,26 @@ var _ = SIGDescribe("PodUnavailableBudget", func() { // update success image ginkgo.By(fmt.Sprintf("update CloneSet(%s.%s) success image", cloneset.Namespace, cloneset.Name)) - clonesetIn1, _ = kc.AppsV1alpha1().CloneSets(clonesetIn1.Namespace).Get(context.TODO(), clonesetIn1.Name, metav1.GetOptions{}) - clonesetIn1.Spec.Template.Spec.Containers[0].Image = NewWebserverImage - clonesetIn1, err = kc.AppsV1alpha1().CloneSets(cloneset.Namespace).Update(context.TODO(), clonesetIn1, metav1.UpdateOptions{}) + err = retry.RetryOnConflict(retry.DefaultRetry, func() error { + clonesetIn1, err = kc.AppsV1alpha1().CloneSets(clonesetIn1.Namespace).Get(context.TODO(), clonesetIn1.Name, metav1.GetOptions{}) + if err != nil { + return err + } + clonesetIn1.Spec.Template.Spec.Containers[0].Image = NewWebserverImage + clonesetIn1, err = kc.AppsV1alpha1().CloneSets(cloneset.Namespace).Update(context.TODO(), clonesetIn1, metav1.UpdateOptions{}) + return err + }) gomega.Expect(err).NotTo(gomega.HaveOccurred()) // update success image - clonesetIn2, _ = kc.AppsV1alpha1().CloneSets(clonesetIn2.Namespace).Get(context.TODO(), clonesetIn2.Name, metav1.GetOptions{}) - clonesetIn2.Spec.Template.Spec.Containers[0].Image = NewWebserverImage - clonesetIn2, err = kc.AppsV1alpha1().CloneSets(clonesetIn2.Namespace).Update(context.TODO(), clonesetIn2, metav1.UpdateOptions{}) + err = retry.RetryOnConflict(retry.DefaultRetry, func() error { + clonesetIn2, err = kc.AppsV1alpha1().CloneSets(clonesetIn2.Namespace).Get(context.TODO(), clonesetIn2.Name, metav1.GetOptions{}) + if err != nil { + return err + } + clonesetIn2.Spec.Template.Spec.Containers[0].Image = NewWebserverImage + clonesetIn2, err = kc.AppsV1alpha1().CloneSets(clonesetIn2.Namespace).Update(context.TODO(), clonesetIn2, metav1.UpdateOptions{}) + return err + }) gomega.Expect(err).NotTo(gomega.HaveOccurred()) tester.WaitForCloneSetMinReadyAndRunning([]*appsv1alpha1.CloneSet{clonesetIn1, clonesetIn2}, 7) @@ -986,6 +1005,13 @@ var _ = SIGDescribe("PodUnavailableBudget", func() { sidecarTester.UpdateSidecarSet(sidecarSet) time.Sleep(time.Second) tester.WaitForCloneSetMinReadyAndRunning([]*appsv1alpha1.CloneSet{cloneset}, 2) + exceptSidecarSetStatus := &appsv1alpha1.SidecarSetStatus{ + MatchedPods: 10, + UpdatedPods: 10, + UpdatedReadyPods: 10, + ReadyPods: 10, + } + sidecarTester.WaitForSidecarSetMinReadyAndUpgrade(sidecarSet, exceptSidecarSetStatus, 2) ginkgo.By(fmt.Sprintf("check PodUnavailableBudget(%s.%s) Status", pub.Namespace, pub.Name)) expectStatus = &policyv1alpha1.PodUnavailableBudgetStatus{