Skip to content

Commit

Permalink
pub support pod unvailable label (openkruise#1004)
Browse files Browse the repository at this point in the history
  • Loading branch information
zmberg authored Jun 28, 2022
1 parent 1c23058 commit a421b6e
Show file tree
Hide file tree
Showing 10 changed files with 481 additions and 106 deletions.
40 changes: 40 additions & 0 deletions apis/apps/pub/pod_unavailable_label.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
Copyright 2022 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package pub

import (
"strings"
)

const (
// PubUnavailablePodLabelPrefix indicates if the pod has this label, both kruise workload and
// pub will determine that the pod is unavailable, even if pod.status.ready=true.
// Main users non-destructive offline and other scenarios
PubUnavailablePodLabelPrefix = "unavailable-pod.kruise.io/"
)

func HasUnavailableLabel(labels map[string]string) bool {
if len(labels) == 0 {
return false
}
for key := range labels {
if strings.HasPrefix(key, PubUnavailablePodLabelPrefix) {
return true
}
}
return false
}
5 changes: 5 additions & 0 deletions apis/policy/v1alpha1/podunavailablebudget_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,17 @@ import (
// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN!
// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized.

type PubOperation string

const (
// PubProtectOperationAnnotation indicates the pub protected Operation[DELETE,UPDATE]
// the following indicates the pub only protect DELETE,UPDATE Operation
// annotations[kruise.io/pub-protect-operations]=DELETE,UPDATE
// if the annotations do not exist, the default DELETE and UPDATE are protected
PubProtectOperationAnnotation = "kruise.io/pub-protect-operations"
// pod webhook operation
PubUpdateOperation PubOperation = "UPDATE"
PubDeleteOperation PubOperation = "DELETE"
)

// PodUnavailableBudgetSpec defines the desired state of PodUnavailableBudget
Expand Down
16 changes: 11 additions & 5 deletions pkg/control/pubcontrol/pub_control.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"reflect"
"strings"

appspub "github.com/openkruise/kruise/apis/apps/pub"
policyv1alpha1 "github.com/openkruise/kruise/apis/policy/v1alpha1"
"github.com/openkruise/kruise/pkg/control/sidecarcontrol"
"github.com/openkruise/kruise/pkg/util"
Expand All @@ -43,19 +44,24 @@ type commonControl struct {
func (c *commonControl) IsPodReady(pod *corev1.Pod) bool {
// 1. pod.Status.Phase == v1.PodRunning
// 2. pod.condition PodReady == true
return util.IsRunningAndReady(pod)
if !util.IsRunningAndReady(pod) {
return false
}

// unavailable label
return !appspub.HasUnavailableLabel(pod.Labels)
}

func (c *commonControl) IsPodUnavailableChanged(oldPod, newPod *corev1.Pod) bool {
// kruise workload in-place situation
if newPod == nil || oldPod == nil {
return true
}
// If pod.spec changed, pod will be in unavailable condition
if !reflect.DeepEqual(oldPod.Spec, newPod.Spec) {
klog.V(3).Infof("pod(%s/%s) specification changed, and maybe cause unavailability", newPod.Namespace, newPod.Name)
return true
}
// pod add unavailable label
if !appspub.HasUnavailableLabel(oldPod.Labels) && appspub.HasUnavailableLabel(newPod.Labels) {
return true
}
// pod other changes will not cause unavailability situation, then return false
return false
}
Expand Down
129 changes: 129 additions & 0 deletions pkg/control/pubcontrol/pub_control_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/*
Copyright 2021 The Kruise Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package pubcontrol

import (
"fmt"
"testing"

"github.com/openkruise/kruise/apis/apps/pub"
corev1 "k8s.io/api/core/v1"
)

func TestIsPodUnavailableChanged(t *testing.T) {
cases := []struct {
name string
getOldPod func() *corev1.Pod
getNewPod func() *corev1.Pod
expect bool
}{
{
name: "only annotations change",
getOldPod: func() *corev1.Pod {
demo := podDemo.DeepCopy()
return demo
},
getNewPod: func() *corev1.Pod {
demo := podDemo.DeepCopy()
demo.Annotations["add"] = "annotations"
return demo
},
expect: false,
},
{
name: "add unvailable label",
getOldPod: func() *corev1.Pod {
demo := podDemo.DeepCopy()
return demo
},
getNewPod: func() *corev1.Pod {
demo := podDemo.DeepCopy()
demo.Labels[fmt.Sprintf("%sdata", pub.PubUnavailablePodLabelPrefix)] = "true"
return demo
},
expect: true,
},
{
name: "image changed",
getOldPod: func() *corev1.Pod {
demo := podDemo.DeepCopy()
return demo
},
getNewPod: func() *corev1.Pod {
demo := podDemo.DeepCopy()
demo.Spec.Containers[0].Image = "nginx:v2"
return demo
},
expect: true,
},
}

for _, cs := range cases {
t.Run(cs.name, func(t *testing.T) {
control := commonControl{}
is := control.IsPodUnavailableChanged(cs.getOldPod(), cs.getNewPod())
if cs.expect != is {
t.Fatalf("IsPodUnavailableChanged failed")
}
})
}
}

func TestIsPodReady(t *testing.T) {
cases := []struct {
name string
getPod func() *corev1.Pod
expect bool
}{
{
name: "pod ready",
getPod: func() *corev1.Pod {
demo := podDemo.DeepCopy()
return demo
},
expect: true,
},
{
name: "pod not ready",
getPod: func() *corev1.Pod {
demo := podDemo.DeepCopy()
demo.Status.Conditions[0].Status = corev1.ConditionFalse
return demo
},
expect: false,
},
{
name: "pod contains unavailable label",
getPod: func() *corev1.Pod {
demo := podDemo.DeepCopy()
demo.Labels[fmt.Sprintf("%sdata", pub.PubUnavailablePodLabelPrefix)] = "true"
return demo
},
expect: false,
},
}

for _, cs := range cases {
t.Run(cs.name, func(t *testing.T) {
control := commonControl{}
is := control.IsPodReady(cs.getPod())
if cs.expect != is {
t.Fatalf("IsPodReady failed")
}
})
}
}
56 changes: 38 additions & 18 deletions pkg/control/pubcontrol/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package pubcontrol
import (
"context"
"fmt"
"strings"
"time"

policyv1alpha1 "github.com/openkruise/kruise/apis/policy/v1alpha1"
Expand All @@ -30,6 +31,7 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/util/retry"
"k8s.io/klog/v2"
Expand All @@ -48,38 +50,47 @@ var ConflictRetry = wait.Backoff{
Jitter: 0.1,
}

type Operation string

const (
UpdateOperation = "UPDATE"
DeleteOperation = "DELETE"

// Marked pods will not be pub-protected, solving the scenario of force pod deletion
PodPubNoProtectionAnnotation = "pub.kruise.io/no-protect"

// related-pub annotation in pod
PodRelatedPubAnnotation = "kruise.io/related-pub"
)

// parameters:
// 1. allowed(bool) indicates whether to allow this update operation
// 2. err(error)
func PodUnavailableBudgetValidatePod(client client.Client, control PubControl, pub *policyv1alpha1.PodUnavailableBudget, pod *corev1.Pod, operation Operation, dryRun bool) (allowed bool, reason string, err error) {
// If the pod is not ready, it doesn't count towards healthy and we should not decrement
if !control.IsPodReady(pod) {
func PodUnavailableBudgetValidatePod(client client.Client, control PubControl, pod *corev1.Pod, operation policyv1alpha1.PubOperation, dryRun bool) (allowed bool, reason string, err error) {
klog.V(3).Infof("validating pod(%s/%s) operation(%s) for PodUnavailableBudget", pod.Namespace, pod.Name, operation)
// pods that contain annotations[pod.kruise.io/pub-no-protect]="true" will be ignore
// and will no longer check the pub quota
if pod.Annotations[PodPubNoProtectionAnnotation] == "true" {
klog.V(3).Infof("pod(%s/%s) contains annotations[%s]=true, then don't need check pub", pod.Namespace, pod.Name, PodPubNoProtectionAnnotation)
return true, "", nil
// If the pod is not ready, it doesn't count towards healthy and we should not decrement
} else if !control.IsPodReady(pod) {
klog.V(3).Infof("pod(%s/%s) is not ready, then don't need check pub", pod.Namespace, pod.Name)
return true, "", nil
}
// pod is in pub.Status.DisruptedPods or pub.Status.UnavailablePods, then don't need check it
if isPodRecordedInPub(pod.Name, pub) {
klog.V(5).Infof("pod(%s/%s) already is recorded in pub(%s/%s)", pod.Namespace, pod.Name, pub.Namespace, pub.Name)

// pub for pod
pub, err := control.GetPubForPod(pod)
if err != nil {
return false, "", err
// if there is no matching PodUnavailableBudget, just return true
} else if pub == nil {
return true, "", nil
} else if !isNeedPubProtection(pub, operation) {
klog.V(3).Infof("pod(%s/%s) operation(%s) is not in pub(%s) protection", pod.Namespace, pod.Name, pub.Name)
return true, "", nil
// pod is in pub.Status.DisruptedPods or pub.Status.UnavailablePods, then don't need check it
} else if isPodRecordedInPub(pod.Name, pub) {
klog.V(3).Infof("pod(%s/%s) already is recorded in pub(%s/%s)", pod.Namespace, pod.Name, pub.Namespace, pub.Name)
return true, "", nil
}

// for debug
// check and decrement pub quota
var conflictTimes int
var costOfGet, costOfUpdate time.Duration

refresh := false
var pubClone *policyv1alpha1.PodUnavailableBudget
err = retry.RetryOnConflict(ConflictRetry, func() error {
Expand Down Expand Up @@ -128,7 +139,7 @@ func PodUnavailableBudgetValidatePod(client client.Client, control PubControl, p

// If this is a dry-run, we don't need to go any further than that.
if dryRun {
klog.V(5).Infof("pod(%s) operation for pub(%s/%s) is a dry run", pod.Name, pubClone.Namespace, pubClone.Name)
klog.V(3).Infof("pod(%s) operation for pub(%s/%s) is a dry run", pod.Name, pubClone.Namespace, pubClone.Name)
return nil
}
klog.V(3).Infof("pub(%s/%s) update status(disruptedPods:%d, unavailablePods:%d, expectedCount:%d, desiredAvailable:%d, currentAvailable:%d, unavailableAllowed:%d)",
Expand Down Expand Up @@ -163,7 +174,7 @@ func PodUnavailableBudgetValidatePod(client client.Client, control PubControl, p
return true, "", nil
}

func checkAndDecrement(podName string, pub *policyv1alpha1.PodUnavailableBudget, operation Operation) error {
func checkAndDecrement(podName string, pub *policyv1alpha1.PodUnavailableBudget, operation policyv1alpha1.PubOperation) error {
if pub.Status.UnavailableAllowed <= 0 {
return errors.NewForbidden(policyv1alpha1.Resource("podunavailablebudget"), pub.Name, fmt.Errorf("pub unavailable allowed is negative"))
}
Expand All @@ -180,7 +191,7 @@ func checkAndDecrement(podName string, pub *policyv1alpha1.PodUnavailableBudget,
pub.Status.UnavailablePods = make(map[string]metav1.Time)
}

if operation == UpdateOperation {
if operation == policyv1alpha1.PubUpdateOperation {
pub.Status.UnavailablePods[podName] = metav1.Time{Time: time.Now()}
klog.V(3).Infof("pod(%s) is recorded in pub(%s/%s) UnavailablePods", podName, pub.Namespace, pub.Name)
} else {
Expand Down Expand Up @@ -212,3 +223,12 @@ func IsReferenceEqual(ref1, ref2 *policyv1alpha1.TargetReference) bool {
}
return gv1.Group == gv2.Group && ref1.Kind == ref2.Kind && ref1.Name == ref2.Name
}

func isNeedPubProtection(pub *policyv1alpha1.PodUnavailableBudget, operation policyv1alpha1.PubOperation) bool {
operationValue, ok := pub.Annotations[policyv1alpha1.PubProtectOperationAnnotation]
if !ok || operationValue == "" {
return true
}
operations := sets.NewString(strings.Split(operationValue, ",")...)
return operations.Has(string(operation))
}
Loading

0 comments on commit a421b6e

Please sign in to comment.