From 2d3e0be18718ce802744189579d8059219faeb64 Mon Sep 17 00:00:00 2001 From: Harshita Sao <84518563+harshitasao@users.noreply.github.com> Date: Mon, 15 Jul 2024 07:37:13 +0530 Subject: [PATCH 1/3] changed the scorecard badge link to the standard format and updated the domain (#1657) Signed-off-by: harshitasao --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 476b53b202..30c4ad5cc8 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![License](https://img.shields.io/badge/license-Apache%202-4EB1BA.svg)](https://www.apache.org/licenses/LICENSE-2.0.html) [![Go Report Card](https://goreportcard.com/badge/github.com/openkruise/kruise)](https://goreportcard.com/report/github.com/openkruise/kruise) [![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/2908/badge)](https://bestpractices.coreinfrastructure.org/en/projects/2908) -[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/openkruise/kruise/badge)](https://api.securityscorecards.dev/projects/github.com/openkruise/kruise) +[![OpenSSF Scorecard](https://api.scorecard.dev/projects/github.com/openkruise/kruise/badge)](https://scorecard.dev/viewer/?uri=github.com/openkruise/kruise) [![CircleCI](https://circleci.com/gh/openkruise/kruise.svg?style=svg)](https://circleci.com/gh/openkruise/kruise) [![codecov](https://codecov.io/gh/openkruise/kruise/branch/master/graph/badge.svg)](https://codecov.io/gh/openkruise/kruise) [![Contributor Covenant](https://img.shields.io/badge/Contributor%20Covenant-v2.0%20adopted-ff69b4.svg)](./CODE_OF_CONDUCT.md) From 9b1a88d0f0cebef36017a6e58649d76a5e323b70 Mon Sep 17 00:00:00 2001 From: Kuromesi <87558945+Kuromesi@users.noreply.github.com> Date: Mon, 22 Jul 2024 10:11:19 +0800 Subject: [PATCH 2/3] add support for external certs (#1665) Signed-off-by: Kuromesi --- apis/apps/v1alpha1/daemonset_types.go | 2 +- .../crd/bases/apps.kruise.io_daemonsets.yaml | 2 +- config/manager/manager.yaml | 2 +- pkg/features/kruise_features.go | 4 + .../util/configuration/configuration.go | 20 ++++ .../util/controller/webhook_controller.go | 9 +- pkg/webhook/util/crd/crd.go | 18 +++ pkg/webhook/util/util.go | 34 ++++++ pkg/webhook/util/writer/certwriter.go | 7 +- pkg/webhook/util/writer/external.go | 109 ++++++++++++++++++ pkg/webhook/util/writer/external_test.go | 108 +++++++++++++++++ 11 files changed, 307 insertions(+), 8 deletions(-) create mode 100644 pkg/webhook/util/writer/external.go create mode 100644 pkg/webhook/util/writer/external_test.go diff --git a/apis/apps/v1alpha1/daemonset_types.go b/apis/apps/v1alpha1/daemonset_types.go index aec3f7df5e..d5102364d7 100644 --- a/apis/apps/v1alpha1/daemonset_types.go +++ b/apis/apps/v1alpha1/daemonset_types.go @@ -91,7 +91,7 @@ type RollingUpdateDaemonSet struct { // pod is available (Ready for at least minReadySeconds) the old DaemonSet pod // on that node is marked deleted. If the old pod becomes unavailable for any // reason (Ready transitions to false, is evicted, or is drained) an updated - // pod is immediatedly created on that node without considering surge limits. + // pod is immediately created on that node without considering surge limits. // Allowing surge implies the possibility that the resources consumed by the // daemonset on any given node can double if the readiness check fails, and // so resource intensive daemonsets should take into account that they may diff --git a/config/crd/bases/apps.kruise.io_daemonsets.yaml b/config/crd/bases/apps.kruise.io_daemonsets.yaml index 75e4711361..25b0360645 100644 --- a/config/crd/bases/apps.kruise.io_daemonsets.yaml +++ b/config/crd/bases/apps.kruise.io_daemonsets.yaml @@ -253,7 +253,7 @@ spec: pod is available (Ready for at least minReadySeconds) the old DaemonSet pod on that node is marked deleted. If the old pod becomes unavailable for any reason (Ready transitions to false, is evicted, or is drained) an updated - pod is immediatedly created on that node without considering surge limits. + pod is immediately created on that node without considering surge limits. Allowing surge implies the possibility that the resources consumed by the daemonset on any given node can double if the readiness check fails, and so resource intensive daemonsets should take into account that they may diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 9a0538bd2d..6aacbf55f8 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -35,7 +35,7 @@ spec: - --enable-leader-election - --logtostderr=true - --v=5 - - --feature-gates=AllAlpha=true + - --feature-gates=AllAlpha=true,EnableExternalCerts=false image: controller:latest imagePullPolicy: Always securityContext: diff --git a/pkg/features/kruise_features.go b/pkg/features/kruise_features.go index 6b70741e33..943c59d659 100644 --- a/pkg/features/kruise_features.go +++ b/pkg/features/kruise_features.go @@ -122,6 +122,9 @@ const ( // Enables a StatefulSet to start from an arbitrary non zero ordinal StatefulSetStartOrdinal featuregate.Feature = "StatefulSetStartOrdinal" + + // Use certs generated externally + EnableExternalCerts featuregate.Feature = "EnableExternalCerts" ) var defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{ @@ -154,6 +157,7 @@ var defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{ EnhancedLivenessProbeGate: {Default: false, PreRelease: featuregate.Alpha}, RecreatePodWhenChangeVCTInCloneSetGate: {Default: false, PreRelease: featuregate.Alpha}, StatefulSetStartOrdinal: {Default: false, PreRelease: featuregate.Alpha}, + EnableExternalCerts: {Default: false, PreRelease: featuregate.Alpha}, } func init() { diff --git a/pkg/webhook/util/configuration/configuration.go b/pkg/webhook/util/configuration/configuration.go index e4aea3f1cc..fec45790af 100644 --- a/pkg/webhook/util/configuration/configuration.go +++ b/pkg/webhook/util/configuration/configuration.go @@ -28,6 +28,8 @@ import ( clientset "k8s.io/client-go/kubernetes" "k8s.io/klog/v2" + "github.com/openkruise/kruise/pkg/features" + utilfeature "github.com/openkruise/kruise/pkg/util/feature" "github.com/openkruise/kruise/pkg/webhook/types" webhookutil "github.com/openkruise/kruise/pkg/webhook/util" ) @@ -46,6 +48,24 @@ func Ensure(kubeClient clientset.Interface, handlers map[string]types.HandlerGet if err != nil { return fmt.Errorf("not found ValidatingWebhookConfiguration %s", validatingWebhookConfigurationName) } + + if utilfeature.DefaultFeatureGate.Enabled(features.EnableExternalCerts) { + // if using external certs, only check the caBundle of webhook + for _, wh := range mutatingConfig.Webhooks { + if wh.ClientConfig.CABundle == nil { + return fmt.Errorf("caBundle of MutatingWebhookConfiguration %s is nil", mutatingWebhookConfigurationName) + + } + } + + for _, wh := range validatingConfig.Webhooks { + if wh.ClientConfig.CABundle == nil { + return fmt.Errorf("caBundle of ValidatingWebhookConfiguration %s is nil", mutatingWebhookConfigurationName) + } + } + return nil + } + // if using certs generated by kruise, update webhook configurations oldMutatingConfig := mutatingConfig.DeepCopy() oldValidatingConfig := validatingConfig.DeepCopy() diff --git a/pkg/webhook/util/controller/webhook_controller.go b/pkg/webhook/util/controller/webhook_controller.go index 196b6943ee..c675c00a0d 100644 --- a/pkg/webhook/util/controller/webhook_controller.go +++ b/pkg/webhook/util/controller/webhook_controller.go @@ -41,6 +41,8 @@ import ( "k8s.io/klog/v2" extclient "github.com/openkruise/kruise/pkg/client" + "github.com/openkruise/kruise/pkg/features" + utilfeature "github.com/openkruise/kruise/pkg/util/feature" webhooktypes "github.com/openkruise/kruise/pkg/webhook/types" webhookutil "github.com/openkruise/kruise/pkg/webhook/util" "github.com/openkruise/kruise/pkg/webhook/util/configuration" @@ -233,7 +235,11 @@ func (c *Controller) sync() error { var err error certWriterType := webhookutil.GetCertWriter() - if certWriterType == writer.FsCertWriter || (len(certWriterType) == 0 && len(webhookutil.GetHost()) != 0) { + if utilfeature.DefaultFeatureGate.Enabled(features.EnableExternalCerts) { + certWriter, err = writer.NewExternalCertWriter(writer.ExternalCertWriterOptions{ + Clientset: c.kubeClient, + }) + } else if certWriterType == writer.FsCertWriter || (len(certWriterType) == 0 && len(webhookutil.GetHost()) != 0) { certWriter, err = writer.NewFSCertWriter(writer.FSCertWriterOptions{ Path: webhookutil.GetCertDir(), }) @@ -254,7 +260,6 @@ func (c *Controller) sync() error { if err := writer.WriteCertsToDir(webhookutil.GetCertDir(), certs); err != nil { return fmt.Errorf("failed to write certs to dir: %v", err) } - if err := configuration.Ensure(c.kubeClient, c.handlers, certs.CACert); err != nil { return fmt.Errorf("failed to ensure configuration: %v", err) } diff --git a/pkg/webhook/util/crd/crd.go b/pkg/webhook/util/crd/crd.go index 720767afc0..2949679f3a 100644 --- a/pkg/webhook/util/crd/crd.go +++ b/pkg/webhook/util/crd/crd.go @@ -32,6 +32,8 @@ import ( "k8s.io/apimachinery/pkg/runtime/schema" "github.com/openkruise/kruise/apis" + "github.com/openkruise/kruise/pkg/features" + utilfeature "github.com/openkruise/kruise/pkg/util/feature" webhookutil "github.com/openkruise/kruise/pkg/webhook/util" ) @@ -49,6 +51,21 @@ func Ensure(client apiextensionsclientset.Interface, lister apiextensionslisters return fmt.Errorf("failed to list crds: %v", err) } + if utilfeature.DefaultFeatureGate.Enabled(features.EnableExternalCerts) { + for _, crd := range crdList { + if len(crd.Spec.Versions) == 0 || crd.Spec.Conversion == nil || crd.Spec.Conversion.Strategy != apiextensionsv1.WebhookConverter { + continue + } + if !kruiseScheme.Recognizes(schema.GroupVersionKind{Group: crd.Spec.Group, Version: crd.Spec.Versions[0].Name, Kind: crd.Spec.Names.Kind}) { + continue + } + + if crd.Spec.Conversion.Webhook == nil || crd.Spec.Conversion.Webhook.ClientConfig == nil || crd.Spec.Conversion.Webhook.ClientConfig.CABundle == nil { + return fmt.Errorf("bad conversion configuration of CRD %s", crd.Name) + } + } + return nil + } webhookConfig := apiextensionsv1.WebhookClientConfig{ CABundle: caBundle, } @@ -85,5 +102,6 @@ func Ensure(client apiextensionsclientset.Interface, lister apiextensionslisters } } } + return nil } diff --git a/pkg/webhook/util/util.go b/pkg/webhook/util/util.go index 442c25b268..c1018dd1a6 100644 --- a/pkg/webhook/util/util.go +++ b/pkg/webhook/util/util.go @@ -19,6 +19,7 @@ package util import ( "os" "strconv" + "time" "k8s.io/klog/v2" @@ -69,3 +70,36 @@ func GetCertDir() string { func GetCertWriter() string { return os.Getenv("WEBHOOK_CERT_WRITER") } + +var ( + renewBefore time.Duration +) + +func GetRenewBeforeTime() time.Duration { + if renewBefore != 0 { + return renewBefore + } + renewBefore = 6 * 30 * 24 * time.Hour + if s := os.Getenv("CERTS_RENEW_BEFORE"); len(s) > 0 { + t, err := strconv.Atoi(s[0 : len(s)-1]) + if err != nil { + klog.Errorf("failed to parese time %s: %v", s[0:len(s)-1], err) + return renewBefore + } + suffix := s[len(s)-1] + if suffix == 'd' { + renewBefore = time.Duration(t) * 7 * time.Hour + } else if suffix == 'm' { + renewBefore = time.Duration(t) * 30 * time.Hour + } else if suffix == 'y' { + renewBefore = time.Duration(t) * 365 * time.Hour + } else { + klog.Errorf("unknown date suffix %c", suffix) + } + } + if renewBefore <= 0 { + klog.Error("renewBefore time can not be less or equal than 0") + renewBefore = 6 * 30 * 24 * time.Hour + } + return renewBefore +} diff --git a/pkg/webhook/util/writer/certwriter.go b/pkg/webhook/util/writer/certwriter.go index 7fe3271d84..ec77e11d49 100644 --- a/pkg/webhook/util/writer/certwriter.go +++ b/pkg/webhook/util/writer/certwriter.go @@ -23,6 +23,7 @@ import ( "k8s.io/klog/v2" + "github.com/openkruise/kruise/pkg/webhook/util" "github.com/openkruise/kruise/pkg/webhook/util/generator" ) @@ -61,7 +62,8 @@ func handleCommon(dnsName string, ch certReadWriter) (*generator.Artifacts, bool } // Recreate the cert if it's invalid. - valid := validCert(certs, dnsName) + renewBefore := util.GetRenewBeforeTime() + valid := validCert(certs, dnsName, time.Now().Add(renewBefore)) if !valid { klog.Info("cert is invalid or expired, regenerating a new one") certs, err = ch.overwrite(certs.ResourceVersion) @@ -98,10 +100,9 @@ type certReadWriter interface { overwrite(resourceVersion string) (*generator.Artifacts, error) } -func validCert(certs *generator.Artifacts, dnsName string) bool { +func validCert(certs *generator.Artifacts, dnsName string, expired time.Time) bool { if certs == nil { return false } - expired := time.Now().AddDate(0, 6, 0) return generator.ValidCACert(certs.Key, certs.Cert, certs.CACert, dnsName, expired) } diff --git a/pkg/webhook/util/writer/external.go b/pkg/webhook/util/writer/external.go new file mode 100644 index 0000000000..7330b5f251 --- /dev/null +++ b/pkg/webhook/util/writer/external.go @@ -0,0 +1,109 @@ +/* +Copyright 2024 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package writer + +import ( + "bytes" + "context" + "errors" + "fmt" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/klog/v2" + + "github.com/openkruise/kruise/pkg/webhook/util/generator" +) + +const ( + mutatingWebhookConfigurationName = "kruise-mutating-webhook-configuration" +) + +var currentExternalCerts *generator.Artifacts + +// externalCertWriter provisions the certificate by reading from the k8s mutating webhook configuration. +type externalCertWriter struct { + *ExternalCertWriterOptions +} + +// ExternalCertWriterOptions is options for constructing a externalCertWriter. +type ExternalCertWriterOptions struct { + Clientset clientset.Interface +} + +var _ CertWriter = &externalCertWriter{} + +func (ops *ExternalCertWriterOptions) validate() error { + if ops.Clientset == nil { + return errors.New("client must be set in externalCertWriterOptions") + } + return nil +} + +func NewExternalCertWriter(ops ExternalCertWriterOptions) (CertWriter, error) { + err := ops.validate() + if err != nil { + return nil, err + } + return &externalCertWriter{ExternalCertWriterOptions: &ops}, nil +} + +func (s *externalCertWriter) EnsureCert(dnsName string) (*generator.Artifacts, bool, error) { + // Read certs from mutating webhook configuration generated by external + certs, err := s.read() + if err != nil { + return nil, false, err + } + // check if the certs are updated since last read + if currentExternalCerts != nil && compareCerts(certs, currentExternalCerts) { + klog.Info("external certs are not updated") + return certs, false, nil + } + + currentExternalCerts = certs + return certs, true, nil +} + +var _ certReadWriter = &externalCertWriter{} + +func (s *externalCertWriter) write() (*generator.Artifacts, error) { + return nil, nil +} + +func (s *externalCertWriter) overwrite(resourceVersion string) (*generator.Artifacts, error) { + return nil, nil +} + +func (s *externalCertWriter) read() (*generator.Artifacts, error) { + mutatingConfig, err := s.Clientset.AdmissionregistrationV1().MutatingWebhookConfigurations().Get(context.TODO(), mutatingWebhookConfigurationName, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("not found MutatingWebhookConfiguration %s", mutatingWebhookConfigurationName) + } + if len(mutatingConfig.Webhooks) == 0 { + return nil, fmt.Errorf("not found webhook in MutatingWebhookConfiguration %s", mutatingWebhookConfigurationName) + } + + caBundle := mutatingConfig.Webhooks[0].ClientConfig.CABundle + return &generator.Artifacts{CACert: caBundle}, nil +} + +func compareCerts(certsA, certsB *generator.Artifacts) bool { + if !bytes.Equal(certsA.CACert, certsB.CACert) || !bytes.Equal(certsA.CAKey, certsB.CAKey) || !bytes.Equal(certsA.Cert, certsB.Cert) || !bytes.Equal(certsA.Key, certsB.Key) { + return false + } + return true +} diff --git a/pkg/webhook/util/writer/external_test.go b/pkg/webhook/util/writer/external_test.go new file mode 100644 index 0000000000..d6afaecdd9 --- /dev/null +++ b/pkg/webhook/util/writer/external_test.go @@ -0,0 +1,108 @@ +/* +Copyright 2024 The Kruise Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package writer + +import ( + "context" + "testing" + + "github.com/onsi/gomega" + "github.com/openkruise/kruise/pkg/webhook/util/generator" + v1 "k8s.io/api/admissionregistration/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" +) + +func TestEnsureCert(t *testing.T) { + g := gomega.NewGomegaWithT(t) + type ExpectOut struct { + changed bool + errorHappens bool + } + teatCases := []struct { + name string + needRefreshCertCache bool + expect ExpectOut + }{ + { + name: "test get certs from mutating webhook configuration", + needRefreshCertCache: true, + expect: ExpectOut{ + changed: true, + errorHappens: false, + }, + }, + { + name: "test get certs from cache", + needRefreshCertCache: false, + expect: ExpectOut{ + changed: false, + errorHappens: false, + }, + }, + } + dnsName := "kruise-webhook-service.svc" + client := fake.NewSimpleClientset() + + // generate certs and secret + certGenerator := &generator.SelfSignedCertGenerator{} + // certs expire after 10 years + certs, err := certGenerator.Generate(dnsName) + g.Expect(err).NotTo(gomega.HaveOccurred()) + wh := &v1.MutatingWebhookConfiguration{ + ObjectMeta: metav1.ObjectMeta{ + Name: mutatingWebhookConfigurationName, + }, + Webhooks: []v1.MutatingWebhook{ + { + ClientConfig: v1.WebhookClientConfig{ + CABundle: certs.CACert, + }, + }, + }, + } + + _, err = client.AdmissionregistrationV1().MutatingWebhookConfigurations().Create(context.TODO(), wh, metav1.CreateOptions{}) + g.Expect(err).NotTo(gomega.HaveOccurred()) + + for _, tc := range teatCases { + t.Run(tc.name, func(t *testing.T) { + if tc.needRefreshCertCache { + RefreshCurrentExternalCertsCache() + } else { + UpdateCurrentExternalCertsCache(certs) + } + + externalCertWriter, err := NewExternalCertWriter(ExternalCertWriterOptions{ + Clientset: client, + }) + g.Expect(err).NotTo(gomega.HaveOccurred()) + externalCerts, changed, err := externalCertWriter.EnsureCert(dnsName) + g.Expect(externalCerts.CACert).Should(gomega.Equal(certs.CACert)) + g.Expect(changed).Should(gomega.Equal(tc.expect.changed)) + g.Expect(err != nil).Should(gomega.Equal(tc.expect.errorHappens)) + }) + } +} + +func RefreshCurrentExternalCertsCache() { + currentExternalCerts = nil +} + +func UpdateCurrentExternalCertsCache(externalCerts *generator.Artifacts) { + currentExternalCerts.CACert = externalCerts.CACert +} From c5c6df7176d2b185cc28e10789d2af46c9e9cd2b Mon Sep 17 00:00:00 2001 From: Jeremy Date: Mon, 22 Jul 2024 10:13:19 +0800 Subject: [PATCH 3/3] add proposal for support progressDeadlineSeconds in CloneSet (#1520) more docs fix mdl ci complete the proposal fix update fix typo Bump crate-ci/typos from 1.22.9 to 1.23.1 (#1658) Bumps [crate-ci/typos](https://github.com/crate-ci/typos) from 1.22.9 to 1.23.1. - [Release notes](https://github.com/crate-ci/typos/releases) - [Changelog](https://github.com/crate-ci/typos/blob/master/CHANGELOG.md) - [Commits](https://github.com/crate-ci/typos/compare/v1.22.9...v1.23.1) --- updated-dependencies: - dependency-name: crate-ci/typos dependency-type: direct:production update-type: version-update:semver-minor ... Bump actions/upload-artifact from 4.3.3 to 4.3.4 (#1659) Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.3.3 to 4.3.4. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/65462800fd760344b1a7b4382951275a0abb4808...0b2256b8c012f0828dc542b3febcab082c67f72b) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-patch ... changed the scorecard badge link to the standard format and updated the domain (#1657) fix typo Signed-off-by: hantmac --- apis/apps/v1alpha1/daemonset_types.go | 3 +- ...loneset-support-progressDeadlineSeconds.md | 161 ++++++++++++++++++ 2 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 docs/proposals/20240309-cloneset-support-progressDeadlineSeconds.md diff --git a/apis/apps/v1alpha1/daemonset_types.go b/apis/apps/v1alpha1/daemonset_types.go index d5102364d7..2cce16500c 100644 --- a/apis/apps/v1alpha1/daemonset_types.go +++ b/apis/apps/v1alpha1/daemonset_types.go @@ -17,11 +17,12 @@ limitations under the License. package v1alpha1 import ( - appspub "github.com/openkruise/kruise/apis/apps/pub" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" + + appspub "github.com/openkruise/kruise/apis/apps/pub" ) // DaemonSetUpdateStrategy is a struct used to control the update strategy for a DaemonSet. diff --git a/docs/proposals/20240309-cloneset-support-progressDeadlineSeconds.md b/docs/proposals/20240309-cloneset-support-progressDeadlineSeconds.md new file mode 100644 index 0000000000..2917afc1da --- /dev/null +++ b/docs/proposals/20240309-cloneset-support-progressDeadlineSeconds.md @@ -0,0 +1,161 @@ +--- +title: CloneSet +authors: +- "@hantmac" + reviewers: +- "@Fei-Guo" +- "@furykerry" +- "@FillZpp" + creation-date: 2024-03-10 + last-updated: 2024-03-10 + status: implementable +--- + +# Support progressDeadlineSeconds in CloneSet +Table of Contents +================= + +- [Support progressDeadlineSeconds in CloneSet](#Support progressDeadlineSeconds in CloneSet) +- [Table of Contents](#table-of-contents) + - [Motivation](#motivation) + - [Proposal](#proposal) + - [1. add .spec.progressDeadlineSeconds field](#1add-.spec.progressDeadlineSeconds-field) + - [2. The behavior of progressDeadlineSeconds](#2the-behavior-of-progressDeadlineSeconds) + - [3. handle the logic](#2handle-the-logic) + +## Motivation + +`.spec.progressDeadlineSeconds` is an optional field in Deployment that specifies the number of seconds one wants to wait for their Deployment to progress before the system reports back that the Deployment has failed progressing. +Once the deadline has been exceeded, the Deployment controller adds a DeploymentCondition with the following attributes to the Deployment's `.status.conditions`: +``` +type: Progressing +status: "False" +reason: ProgressDeadlineExceeded +``` + +This is useful for users to control the progress of the deployment. +So we should add support for `progressDeadlineSeconds` in CloneSet as well. + +## Proposal +Firstly, add the `progressDeadlineSeconds` field to the CloneSetSpec. +Then add the handle logic in cloneSet controller to handle the `progressDeadlineSeconds` field. + +### 1. add .spec.progressDeadlineSeconds field +The default value of `progressDeadlineSeconds` is 600 seconds according to the [official document](https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#progress-deadline-seconds). +```yaml +apiVersion: apps.kruise.io/v1alpha1 +kind: CloneSet +metadata: + name: cloneset-example +spec: + replicas: 3 + progressDeadlineSeconds: 600 + selector: + matchLabels: + app: nginx + template: + metadata: + labels: + app: nginx + spec: + containers: + - name: nginx + image: nginx:1.14.2 + ``` + +### 2. The behavior of progressDeadlineSeconds +However, the behavior of `progressDeadlineSeconds` in CloneSet might differ from its behavior in Deployment due to the support of partition in CloneSet. If a CloneSet is paused due to partition, it's debatable whether the paused time should be included in the progress deadline. +Here are two possible interpretations of `progressDeadlineSeconds` in the context of CloneSet: +1. `progressDeadlineSeconds` could be redefined as the time taken for the CloneSet to reach completion or the "paused" state due to partition. In this case, the time during which the CloneSet is paused would NOT be included in the progress deadline. +2. Secondly, `progressDeadlineSeconds` could only be supported if the partition is not set. This means that if a partition is set, the `progressDeadlineSeconds` field would not be applicable or has no effect. + +After the discussion of the community, we choose the first interpretation.So we should re-set the progressDeadlineSeconds when the CloneSet reach completion OR "the paused state". + +### 3. handle the logic +In cloneset controller, we should add the logic to handle the `progressDeadlineSeconds` field. We firstly add a timer to check the progress of the CloneSet. +If the progress exceeds the `progressDeadlineSeconds`, we should add a CloneSetCondition to the CloneSet's `.status.conditions`: +```go +// add a timer to check the progress of the CloneSet +if cloneSet.Spec.ProgressDeadlineSeconds != nil { + // handle the logic + starttime := time.Now() + ... + if time.Now().After(starttime.Add(time.Duration(*cloneSet.Spec.ProgressDeadlineSeconds) * time.Second)) { + newStatus.Conditions = append(newStatus.Conditions, appsv1alpha1.CloneSetCondition{ + Type: appsv1alpha1.CloneSetProgressing, + Status: corev1.ConditionFalse, + Reason: appsv1alpha1.CloneSetProgressDeadlineExceeded, + }) + } +} +``` + +When the CloneSet reaches the "paused" state, we should reset the timer to avoid the progress deadline being exceeded. +And we check the progress of the CloneSet in the `syncCloneSetStatus` function. If the progress exceeds the `progressDeadlineSeconds`, we should add a CloneSetCondition to the CloneSet's `.status.conditions`: + +```go +const ( + CloneSetProgressDeadlineExceeded CloneSetConditionReason = "ProgressDeadlineExceeded" + CloneSetConditionTypeProgressing CloneSetConditionType = "Progressing" +) +``` + +```go +func (c *CloneSetController) syncCloneSetStatus(cloneSet *appsv1alpha1.CloneSet, newStatus *appsv1alpha1.CloneSetStatus) error { + ... + if cloneSet.Spec.ProgressDeadlineSeconds != nil { + // handle the logic + if time.Now().After(starttime.Add(time.Duration(*cloneSet.Spec.ProgressDeadlineSeconds) * time.Second)) { + newStatus.Conditions = append(newStatus.Conditions, appsv1alpha1.CloneSetCondition{ + Type: appsv1alpha1.CloneSetProgressing, + Status: corev1.ConditionFalse, + Reason: appsv1alpha1.CloneSetProgressDeadlineExceeded, + }) + } + } + ... +} +``` + +When the CloneSet reaches the "paused" state, we should reset the timer to avoid the progress deadline being exceeded. +```go +func (c *CloneSetController) syncCloneSetStatus(cloneSet *appsv1alpha1.CloneSet, newStatus *appsv1alpha1.CloneSetStatus) error { + ... + // reset the starttime when the CloneSet reaches the "paused" state or complete state + if cloneSet.Status.UpdatedReadyReplicas == cloneSet.Status.Replicas || replicas - updatedReplicas = partition { + starttime = time.Now() + } + + if cloneSet.Spec.Paused { + starttime = time.Now() + } + ... + } + + ... +} +``` + +And we can save the starttime in the `LastUpdateTime` in the CloneSet's `.status.conditions`: +``` +status: + conditions: + - lastTransitionTime: "2021-11-26T20:52:12Z" + lastUpdateTime: "2021-11-26T20:52:12Z" + message: CloneSet has minimum availability. + reason: MinimumReplicasAvailable + status: "True" + type: Available + - lastTransitionTime: "2021-11-26T20:52:12Z" + lastUpdateTime: "2021-11-26T20:52:12Z" + message: 'progress deadline exceeded' + reason: ProgressDeadlineExceeded + status: "False" + type: Progressing +``` + +## Implementation History + +- [ ] 06/07/2024: Proposal submission + +