From db3c178250e204fff471453d7ea75d5a7ae69bca Mon Sep 17 00:00:00 2001 From: Sarah Wang Date: Mon, 7 Oct 2024 15:31:18 -0400 Subject: [PATCH 01/12] starting pdb stuff --- api/datadoghq/v2alpha1/datadogagent_types.go | 5 ++++ .../component/clusteragent/utils.go | 22 +++++++++++++++++ .../datadogagent/controller_reconcile_v2.go | 14 ++++++++--- .../feature/enabledefault/feature.go | 24 +++++++++++++++++-- .../controller/datadogagent/feature/ids.go | 2 ++ 5 files changed, 62 insertions(+), 5 deletions(-) diff --git a/api/datadoghq/v2alpha1/datadogagent_types.go b/api/datadoghq/v2alpha1/datadogagent_types.go index 7f0d998aa..7269418ea 100644 --- a/api/datadoghq/v2alpha1/datadogagent_types.go +++ b/api/datadoghq/v2alpha1/datadogagent_types.go @@ -1245,6 +1245,11 @@ type GlobalConfig struct { // FIPS contains configuration used to customize the FIPS proxy sidecar. FIPS *FIPSConfig `json:"fips,omitempty"` + + // PodDisruptionBudget enables the creation of a PodDisruptionBudget. + // Default: false + // +optional + PodDisruptionBudget *bool `json:"podDisruptionBudget,omitempty"` } // DatadogCredentials is a generic structure that holds credentials to access Datadog. diff --git a/internal/controller/datadogagent/component/clusteragent/utils.go b/internal/controller/datadogagent/component/clusteragent/utils.go index 653ce2596..f2aa3c7b0 100644 --- a/internal/controller/datadogagent/component/clusteragent/utils.go +++ b/internal/controller/datadogagent/component/clusteragent/utils.go @@ -15,11 +15,16 @@ import ( "github.com/DataDog/datadog-operator/pkg/controller/utils/comparison" corev1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/version" ) +const ( + pdbMinAvailableInstances = 1 +) + // GetClusterAgentService returns the Cluster-Agent service func GetClusterAgentService(dda metav1.Object) *corev1.Service { labels := object.GetDefaultLabels(dda, v2alpha1.DefaultClusterAgentResourceSuffix, GetClusterAgentVersion(dda)) @@ -53,6 +58,23 @@ func GetClusterAgentService(dda metav1.Object) *corev1.Service { return service } +func GetClusterAgentPodDisruptionBudget(dda metav1.Object) *policyv1.PodDisruptionBudget { + // labels and annotations + minAvailableStr := intstr.FromInt(pdbMinAvailableInstances) + matchLabels := map[string]string{ + apicommon.AgentDeploymentNameLabelKey: dda.GetName(), + apicommon.AgentDeploymentComponentLabelKey: v2alpha1.DefaultClusterAgentResourceSuffix} + pdb := &policyv1.PodDisruptionBudget{ + Spec: policyv1.PodDisruptionBudgetSpec{ + MinAvailable: &minAvailableStr, + Selector: &metav1.LabelSelector{ + MatchLabels: matchLabels, + }, + }, + } + return pdb +} + // GetMetricsServerServiceName returns the external metrics provider service name func GetMetricsServerServiceName(dda metav1.Object) string { return fmt.Sprintf("%s-%s", dda.GetName(), v2alpha1.DefaultMetricsServerResourceSuffix) diff --git a/internal/controller/datadogagent/controller_reconcile_v2.go b/internal/controller/datadogagent/controller_reconcile_v2.go index 862c2027f..69aa1300f 100644 --- a/internal/controller/datadogagent/controller_reconcile_v2.go +++ b/internal/controller/datadogagent/controller_reconcile_v2.go @@ -32,7 +32,7 @@ import ( func (r *Reconciler) internalReconcileV2(ctx context.Context, request reconcile.Request) (reconcile.Result, error) { reqLogger := r.log.WithValues("datadogagent", request.NamespacedName) - reqLogger.Info("Reconciling DatadogAgent") + reqLogger.Info("Reconciling DatadogAgent1") // Fetch the DatadogAgent instance instance := &datadoghqv2alpha1.DatadogAgent{} @@ -46,10 +46,12 @@ func (r *Reconciler) internalReconcileV2(ctx context.Context, request reconcile. return result, nil } // Error reading the object - requeue the request. + reqLogger.Error(err, "unable to fetch DatadogAgent") return result, err } if instance.Spec.Global == nil || instance.Spec.Global.Credentials == nil { + reqLogger.Info("credentials not configured in the DatadogAgent, can't reconcile") return result, fmt.Errorf("credentials not configured in the DatadogAgent, can't reconcile") } @@ -74,6 +76,7 @@ func (r *Reconciler) internalReconcileV2(ctx context.Context, request reconcile. }*/ if result, err = r.handleFinalizer(reqLogger, instance, r.finalizeDadV2); utils.ShouldReturn(result, err) { + reqLogger.V(1).Info("finalizer error", "error", err) return result, err } @@ -88,7 +91,7 @@ func (r *Reconciler) internalReconcileV2(ctx context.Context, request reconcile. // Set default values for GlobalConfig and Features instanceCopy := instance.DeepCopy() datadoghqv2alpha1.DefaultDatadogAgent(instanceCopy) - + reqLogger.Info("reconcile instance") return r.reconcileInstanceV2(ctx, reqLogger, instanceCopy) } @@ -96,8 +99,11 @@ func (r *Reconciler) reconcileInstanceV2(ctx context.Context, logger logr.Logger var result reconcile.Result newStatus := instance.Status.DeepCopy() now := metav1.NewTime(time.Now()) - + // logger = logger.WithValues("datadogagent", instance.Name, "namespace", instance.Namespace) + fmt.Println("ReconcileInstanceV2") + logger.Info("ReconcileInstanceV2") features, requiredComponents := feature.BuildFeatures(instance, reconcilerOptionsToFeatureOptions(&r.options, logger)) + logger.Info("ReconcileInstanceV2", "features", features) // update list of enabled features for metrics forwarder r.updateMetricsForwardersFeatures(instance, features) @@ -118,6 +124,8 @@ func (r *Reconciler) reconcileInstanceV2(ctx context.Context, logger logr.Logger // Set up dependencies required by enabled features for _, feat := range features { + fmt.Println("Dependency ManageDependencies", "featureID", feat.ID()) + logger.Info("Dependency ManageDependencies", "featureID", feat.ID()) logger.V(1).Info("Dependency ManageDependencies", "featureID", feat.ID()) if featErr := feat.ManageDependencies(resourceManagers, requiredComponents); featErr != nil { errs = append(errs, featErr) diff --git a/internal/controller/datadogagent/feature/enabledefault/feature.go b/internal/controller/datadogagent/feature/enabledefault/feature.go index 6f620213f..4155c0875 100644 --- a/internal/controller/datadogagent/feature/enabledefault/feature.go +++ b/internal/controller/datadogagent/feature/enabledefault/feature.go @@ -67,6 +67,7 @@ type defaultFeature struct { logger logr.Logger disableNonResourceRules bool otelAgentEnabled bool + PodDisruptionBudget bool customConfigAnnotationKey string customConfigAnnotationValue string @@ -123,7 +124,11 @@ func (f *defaultFeature) Configure(dda *v2alpha1.DatadogAgent) feature.RequiredC if dda.Spec.Global.DisableNonResourceRules != nil && *dda.Spec.Global.DisableNonResourceRules { f.disableNonResourceRules = true } - + if dda.Spec.Global.PodDisruptionBudget != nil { + if dda.Status.ClusterAgent.Replicas != 0 && dda.Status.ClusterAgent.Replicas > 1 { + f.PodDisruptionBudget = true + } + } if dda.Spec.Global.Credentials != nil { creds := dda.Spec.Global.Credentials @@ -208,13 +213,14 @@ func (f *defaultFeature) Configure(dda *v2alpha1.DatadogAgent) feature.RequiredC }, } } - } // ManageDependencies allows a feature to manage its dependencies. // Feature's dependencies should be added in the store. func (f *defaultFeature) ManageDependencies(managers feature.ResourceManagers, components feature.RequiredComponents) error { var errs []error + fmt.Println("manage dependencies") + f.logger.Info("manage dependencies") // manage credential secret if f.credentialsInfo.secretCreation.createSecret { for key, value := range f.credentialsInfo.secretCreation.data { @@ -250,6 +256,8 @@ func (f *defaultFeature) ManageDependencies(managers feature.ResourceManagers, c } if components.ClusterAgent.IsEnabled() { + f.logger.Info("Cluster Agent is enabled") + fmt.Println("Cluster Agent is enabled") if err := f.clusterAgentDependencies(managers, components.ClusterAgent); err != nil { errs = append(errs, err) } @@ -318,6 +326,18 @@ func (f *defaultFeature) clusterAgentDependencies(managers feature.ResourceManag return err } + pdb := componentdca.GetClusterAgentPodDisruptionBudget(f.owner) + fmt.Println("got cluster agent pdb, min available is ", pdb.Spec.MinAvailable) + f.logger.Info("test, logging for PDB") + if err := managers.Store().AddOrUpdate(kubernetes.PodDisruptionBudgetsKind, pdb); err != nil { + f.logger.Error(err, "error with created pod disruption budget") + fmt.Println("error with created pod disruption budget") + return err + } + // similar to ^ but for pdb + // add pdb creation when dca is enabled + // what to add as matchlabels? + return errors.NewAggregate(errs) } diff --git a/internal/controller/datadogagent/feature/ids.go b/internal/controller/datadogagent/feature/ids.go index ec383d7d6..f03d3ccad 100644 --- a/internal/controller/datadogagent/feature/ids.go +++ b/internal/controller/datadogagent/feature/ids.go @@ -65,6 +65,8 @@ const ( SBOMIDType = "sbom" // HelmCheckIDType Helm Check feature. HelmCheckIDType = "helm_check" + // PodDisruptionBudgetIDType Pod Disruption Budget feature. + PodDisruptionBudgetIDType = "pdb" // DummyIDType Dummy feature. DummyIDType = "dummy" ) From 0f0dde6cc21bea6f2907b9a0399564f299118eb5 Mon Sep 17 00:00:00 2001 From: Sarah Wang Date: Wed, 9 Oct 2024 13:55:48 -0400 Subject: [PATCH 02/12] create pdb with flags to enable it in override --- api/datadoghq/v2alpha1/datadogagent_types.go | 9 +++---- .../v2alpha1/zz_generated.deepcopy.go | 5 ++++ .../bases/v1/datadoghq.com_datadogagents.yaml | 3 +++ docs/configuration.v2alpha1.md | 1 + .../component/clusteragent/utils.go | 4 +++ .../component/clusterchecksrunner/default.go | 27 +++++++++++++++++++ .../feature/enabledefault/feature.go | 19 ------------- .../controller/datadogagent/feature/ids.go | 2 -- .../datadogagent/override/dependencies.go | 24 +++++++++++++++++ 9 files changed, 68 insertions(+), 26 deletions(-) diff --git a/api/datadoghq/v2alpha1/datadogagent_types.go b/api/datadoghq/v2alpha1/datadogagent_types.go index 7269418ea..e04bb7df7 100644 --- a/api/datadoghq/v2alpha1/datadogagent_types.go +++ b/api/datadoghq/v2alpha1/datadogagent_types.go @@ -1245,11 +1245,6 @@ type GlobalConfig struct { // FIPS contains configuration used to customize the FIPS proxy sidecar. FIPS *FIPSConfig `json:"fips,omitempty"` - - // PodDisruptionBudget enables the creation of a PodDisruptionBudget. - // Default: false - // +optional - PodDisruptionBudget *bool `json:"podDisruptionBudget,omitempty"` } // DatadogCredentials is a generic structure that holds credentials to access Datadog. @@ -1367,6 +1362,10 @@ type DatadogAgentComponentOverride struct { // +optional Replicas *int32 `json:"replicas,omitempty"` + // Set CreatePodDisruptionBudget to true to create a PodDisruptionBudget for this component. + // +optional + CreatePodDisruptionBudget *bool `json:"createPodDisruptionBudget,omitempty"` + // Set CreateRbac to false to prevent automatic creation of Role/ClusterRole for this component // +optional CreateRbac *bool `json:"createRbac,omitempty"` diff --git a/api/datadoghq/v2alpha1/zz_generated.deepcopy.go b/api/datadoghq/v2alpha1/zz_generated.deepcopy.go index 2bb8608e4..d2611c12b 100644 --- a/api/datadoghq/v2alpha1/zz_generated.deepcopy.go +++ b/api/datadoghq/v2alpha1/zz_generated.deepcopy.go @@ -628,6 +628,11 @@ func (in *DatadogAgentComponentOverride) DeepCopyInto(out *DatadogAgentComponent *out = new(int32) **out = **in } + if in.CreatePodDisruptionBudget != nil { + in, out := &in.CreatePodDisruptionBudget, &out.CreatePodDisruptionBudget + *out = new(bool) + **out = **in + } if in.CreateRbac != nil { in, out := &in.CreateRbac, &out.CreateRbac *out = new(bool) diff --git a/config/crd/bases/v1/datadoghq.com_datadogagents.yaml b/config/crd/bases/v1/datadoghq.com_datadogagents.yaml index 74a7420aa..dbce4748a 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagents.yaml +++ b/config/crd/bases/v1/datadoghq.com_datadogagents.yaml @@ -3567,6 +3567,9 @@ spec: `security-agent`, `system-probe`, `trace-agent`, and `all`. Configuration under `all` applies to all configured containers. type: object + createPodDisruptionBudget: + description: Set CreatePodDisruptionBudget to true to create a PodDisruptionBudget for this component. + type: boolean createRbac: description: Set CreateRbac to false to prevent automatic creation of Role/ClusterRole for this component type: boolean diff --git a/docs/configuration.v2alpha1.md b/docs/configuration.v2alpha1.md index ff3f6a3ec..55b28b57a 100644 --- a/docs/configuration.v2alpha1.md +++ b/docs/configuration.v2alpha1.md @@ -331,6 +331,7 @@ In the table, `spec.override.nodeAgent.image.name` and `spec.override.nodeAgent. | [key].containers.[key].securityContext.windowsOptions.hostProcess | HostProcess determines if a container should be run as a 'Host Process' container. All of a Pod's containers must have the same effective HostProcess value (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). In addition, if HostProcess is true then HostNetwork must also be set to true. | | [key].containers.[key].securityContext.windowsOptions.runAsUserName | The UserName in Windows to run the entrypoint of the container process. Defaults to the user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. | | [key].containers.[key].volumeMounts `[]object` | Specify additional volume mounts in the container. | +| [key].createPodDisruptionBudget | Set CreatePodDisruptionBudget to true to create a PodDisruptionBudget for this component. | | [key].createRbac | Set CreateRbac to false to prevent automatic creation of Role/ClusterRole for this component | | [key].customConfigurations `map[string]object` | CustomConfiguration allows to specify custom configuration files for `datadog.yaml`, `datadog-cluster.yaml`, `security-agent.yaml`, and `system-probe.yaml`. The content is merged with configuration generated by the Datadog Operator, with priority given to custom configuration. WARNING: It is possible to override values set in the `DatadogAgent`. | | [key].customConfigurations.[key].configData | ConfigData corresponds to the configuration file content. | diff --git a/internal/controller/datadogagent/component/clusteragent/utils.go b/internal/controller/datadogagent/component/clusteragent/utils.go index f2aa3c7b0..aef598884 100644 --- a/internal/controller/datadogagent/component/clusteragent/utils.go +++ b/internal/controller/datadogagent/component/clusteragent/utils.go @@ -65,6 +65,10 @@ func GetClusterAgentPodDisruptionBudget(dda metav1.Object) *policyv1.PodDisrupti apicommon.AgentDeploymentNameLabelKey: dda.GetName(), apicommon.AgentDeploymentComponentLabelKey: v2alpha1.DefaultClusterAgentResourceSuffix} pdb := &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Name: "datadog-cluster-agent-pdb", + Namespace: dda.GetNamespace(), + }, Spec: policyv1.PodDisruptionBudgetSpec{ MinAvailable: &minAvailableStr, Selector: &metav1.LabelSelector{ diff --git a/internal/controller/datadogagent/component/clusterchecksrunner/default.go b/internal/controller/datadogagent/component/clusterchecksrunner/default.go index 9690ecef1..2749f53b7 100644 --- a/internal/controller/datadogagent/component/clusterchecksrunner/default.go +++ b/internal/controller/datadogagent/component/clusterchecksrunner/default.go @@ -11,7 +11,9 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" apicommon "github.com/DataDog/datadog-operator/api/datadoghq/common" "github.com/DataDog/datadog-operator/api/datadoghq/v2alpha1" @@ -21,6 +23,10 @@ import ( "github.com/DataDog/datadog-operator/pkg/defaulting" ) +const ( + pdMaxUnavailableInstances = 1 +) + // GetClusterChecksRunnerName return the Cluster-Checks-Runner name based on the DatadogAgent name func GetClusterChecksRunnerName(dda metav1.Object) string { return fmt.Sprintf("%s-%s", dda.GetName(), v2alpha1.DefaultClusterChecksRunnerResourceSuffix) @@ -82,6 +88,27 @@ func NewDefaultClusterChecksRunnerPodTemplateSpec(dda metav1.Object) *corev1.Pod return template } +func GetClusterChecksRunnerPodDisruptionBudget(dda metav1.Object) *policyv1.PodDisruptionBudget { + maxUnavailableStr := intstr.FromInt(pdMaxUnavailableInstances) + matchLabels := map[string]string{ + apicommon.AgentDeploymentNameLabelKey: dda.GetName(), + apicommon.AgentDeploymentComponentLabelKey: v2alpha1.DefaultClusterChecksRunnerResourceSuffix} + pdb := &policyv1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Name: "datadog-cluster-checks-runner-pdb", + Namespace: dda.GetNamespace(), + }, + Spec: policyv1.PodDisruptionBudgetSpec{ + MaxUnavailable: &maxUnavailableStr, + Selector: &metav1.LabelSelector{ + MatchLabels: matchLabels, + }, + }, + } + fmt.Println("in GetClusterAgentPodDisruptionBudget, ", dda.GetNamespace()) + return pdb +} + // getDefaultServiceAccountName return the default Cluster-Agent ServiceAccountName func getDefaultServiceAccountName(dda metav1.Object) string { return fmt.Sprintf("%s-%s", dda.GetName(), v2alpha1.DefaultClusterChecksRunnerResourceSuffix) diff --git a/internal/controller/datadogagent/feature/enabledefault/feature.go b/internal/controller/datadogagent/feature/enabledefault/feature.go index 4155c0875..5cd05d321 100644 --- a/internal/controller/datadogagent/feature/enabledefault/feature.go +++ b/internal/controller/datadogagent/feature/enabledefault/feature.go @@ -124,11 +124,6 @@ func (f *defaultFeature) Configure(dda *v2alpha1.DatadogAgent) feature.RequiredC if dda.Spec.Global.DisableNonResourceRules != nil && *dda.Spec.Global.DisableNonResourceRules { f.disableNonResourceRules = true } - if dda.Spec.Global.PodDisruptionBudget != nil { - if dda.Status.ClusterAgent.Replicas != 0 && dda.Status.ClusterAgent.Replicas > 1 { - f.PodDisruptionBudget = true - } - } if dda.Spec.Global.Credentials != nil { creds := dda.Spec.Global.Credentials @@ -219,8 +214,6 @@ func (f *defaultFeature) Configure(dda *v2alpha1.DatadogAgent) feature.RequiredC // Feature's dependencies should be added in the store. func (f *defaultFeature) ManageDependencies(managers feature.ResourceManagers, components feature.RequiredComponents) error { var errs []error - fmt.Println("manage dependencies") - f.logger.Info("manage dependencies") // manage credential secret if f.credentialsInfo.secretCreation.createSecret { for key, value := range f.credentialsInfo.secretCreation.data { @@ -326,18 +319,6 @@ func (f *defaultFeature) clusterAgentDependencies(managers feature.ResourceManag return err } - pdb := componentdca.GetClusterAgentPodDisruptionBudget(f.owner) - fmt.Println("got cluster agent pdb, min available is ", pdb.Spec.MinAvailable) - f.logger.Info("test, logging for PDB") - if err := managers.Store().AddOrUpdate(kubernetes.PodDisruptionBudgetsKind, pdb); err != nil { - f.logger.Error(err, "error with created pod disruption budget") - fmt.Println("error with created pod disruption budget") - return err - } - // similar to ^ but for pdb - // add pdb creation when dca is enabled - // what to add as matchlabels? - return errors.NewAggregate(errs) } diff --git a/internal/controller/datadogagent/feature/ids.go b/internal/controller/datadogagent/feature/ids.go index f03d3ccad..ec383d7d6 100644 --- a/internal/controller/datadogagent/feature/ids.go +++ b/internal/controller/datadogagent/feature/ids.go @@ -65,8 +65,6 @@ const ( SBOMIDType = "sbom" // HelmCheckIDType Helm Check feature. HelmCheckIDType = "helm_check" - // PodDisruptionBudgetIDType Pod Disruption Budget feature. - PodDisruptionBudgetIDType = "pdb" // DummyIDType Dummy feature. DummyIDType = "dummy" ) diff --git a/internal/controller/datadogagent/override/dependencies.go b/internal/controller/datadogagent/override/dependencies.go index a9bf9f211..4d7ecec62 100644 --- a/internal/controller/datadogagent/override/dependencies.go +++ b/internal/controller/datadogagent/override/dependencies.go @@ -14,6 +14,8 @@ import ( "k8s.io/apimachinery/pkg/util/errors" "github.com/DataDog/datadog-operator/api/datadoghq/v2alpha1" + componentdca "github.com/DataDog/datadog-operator/internal/controller/datadogagent/component/clusteragent" + componentccr "github.com/DataDog/datadog-operator/internal/controller/datadogagent/component/clusterchecksrunner" "github.com/DataDog/datadog-operator/internal/controller/datadogagent/feature" "github.com/DataDog/datadog-operator/internal/controller/datadogagent/object" "github.com/DataDog/datadog-operator/internal/controller/datadogagent/object/configmap" @@ -42,6 +44,28 @@ func Dependencies(logger logr.Logger, manager feature.ResourceManagers, dda *v2a // Handle custom check files checksdCMName := fmt.Sprintf(extraChecksdConfigMapName, strings.ToLower((string(component)))) errs = append(errs, overrideExtraConfigs(logger, manager, override.ExtraChecksd, namespace, checksdCMName, false)...) + + if override.CreatePodDisruptionBudget != nil { + fmt.Println("override.CreatePodDisruptionBudget is not nil for", component) + if component == v2alpha1.ClusterAgentComponentName { + pdb := componentdca.GetClusterAgentPodDisruptionBudget(dda) + if err := manager.Store().AddOrUpdate(kubernetes.PodDisruptionBudgetsKind, pdb); err != nil { + fmt.Println("error with created pod disruption budget") + errs = append(errs, err) + } else { + fmt.Println("created pod disruption budget") + } + + } else if component == v2alpha1.ClusterChecksRunnerComponentName { + pdb := componentccr.GetClusterChecksRunnerPodDisruptionBudget(dda) + if err := manager.Store().AddOrUpdate(kubernetes.PodDisruptionBudgetsKind, pdb); err != nil { + fmt.Println("error with created pod disruption budget") + errs = append(errs, err) + } else { + fmt.Println("created pod disruption budget") + } + } + } } return errs From 9d9e84108c5dd9fb2055761dcbc7c975cecfbe6f Mon Sep 17 00:00:00 2001 From: Sarah Wang Date: Wed, 9 Oct 2024 16:35:57 -0400 Subject: [PATCH 03/12] cleanup --- .../datadogagent/component/clusterchecksrunner/default.go | 1 - .../controller/datadogagent/controller_reconcile_v2.go | 7 ------- .../datadogagent/feature/enabledefault/feature.go | 2 -- internal/controller/datadogagent/override/dependencies.go | 8 -------- 4 files changed, 18 deletions(-) diff --git a/internal/controller/datadogagent/component/clusterchecksrunner/default.go b/internal/controller/datadogagent/component/clusterchecksrunner/default.go index 2749f53b7..a8a02762b 100644 --- a/internal/controller/datadogagent/component/clusterchecksrunner/default.go +++ b/internal/controller/datadogagent/component/clusterchecksrunner/default.go @@ -105,7 +105,6 @@ func GetClusterChecksRunnerPodDisruptionBudget(dda metav1.Object) *policyv1.PodD }, }, } - fmt.Println("in GetClusterAgentPodDisruptionBudget, ", dda.GetNamespace()) return pdb } diff --git a/internal/controller/datadogagent/controller_reconcile_v2.go b/internal/controller/datadogagent/controller_reconcile_v2.go index 69aa1300f..dd17969ec 100644 --- a/internal/controller/datadogagent/controller_reconcile_v2.go +++ b/internal/controller/datadogagent/controller_reconcile_v2.go @@ -99,11 +99,7 @@ func (r *Reconciler) reconcileInstanceV2(ctx context.Context, logger logr.Logger var result reconcile.Result newStatus := instance.Status.DeepCopy() now := metav1.NewTime(time.Now()) - // logger = logger.WithValues("datadogagent", instance.Name, "namespace", instance.Namespace) - fmt.Println("ReconcileInstanceV2") - logger.Info("ReconcileInstanceV2") features, requiredComponents := feature.BuildFeatures(instance, reconcilerOptionsToFeatureOptions(&r.options, logger)) - logger.Info("ReconcileInstanceV2", "features", features) // update list of enabled features for metrics forwarder r.updateMetricsForwardersFeatures(instance, features) @@ -124,9 +120,6 @@ func (r *Reconciler) reconcileInstanceV2(ctx context.Context, logger logr.Logger // Set up dependencies required by enabled features for _, feat := range features { - fmt.Println("Dependency ManageDependencies", "featureID", feat.ID()) - logger.Info("Dependency ManageDependencies", "featureID", feat.ID()) - logger.V(1).Info("Dependency ManageDependencies", "featureID", feat.ID()) if featErr := feat.ManageDependencies(resourceManagers, requiredComponents); featErr != nil { errs = append(errs, featErr) } diff --git a/internal/controller/datadogagent/feature/enabledefault/feature.go b/internal/controller/datadogagent/feature/enabledefault/feature.go index 5cd05d321..4b48f437f 100644 --- a/internal/controller/datadogagent/feature/enabledefault/feature.go +++ b/internal/controller/datadogagent/feature/enabledefault/feature.go @@ -249,8 +249,6 @@ func (f *defaultFeature) ManageDependencies(managers feature.ResourceManagers, c } if components.ClusterAgent.IsEnabled() { - f.logger.Info("Cluster Agent is enabled") - fmt.Println("Cluster Agent is enabled") if err := f.clusterAgentDependencies(managers, components.ClusterAgent); err != nil { errs = append(errs, err) } diff --git a/internal/controller/datadogagent/override/dependencies.go b/internal/controller/datadogagent/override/dependencies.go index 4d7ecec62..6a8ed191d 100644 --- a/internal/controller/datadogagent/override/dependencies.go +++ b/internal/controller/datadogagent/override/dependencies.go @@ -46,23 +46,15 @@ func Dependencies(logger logr.Logger, manager feature.ResourceManagers, dda *v2a errs = append(errs, overrideExtraConfigs(logger, manager, override.ExtraChecksd, namespace, checksdCMName, false)...) if override.CreatePodDisruptionBudget != nil { - fmt.Println("override.CreatePodDisruptionBudget is not nil for", component) if component == v2alpha1.ClusterAgentComponentName { pdb := componentdca.GetClusterAgentPodDisruptionBudget(dda) if err := manager.Store().AddOrUpdate(kubernetes.PodDisruptionBudgetsKind, pdb); err != nil { - fmt.Println("error with created pod disruption budget") errs = append(errs, err) - } else { - fmt.Println("created pod disruption budget") } - } else if component == v2alpha1.ClusterChecksRunnerComponentName { pdb := componentccr.GetClusterChecksRunnerPodDisruptionBudget(dda) if err := manager.Store().AddOrUpdate(kubernetes.PodDisruptionBudgetsKind, pdb); err != nil { - fmt.Println("error with created pod disruption budget") errs = append(errs, err) - } else { - fmt.Println("created pod disruption budget") } } } From 835c5d4be1f1ecdcff8bd78f1709031ef3c11296 Mon Sep 17 00:00:00 2001 From: Sarah Wang Date: Thu, 10 Oct 2024 13:36:12 -0400 Subject: [PATCH 04/12] add tests --- .../component/clusteragent/default_test.go | 13 ++++++++ .../clusterchecksrunner/default_test.go | 14 +++++++++ .../feature/enabledefault/feature.go | 1 - .../datadogagent/override/dependencies.go | 31 ++++++++++++------- .../override/dependencies_test.go | 24 ++++++++++++++ 5 files changed, 70 insertions(+), 13 deletions(-) diff --git a/internal/controller/datadogagent/component/clusteragent/default_test.go b/internal/controller/datadogagent/component/clusteragent/default_test.go index 5f7dabba1..25bddbad2 100644 --- a/internal/controller/datadogagent/component/clusteragent/default_test.go +++ b/internal/controller/datadogagent/component/clusteragent/default_test.go @@ -5,6 +5,7 @@ import ( "strconv" "testing" + "github.com/DataDog/datadog-operator/api/datadoghq/v2alpha1" datadoghqv2alpha1 "github.com/DataDog/datadog-operator/api/datadoghq/v2alpha1" apiutils "github.com/DataDog/datadog-operator/api/utils" "github.com/DataDog/datadog-operator/internal/controller/datadogagent/common" @@ -38,6 +39,18 @@ func Test_defaultClusterAgentDeployment(t *testing.T) { assert.Empty(t, testutils.CompareKubeResource(&deployment.Spec.Template, expectedDeployment)) } +func Test_getPodDisruptionBudget(t *testing.T) { + dda := v2alpha1.DatadogAgent{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-datadog-agent", + Namespace: "some-namespace", + }, + } + testpdb := GetClusterAgentPodDisruptionBudget(&dda) + assert.Equal(t, "datadog-cluster-agent-pdb", testpdb.Name) + assert.Equal(t, intstr.FromInt(pdbMinAvailableInstances), *testpdb.Spec.MinAvailable) + assert.Nil(t, testpdb.Spec.MaxUnavailable) +} func clusterAgentExpectedPodTemplate(dda *datadoghqv2alpha1.DatadogAgent) *corev1.PodTemplateSpec { podTemplate := &corev1.PodTemplateSpec{ diff --git a/internal/controller/datadogagent/component/clusterchecksrunner/default_test.go b/internal/controller/datadogagent/component/clusterchecksrunner/default_test.go index ed37a31c5..7c8788f7a 100644 --- a/internal/controller/datadogagent/component/clusterchecksrunner/default_test.go +++ b/internal/controller/datadogagent/component/clusterchecksrunner/default_test.go @@ -11,6 +11,7 @@ import ( "github.com/DataDog/datadog-operator/api/datadoghq/v2alpha1" "github.com/stretchr/testify/assert" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" ) func Test_getDefaultServiceAccountName(t *testing.T) { @@ -23,3 +24,16 @@ func Test_getDefaultServiceAccountName(t *testing.T) { assert.Equal(t, "my-datadog-agent-cluster-checks-runner", getDefaultServiceAccountName(&dda)) } + +func Test_getPodDisruptionBudget(t *testing.T) { + dda := v2alpha1.DatadogAgent{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-datadog-agent", + Namespace: "some-namespace", + }, + } + testpdb := GetClusterChecksRunnerPodDisruptionBudget(&dda) + assert.Equal(t, "datadog-cluster-checks-runner-pdb", testpdb.Name) + assert.Equal(t, intstr.FromInt(pdMaxUnavailableInstances), *testpdb.Spec.MaxUnavailable) + assert.Nil(t, testpdb.Spec.MinAvailable) +} diff --git a/internal/controller/datadogagent/feature/enabledefault/feature.go b/internal/controller/datadogagent/feature/enabledefault/feature.go index 4b48f437f..8bfb6aeb4 100644 --- a/internal/controller/datadogagent/feature/enabledefault/feature.go +++ b/internal/controller/datadogagent/feature/enabledefault/feature.go @@ -67,7 +67,6 @@ type defaultFeature struct { logger logr.Logger disableNonResourceRules bool otelAgentEnabled bool - PodDisruptionBudget bool customConfigAnnotationKey string customConfigAnnotationValue string diff --git a/internal/controller/datadogagent/override/dependencies.go b/internal/controller/datadogagent/override/dependencies.go index 6a8ed191d..b7015e072 100644 --- a/internal/controller/datadogagent/override/dependencies.go +++ b/internal/controller/datadogagent/override/dependencies.go @@ -45,21 +45,28 @@ func Dependencies(logger logr.Logger, manager feature.ResourceManagers, dda *v2a checksdCMName := fmt.Sprintf(extraChecksdConfigMapName, strings.ToLower((string(component)))) errs = append(errs, overrideExtraConfigs(logger, manager, override.ExtraChecksd, namespace, checksdCMName, false)...) - if override.CreatePodDisruptionBudget != nil { - if component == v2alpha1.ClusterAgentComponentName { - pdb := componentdca.GetClusterAgentPodDisruptionBudget(dda) - if err := manager.Store().AddOrUpdate(kubernetes.PodDisruptionBudgetsKind, pdb); err != nil { - errs = append(errs, err) - } - } else if component == v2alpha1.ClusterChecksRunnerComponentName { - pdb := componentccr.GetClusterChecksRunnerPodDisruptionBudget(dda) - if err := manager.Store().AddOrUpdate(kubernetes.PodDisruptionBudgetsKind, pdb); err != nil { - errs = append(errs, err) - } + errs = append(errs, overridePodDisruptionBudget(logger, manager, dda, override.CreatePodDisruptionBudget, component)...) + } + + return errs +} + +func overridePodDisruptionBudget(logger logr.Logger, manager feature.ResourceManagers, dda *v2alpha1.DatadogAgent, createPdb *bool, component v2alpha1.ComponentName) (errs []error) { + if createPdb != nil && *createPdb { + if component == v2alpha1.ClusterAgentComponentName { + pdb := componentdca.GetClusterAgentPodDisruptionBudget(dda) + if err := manager.Store().AddOrUpdate(kubernetes.PodDisruptionBudgetsKind, pdb); err != nil { + errs = append(errs, err) + } + } else if component == v2alpha1.ClusterChecksRunnerComponentName { + pdb := componentccr.GetClusterChecksRunnerPodDisruptionBudget(dda) + if err := manager.Store().AddOrUpdate(kubernetes.PodDisruptionBudgetsKind, pdb); err != nil { + errs = append(errs, err) } } + } else { + logger.Error(nil, "Pod disruption budget is not created by default") } - return errs } diff --git a/internal/controller/datadogagent/override/dependencies_test.go b/internal/controller/datadogagent/override/dependencies_test.go index d72c99936..ae9750329 100644 --- a/internal/controller/datadogagent/override/dependencies_test.go +++ b/internal/controller/datadogagent/override/dependencies_test.go @@ -124,6 +124,30 @@ func TestDependencies(t *testing.T) { }, expectsErrors: false, }, + { + name: "override clusterAgent createPDB without errors", + dda: v2alpha1.DatadogAgent{ + Spec: v2alpha1.DatadogAgentSpec{ + Override: map[v2alpha1.ComponentName]*v2alpha1.DatadogAgentComponentOverride{ + v2alpha1.ClusterAgentComponentName: { + CreatePodDisruptionBudget: apiutils.NewBoolPointer(true), + }, + }, + }, + }, + }, + { + name: "override clusterChecksRunner createPDB without errors", + dda: v2alpha1.DatadogAgent{ + Spec: v2alpha1.DatadogAgentSpec{ + Override: map[v2alpha1.ComponentName]*v2alpha1.DatadogAgentComponentOverride{ + v2alpha1.ClusterChecksRunnerComponentName: { + CreatePodDisruptionBudget: apiutils.NewBoolPointer(true), + }, + }, + }, + }, + }, } for _, test := range tests { From 007c6ca2fb571ee24949701d02672cde6aa60353 Mon Sep 17 00:00:00 2001 From: Sarah Wang Date: Thu, 10 Oct 2024 14:17:25 -0400 Subject: [PATCH 05/12] double check CLC features flag before adding pdb --- internal/controller/datadogagent/override/dependencies.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/internal/controller/datadogagent/override/dependencies.go b/internal/controller/datadogagent/override/dependencies.go index b7015e072..17045b4ae 100644 --- a/internal/controller/datadogagent/override/dependencies.go +++ b/internal/controller/datadogagent/override/dependencies.go @@ -58,7 +58,9 @@ func overridePodDisruptionBudget(logger logr.Logger, manager feature.ResourceMan if err := manager.Store().AddOrUpdate(kubernetes.PodDisruptionBudgetsKind, pdb); err != nil { errs = append(errs, err) } - } else if component == v2alpha1.ClusterChecksRunnerComponentName { + } else if component == v2alpha1.ClusterChecksRunnerComponentName && + (dda.Spec.Features.ClusterChecks.UseClusterChecksRunners == nil || + *dda.Spec.Features.ClusterChecks.UseClusterChecksRunners) { pdb := componentccr.GetClusterChecksRunnerPodDisruptionBudget(dda) if err := manager.Store().AddOrUpdate(kubernetes.PodDisruptionBudgetsKind, pdb); err != nil { errs = append(errs, err) From 38a41ae4b5c3cfa4c3b14b01959e442c1c7ac065 Mon Sep 17 00:00:00 2001 From: Sarah Wang Date: Thu, 10 Oct 2024 14:17:34 -0400 Subject: [PATCH 06/12] cleanup --- api/datadoghq/v2alpha1/datadogagent_types.go | 1 + config/crd/bases/v1/datadoghq.com_datadogagents.yaml | 4 +++- docs/configuration.v2alpha1.md | 2 +- .../controller/datadogagent/controller_reconcile_v2.go | 7 ++----- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/api/datadoghq/v2alpha1/datadogagent_types.go b/api/datadoghq/v2alpha1/datadogagent_types.go index e04bb7df7..a662b511b 100644 --- a/api/datadoghq/v2alpha1/datadogagent_types.go +++ b/api/datadoghq/v2alpha1/datadogagent_types.go @@ -1363,6 +1363,7 @@ type DatadogAgentComponentOverride struct { Replicas *int32 `json:"replicas,omitempty"` // Set CreatePodDisruptionBudget to true to create a PodDisruptionBudget for this component. + // Not applicable for the Node Agent. A Cluster Agent PDB is set with 1 min available pod, and a Cluster Checks Runner PDB is set with 1 max unavailable pod. // +optional CreatePodDisruptionBudget *bool `json:"createPodDisruptionBudget,omitempty"` diff --git a/config/crd/bases/v1/datadoghq.com_datadogagents.yaml b/config/crd/bases/v1/datadoghq.com_datadogagents.yaml index dbce4748a..3b2588569 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagents.yaml +++ b/config/crd/bases/v1/datadoghq.com_datadogagents.yaml @@ -3568,7 +3568,9 @@ spec: Configuration under `all` applies to all configured containers. type: object createPodDisruptionBudget: - description: Set CreatePodDisruptionBudget to true to create a PodDisruptionBudget for this component. + description: |- + Set CreatePodDisruptionBudget to true to create a PodDisruptionBudget for this component. + Not applicable for the Node Agent. A Cluster Agent PDB is set with 1 min available pod, and a Cluster Checks Runner PDB is set with 1 max unavailable pod. type: boolean createRbac: description: Set CreateRbac to false to prevent automatic creation of Role/ClusterRole for this component diff --git a/docs/configuration.v2alpha1.md b/docs/configuration.v2alpha1.md index 55b28b57a..d27fafc55 100644 --- a/docs/configuration.v2alpha1.md +++ b/docs/configuration.v2alpha1.md @@ -331,7 +331,7 @@ In the table, `spec.override.nodeAgent.image.name` and `spec.override.nodeAgent. | [key].containers.[key].securityContext.windowsOptions.hostProcess | HostProcess determines if a container should be run as a 'Host Process' container. All of a Pod's containers must have the same effective HostProcess value (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). In addition, if HostProcess is true then HostNetwork must also be set to true. | | [key].containers.[key].securityContext.windowsOptions.runAsUserName | The UserName in Windows to run the entrypoint of the container process. Defaults to the user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. | | [key].containers.[key].volumeMounts `[]object` | Specify additional volume mounts in the container. | -| [key].createPodDisruptionBudget | Set CreatePodDisruptionBudget to true to create a PodDisruptionBudget for this component. | +| [key].createPodDisruptionBudget | Set CreatePodDisruptionBudget to true to create a PodDisruptionBudget for this component. Not applicable for the Node Agent. A Cluster Agent PDB is set with 1 min available pod, and a Cluster Checks Runner PDB is set with 1 max unavailable pod. | | [key].createRbac | Set CreateRbac to false to prevent automatic creation of Role/ClusterRole for this component | | [key].customConfigurations `map[string]object` | CustomConfiguration allows to specify custom configuration files for `datadog.yaml`, `datadog-cluster.yaml`, `security-agent.yaml`, and `system-probe.yaml`. The content is merged with configuration generated by the Datadog Operator, with priority given to custom configuration. WARNING: It is possible to override values set in the `DatadogAgent`. | | [key].customConfigurations.[key].configData | ConfigData corresponds to the configuration file content. | diff --git a/internal/controller/datadogagent/controller_reconcile_v2.go b/internal/controller/datadogagent/controller_reconcile_v2.go index dd17969ec..321dc9672 100644 --- a/internal/controller/datadogagent/controller_reconcile_v2.go +++ b/internal/controller/datadogagent/controller_reconcile_v2.go @@ -32,7 +32,7 @@ import ( func (r *Reconciler) internalReconcileV2(ctx context.Context, request reconcile.Request) (reconcile.Result, error) { reqLogger := r.log.WithValues("datadogagent", request.NamespacedName) - reqLogger.Info("Reconciling DatadogAgent1") + reqLogger.Info("Reconciling DatadogAgent") // Fetch the DatadogAgent instance instance := &datadoghqv2alpha1.DatadogAgent{} @@ -46,12 +46,10 @@ func (r *Reconciler) internalReconcileV2(ctx context.Context, request reconcile. return result, nil } // Error reading the object - requeue the request. - reqLogger.Error(err, "unable to fetch DatadogAgent") return result, err } if instance.Spec.Global == nil || instance.Spec.Global.Credentials == nil { - reqLogger.Info("credentials not configured in the DatadogAgent, can't reconcile") return result, fmt.Errorf("credentials not configured in the DatadogAgent, can't reconcile") } @@ -76,7 +74,6 @@ func (r *Reconciler) internalReconcileV2(ctx context.Context, request reconcile. }*/ if result, err = r.handleFinalizer(reqLogger, instance, r.finalizeDadV2); utils.ShouldReturn(result, err) { - reqLogger.V(1).Info("finalizer error", "error", err) return result, err } @@ -91,7 +88,6 @@ func (r *Reconciler) internalReconcileV2(ctx context.Context, request reconcile. // Set default values for GlobalConfig and Features instanceCopy := instance.DeepCopy() datadoghqv2alpha1.DefaultDatadogAgent(instanceCopy) - reqLogger.Info("reconcile instance") return r.reconcileInstanceV2(ctx, reqLogger, instanceCopy) } @@ -120,6 +116,7 @@ func (r *Reconciler) reconcileInstanceV2(ctx context.Context, logger logr.Logger // Set up dependencies required by enabled features for _, feat := range features { + logger.V(1).Info("Dependency ManageDependencies", "featureID", feat.ID()) if featErr := feat.ManageDependencies(resourceManagers, requiredComponents); featErr != nil { errs = append(errs, featErr) } From b828c216629411abed6d07d92dd5f980f34e75a2 Mon Sep 17 00:00:00 2001 From: Sarah Wang Date: Thu, 10 Oct 2024 14:48:31 -0400 Subject: [PATCH 07/12] fix bug in test --- .../controller/datadogagent/override/dependencies_test.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/internal/controller/datadogagent/override/dependencies_test.go b/internal/controller/datadogagent/override/dependencies_test.go index ae9750329..1c836a3b2 100644 --- a/internal/controller/datadogagent/override/dependencies_test.go +++ b/internal/controller/datadogagent/override/dependencies_test.go @@ -145,6 +145,11 @@ func TestDependencies(t *testing.T) { CreatePodDisruptionBudget: apiutils.NewBoolPointer(true), }, }, + Features: &v2alpha1.DatadogFeatures{ + ClusterChecks: &v2alpha1.ClusterChecksFeatureConfig{ + UseClusterChecksRunners: apiutils.NewBoolPointer(true), + }, + }, }, }, }, From ac2f2ef212bf9ef3b24f4c11ce4129bb0f567ba8 Mon Sep 17 00:00:00 2001 From: Sarah Wang Date: Tue, 5 Nov 2024 16:56:30 -0500 Subject: [PATCH 08/12] code fixes --- api/datadoghq/v2alpha1/datadogagent_types.go | 2 +- .../datadogagent/component/clusteragent/default.go | 5 +++++ .../component/clusteragent/default_test.go | 2 +- .../datadogagent/component/clusteragent/utils.go | 2 +- .../component/clusterchecksrunner/default.go | 10 +++++++--- .../component/clusterchecksrunner/default_test.go | 4 ++-- 6 files changed, 17 insertions(+), 8 deletions(-) diff --git a/api/datadoghq/v2alpha1/datadogagent_types.go b/api/datadoghq/v2alpha1/datadogagent_types.go index 5959c1c4b..19c3a82d7 100644 --- a/api/datadoghq/v2alpha1/datadogagent_types.go +++ b/api/datadoghq/v2alpha1/datadogagent_types.go @@ -1424,7 +1424,7 @@ type DatadogAgentComponentOverride struct { Replicas *int32 `json:"replicas,omitempty"` // Set CreatePodDisruptionBudget to true to create a PodDisruptionBudget for this component. - // Not applicable for the Node Agent. A Cluster Agent PDB is set with 1 min available pod, and a Cluster Checks Runner PDB is set with 1 max unavailable pod. + // Not applicable for the Node Agent. A Cluster Agent PDB is set with 1 minimum available pod, and a Cluster Checks Runner PDB is set with 1 maximum unavailable pod. // +optional CreatePodDisruptionBudget *bool `json:"createPodDisruptionBudget,omitempty"` diff --git a/internal/controller/datadogagent/component/clusteragent/default.go b/internal/controller/datadogagent/component/clusteragent/default.go index ceb573462..73503a86e 100644 --- a/internal/controller/datadogagent/component/clusteragent/default.go +++ b/internal/controller/datadogagent/component/clusteragent/default.go @@ -28,6 +28,11 @@ func GetClusterAgentServiceName(dda metav1.Object) string { return fmt.Sprintf("%s-%s", dda.GetName(), v2alpha1.DefaultClusterAgentResourceSuffix) } +// GetClusterAgentPodDisruptionBudgetName return the Cluster-Agent PodDisruptionBudget name based on the DatadogAgent name +func GetClusterAgentPodDisruptionBudgetName(dda metav1.Object) string { + return fmt.Sprintf("%s-%s-pdb", dda.GetName(), v2alpha1.DefaultClusterAgentResourceSuffix) +} + // GetClusterAgentName return the Cluster-Agent name based on the DatadogAgent name func GetClusterAgentName(dda metav1.Object) string { return fmt.Sprintf("%s-%s", dda.GetName(), v2alpha1.DefaultClusterAgentResourceSuffix) diff --git a/internal/controller/datadogagent/component/clusteragent/default_test.go b/internal/controller/datadogagent/component/clusteragent/default_test.go index 25bddbad2..567fb7d09 100644 --- a/internal/controller/datadogagent/component/clusteragent/default_test.go +++ b/internal/controller/datadogagent/component/clusteragent/default_test.go @@ -47,7 +47,7 @@ func Test_getPodDisruptionBudget(t *testing.T) { }, } testpdb := GetClusterAgentPodDisruptionBudget(&dda) - assert.Equal(t, "datadog-cluster-agent-pdb", testpdb.Name) + assert.Equal(t, "my-datadog-agent-cluster-agent-pdb", testpdb.Name) assert.Equal(t, intstr.FromInt(pdbMinAvailableInstances), *testpdb.Spec.MinAvailable) assert.Nil(t, testpdb.Spec.MaxUnavailable) } diff --git a/internal/controller/datadogagent/component/clusteragent/utils.go b/internal/controller/datadogagent/component/clusteragent/utils.go index 5adc611cb..b608c621c 100644 --- a/internal/controller/datadogagent/component/clusteragent/utils.go +++ b/internal/controller/datadogagent/component/clusteragent/utils.go @@ -66,7 +66,7 @@ func GetClusterAgentPodDisruptionBudget(dda metav1.Object) *policyv1.PodDisrupti apicommon.AgentDeploymentComponentLabelKey: v2alpha1.DefaultClusterAgentResourceSuffix} pdb := &policyv1.PodDisruptionBudget{ ObjectMeta: metav1.ObjectMeta{ - Name: "datadog-cluster-agent-pdb", + Name: GetClusterAgentPodDisruptionBudgetName(dda), Namespace: dda.GetNamespace(), }, Spec: policyv1.PodDisruptionBudgetSpec{ diff --git a/internal/controller/datadogagent/component/clusterchecksrunner/default.go b/internal/controller/datadogagent/component/clusterchecksrunner/default.go index a8a02762b..f554834db 100644 --- a/internal/controller/datadogagent/component/clusterchecksrunner/default.go +++ b/internal/controller/datadogagent/component/clusterchecksrunner/default.go @@ -24,7 +24,7 @@ import ( ) const ( - pdMaxUnavailableInstances = 1 + pdbMaxUnavailableInstances = 1 ) // GetClusterChecksRunnerName return the Cluster-Checks-Runner name based on the DatadogAgent name @@ -88,14 +88,18 @@ func NewDefaultClusterChecksRunnerPodTemplateSpec(dda metav1.Object) *corev1.Pod return template } +func GetClusterChecksRunnerPodDisruptionBudgetName(dda metav1.Object) string { + return fmt.Sprintf("%s-%s-pdb", dda.GetName(), v2alpha1.DefaultClusterChecksRunnerResourceSuffix) +} + func GetClusterChecksRunnerPodDisruptionBudget(dda metav1.Object) *policyv1.PodDisruptionBudget { - maxUnavailableStr := intstr.FromInt(pdMaxUnavailableInstances) + maxUnavailableStr := intstr.FromInt(pdbMaxUnavailableInstances) matchLabels := map[string]string{ apicommon.AgentDeploymentNameLabelKey: dda.GetName(), apicommon.AgentDeploymentComponentLabelKey: v2alpha1.DefaultClusterChecksRunnerResourceSuffix} pdb := &policyv1.PodDisruptionBudget{ ObjectMeta: metav1.ObjectMeta{ - Name: "datadog-cluster-checks-runner-pdb", + Name: GetClusterChecksRunnerPodDisruptionBudgetName(dda), Namespace: dda.GetNamespace(), }, Spec: policyv1.PodDisruptionBudgetSpec{ diff --git a/internal/controller/datadogagent/component/clusterchecksrunner/default_test.go b/internal/controller/datadogagent/component/clusterchecksrunner/default_test.go index 7c8788f7a..eb1177de9 100644 --- a/internal/controller/datadogagent/component/clusterchecksrunner/default_test.go +++ b/internal/controller/datadogagent/component/clusterchecksrunner/default_test.go @@ -33,7 +33,7 @@ func Test_getPodDisruptionBudget(t *testing.T) { }, } testpdb := GetClusterChecksRunnerPodDisruptionBudget(&dda) - assert.Equal(t, "datadog-cluster-checks-runner-pdb", testpdb.Name) - assert.Equal(t, intstr.FromInt(pdMaxUnavailableInstances), *testpdb.Spec.MaxUnavailable) + assert.Equal(t, "my-datadog-agent-cluster-checks-runner-pdb", testpdb.Name) + assert.Equal(t, intstr.FromInt(pdbMaxUnavailableInstances), *testpdb.Spec.MaxUnavailable) assert.Nil(t, testpdb.Spec.MinAvailable) } From 591a4a42776c4cf03e01c7c3d6fd2cfaf5789b6c Mon Sep 17 00:00:00 2001 From: Sarah Wang Date: Wed, 6 Nov 2024 12:50:49 -0500 Subject: [PATCH 09/12] fix error logging --- config/crd/bases/v1/datadoghq.com_datadogagents.yaml | 2 +- internal/controller/datadogagent/override/dependencies.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config/crd/bases/v1/datadoghq.com_datadogagents.yaml b/config/crd/bases/v1/datadoghq.com_datadogagents.yaml index 5e24552ed..6c66c1ed2 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogagents.yaml +++ b/config/crd/bases/v1/datadoghq.com_datadogagents.yaml @@ -3926,7 +3926,7 @@ spec: createPodDisruptionBudget: description: |- Set CreatePodDisruptionBudget to true to create a PodDisruptionBudget for this component. - Not applicable for the Node Agent. A Cluster Agent PDB is set with 1 min available pod, and a Cluster Checks Runner PDB is set with 1 max unavailable pod. + Not applicable for the Node Agent. A Cluster Agent PDB is set with 1 minimum available pod, and a Cluster Checks Runner PDB is set with 1 maximum unavailable pod. type: boolean createRbac: description: Set CreateRbac to false to prevent automatic creation of Role/ClusterRole for this component diff --git a/internal/controller/datadogagent/override/dependencies.go b/internal/controller/datadogagent/override/dependencies.go index 3cdc52896..d6bd7f7bf 100644 --- a/internal/controller/datadogagent/override/dependencies.go +++ b/internal/controller/datadogagent/override/dependencies.go @@ -66,7 +66,7 @@ func overridePodDisruptionBudget(logger logr.Logger, manager feature.ResourceMan errs = append(errs, err) } } - } else { + } else if createPdb != nil { logger.Error(nil, "Pod disruption budget is not created by default") } return errs From 497358213e1dc680a464b7117903e8e0057e4530 Mon Sep 17 00:00:00 2001 From: Sarah Wang Date: Thu, 7 Nov 2024 11:28:51 -0800 Subject: [PATCH 10/12] added pdb testing --- .../datadogagent/controller_v2_test.go | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/internal/controller/datadogagent/controller_v2_test.go b/internal/controller/datadogagent/controller_v2_test.go index 905798af1..f7b5b533a 100644 --- a/internal/controller/datadogagent/controller_v2_test.go +++ b/internal/controller/datadogagent/controller_v2_test.go @@ -24,9 +24,11 @@ import ( assert "github.com/stretchr/testify/require" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/tools/record" "sigs.k8s.io/controller-runtime/pkg/client" @@ -367,6 +369,34 @@ func TestReconcileDatadogAgentV2_Reconcile(t *testing.T) { return verifyDaemonsetContainers(c, resourcesNamespace, dsName, expectedContainers) }, }, + { + name: "DatadogAgent with PDB enabled", + fields: fields{ + client: fake.NewClientBuilder().WithStatusSubresource(&appsv1.DaemonSet{}, &v2alpha1.DatadogAgent{}, &policyv1.PodDisruptionBudget{}).Build(), + scheme: s, + recorder: recorder, + }, + args: args{ + request: newRequest(resourcesNamespace, resourcesName), + loadFunc: func(c client.Client) { + dda := v2alpha1test.NewInitializedDatadogAgentBuilder(resourcesNamespace, resourcesName). + WithComponentOverride(v2alpha1.ClusterAgentComponentName, v2alpha1.DatadogAgentComponentOverride{ + CreatePodDisruptionBudget: apiutils.NewBoolPointer(true), + }). + WithClusterChecksUseCLCEnabled(true). + WithComponentOverride(v2alpha1.ClusterChecksRunnerComponentName, v2alpha1.DatadogAgentComponentOverride{ + CreatePodDisruptionBudget: apiutils.NewBoolPointer(true), + }). + Build() + _ = c.Create(context.TODO(), dda) + }, + }, + want: reconcile.Result{RequeueAfter: defaultRequeueDuration}, + wantErr: false, + wantFunc: func(c client.Client) error { + return verifyPDB(t, c) + }, + }, } for _, tt := range tests { @@ -564,3 +594,22 @@ func verifyDaemonsetNames(t *testing.T, c client.Client, resourcesNamespace, dsN assert.Equal(t, expectedDSNames, actualDSNames) return nil } + +func verifyPDB(t *testing.T, c client.Client) error { + pdbList := policyv1.PodDisruptionBudgetList{} + if err := c.List(context.TODO(), &pdbList); err != nil { + return err + } + assert.True(t, len(pdbList.Items) == 2) + + dcaPDB := pdbList.Items[0] + assert.Equal(t, "foo-cluster-agent-pdb", dcaPDB.Name) + assert.Equal(t, intstr.FromInt(1), *dcaPDB.Spec.MinAvailable) + assert.Nil(t, dcaPDB.Spec.MaxUnavailable) + + ccrPDB := pdbList.Items[1] + assert.Equal(t, "foo-cluster-checks-runner-pdb", ccrPDB.Name) + assert.Equal(t, intstr.FromInt(1), *ccrPDB.Spec.MaxUnavailable) + assert.Nil(t, ccrPDB.Spec.MinAvailable) + return nil +} From f484e53b45343962c72f2bc6181c779dd342c8f6 Mon Sep 17 00:00:00 2001 From: Sarah Wang Date: Thu, 7 Nov 2024 12:43:57 -0800 Subject: [PATCH 11/12] update config doc --- docs/configuration.v2alpha1.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuration.v2alpha1.md b/docs/configuration.v2alpha1.md index 46cacae23..8739c1e37 100644 --- a/docs/configuration.v2alpha1.md +++ b/docs/configuration.v2alpha1.md @@ -344,7 +344,7 @@ In the table, `spec.override.nodeAgent.image.name` and `spec.override.nodeAgent. | [key].containers.[key].securityContext.windowsOptions.hostProcess | HostProcess determines if a container should be run as a 'Host Process' container. All of a Pod's containers must have the same effective HostProcess value (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). In addition, if HostProcess is true then HostNetwork must also be set to true. | | [key].containers.[key].securityContext.windowsOptions.runAsUserName | The UserName in Windows to run the entrypoint of the container process. Defaults to the user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. | | [key].containers.[key].volumeMounts `[]object` | Specify additional volume mounts in the container. | -| [key].createPodDisruptionBudget | Set CreatePodDisruptionBudget to true to create a PodDisruptionBudget for this component. Not applicable for the Node Agent. A Cluster Agent PDB is set with 1 min available pod, and a Cluster Checks Runner PDB is set with 1 max unavailable pod. | +| [key].createPodDisruptionBudget | Set CreatePodDisruptionBudget to true to create a PodDisruptionBudget for this component. Not applicable for the Node Agent. A Cluster Agent PDB is set with 1 minimum available pod, and a Cluster Checks Runner PDB is set with 1 maximum unavailable pod. | | [key].createRbac | Set CreateRbac to false to prevent automatic creation of Role/ClusterRole for this component | | [key].customConfigurations `map[string]object` | CustomConfiguration allows to specify custom configuration files for `datadog.yaml`, `datadog-cluster.yaml`, `security-agent.yaml`, and `system-probe.yaml`. The content is merged with configuration generated by the Datadog Operator, with priority given to custom configuration. WARNING: It is possible to override values set in the `DatadogAgent`. | | [key].customConfigurations.[key].configData | ConfigData corresponds to the configuration file content. | From b10635b52086935d4a7749adc1bc15bcbd5da32a Mon Sep 17 00:00:00 2001 From: Sarah Wang Date: Fri, 8 Nov 2024 11:27:01 -0800 Subject: [PATCH 12/12] remove pdb logging --- internal/controller/datadogagent/override/dependencies.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/internal/controller/datadogagent/override/dependencies.go b/internal/controller/datadogagent/override/dependencies.go index d6bd7f7bf..18006de15 100644 --- a/internal/controller/datadogagent/override/dependencies.go +++ b/internal/controller/datadogagent/override/dependencies.go @@ -66,8 +66,6 @@ func overridePodDisruptionBudget(logger logr.Logger, manager feature.ResourceMan errs = append(errs, err) } } - } else if createPdb != nil { - logger.Error(nil, "Pod disruption budget is not created by default") } return errs }