Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

create Pod Disruption Budget for DCA and CCR deployments #1454

Merged
merged 14 commits into from
Nov 8, 2024
5 changes: 5 additions & 0 deletions api/datadoghq/v2alpha1/datadogagent_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -1423,6 +1423,11 @@ type DatadogAgentComponentOverride struct {
// +optional
Replicas *int32 `json:"replicas,omitempty"`

// Set CreatePodDisruptionBudget to true to create a PodDisruptionBudget for this component.
// Not applicable for the Node Agent. A Cluster Agent PDB is set with 1 min available pod, and a Cluster Checks Runner PDB is set with 1 max unavailable pod.
swang392 marked this conversation as resolved.
Show resolved Hide resolved
// +optional
CreatePodDisruptionBudget *bool `json:"createPodDisruptionBudget,omitempty"`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we ever get a request for making PDB configurable?! I guess it's unlikely since we haven't made it configurable in helm.


// Set CreateRbac to false to prevent automatic creation of Role/ClusterRole for this component
// +optional
CreateRbac *bool `json:"createRbac,omitempty"`
Expand Down
5 changes: 5 additions & 0 deletions api/datadoghq/v2alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions config/crd/bases/v1/datadoghq.com_datadogagents.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3923,6 +3923,11 @@ spec:
`agent`, `cluster-agent`, `init-config`, `init-volume`, `process-agent`, `seccomp-setup`,
`security-agent`, `system-probe`, and `trace-agent`.
type: object
createPodDisruptionBudget:
description: |-
Set CreatePodDisruptionBudget to true to create a PodDisruptionBudget for this component.
Not applicable for the Node Agent. A Cluster Agent PDB is set with 1 min available pod, and a Cluster Checks Runner PDB is set with 1 max unavailable pod.
type: boolean
createRbac:
description: Set CreateRbac to false to prevent automatic creation of Role/ClusterRole for this component
type: boolean
Expand Down
1 change: 1 addition & 0 deletions docs/configuration.v2alpha1.md
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,7 @@ In the table, `spec.override.nodeAgent.image.name` and `spec.override.nodeAgent.
| [key].containers.[key].securityContext.windowsOptions.hostProcess | HostProcess determines if a container should be run as a 'Host Process' container. All of a Pod's containers must have the same effective HostProcess value (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). In addition, if HostProcess is true then HostNetwork must also be set to true. |
| [key].containers.[key].securityContext.windowsOptions.runAsUserName | The UserName in Windows to run the entrypoint of the container process. Defaults to the user specified in image metadata if unspecified. May also be set in PodSecurityContext. If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. |
| [key].containers.[key].volumeMounts `[]object` | Specify additional volume mounts in the container. |
| [key].createPodDisruptionBudget | Set CreatePodDisruptionBudget to true to create a PodDisruptionBudget for this component. Not applicable for the Node Agent. A Cluster Agent PDB is set with 1 min available pod, and a Cluster Checks Runner PDB is set with 1 max unavailable pod. |
| [key].createRbac | Set CreateRbac to false to prevent automatic creation of Role/ClusterRole for this component |
| [key].customConfigurations `map[string]object` | CustomConfiguration allows to specify custom configuration files for `datadog.yaml`, `datadog-cluster.yaml`, `security-agent.yaml`, and `system-probe.yaml`. The content is merged with configuration generated by the Datadog Operator, with priority given to custom configuration. WARNING: It is possible to override values set in the `DatadogAgent`. |
| [key].customConfigurations.[key].configData | ConfigData corresponds to the configuration file content. |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"strconv"
"testing"

"github.com/DataDog/datadog-operator/api/datadoghq/v2alpha1"
datadoghqv2alpha1 "github.com/DataDog/datadog-operator/api/datadoghq/v2alpha1"
apiutils "github.com/DataDog/datadog-operator/api/utils"
"github.com/DataDog/datadog-operator/internal/controller/datadogagent/common"
Expand Down Expand Up @@ -38,6 +39,18 @@ func Test_defaultClusterAgentDeployment(t *testing.T) {

assert.Empty(t, testutils.CompareKubeResource(&deployment.Spec.Template, expectedDeployment))
}
func Test_getPodDisruptionBudget(t *testing.T) {
dda := v2alpha1.DatadogAgent{
ObjectMeta: metav1.ObjectMeta{
Name: "my-datadog-agent",
Namespace: "some-namespace",
},
}
testpdb := GetClusterAgentPodDisruptionBudget(&dda)
assert.Equal(t, "datadog-cluster-agent-pdb", testpdb.Name)
assert.Equal(t, intstr.FromInt(pdbMinAvailableInstances), *testpdb.Spec.MinAvailable)
assert.Nil(t, testpdb.Spec.MaxUnavailable)
}

func clusterAgentExpectedPodTemplate(dda *datadoghqv2alpha1.DatadogAgent) *corev1.PodTemplateSpec {
podTemplate := &corev1.PodTemplateSpec{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,16 @@ import (
"github.com/DataDog/datadog-operator/pkg/controller/utils/comparison"

corev1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/version"
)

const (
pdbMinAvailableInstances = 1
)

// GetClusterAgentService returns the Cluster-Agent service
func GetClusterAgentService(dda metav1.Object) *corev1.Service {
labels := object.GetDefaultLabels(dda, v2alpha1.DefaultClusterAgentResourceSuffix, GetClusterAgentVersion(dda))
Expand Down Expand Up @@ -53,6 +58,27 @@ func GetClusterAgentService(dda metav1.Object) *corev1.Service {
return service
}

func GetClusterAgentPodDisruptionBudget(dda metav1.Object) *policyv1.PodDisruptionBudget {
levan-m marked this conversation as resolved.
Show resolved Hide resolved
// labels and annotations
minAvailableStr := intstr.FromInt(pdbMinAvailableInstances)
matchLabels := map[string]string{
apicommon.AgentDeploymentNameLabelKey: dda.GetName(),
apicommon.AgentDeploymentComponentLabelKey: v2alpha1.DefaultClusterAgentResourceSuffix}
pdb := &policyv1.PodDisruptionBudget{
ObjectMeta: metav1.ObjectMeta{
Name: "datadog-cluster-agent-pdb",
swang392 marked this conversation as resolved.
Show resolved Hide resolved
Namespace: dda.GetNamespace(),
},
Spec: policyv1.PodDisruptionBudgetSpec{
MinAvailable: &minAvailableStr,
Selector: &metav1.LabelSelector{
MatchLabels: matchLabels,
},
},
}
return pdb
}

// GetMetricsServerServiceName returns the external metrics provider service name
func GetMetricsServerServiceName(dda metav1.Object) string {
return fmt.Sprintf("%s-%s", dda.GetName(), v2alpha1.DefaultMetricsServerResourceSuffix)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ import (

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"

apicommon "github.com/DataDog/datadog-operator/api/datadoghq/common"
"github.com/DataDog/datadog-operator/api/datadoghq/v2alpha1"
Expand All @@ -21,6 +23,10 @@ import (
"github.com/DataDog/datadog-operator/pkg/defaulting"
)

const (
pdMaxUnavailableInstances = 1
swang392 marked this conversation as resolved.
Show resolved Hide resolved
)

// GetClusterChecksRunnerName return the Cluster-Checks-Runner name based on the DatadogAgent name
func GetClusterChecksRunnerName(dda metav1.Object) string {
return fmt.Sprintf("%s-%s", dda.GetName(), v2alpha1.DefaultClusterChecksRunnerResourceSuffix)
Expand Down Expand Up @@ -82,6 +88,26 @@ func NewDefaultClusterChecksRunnerPodTemplateSpec(dda metav1.Object) *corev1.Pod
return template
}

func GetClusterChecksRunnerPodDisruptionBudget(dda metav1.Object) *policyv1.PodDisruptionBudget {
maxUnavailableStr := intstr.FromInt(pdMaxUnavailableInstances)
matchLabels := map[string]string{
apicommon.AgentDeploymentNameLabelKey: dda.GetName(),
apicommon.AgentDeploymentComponentLabelKey: v2alpha1.DefaultClusterChecksRunnerResourceSuffix}
pdb := &policyv1.PodDisruptionBudget{
ObjectMeta: metav1.ObjectMeta{
Name: "datadog-cluster-checks-runner-pdb",
swang392 marked this conversation as resolved.
Show resolved Hide resolved
Namespace: dda.GetNamespace(),
},
Spec: policyv1.PodDisruptionBudgetSpec{
MaxUnavailable: &maxUnavailableStr,
Selector: &metav1.LabelSelector{
MatchLabels: matchLabels,
},
},
}
return pdb
}

// getDefaultServiceAccountName return the default Cluster-Agent ServiceAccountName
func getDefaultServiceAccountName(dda metav1.Object) string {
return fmt.Sprintf("%s-%s", dda.GetName(), v2alpha1.DefaultClusterChecksRunnerResourceSuffix)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/DataDog/datadog-operator/api/datadoghq/v2alpha1"
"github.com/stretchr/testify/assert"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
)

func Test_getDefaultServiceAccountName(t *testing.T) {
Expand All @@ -23,3 +24,16 @@ func Test_getDefaultServiceAccountName(t *testing.T) {

assert.Equal(t, "my-datadog-agent-cluster-checks-runner", getDefaultServiceAccountName(&dda))
}

func Test_getPodDisruptionBudget(t *testing.T) {
dda := v2alpha1.DatadogAgent{
ObjectMeta: metav1.ObjectMeta{
Name: "my-datadog-agent",
Namespace: "some-namespace",
},
}
testpdb := GetClusterChecksRunnerPodDisruptionBudget(&dda)
assert.Equal(t, "datadog-cluster-checks-runner-pdb", testpdb.Name)
assert.Equal(t, intstr.FromInt(pdMaxUnavailableInstances), *testpdb.Spec.MaxUnavailable)
assert.Nil(t, testpdb.Spec.MinAvailable)
}
2 changes: 0 additions & 2 deletions internal/controller/datadogagent/controller_reconcile_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,15 +88,13 @@ func (r *Reconciler) internalReconcileV2(ctx context.Context, request reconcile.
// Set default values for GlobalConfig and Features
instanceCopy := instance.DeepCopy()
datadoghqv2alpha1.DefaultDatadogAgent(instanceCopy)

return r.reconcileInstanceV2(ctx, reqLogger, instanceCopy)
}

func (r *Reconciler) reconcileInstanceV2(ctx context.Context, logger logr.Logger, instance *datadoghqv2alpha1.DatadogAgent) (reconcile.Result, error) {
var result reconcile.Result
newStatus := instance.Status.DeepCopy()
now := metav1.NewTime(time.Now())

features, requiredComponents := feature.BuildFeatures(instance, reconcilerOptionsToFeatureOptions(&r.options, logger))
// update list of enabled features for metrics forwarder
r.updateMetricsForwardersFeatures(instance, features)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,6 @@ func (f *defaultFeature) Configure(dda *v2alpha1.DatadogAgent) feature.RequiredC
if dda.Spec.Global.DisableNonResourceRules != nil && *dda.Spec.Global.DisableNonResourceRules {
f.disableNonResourceRules = true
}

if dda.Spec.Global.Credentials != nil {
creds := dda.Spec.Global.Credentials

Expand Down Expand Up @@ -223,7 +222,6 @@ func (f *defaultFeature) Configure(dda *v2alpha1.DatadogAgent) feature.RequiredC
},
}
}

}

// ManageDependencies allows a feature to manage its dependencies.
Expand Down
25 changes: 25 additions & 0 deletions internal/controller/datadogagent/override/dependencies.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ import (
"k8s.io/apimachinery/pkg/util/errors"

"github.com/DataDog/datadog-operator/api/datadoghq/v2alpha1"
componentdca "github.com/DataDog/datadog-operator/internal/controller/datadogagent/component/clusteragent"
componentccr "github.com/DataDog/datadog-operator/internal/controller/datadogagent/component/clusterchecksrunner"
"github.com/DataDog/datadog-operator/internal/controller/datadogagent/feature"
"github.com/DataDog/datadog-operator/internal/controller/datadogagent/object"
"github.com/DataDog/datadog-operator/internal/controller/datadogagent/object/configmap"
Expand Down Expand Up @@ -42,11 +44,34 @@ func Dependencies(logger logr.Logger, manager feature.ResourceManagers, dda *v2a
// Handle custom check files
checksdCMName := fmt.Sprintf(extraChecksdConfigMapName, strings.ToLower((string(component))))
errs = append(errs, overrideExtraConfigs(logger, manager, override.ExtraChecksd, namespace, checksdCMName, false)...)

errs = append(errs, overridePodDisruptionBudget(logger, manager, dda, override.CreatePodDisruptionBudget, component)...)
}

return errs
}

func overridePodDisruptionBudget(logger logr.Logger, manager feature.ResourceManagers, dda *v2alpha1.DatadogAgent, createPdb *bool, component v2alpha1.ComponentName) (errs []error) {
if createPdb != nil && *createPdb {
if component == v2alpha1.ClusterAgentComponentName {
pdb := componentdca.GetClusterAgentPodDisruptionBudget(dda)
if err := manager.Store().AddOrUpdate(kubernetes.PodDisruptionBudgetsKind, pdb); err != nil {
errs = append(errs, err)
}
} else if component == v2alpha1.ClusterChecksRunnerComponentName &&
(dda.Spec.Features.ClusterChecks.UseClusterChecksRunners == nil ||
*dda.Spec.Features.ClusterChecks.UseClusterChecksRunners) {
pdb := componentccr.GetClusterChecksRunnerPodDisruptionBudget(dda)
if err := manager.Store().AddOrUpdate(kubernetes.PodDisruptionBudgetsKind, pdb); err != nil {
errs = append(errs, err)
}
}
} else {
logger.Error(nil, "Pod disruption budget is not created by default")
swang392 marked this conversation as resolved.
Show resolved Hide resolved
}
return errs
}

func overrideRBAC(logger logr.Logger, manager feature.ResourceManagers, override *v2alpha1.DatadogAgentComponentOverride, component v2alpha1.ComponentName, namespace string) error {
var errs []error

Expand Down
29 changes: 29 additions & 0 deletions internal/controller/datadogagent/override/dependencies_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,35 @@ func TestDependencies(t *testing.T) {
},
expectsErrors: false,
},
{
name: "override clusterAgent createPDB without errors",
dda: v2alpha1.DatadogAgent{
Spec: v2alpha1.DatadogAgentSpec{
Override: map[v2alpha1.ComponentName]*v2alpha1.DatadogAgentComponentOverride{
v2alpha1.ClusterAgentComponentName: {
CreatePodDisruptionBudget: apiutils.NewBoolPointer(true),
},
},
},
},
},
{
name: "override clusterChecksRunner createPDB without errors",
dda: v2alpha1.DatadogAgent{
Spec: v2alpha1.DatadogAgentSpec{
Override: map[v2alpha1.ComponentName]*v2alpha1.DatadogAgentComponentOverride{
v2alpha1.ClusterChecksRunnerComponentName: {
CreatePodDisruptionBudget: apiutils.NewBoolPointer(true),
},
},
Features: &v2alpha1.DatadogFeatures{
ClusterChecks: &v2alpha1.ClusterChecksFeatureConfig{
UseClusterChecksRunners: apiutils.NewBoolPointer(true),
},
},
},
},
},
}

for _, test := range tests {
Expand Down
Loading