diff --git a/docs/datadog_monitor.md b/docs/datadog_monitor.md index edb8d386f..f60944253 100644 --- a/docs/datadog_monitor.md +++ b/docs/datadog_monitor.md @@ -71,8 +71,11 @@ To deploy a `DatadogMonitor` with the Datadog Operator, use the [`datadog-operat ``` This automatically creates a new monitor in Datadog. You can find it on the [Manage Monitors][7] page of your Datadog account. + *Note*: All monitors created from `DatadogMonitor` are automatically tagged with `generated:kubernetes`. +By default, the Operator ensures that the API monitor definition stays in sync with the DatadogMonitor resource every **60** minutes (per monitor). This interval can be adjusted using the environment variable `DD_MONITOR_FORCE_SYNC_PERIOD`, which specifies the number of minutes. For example, setting this variable to `"30"` changes the interval to 30 minutes. + ## Cleanup The following commands delete the monitor from your Datadog account and all the Kubernetes resources created by the above instructions: diff --git a/internal/controller/datadogmonitor/controller.go b/internal/controller/datadogmonitor/controller.go index 5e821927d..4b3f3f26d 100644 --- a/internal/controller/datadogmonitor/controller.go +++ b/internal/controller/datadogmonitor/controller.go @@ -8,7 +8,9 @@ package datadogmonitor import ( "context" "fmt" + "os" "sort" + "strconv" "strings" "time" @@ -35,10 +37,11 @@ import ( ) const ( - defaultRequeuePeriod = 60 * time.Second - defaultErrRequeuePeriod = 5 * time.Second - defaultForceSyncPeriod = 60 * time.Minute - maxTriggeredStateGroups = 10 + defaultRequeuePeriod = 60 * time.Second + defaultErrRequeuePeriod = 5 * time.Second + defaultForceSyncPeriod = 60 * time.Minute + maxTriggeredStateGroups = 10 + DDMonitorForceSyncPeriodEnvVar = "DD_MONITOR_FORCE_SYNC_PERIOD" ) var supportedMonitorTypes = map[string]bool{ @@ -89,11 +92,20 @@ func (r *Reconciler) internalReconcile(ctx context.Context, req reconcile.Reques logger := r.log.WithValues("datadogmonitor", req.NamespacedName) logger.Info("Reconciling DatadogMonitor") now := metav1.NewTime(time.Now()) + forceSyncPeriod := defaultForceSyncPeriod + + forceSyncPeriodInt, err := strconv.Atoi(os.Getenv(DDMonitorForceSyncPeriodEnvVar)) + if err != nil { + logger.Error(err, "Invalid value for monitor force sync period. Defaulting to 60 minutes.") + } else { + logger.V(1).Info("Setting monitor force sync period", "minutes", forceSyncPeriodInt) + forceSyncPeriod = time.Duration(forceSyncPeriodInt) * time.Minute + } // Get instance instance := &datadoghqv1alpha1.DatadogMonitor{} var result ctrl.Result - err := r.client.Get(ctx, req.NamespacedName, instance) + err = r.client.Get(ctx, req.NamespacedName, instance) if err != nil { if apierrors.IsNotFound(err) { // Request object not found, could have been deleted after reconcile request. @@ -140,7 +152,7 @@ func (r *Reconciler) internalReconcile(ctx context.Context, req reconcile.Reques // Custom resource manifest has changed, need to update the API logger.V(1).Info("DatadogMonitor manifest has changed") shouldUpdate = true - } else if instance.Status.MonitorLastForceSyncTime == nil || (defaultForceSyncPeriod-now.Sub(instance.Status.MonitorLastForceSyncTime.Time)) <= 0 { + } else if instance.Status.MonitorLastForceSyncTime == nil || (forceSyncPeriod-now.Sub(instance.Status.MonitorLastForceSyncTime.Time)) <= 0 { // Periodically force a sync with the API monitor to ensure parity // Get monitor to make sure it exists before trying any updates. If it doesn't, set shouldCreate m, err = r.get(instance, newStatus)