diff --git a/pkg/controller/pagerdutyintegration/clusterdeployment_created.go b/pkg/controller/pagerdutyintegration/clusterdeployment_created.go index 82b8a099..e1760a5e 100644 --- a/pkg/controller/pagerdutyintegration/clusterdeployment_created.go +++ b/pkg/controller/pagerdutyintegration/clusterdeployment_created.go @@ -32,7 +32,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" ) -func (r *ReconcilePagerDutyIntegration) handleCreate(pdi *pagerdutyv1alpha1.PagerDutyIntegration, cd *hivev1.ClusterDeployment) error { +func (r *ReconcilePagerDutyIntegration) handleCreate(pdclient pd.Client, pdi *pagerdutyv1alpha1.PagerDutyIntegration, cd *hivev1.ClusterDeployment) error { var ( // secretName is the name of the Secret deployed to the target // cluster, and also the name of the SyncSet that causes it to @@ -96,7 +96,7 @@ func (r *ReconcilePagerDutyIntegration) handleCreate(pdi *pagerdutyv1alpha1.Page err = pdData.ParseClusterConfig(r.client, cd.Namespace, configMapName) if err != nil { var createErr error - _, createErr = r.pdclient.CreateService(pdData) + _, createErr = pdclient.CreateService(pdData) if createErr != nil { localmetrics.UpdateMetricPagerDutyCreateFailure(1, ClusterID) return createErr @@ -104,7 +104,7 @@ func (r *ReconcilePagerDutyIntegration) handleCreate(pdi *pagerdutyv1alpha1.Page } localmetrics.UpdateMetricPagerDutyCreateFailure(0, ClusterID) - pdIntegrationKey, err = r.pdclient.GetIntegrationKey(pdData) + pdIntegrationKey, err = pdclient.GetIntegrationKey(pdData) if err != nil { return err } diff --git a/pkg/controller/pagerdutyintegration/clusterdeployment_deleted.go b/pkg/controller/pagerdutyintegration/clusterdeployment_deleted.go index f846748c..376da35b 100644 --- a/pkg/controller/pagerdutyintegration/clusterdeployment_deleted.go +++ b/pkg/controller/pagerdutyintegration/clusterdeployment_deleted.go @@ -28,7 +28,7 @@ import ( "k8s.io/apimachinery/pkg/types" ) -func (r *ReconcilePagerDutyIntegration) handleDelete(pdi *pagerdutyv1alpha1.PagerDutyIntegration, cd *hivev1.ClusterDeployment) error { +func (r *ReconcilePagerDutyIntegration) handleDelete(pdclient pd.Client, pdi *pagerdutyv1alpha1.PagerDutyIntegration, cd *hivev1.ClusterDeployment) error { var ( // secretName is the name of the Secret deployed to the target // cluster, and also the name of the SyncSet that causes it to @@ -119,7 +119,7 @@ func (r *ReconcilePagerDutyIntegration) handleDelete(pdi *pagerdutyv1alpha1.Page if deletePDService { // we have everything necessary to attempt deletion of the PD service - err = r.pdclient.DeleteService(pdData) + err = pdclient.DeleteService(pdData) if err != nil { r.reqLogger.Error(err, "Failed cleaning up pagerduty.") } else { diff --git a/pkg/controller/pagerdutyintegration/pagerdutyintegration_controller.go b/pkg/controller/pagerdutyintegration/pagerdutyintegration_controller.go index fe7baae7..c759b8d8 100644 --- a/pkg/controller/pagerdutyintegration/pagerdutyintegration_controller.go +++ b/pkg/controller/pagerdutyintegration/pagerdutyintegration_controller.go @@ -16,11 +16,13 @@ package pagerdutyintegration import ( "context" + "time" "github.com/go-logr/logr" hivev1 "github.com/openshift/hive/pkg/apis/hive/v1" "github.com/openshift/pagerduty-operator/config" pagerdutyv1alpha1 "github.com/openshift/pagerduty-operator/pkg/apis/pagerduty/v1alpha1" + "github.com/openshift/pagerduty-operator/pkg/localmetrics" pd "github.com/openshift/pagerduty-operator/pkg/pagerduty" "github.com/openshift/pagerduty-operator/pkg/utils" corev1 "k8s.io/api/core/v1" @@ -46,29 +48,16 @@ var log = logf.Log.WithName("controller_pagerdutyintegration") // Add creates a new PagerDutyIntegration Controller and adds it to the Manager. The Manager will set fields on the Controller // and Start it when the Manager is Started. func Add(mgr manager.Manager) error { - newRec, err := newReconciler(mgr) - if err != nil { - return err - } - - return add(mgr, newRec) + return add(mgr, newReconciler(mgr)) } // newReconciler returns a new reconcile.Reconciler -func newReconciler(mgr manager.Manager) (reconcile.Reconciler, error) { - tempClient, err := client.New(mgr.GetConfig(), client.Options{Scheme: mgr.GetScheme()}) - if err != nil { - return nil, err - } - - // get PD API key from secret - pdAPIKey, err := utils.LoadSecretData(tempClient, config.PagerDutyAPISecretName, config.OperatorNamespace, config.PagerDutyAPISecretKey) - +func newReconciler(mgr manager.Manager) reconcile.Reconciler { return &ReconcilePagerDutyIntegration{ client: mgr.GetClient(), scheme: mgr.GetScheme(), - pdclient: pd.NewClient(pdAPIKey), - }, nil + pdclient: pd.NewClient, + } } // add adds a new Controller to mgr with r as the reconcile.Reconciler @@ -153,7 +142,7 @@ type ReconcilePagerDutyIntegration struct { client client.Client scheme *runtime.Scheme reqLogger logr.Logger - pdclient pd.Client + pdclient func(APIKey string) pd.Client } // Reconcile reads that state of the cluster for a PagerDutyIntegration object and makes changes based on the state read @@ -184,10 +173,24 @@ func (r *ReconcilePagerDutyIntegration) Reconcile(request reconcile.Request) (re return r.requeueOnErr(err) } + pdApiKey, err := utils.LoadSecretData( + r.client, + pdi.Spec.PagerdutyApiKeySecretRef.Name, + pdi.Spec.PagerdutyApiKeySecretRef.Namespace, + config.PagerDutyAPISecretKey, + ) + if err != nil { + r.reqLogger.Error(err, "Failed to load PagerDuty API key from Secret listed in PagerDutyIntegration CR") + localmetrics.UpdateMetricPagerDutyIntegrationSecretLoaded(0, pdi.Name) + return r.requeueAfter(10 * time.Minute) + } + localmetrics.UpdateMetricPagerDutyIntegrationSecretLoaded(1, pdi.Name) + pdClient := r.pdclient(pdApiKey) + if pdi.DeletionTimestamp != nil { if utils.HasFinalizer(pdi, config.OperatorFinalizer) { for _, cd := range matchingClusterDeployments.Items { - err := r.handleDelete(pdi, &cd) + err := r.handleDelete(pdClient, pdi, &cd) if err != nil { return r.requeueOnErr(err) } @@ -212,12 +215,12 @@ func (r *ReconcilePagerDutyIntegration) Reconcile(request reconcile.Request) (re for _, cd := range matchingClusterDeployments.Items { if cd.DeletionTimestamp != nil || cd.Labels[config.ClusterDeploymentNoalertsLabel] == "true" { - err := r.handleDelete(pdi, &cd) + err := r.handleDelete(pdClient, pdi, &cd) if err != nil { return r.requeueOnErr(err) } } else { - err := r.handleCreate(pdi, &cd) + err := r.handleCreate(pdClient, pdi, &cd) if err != nil { return r.requeueOnErr(err) } @@ -245,3 +248,7 @@ func (r *ReconcilePagerDutyIntegration) doNotRequeue() (reconcile.Result, error) func (r *ReconcilePagerDutyIntegration) requeueOnErr(err error) (reconcile.Result, error) { return reconcile.Result{}, err } + +func (r *ReconcilePagerDutyIntegration) requeueAfter(t time.Duration) (reconcile.Result, error) { + return reconcile.Result{RequeueAfter: t}, nil +} diff --git a/pkg/controller/pagerdutyintegration/pagerdutyintegration_controller_test.go b/pkg/controller/pagerdutyintegration/pagerdutyintegration_controller_test.go index ed5b7868..25619f3c 100644 --- a/pkg/controller/pagerdutyintegration/pagerdutyintegration_controller_test.go +++ b/pkg/controller/pagerdutyintegration/pagerdutyintegration_controller_test.go @@ -26,6 +26,7 @@ import ( pagerdutyapis "github.com/openshift/pagerduty-operator/pkg/apis" pagerdutyv1alpha1 "github.com/openshift/pagerduty-operator/pkg/apis/pagerduty/v1alpha1" "github.com/openshift/pagerduty-operator/pkg/kube" + pd "github.com/openshift/pagerduty-operator/pkg/pagerduty" mockpd "github.com/openshift/pagerduty-operator/pkg/pagerduty/mock" "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" @@ -359,6 +360,7 @@ func TestReconcilePagerDutyIntegration(t *testing.T) { localObjects: []runtime.Object{ uninstalledClusterDeployment(), testPagerDutyIntegration(), + testPDConfigSecret(), }, expectedSyncSets: &SyncSetEntry{}, expectedSecrets: &SecretEntry{}, @@ -426,6 +428,7 @@ func TestReconcilePagerDutyIntegration(t *testing.T) { localObjects: []runtime.Object{ unlabelledClusterDeployment(), testPagerDutyIntegration(), + testPDConfigSecret(), }, expectedSyncSets: &SyncSetEntry{}, expectedSecrets: &SecretEntry{}, @@ -439,6 +442,7 @@ func TestReconcilePagerDutyIntegration(t *testing.T) { localObjects: []runtime.Object{ testNoalertsClusterDeployment(), testPagerDutyIntegration(), + testPDConfigSecret(), }, expectedSyncSets: &SyncSetEntry{}, expectedSecrets: &SecretEntry{}, @@ -502,7 +506,7 @@ func TestReconcilePagerDutyIntegration(t *testing.T) { rpdi := &ReconcilePagerDutyIntegration{ client: mocks.fakeKubeClient, scheme: scheme.Scheme, - pdclient: mocks.mockPDClient, + pdclient: func(s string) pd.Client { return mocks.mockPDClient }, } // Act [2x as first exits early after setting finalizer] @@ -556,7 +560,7 @@ func TestRemoveAlertsAfterCreate(t *testing.T) { rpdi := &ReconcilePagerDutyIntegration{ client: mocks.fakeKubeClient, scheme: scheme.Scheme, - pdclient: mocks.mockPDClient, + pdclient: func(s string) pd.Client { return mocks.mockPDClient }, } // Act (create) [2x as first exits early after setting finalizer] @@ -636,7 +640,7 @@ func TestDeleteSecret(t *testing.T) { rpdi := &ReconcilePagerDutyIntegration{ client: mocks.fakeKubeClient, scheme: scheme.Scheme, - pdclient: mocks.mockPDClient, + pdclient: func(s string) pd.Client { return mocks.mockPDClient }, } // Act (create) [2x as first exits early after setting finalizer] diff --git a/pkg/localmetrics/localmetrics.go b/pkg/localmetrics/localmetrics.go index f2ba3b60..efa213e5 100644 --- a/pkg/localmetrics/localmetrics.go +++ b/pkg/localmetrics/localmetrics.go @@ -49,14 +49,21 @@ var ( ConstLabels: prometheus.Labels{"name": "pagerduty-operator"}, }) + MetricPagerDutyIntegrationSecretLoaded = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "pagerdutyintegration_secret_loaded", + Help: "Metric to track the ability to load the PagerDuty API key from the Secret specified in the PagerDutyIntegration", + ConstLabels: prometheus.Labels{"name": "pagerduty-operator"}, + }, []string{"pagerdutyintegration_name"}) + MetricsList = []prometheus.Collector{ MetricPagerDutyCreateFailure, MetricPagerDutyDeleteFailure, MetricPagerDutyHeartbeat, + MetricPagerDutyIntegrationSecretLoaded, } ) -// UpdateAPIMetrics updates all API endpoint metrics ever 5 minutes +// UpdateAPIMetrics updates all API endpoint metrics every 5 minutes func UpdateAPIMetrics(APIKey string, timer *prometheus.Timer) { d := time.Tick(5 * time.Minute) for range d { @@ -65,14 +72,25 @@ func UpdateAPIMetrics(APIKey string, timer *prometheus.Timer) { } -// UpdateMetricPagerDutyCreateFailure updates guage to 1 when creation fails +// UpdateMetricPagerDutyIntegrationSecretLoaded updates gauge to 1 +// when the PagerDuty API key can be loaded from the Secret specified +// in the PagerDutyIntegration, or to 0 if it fails +func UpdateMetricPagerDutyIntegrationSecretLoaded(x int, pdiName string) { + MetricPagerDutyIntegrationSecretLoaded.With( + prometheus.Labels{"pagerdutyintegration_name": pdiName}, + ).Set( + float64(x), + ) +} + +// UpdateMetricPagerDutyCreateFailure updates gauge to 1 when creation fails func UpdateMetricPagerDutyCreateFailure(x int, cd string) { MetricPagerDutyCreateFailure.With(prometheus.Labels{ "clusterdeployment_name": cd}).Set( float64(x)) } -// UpdateMetricPagerDutyDeleteFailure updates guage to 1 when deletion fails +// UpdateMetricPagerDutyDeleteFailure updates gauge to 1 when deletion fails func UpdateMetricPagerDutyDeleteFailure(x int, cd string) { MetricPagerDutyDeleteFailure.With(prometheus.Labels{ "clusterdeployment_name": cd}).Set(