Skip to content

Commit

Permalink
Merge pull request #107 from grdryn/use-custom-apikey-secret
Browse files Browse the repository at this point in the history
Don't use old hardcoded PD API key secret
  • Loading branch information
openshift-merge-robot committed Aug 6, 2020
2 parents c2e7fea + 94c0be8 commit 686d3de
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 45 deletions.
19 changes: 18 additions & 1 deletion hack/olm-registry/olm-artifacts-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ objects:
sourceType: grpc
image: ${REGISTRY_IMG}:${CHANNEL}-${IMAGE_TAG}
displayName: pagerduty-operator Registry
publisher: SRE
publisher: SRE

- apiVersion: operators.coreos.com/v1alpha2
kind: OperatorGroup
Expand Down Expand Up @@ -67,3 +67,20 @@ objects:
targetSecretRef:
name: pd-secret
namespace: openshift-monitoring

- apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: pagerduty-integration-api-secret
namespace: pagerduty-operator
spec:
groups:
- name: pagerduty-integration-api-secret
rules:
- alert: PagerDutyIntegrationAPISecretError
annotations:
message: PagerDuty Operator is failing to load PAGERDUTY_API_KEY from Secret specified in PagerDutyIntegration {{ $labels.pagerdutyintegration_name }}. Either the Secret might be missing, or the key might be missing from within the Secret.
expr: pagerdutyintegration_secret_loaded < 1
for: 15m
labels:
severity: warning
10 changes: 5 additions & 5 deletions pkg/controller/pagerdutyintegration/clusterdeployment_created.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
)

func (r *ReconcilePagerDutyIntegration) handleCreate(pdi *pagerdutyv1alpha1.PagerDutyIntegration, cd *hivev1.ClusterDeployment) error {
func (r *ReconcilePagerDutyIntegration) handleCreate(pdclient pd.Client, pdi *pagerdutyv1alpha1.PagerDutyIntegration, cd *hivev1.ClusterDeployment) error {
var (
// secretName is the name of the Secret deployed to the target
// cluster, and also the name of the SyncSet that causes it to
Expand Down Expand Up @@ -96,15 +96,15 @@ func (r *ReconcilePagerDutyIntegration) handleCreate(pdi *pagerdutyv1alpha1.Page
err = pdData.ParseClusterConfig(r.client, cd.Namespace, configMapName)
if err != nil {
var createErr error
_, createErr = r.pdclient.CreateService(pdData)
_, createErr = pdclient.CreateService(pdData)
if createErr != nil {
localmetrics.UpdateMetricPagerDutyCreateFailure(1, ClusterID)
localmetrics.UpdateMetricPagerDutyCreateFailure(1, ClusterID, pdi.Name)
return createErr
}
}
localmetrics.UpdateMetricPagerDutyCreateFailure(0, ClusterID)
localmetrics.UpdateMetricPagerDutyCreateFailure(0, ClusterID, pdi.Name)

pdIntegrationKey, err = r.pdclient.GetIntegrationKey(pdData)
pdIntegrationKey, err = pdclient.GetIntegrationKey(pdData)
if err != nil {
return err
}
Expand Down
19 changes: 15 additions & 4 deletions pkg/controller/pagerdutyintegration/clusterdeployment_deleted.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import (
"k8s.io/apimachinery/pkg/types"
)

func (r *ReconcilePagerDutyIntegration) handleDelete(pdi *pagerdutyv1alpha1.PagerDutyIntegration, cd *hivev1.ClusterDeployment) error {
func (r *ReconcilePagerDutyIntegration) handleDelete(pdclient pd.Client, pdi *pagerdutyv1alpha1.PagerDutyIntegration, cd *hivev1.ClusterDeployment) error {
var (
// secretName is the name of the Secret deployed to the target
// cluster, and also the name of the SyncSet that causes it to
Expand Down Expand Up @@ -119,7 +119,7 @@ func (r *ReconcilePagerDutyIntegration) handleDelete(pdi *pagerdutyv1alpha1.Page

if deletePDService {
// we have everything necessary to attempt deletion of the PD service
err = r.pdclient.DeleteService(pdData)
err = pdclient.DeleteService(pdData)
if err != nil {
r.reqLogger.Error(err, "Failed cleaning up pagerduty.")
} else {
Expand Down Expand Up @@ -155,11 +155,22 @@ func (r *ReconcilePagerDutyIntegration) handleDelete(pdi *pagerdutyv1alpha1.Page
utils.DeleteFinalizer(cd, finalizer)
err = r.client.Update(context.TODO(), cd)
if err != nil {
metrics.UpdateMetricPagerDutyDeleteFailure(1, ClusterID)
metrics.UpdateMetricPagerDutyDeleteFailure(1, ClusterID, pdi.Name)
return err
}
}
metrics.UpdateMetricPagerDutyDeleteFailure(0, ClusterID)

if utils.HasFinalizer(cd, config.OperatorFinalizer) {
r.reqLogger.Info("Deleting old PD finalizer from ClusterDeployment", "Namespace", cd.Namespace, "Name", cd.Name)
utils.DeleteFinalizer(cd, config.OperatorFinalizer)
err = r.client.Update(context.TODO(), cd)
if err != nil {
metrics.UpdateMetricPagerDutyDeleteFailure(1, ClusterID, pdi.Name)
return err
}
}

metrics.UpdateMetricPagerDutyDeleteFailure(0, ClusterID, pdi.Name)

return nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -52,29 +52,16 @@ var log = logf.Log.WithName("controller_pagerdutyintegration")
// Add creates a new PagerDutyIntegration Controller and adds it to the Manager. The Manager will set fields on the Controller
// and Start it when the Manager is Started.
func Add(mgr manager.Manager) error {
newRec, err := newReconciler(mgr)
if err != nil {
return err
}

return add(mgr, newRec)
return add(mgr, newReconciler(mgr))
}

// newReconciler returns a new reconcile.Reconciler
func newReconciler(mgr manager.Manager) (reconcile.Reconciler, error) {
tempClient, err := client.New(mgr.GetConfig(), client.Options{Scheme: mgr.GetScheme()})
if err != nil {
return nil, err
}

// get PD API key from secret
pdAPIKey, err := utils.LoadSecretData(tempClient, config.PagerDutyAPISecretName, config.OperatorNamespace, config.PagerDutyAPISecretKey)

func newReconciler(mgr manager.Manager) reconcile.Reconciler {
return &ReconcilePagerDutyIntegration{
client: utils.NewClientWithMetricsOrDie(log, mgr, controllerName),
scheme: mgr.GetScheme(),
pdclient: pd.NewClient(pdAPIKey),
}, nil
pdclient: pd.NewClient,
}
}

// add adds a new Controller to mgr with r as the reconcile.Reconciler
Expand Down Expand Up @@ -159,7 +146,7 @@ type ReconcilePagerDutyIntegration struct {
client client.Client
scheme *runtime.Scheme
reqLogger logr.Logger
pdclient pd.Client
pdclient func(APIKey string) pd.Client
}

// Reconcile reads that state of the cluster for a PagerDutyIntegration object and makes changes based on the state read
Expand Down Expand Up @@ -198,15 +185,31 @@ func (r *ReconcilePagerDutyIntegration) Reconcile(request reconcile.Request) (re
return r.requeueOnErr(err)
}

pdApiKey, err := utils.LoadSecretData(
r.client,
pdi.Spec.PagerdutyApiKeySecretRef.Name,
pdi.Spec.PagerdutyApiKeySecretRef.Namespace,
config.PagerDutyAPISecretKey,
)
if err != nil {
r.reqLogger.Error(err, "Failed to load PagerDuty API key from Secret listed in PagerDutyIntegration CR")
localmetrics.UpdateMetricPagerDutyIntegrationSecretLoaded(0, pdi.Name)
return r.requeueAfter(10 * time.Minute)
}
localmetrics.UpdateMetricPagerDutyIntegrationSecretLoaded(1, pdi.Name)
pdClient := r.pdclient(pdApiKey)

if pdi.DeletionTimestamp != nil {
if utils.HasFinalizer(pdi, config.OperatorFinalizer) {
for _, cd := range matchingClusterDeployments.Items {
err := r.handleDelete(pdi, &cd)
err := r.handleDelete(pdClient, pdi, &cd)
if err != nil {
return r.requeueOnErr(err)
}
}

localmetrics.DeleteMetricPagerDutyIntegrationSecretLoaded(pdi.Name)

utils.DeleteFinalizer(pdi, config.OperatorFinalizer)
err = r.client.Update(context.TODO(), pdi)
if err != nil {
Expand All @@ -226,12 +229,12 @@ func (r *ReconcilePagerDutyIntegration) Reconcile(request reconcile.Request) (re

for _, cd := range matchingClusterDeployments.Items {
if cd.DeletionTimestamp != nil || cd.Labels[config.ClusterDeploymentNoalertsLabel] == "true" {
err := r.handleDelete(pdi, &cd)
err := r.handleDelete(pdClient, pdi, &cd)
if err != nil {
return r.requeueOnErr(err)
}
} else {
err := r.handleCreate(pdi, &cd)
err := r.handleCreate(pdClient, pdi, &cd)
if err != nil {
return r.requeueOnErr(err)
}
Expand Down Expand Up @@ -259,3 +262,7 @@ func (r *ReconcilePagerDutyIntegration) doNotRequeue() (reconcile.Result, error)
func (r *ReconcilePagerDutyIntegration) requeueOnErr(err error) (reconcile.Result, error) {
return reconcile.Result{}, err
}

func (r *ReconcilePagerDutyIntegration) requeueAfter(t time.Duration) (reconcile.Result, error) {
return reconcile.Result{RequeueAfter: t}, nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
pagerdutyapis "github.com/openshift/pagerduty-operator/pkg/apis"
pagerdutyv1alpha1 "github.com/openshift/pagerduty-operator/pkg/apis/pagerduty/v1alpha1"
"github.com/openshift/pagerduty-operator/pkg/kube"
pd "github.com/openshift/pagerduty-operator/pkg/pagerduty"
mockpd "github.com/openshift/pagerduty-operator/pkg/pagerduty/mock"
"github.com/stretchr/testify/assert"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -359,6 +360,7 @@ func TestReconcilePagerDutyIntegration(t *testing.T) {
localObjects: []runtime.Object{
uninstalledClusterDeployment(),
testPagerDutyIntegration(),
testPDConfigSecret(),
},
expectedSyncSets: &SyncSetEntry{},
expectedSecrets: &SecretEntry{},
Expand Down Expand Up @@ -426,6 +428,7 @@ func TestReconcilePagerDutyIntegration(t *testing.T) {
localObjects: []runtime.Object{
unlabelledClusterDeployment(),
testPagerDutyIntegration(),
testPDConfigSecret(),
},
expectedSyncSets: &SyncSetEntry{},
expectedSecrets: &SecretEntry{},
Expand All @@ -439,6 +442,7 @@ func TestReconcilePagerDutyIntegration(t *testing.T) {
localObjects: []runtime.Object{
testNoalertsClusterDeployment(),
testPagerDutyIntegration(),
testPDConfigSecret(),
},
expectedSyncSets: &SyncSetEntry{},
expectedSecrets: &SecretEntry{},
Expand Down Expand Up @@ -502,7 +506,7 @@ func TestReconcilePagerDutyIntegration(t *testing.T) {
rpdi := &ReconcilePagerDutyIntegration{
client: mocks.fakeKubeClient,
scheme: scheme.Scheme,
pdclient: mocks.mockPDClient,
pdclient: func(s string) pd.Client { return mocks.mockPDClient },
}

// Act [2x as first exits early after setting finalizer]
Expand Down Expand Up @@ -556,7 +560,7 @@ func TestRemoveAlertsAfterCreate(t *testing.T) {
rpdi := &ReconcilePagerDutyIntegration{
client: mocks.fakeKubeClient,
scheme: scheme.Scheme,
pdclient: mocks.mockPDClient,
pdclient: func(s string) pd.Client { return mocks.mockPDClient },
}

// Act (create) [2x as first exits early after setting finalizer]
Expand Down Expand Up @@ -636,7 +640,7 @@ func TestDeleteSecret(t *testing.T) {
rpdi := &ReconcilePagerDutyIntegration{
client: mocks.fakeKubeClient,
scheme: scheme.Scheme,
pdclient: mocks.mockPDClient,
pdclient: func(s string) pd.Client { return mocks.mockPDClient },
}

// Act (create) [2x as first exits early after setting finalizer]
Expand Down
50 changes: 39 additions & 11 deletions pkg/localmetrics/localmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,13 @@ var (
Name: "pagerduty_create_failure",
Help: "Metric for the failure of creating a cluster deployment.",
ConstLabels: prometheus.Labels{"name": "pagerduty-operator"},
}, []string{"clusterdeployment_name"})
}, []string{"clusterdeployment_name", "pagerdutyintegration_name"})

MetricPagerDutyDeleteFailure = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "pagerduty_delete_failure",
Help: "Metric for the failure of deleting a cluster deployment.",
ConstLabels: prometheus.Labels{"name": "pagerduty-operator"},
}, []string{"clusterdeployment_name"})
}, []string{"clusterdeployment_name", "pagerdutyintegration_name"})

MetricPagerDutyHeartbeat = prometheus.NewSummary(prometheus.SummaryOpts{
Name: "pagerduty_heartbeat",
Expand All @@ -69,16 +69,23 @@ var (
Buckets: []float64{1},
}, []string{"controller", "method", "resource", "status"})

MetricPagerDutyIntegrationSecretLoaded = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "pagerdutyintegration_secret_loaded",
Help: "Metric to track the ability to load the PagerDuty API key from the Secret specified in the PagerDutyIntegration",
ConstLabels: prometheus.Labels{"name": "pagerduty-operator"},
}, []string{"pagerdutyintegration_name"})

MetricsList = []prometheus.Collector{
MetricPagerDutyCreateFailure,
MetricPagerDutyDeleteFailure,
MetricPagerDutyHeartbeat,
ApiCallDuration,
ReconcileDuration,
MetricPagerDutyIntegrationSecretLoaded,
}
)

// UpdateAPIMetrics updates all API endpoint metrics ever 5 minutes
// UpdateAPIMetrics updates all API endpoint metrics every 5 minutes
func UpdateAPIMetrics(APIKey string, timer *prometheus.Timer) {
d := time.Tick(5 * time.Minute)
for range d {
Expand All @@ -87,18 +94,39 @@ func UpdateAPIMetrics(APIKey string, timer *prometheus.Timer) {

}

// UpdateMetricPagerDutyCreateFailure updates guage to 1 when creation fails
func UpdateMetricPagerDutyCreateFailure(x int, cd string) {
// UpdateMetricPagerDutyIntegrationSecretLoaded updates gauge to 1
// when the PagerDuty API key can be loaded from the Secret specified
// in the PagerDutyIntegration, or to 0 if it fails
func UpdateMetricPagerDutyIntegrationSecretLoaded(x int, pdiName string) {
MetricPagerDutyIntegrationSecretLoaded.With(
prometheus.Labels{"pagerdutyintegration_name": pdiName},
).Set(float64(x))
}

// DeleteMetricPagerDutyIntegrationSecretLoaded deletes the metric for
// the PagerDutyIntegration name provided. This should be called when
// e.g. the PagerDutyIntegration is being deleted, so that there are
// no irrelevant metrics (which alerts could be firing on).
func DeleteMetricPagerDutyIntegrationSecretLoaded(pdiName string) bool {
return MetricPagerDutyIntegrationSecretLoaded.Delete(
prometheus.Labels{"pagerdutyintegration_name": pdiName},
)
}

// UpdateMetricPagerDutyCreateFailure updates gauge to 1 when creation fails
func UpdateMetricPagerDutyCreateFailure(x int, cd string, pdiName string) {
MetricPagerDutyCreateFailure.With(prometheus.Labels{
"clusterdeployment_name": cd}).Set(
float64(x))
"clusterdeployment_name": cd,
"pagerdutyintegration_name": pdiName,
}).Set(float64(x))
}

// UpdateMetricPagerDutyDeleteFailure updates guage to 1 when deletion fails
func UpdateMetricPagerDutyDeleteFailure(x int, cd string) {
// UpdateMetricPagerDutyDeleteFailure updates gauge to 1 when deletion fails
func UpdateMetricPagerDutyDeleteFailure(x int, cd string, pdiName string) {
MetricPagerDutyDeleteFailure.With(prometheus.Labels{
"clusterdeployment_name": cd}).Set(
float64(x))
"clusterdeployment_name": cd,
"pagerdutyintegration_name": pdiName,
}).Set(float64(x))
}

// SetReconcileDuration Push the duration of the reconcile iteration
Expand Down

0 comments on commit 686d3de

Please sign in to comment.