Skip to content

Commit

Permalink
Don't use old hardcoded PD API key secret
Browse files Browse the repository at this point in the history
It should be specified in the PDI CR. If the API key can't be loaded
from the specified API key, then trigger an alert.
  • Loading branch information
grdryn committed Aug 1, 2020
1 parent 50eaf8e commit 2dbb2e0
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 33 deletions.
2 changes: 2 additions & 0 deletions cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ func start() error {
// Create a new Cmd to provide shared dependencies and start components
mgr, err := manager.New(cfg, manager.Options{
Namespace: "",
// Disable controller-runtime metrics serving
MetricsBindAddress: "0",
})
if err != nil {
log.Error(err, "")
Expand Down
19 changes: 18 additions & 1 deletion hack/olm-registry/olm-artifacts-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ objects:
sourceType: grpc
image: ${REGISTRY_IMG}:${CHANNEL}-${IMAGE_TAG}
displayName: pagerduty-operator Registry
publisher: SRE
publisher: SRE

- apiVersion: operators.coreos.com/v1alpha2
kind: OperatorGroup
Expand Down Expand Up @@ -67,3 +67,20 @@ objects:
targetSecretRef:
name: pd-secret
namespace: openshift-monitoring

- apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: pagerduty-integration-api-secret
namespace: pagerduty-operator
spec:
groups:
- name: pagerduty-integration-api-secret
rules:
- alert: PagerDutyIntegrationAPISecretError
annotations:
message: PagerDuty Operator is failing to load PAGERDUTY_API_KEY from Secret specified in PagerDutyIntegration {{ $labels.pagerdutyintegration_name }}. Either the Secret might be missing, or the key might be missing from within the Secret.
expr: pagerdutyintegration_secret_loaded < 1
for: 15m
labels:
severity: warning
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
)

func (r *ReconcilePagerDutyIntegration) handleCreate(pdi *pagerdutyv1alpha1.PagerDutyIntegration, cd *hivev1.ClusterDeployment) error {
func (r *ReconcilePagerDutyIntegration) handleCreate(pdclient pd.Client, pdi *pagerdutyv1alpha1.PagerDutyIntegration, cd *hivev1.ClusterDeployment) error {
var (
// secretName is the name of the Secret deployed to the target
// cluster, and also the name of the SyncSet that causes it to
Expand Down Expand Up @@ -96,15 +96,15 @@ func (r *ReconcilePagerDutyIntegration) handleCreate(pdi *pagerdutyv1alpha1.Page
err = pdData.ParseClusterConfig(r.client, cd.Namespace, configMapName)
if err != nil {
var createErr error
_, createErr = r.pdclient.CreateService(pdData)
_, createErr = pdclient.CreateService(pdData)
if createErr != nil {
localmetrics.UpdateMetricPagerDutyCreateFailure(1, ClusterID)
return createErr
}
}
localmetrics.UpdateMetricPagerDutyCreateFailure(0, ClusterID)

pdIntegrationKey, err = r.pdclient.GetIntegrationKey(pdData)
pdIntegrationKey, err = pdclient.GetIntegrationKey(pdData)
if err != nil {
return err
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import (
"k8s.io/apimachinery/pkg/types"
)

func (r *ReconcilePagerDutyIntegration) handleDelete(pdi *pagerdutyv1alpha1.PagerDutyIntegration, cd *hivev1.ClusterDeployment) error {
func (r *ReconcilePagerDutyIntegration) handleDelete(pdclient pd.Client, pdi *pagerdutyv1alpha1.PagerDutyIntegration, cd *hivev1.ClusterDeployment) error {
var (
// secretName is the name of the Secret deployed to the target
// cluster, and also the name of the SyncSet that causes it to
Expand Down Expand Up @@ -119,7 +119,7 @@ func (r *ReconcilePagerDutyIntegration) handleDelete(pdi *pagerdutyv1alpha1.Page

if deletePDService {
// we have everything necessary to attempt deletion of the PD service
err = r.pdclient.DeleteService(pdData)
err = pdclient.DeleteService(pdData)
if err != nil {
r.reqLogger.Error(err, "Failed cleaning up pagerduty.")
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@ package pagerdutyintegration

import (
"context"
"time"

"github.com/go-logr/logr"
hivev1 "github.com/openshift/hive/pkg/apis/hive/v1"
"github.com/openshift/pagerduty-operator/config"
pagerdutyv1alpha1 "github.com/openshift/pagerduty-operator/pkg/apis/pagerduty/v1alpha1"
"github.com/openshift/pagerduty-operator/pkg/localmetrics"
pd "github.com/openshift/pagerduty-operator/pkg/pagerduty"
"github.com/openshift/pagerduty-operator/pkg/utils"
corev1 "k8s.io/api/core/v1"
Expand All @@ -46,29 +48,16 @@ var log = logf.Log.WithName("controller_pagerdutyintegration")
// Add creates a new PagerDutyIntegration Controller and adds it to the Manager. The Manager will set fields on the Controller
// and Start it when the Manager is Started.
func Add(mgr manager.Manager) error {
newRec, err := newReconciler(mgr)
if err != nil {
return err
}

return add(mgr, newRec)
return add(mgr, newReconciler(mgr))
}

// newReconciler returns a new reconcile.Reconciler
func newReconciler(mgr manager.Manager) (reconcile.Reconciler, error) {
tempClient, err := client.New(mgr.GetConfig(), client.Options{Scheme: mgr.GetScheme()})
if err != nil {
return nil, err
}

// get PD API key from secret
pdAPIKey, err := utils.LoadSecretData(tempClient, config.PagerDutyAPISecretName, config.OperatorNamespace, config.PagerDutyAPISecretKey)

func newReconciler(mgr manager.Manager) reconcile.Reconciler {
return &ReconcilePagerDutyIntegration{
client: mgr.GetClient(),
scheme: mgr.GetScheme(),
pdclient: pd.NewClient(pdAPIKey),
}, nil
pdclient: pd.NewClient,
}
}

// add adds a new Controller to mgr with r as the reconcile.Reconciler
Expand Down Expand Up @@ -153,7 +142,7 @@ type ReconcilePagerDutyIntegration struct {
client client.Client
scheme *runtime.Scheme
reqLogger logr.Logger
pdclient pd.Client
pdclient func(APIKey string) pd.Client
}

// Reconcile reads that state of the cluster for a PagerDutyIntegration object and makes changes based on the state read
Expand Down Expand Up @@ -184,15 +173,31 @@ func (r *ReconcilePagerDutyIntegration) Reconcile(request reconcile.Request) (re
return r.requeueOnErr(err)
}

pdApiKey, err := utils.LoadSecretData(
r.client,
pdi.Spec.PagerdutyApiKeySecretRef.Name,
pdi.Spec.PagerdutyApiKeySecretRef.Namespace,
config.PagerDutyAPISecretKey,
)
if err != nil {
r.reqLogger.Error(err, "Failed to load PagerDuty API key from Secret listed in PagerDutyIntegration CR")
localmetrics.UpdateMetricPagerDutyIntegrationSecretLoaded(0, pdi.Name)
return r.requeueAfter(10 * time.Minute)
}
localmetrics.UpdateMetricPagerDutyIntegrationSecretLoaded(1, pdi.Name)
pdClient := r.pdclient(pdApiKey)

if pdi.DeletionTimestamp != nil {
if utils.HasFinalizer(pdi, config.OperatorFinalizer) {
for _, cd := range matchingClusterDeployments.Items {
err := r.handleDelete(pdi, &cd)
err := r.handleDelete(pdClient, pdi, &cd)
if err != nil {
return r.requeueOnErr(err)
}
}

localmetrics.DeleteMetricPagerDutyIntegrationSecretLoaded(pdi.Name)

utils.DeleteFinalizer(pdi, config.OperatorFinalizer)
err = r.client.Update(context.TODO(), pdi)
if err != nil {
Expand All @@ -212,12 +217,12 @@ func (r *ReconcilePagerDutyIntegration) Reconcile(request reconcile.Request) (re

for _, cd := range matchingClusterDeployments.Items {
if cd.DeletionTimestamp != nil || cd.Labels[config.ClusterDeploymentNoalertsLabel] == "true" {
err := r.handleDelete(pdi, &cd)
err := r.handleDelete(pdClient, pdi, &cd)
if err != nil {
return r.requeueOnErr(err)
}
} else {
err := r.handleCreate(pdi, &cd)
err := r.handleCreate(pdClient, pdi, &cd)
if err != nil {
return r.requeueOnErr(err)
}
Expand Down Expand Up @@ -245,3 +250,7 @@ func (r *ReconcilePagerDutyIntegration) doNotRequeue() (reconcile.Result, error)
func (r *ReconcilePagerDutyIntegration) requeueOnErr(err error) (reconcile.Result, error) {
return reconcile.Result{}, err
}

func (r *ReconcilePagerDutyIntegration) requeueAfter(t time.Duration) (reconcile.Result, error) {
return reconcile.Result{RequeueAfter: t}, nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
pagerdutyapis "github.com/openshift/pagerduty-operator/pkg/apis"
pagerdutyv1alpha1 "github.com/openshift/pagerduty-operator/pkg/apis/pagerduty/v1alpha1"
"github.com/openshift/pagerduty-operator/pkg/kube"
pd "github.com/openshift/pagerduty-operator/pkg/pagerduty"
mockpd "github.com/openshift/pagerduty-operator/pkg/pagerduty/mock"
"github.com/stretchr/testify/assert"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -359,6 +360,7 @@ func TestReconcilePagerDutyIntegration(t *testing.T) {
localObjects: []runtime.Object{
uninstalledClusterDeployment(),
testPagerDutyIntegration(),
testPDConfigSecret(),
},
expectedSyncSets: &SyncSetEntry{},
expectedSecrets: &SecretEntry{},
Expand Down Expand Up @@ -426,6 +428,7 @@ func TestReconcilePagerDutyIntegration(t *testing.T) {
localObjects: []runtime.Object{
unlabelledClusterDeployment(),
testPagerDutyIntegration(),
testPDConfigSecret(),
},
expectedSyncSets: &SyncSetEntry{},
expectedSecrets: &SecretEntry{},
Expand All @@ -439,6 +442,7 @@ func TestReconcilePagerDutyIntegration(t *testing.T) {
localObjects: []runtime.Object{
testNoalertsClusterDeployment(),
testPagerDutyIntegration(),
testPDConfigSecret(),
},
expectedSyncSets: &SyncSetEntry{},
expectedSecrets: &SecretEntry{},
Expand Down Expand Up @@ -502,7 +506,7 @@ func TestReconcilePagerDutyIntegration(t *testing.T) {
rpdi := &ReconcilePagerDutyIntegration{
client: mocks.fakeKubeClient,
scheme: scheme.Scheme,
pdclient: mocks.mockPDClient,
pdclient: func(s string) pd.Client { return mocks.mockPDClient },
}

// Act [2x as first exits early after setting finalizer]
Expand Down Expand Up @@ -556,7 +560,7 @@ func TestRemoveAlertsAfterCreate(t *testing.T) {
rpdi := &ReconcilePagerDutyIntegration{
client: mocks.fakeKubeClient,
scheme: scheme.Scheme,
pdclient: mocks.mockPDClient,
pdclient: func(s string) pd.Client { return mocks.mockPDClient },
}

// Act (create) [2x as first exits early after setting finalizer]
Expand Down Expand Up @@ -636,7 +640,7 @@ func TestDeleteSecret(t *testing.T) {
rpdi := &ReconcilePagerDutyIntegration{
client: mocks.fakeKubeClient,
scheme: scheme.Scheme,
pdclient: mocks.mockPDClient,
pdclient: func(s string) pd.Client { return mocks.mockPDClient },
}

// Act (create) [2x as first exits early after setting finalizer]
Expand Down
32 changes: 29 additions & 3 deletions pkg/localmetrics/localmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,21 @@ var (
ConstLabels: prometheus.Labels{"name": "pagerduty-operator"},
})

MetricPagerDutyIntegrationSecretLoaded = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "pagerdutyintegration_secret_loaded",
Help: "Metric to track the ability to load the PagerDuty API key from the Secret specified in the PagerDutyIntegration",
ConstLabels: prometheus.Labels{"name": "pagerduty-operator"},
}, []string{"pagerdutyintegration_name"})

MetricsList = []prometheus.Collector{
MetricPagerDutyCreateFailure,
MetricPagerDutyDeleteFailure,
MetricPagerDutyHeartbeat,
MetricPagerDutyIntegrationSecretLoaded,
}
)

// UpdateAPIMetrics updates all API endpoint metrics ever 5 minutes
// UpdateAPIMetrics updates all API endpoint metrics every 5 minutes
func UpdateAPIMetrics(APIKey string, timer *prometheus.Timer) {
d := time.Tick(5 * time.Minute)
for range d {
Expand All @@ -65,14 +72,33 @@ func UpdateAPIMetrics(APIKey string, timer *prometheus.Timer) {

}

// UpdateMetricPagerDutyCreateFailure updates guage to 1 when creation fails
// UpdateMetricPagerDutyIntegrationSecretLoaded updates gauge to 1
// when the PagerDuty API key can be loaded from the Secret specified
// in the PagerDutyIntegration, or to 0 if it fails
func UpdateMetricPagerDutyIntegrationSecretLoaded(x int, pdiName string) {
MetricPagerDutyIntegrationSecretLoaded.With(
prometheus.Labels{"pagerdutyintegration_name": pdiName},
).Set(float64(x))
}

// DeleteMetricPagerDutyIntegrationSecretLoaded deletes the metric for
// the PagerDutyIntegration name provided. This should be called when
// e.g. the PagerDutyIntegration is being deleted, so that there are
// no irrelevant metrics (which alerts could be firing on).
func DeleteMetricPagerDutyIntegrationSecretLoaded(pdiName string) bool {
return MetricPagerDutyIntegrationSecretLoaded.Delete(
prometheus.Labels{"pagerdutyintegration_name": pdiName},
)
}

// UpdateMetricPagerDutyCreateFailure updates gauge to 1 when creation fails
func UpdateMetricPagerDutyCreateFailure(x int, cd string) {
MetricPagerDutyCreateFailure.With(prometheus.Labels{
"clusterdeployment_name": cd}).Set(
float64(x))
}

// UpdateMetricPagerDutyDeleteFailure updates guage to 1 when deletion fails
// UpdateMetricPagerDutyDeleteFailure updates gauge to 1 when deletion fails
func UpdateMetricPagerDutyDeleteFailure(x int, cd string) {
MetricPagerDutyDeleteFailure.With(prometheus.Labels{
"clusterdeployment_name": cd}).Set(
Expand Down

0 comments on commit 2dbb2e0

Please sign in to comment.