Skip to content

Commit

Permalink
Add alert for multiple default virt storage classes
Browse files Browse the repository at this point in the history
Signed-off-by: Alex Kalenyuk <akalenyu@redhat.com>
  • Loading branch information
akalenyu committed Oct 3, 2023
1 parent 7e94eb5 commit 44f4a6c
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 5 deletions.
1 change: 1 addition & 0 deletions cmd/cdi-controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,7 @@ func getTokenPrivateKey() *rsa.PrivateKey {
func registerMetrics() {
metrics.Registry.MustRegister(controller.IncompleteProfileGauge)
controller.IncompleteProfileGauge.Set(-1)
metrics.Registry.MustRegister(controller.DefaultVirtStorageClassesGauge)
metrics.Registry.MustRegister(controller.DataImportCronOutdatedGauge)
}

Expand Down
2 changes: 2 additions & 0 deletions doc/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ CDI install ready. Type: Gauge.
DataImportCron has an outdated import. Type: Gauge.
### kubevirt_cdi_dataimportcron_outdated_aggregated
Total count of outdated DataImportCron imports. Type: Gauge.
### kubevirt_cdi_default_virt_storageclasses
Number of default virt storage classes currently configured. Type: Gauge.
### kubevirt_cdi_import_pods_high_restart
The number of CDI import pods with high restart count. Type: Gauge.
### kubevirt_cdi_incomplete_storageprofiles
Expand Down
37 changes: 35 additions & 2 deletions pkg/controller/storageprofile-controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,12 @@ var (
Name: monitoring.MetricOptsList[monitoring.IncompleteProfile].Name,
Help: monitoring.MetricOptsList[monitoring.IncompleteProfile].Help,
})
// DefaultVirtStorageClassesGauge is the metric we use to alert about multiple default virt storage classes
DefaultVirtStorageClassesGauge = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: monitoring.MetricOptsList[monitoring.DefaultVirtClasses].Name,
Help: monitoring.MetricOptsList[monitoring.DefaultVirtClasses].Help,
})
)

// StorageProfileReconciler members
Expand Down Expand Up @@ -72,7 +78,7 @@ func (r *StorageProfileReconciler) Reconcile(_ context.Context, req reconcile.Re
return reconcile.Result{}, err
}

return reconcile.Result{}, r.checkIncompleteProfiles()
return reconcile.Result{}, r.computeMetrics()
}

func (r *StorageProfileReconciler) reconcileStorageProfile(sc *storagev1.StorageClass) (reconcile.Result, error) {
Expand Down Expand Up @@ -255,7 +261,7 @@ func (r *StorageProfileReconciler) deleteStorageProfile(name string, log logr.Lo
return err
}

return r.checkIncompleteProfiles()
return r.computeMetrics()
}

func isNoProvisioner(name string, cl client.Client) bool {
Expand All @@ -266,6 +272,33 @@ func isNoProvisioner(name string, cl client.Client) bool {
return storageClass.Provisioner == "kubernetes.io/no-provisioner"
}

func (r *StorageProfileReconciler) computeMetrics() error {
if err := r.checkIncompleteProfiles(); err != nil {
return err
}
if err := r.checkDefaultVirtStorageClasses(); err != nil {
return err
}

return nil
}

func (r *StorageProfileReconciler) checkDefaultVirtStorageClasses() error {
defaultVirtStorageClassCount := 0
storageClassList := &storagev1.StorageClassList{}
if err := r.client.List(context.TODO(), storageClassList); err != nil {
return err
}
for _, sc := range storageClassList.Items {
if sc.Annotations[cc.AnnDefaultVirtStorageClass] == "true" {
defaultVirtStorageClassCount++
}
}
DefaultVirtStorageClassesGauge.Set(float64(defaultVirtStorageClassCount))

return nil
}

func (r *StorageProfileReconciler) checkIncompleteProfiles() error {
numIncomplete := 0
storageProfileList := &cdiv1.StorageProfileList{}
Expand Down
12 changes: 9 additions & 3 deletions pkg/monitoring/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@ type MetricsKey string

// All metrics names for reference
const (
ReadyGauge MetricsKey = "readyGauge"
IncompleteProfile MetricsKey = "incompleteProfile"
DataImportCronOutdated MetricsKey = "dataImportCronOutdated"
CloneProgress MetricsKey = "cloneProgress"
DataImportCronOutdated MetricsKey = "dataImportCronOutdated"
IncompleteProfile MetricsKey = "incompleteProfile"
ReadyGauge MetricsKey = "readyGauge"
DefaultVirtClasses MetricsKey = "defaultVirtClasses"
)

// MetricOptsList list all CDI metrics
Expand All @@ -53,6 +54,11 @@ var MetricOptsList = map[MetricsKey]MetricOpts{
Help: "CDI install ready",
Type: "Gauge",
},
DefaultVirtClasses: {
Name: "kubevirt_cdi_default_virt_storageclasses",
Help: "Number of default virt storage classes currently configured",
Type: "Gauge",
},
}

// GetRecordRulesDesc returns CDI Prometheus Record Rules
Expand Down
15 changes: 15 additions & 0 deletions pkg/operator/controller/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,21 @@ func getAlertRules(runbookURLTemplate string) []promv1.Rule {
componentAlertLabelKey: componentAlertLabelValue,
},
),
generateAlertRule(
"CDIMultipleDefaultVirtStorageClasses",
"kubevirt_cdi_default_virt_storageclasses > 1",
promv1.Duration("5m"),
map[string]string{
"summary": "More than one default virtualization StorageClass detected",
"runbook_url": fmt.Sprintf(runbookURLTemplate, "CDIMultipleDefaultVirtStorageClasses"),
},
map[string]string{
severityAlertLabelKey: "warning",
healthImpactAlertLabelKey: "warning",
partOfAlertLabelKey: partOfAlertLabelValue,
componentAlertLabelKey: componentAlertLabelValue,
},
),
}
}

Expand Down
14 changes: 14 additions & 0 deletions tests/monitoring_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
cdiv1 "kubevirt.io/containerized-data-importer-api/pkg/apis/core/v1beta1"

"kubevirt.io/containerized-data-importer/pkg/controller"
cc "kubevirt.io/containerized-data-importer/pkg/controller/common"
"kubevirt.io/containerized-data-importer/pkg/storagecapabilities"
"kubevirt.io/containerized-data-importer/tests/framework"
"kubevirt.io/containerized-data-importer/tests/utils"
Expand Down Expand Up @@ -172,6 +173,19 @@ var _ = Describe("[Destructive] Monitoring Tests", func() {
}
})

It("[test_id:XXXX] CDIMultipleDefaultVirtStorageClasses fired when more than one default virt storage class exists", func() {
numAddedStorageClasses = 2
for i := 0; i < numAddedStorageClasses; i++ {
sc := createUnknownStorageClass(fmt.Sprintf("unknown-sc-%d", i), "kubernetes.io/non-existent-provisioner")
cc.AddAnnotation(sc, cc.AnnDefaultVirtStorageClass, "true")
_, err := f.K8sClient.StorageV1().StorageClasses().Create(context.TODO(), sc, metav1.CreateOptions{})
Expect(err).ToNot(HaveOccurred())
}

By("Check that the CDIMultipleDefaultVirtStorageClasses alert is triggered")
waitForPrometheusAlert(f, "CDIMultipleDefaultVirtStorageClasses")
})

It("[test_id:9659] StorageProfile incomplete metric expected value remains unchanged for provisioner known to not work", func() {
sc, err := f.K8sClient.StorageV1().StorageClasses().Create(context.TODO(), createUnknownStorageClass("unsupported-provisioner", storagecapabilities.ProvisionerNoobaa), metav1.CreateOptions{})
Expect(err).ToNot(HaveOccurred())
Expand Down

0 comments on commit 44f4a6c

Please sign in to comment.