From ce53126f365ccac90980b1c0f8701c8816ea06c4 Mon Sep 17 00:00:00 2001 From: Sagnik Dutta Date: Wed, 6 Sep 2023 16:33:32 +0530 Subject: [PATCH] RHIROS-1221 Filter out and log Invalid recommendations on Prometheus --- internal/services/report_processor.go | 2 +- internal/utils/kruize/kruize_api.go | 49 +++++++++++++++++++++++---- internal/utils/kruize/metrics.go | 11 ++++++ 3 files changed, 55 insertions(+), 7 deletions(-) diff --git a/internal/services/report_processor.go b/internal/services/report_processor.go index 634c700b..05aa2f01 100644 --- a/internal/services/report_processor.go +++ b/internal/services/report_processor.go @@ -180,7 +180,7 @@ func ProcessReport(msg *kafka.Message) { continue } - if kruize.Is_valid_recommendation(recommendation) { + if kruize.Is_valid_recommendation(recommendation, experiment_name) { containers := recommendation[0].Kubernetes_objects[0].Containers for _, container := range containers { for _, v := range container.Recommendations.Data { diff --git a/internal/utils/kruize/kruize_api.go b/internal/utils/kruize/kruize_api.go index 39fd2ddc..fcf1714e 100644 --- a/internal/utils/kruize/kruize_api.go +++ b/internal/utils/kruize/kruize_api.go @@ -176,14 +176,51 @@ func Update_recommendations(experiment_name string, interval_end_time time.Time) } -func Is_valid_recommendation(d []kruizePayload.ListRecommendations) bool { +func Is_valid_recommendation(d []kruizePayload.ListRecommendations, experiment_name string) bool { if len(d) > 0 { + + // To maintain a local reference the following map has been created from + // https://github.com/kruize/autotune/blob/master/design/NotificationCodes.md#detailed-codes + notificationCodeValidities := map[string]bool{ + "112101": true, + "120001": false, + "221001": false, + "221002": false, + "221003": false, + "221004": false, + "223001": false, + "223002": false, + "223003": false, + "223004": false, + "224001": false, + "224002": false, + "224003": false, + "224004": false, + } + notifications := d[0].Kubernetes_objects[0].Containers[0].Recommendations.Notifications - // 112101 is notification code for "Duration Based Recommendations Available". - if _, ok := notifications["112101"]; ok { - return true - } else { - return false + log.Warning("This is the recomm", d[0].Kubernetes_objects[0].Containers[0].Recommendations) + + for key := range notifications{ + // log.Warning("This is the notification key ", key) + + isValid, keyExists := notificationCodeValidities[key] + if !keyExists { + return false + } + + if !isValid { + // log.Warning("Reached here!!!!!!!!!!!!! ", key) + + // Setting the metric counter to 1 as we expect a single metric + // for a combination of notification_code and experiment_name + kruizeInvalidRecommendation.WithLabelValues(key, experiment_name).Set(1) + return false + } else { + return true + } + + } } return false diff --git a/internal/utils/kruize/metrics.go b/internal/utils/kruize/metrics.go index a6980148..3dbe3b81 100644 --- a/internal/utils/kruize/metrics.go +++ b/internal/utils/kruize/metrics.go @@ -13,3 +13,14 @@ var ( []string{"path"}, ) ) + + +var ( + kruizeInvalidRecommendation = promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "rosocp_kruize_invalid_recommendation_detail", + Help: "List of INFO/ERROR type recommendations from Kruize", + }, + []string{"notification_code", "experiment_name"}, + ) +)