From 369187e2fb826f3b3690b530f44cc8fe036d06ff Mon Sep 17 00:00:00 2001 From: Sagnik Dutta Date: Wed, 6 Sep 2023 16:41:10 +0530 Subject: [PATCH] RHIROS-1221 Filter out and log Invalid recommendations on Prometheus --- internal/services/report_processor.go | 2 +- internal/utils/kruize/kruize_api.go | 44 +++++++++++++++++++++++---- internal/utils/kruize/metrics.go | 11 +++++++ 3 files changed, 50 insertions(+), 7 deletions(-) diff --git a/internal/services/report_processor.go b/internal/services/report_processor.go index 634c700b..05aa2f01 100644 --- a/internal/services/report_processor.go +++ b/internal/services/report_processor.go @@ -180,7 +180,7 @@ func ProcessReport(msg *kafka.Message) { continue } - if kruize.Is_valid_recommendation(recommendation) { + if kruize.Is_valid_recommendation(recommendation, experiment_name) { containers := recommendation[0].Kubernetes_objects[0].Containers for _, container := range containers { for _, v := range container.Recommendations.Data { diff --git a/internal/utils/kruize/kruize_api.go b/internal/utils/kruize/kruize_api.go index 39fd2ddc..d23fac79 100644 --- a/internal/utils/kruize/kruize_api.go +++ b/internal/utils/kruize/kruize_api.go @@ -176,14 +176,46 @@ func Update_recommendations(experiment_name string, interval_end_time time.Time) } -func Is_valid_recommendation(d []kruizePayload.ListRecommendations) bool { +func Is_valid_recommendation(d []kruizePayload.ListRecommendations, experiment_name string) bool { if len(d) > 0 { + + // To maintain a local reference the following map has been created from + // https://github.com/kruize/autotune/blob/master/design/NotificationCodes.md#detailed-codes + notificationCodeValidities := map[string]bool{ + "112101": true, + "120001": false, + "221001": false, + "221002": false, + "221003": false, + "221004": false, + "223001": false, + "223002": false, + "223003": false, + "223004": false, + "224001": false, + "224002": false, + "224003": false, + "224004": false, + } + notifications := d[0].Kubernetes_objects[0].Containers[0].Recommendations.Notifications - // 112101 is notification code for "Duration Based Recommendations Available". - if _, ok := notifications["112101"]; ok { - return true - } else { - return false + + for key := range notifications{ + isValid, keyExists := notificationCodeValidities[key] + if !keyExists { + return false + } + + if !isValid { + // Setting the metric counter to 1 as we expect a single metric + // for a combination of notification_code and experiment_name + kruizeInvalidRecommendation.WithLabelValues(key, experiment_name).Set(1) + return false + } else { + return true + } + + } } return false diff --git a/internal/utils/kruize/metrics.go b/internal/utils/kruize/metrics.go index a6980148..3dbe3b81 100644 --- a/internal/utils/kruize/metrics.go +++ b/internal/utils/kruize/metrics.go @@ -13,3 +13,14 @@ var ( []string{"path"}, ) ) + + +var ( + kruizeInvalidRecommendation = promauto.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "rosocp_kruize_invalid_recommendation_detail", + Help: "List of INFO/ERROR type recommendations from Kruize", + }, + []string{"notification_code", "experiment_name"}, + ) +)