From a3becc9906515d0567808fee9a4e322451d6dc3f Mon Sep 17 00:00:00 2001 From: Matthis <99146727+matthisholleville@users.noreply.github.com> Date: Sat, 15 Apr 2023 19:03:37 +0200 Subject: [PATCH] feat: add server metrics (#273) * feat: expose metrics path & init analyzer errors metrics This commit add metrics path & the analyzer error metrics in the codebase. The changes have been made across all analyzers and include the addition of a new metric with label values for the analyzer's name, analyzed object's name, and namespace. The metric's value is set to the length of the analyzer objects failures. Signed-off-by: Matthis Holleville * feat: add metric to cronjob & deployment & netpol Signed-off-by: Matthis Holleville * feat: expose metric to NodeAnalyzer Signed-off-by: Matthis Holleville --------- Signed-off-by: Matthis Holleville --- pkg/analyzer/analyzer.go | 9 +++++++++ pkg/analyzer/cronjob.go | 13 +++++++++++-- pkg/analyzer/cronjob_test.go | 2 +- pkg/analyzer/deployment.go | 9 ++++++++- pkg/analyzer/hpa.go | 9 ++++++++- pkg/analyzer/ingress.go | 10 +++++++++- pkg/analyzer/netpol.go | 13 +++++++++++-- pkg/analyzer/node.go | 11 ++++++++++- pkg/analyzer/pdb.go | 9 ++++++++- pkg/analyzer/pod.go | 10 +++++++++- pkg/analyzer/pvc.go | 9 ++++++++- pkg/analyzer/rs.go | 9 ++++++++- pkg/analyzer/service.go | 9 ++++++++- pkg/analyzer/statefulset.go | 10 +++++++++- pkg/server/main.go | 7 +++++-- 15 files changed, 122 insertions(+), 17 deletions(-) diff --git a/pkg/analyzer/analyzer.go b/pkg/analyzer/analyzer.go index d553caa64c..c0fab38bd8 100644 --- a/pkg/analyzer/analyzer.go +++ b/pkg/analyzer/analyzer.go @@ -7,6 +7,15 @@ import ( "github.com/fatih/color" "github.com/k8sgpt-ai/k8sgpt/pkg/common" "github.com/k8sgpt-ai/k8sgpt/pkg/integration" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var ( + AnalyzerErrorsMetric = promauto.NewGaugeVec(prometheus.GaugeOpts{ + Name: "analyzer_errors", + Help: "Number of errors detected by analyzer", + }, []string{"analyzer_name", "object_name", "namespace"}) ) var coreAnalyzerMap = map[string]common.IAnalyzer{ diff --git a/pkg/analyzer/cronjob.go b/pkg/analyzer/cronjob.go index db773bb46a..7a2e2dde32 100644 --- a/pkg/analyzer/cronjob.go +++ b/pkg/analyzer/cronjob.go @@ -13,6 +13,13 @@ import ( type CronJobAnalyzer struct{} func (analyzer CronJobAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) { + + kind := "CronJob" + + AnalyzerErrorsMetric.DeletePartialMatch(map[string]string{ + "analyzer_name": kind, + }) + var results []common.Result cronJobList, err := a.Client.GetClient().BatchV1().CronJobs("").List(a.Context, v1.ListOptions{}) @@ -81,14 +88,16 @@ func (analyzer CronJobAnalyzer) Analyze(a common.Analyzer) ([]common.Result, err } if len(failures) > 0 { - preAnalysis[cronJob.Name] = common.PreAnalysis{ + preAnalysis[fmt.Sprintf("%s/%s", cronJob.Namespace, cronJob.Name)] = common.PreAnalysis{ FailureDetails: failures, } + AnalyzerErrorsMetric.WithLabelValues(kind, cronJob.Name, cronJob.Namespace).Set(float64(len(failures))) + } for key, value := range preAnalysis { currentAnalysis := common.Result{ - Kind: "CronJob", + Kind: kind, Name: key, Error: value.FailureDetails, } diff --git a/pkg/analyzer/cronjob_test.go b/pkg/analyzer/cronjob_test.go index b6e31c8151..0290b5439d 100644 --- a/pkg/analyzer/cronjob_test.go +++ b/pkg/analyzer/cronjob_test.go @@ -121,6 +121,6 @@ func TestCronJobBroken(t *testing.T) { } assert.Equal(t, len(analysisResults), 1) - assert.Equal(t, analysisResults[0].Name, "example-cronjob") + assert.Equal(t, analysisResults[0].Name, "default/example-cronjob") assert.Equal(t, analysisResults[0].Kind, "CronJob") } diff --git a/pkg/analyzer/deployment.go b/pkg/analyzer/deployment.go index 4f3b77c936..bc89b8e849 100644 --- a/pkg/analyzer/deployment.go +++ b/pkg/analyzer/deployment.go @@ -17,6 +17,12 @@ type DeploymentAnalyzer struct { // Analyze scans all namespaces for Deployments with misconfigurations func (d DeploymentAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) { + kind := "Deployment" + + AnalyzerErrorsMetric.DeletePartialMatch(map[string]string{ + "analyzer_name": kind, + }) + deployments, err := a.Client.GetClient().AppsV1().Deployments("").List(context.Background(), v1.ListOptions{}) if err != nil { return nil, err @@ -44,13 +50,14 @@ func (d DeploymentAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) FailureDetails: failures, Deployment: deployment, } + AnalyzerErrorsMetric.WithLabelValues(kind, deployment.Name, deployment.Namespace).Set(float64(len(failures))) } } for key, value := range preAnalysis { var currentAnalysis = common.Result{ - Kind: "Deployment", + Kind: kind, Name: key, Error: value.FailureDetails, } diff --git a/pkg/analyzer/hpa.go b/pkg/analyzer/hpa.go index 6c7585a68b..c720dc1f0f 100644 --- a/pkg/analyzer/hpa.go +++ b/pkg/analyzer/hpa.go @@ -12,6 +12,12 @@ type HpaAnalyzer struct{} func (HpaAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) { + kind := "HorizontalPodAutoscaler" + + AnalyzerErrorsMetric.DeletePartialMatch(map[string]string{ + "analyzer_name": kind, + }) + list, err := a.Client.GetClient().AutoscalingV1().HorizontalPodAutoscalers(a.Namespace).List(a.Context, metav1.ListOptions{}) if err != nil { return nil, err @@ -71,13 +77,14 @@ func (HpaAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) { HorizontalPodAutoscalers: hpa, FailureDetails: failures, } + AnalyzerErrorsMetric.WithLabelValues(kind, hpa.Name, hpa.Namespace).Set(float64(len(failures))) } } for key, value := range preAnalysis { var currentAnalysis = common.Result{ - Kind: "HorizontalPodAutoscaler", + Kind: kind, Name: key, Error: value.FailureDetails, } diff --git a/pkg/analyzer/ingress.go b/pkg/analyzer/ingress.go index 215a58185a..1435e8091c 100644 --- a/pkg/analyzer/ingress.go +++ b/pkg/analyzer/ingress.go @@ -12,6 +12,12 @@ type IngressAnalyzer struct{} func (IngressAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) { + kind := "Ingress" + + AnalyzerErrorsMetric.DeletePartialMatch(map[string]string{ + "analyzer_name": kind, + }) + list, err := a.Client.GetClient().NetworkingV1().Ingresses(a.Namespace).List(a.Context, metav1.ListOptions{}) if err != nil { return nil, err @@ -107,13 +113,15 @@ func (IngressAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) { Ingress: ing, FailureDetails: failures, } + AnalyzerErrorsMetric.WithLabelValues(kind, ing.Name, ing.Namespace).Set(float64(len(failures))) + } } for key, value := range preAnalysis { var currentAnalysis = common.Result{ - Kind: "Ingress", + Kind: kind, Name: key, Error: value.FailureDetails, } diff --git a/pkg/analyzer/netpol.go b/pkg/analyzer/netpol.go index 52115c314d..e36bf58a74 100644 --- a/pkg/analyzer/netpol.go +++ b/pkg/analyzer/netpol.go @@ -11,6 +11,13 @@ import ( type NetworkPolicyAnalyzer struct{} func (NetworkPolicyAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) { + + kind := "NetworkPolicy" + + AnalyzerErrorsMetric.DeletePartialMatch(map[string]string{ + "analyzer_name": kind, + }) + // get all network policies in the namespace policies, err := a.Client.GetClient().NetworkingV1(). NetworkPolicies(a.Namespace).List(a.Context, metav1.ListOptions{}) @@ -54,16 +61,18 @@ func (NetworkPolicyAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) } if len(failures) > 0 { - preAnalysis[policy.Name] = common.PreAnalysis{ + preAnalysis[fmt.Sprintf("%s/%s", policy.Namespace, policy.Name)] = common.PreAnalysis{ FailureDetails: failures, NetworkPolicy: policy, } + AnalyzerErrorsMetric.WithLabelValues(kind, policy.Name, policy.Namespace).Set(float64(len(failures))) + } } for key, value := range preAnalysis { currentAnalysis := common.Result{ - Kind: "NetworkPolicy", + Kind: kind, Name: key, Error: value.FailureDetails, } diff --git a/pkg/analyzer/node.go b/pkg/analyzer/node.go index e13b932919..53fed7e74c 100644 --- a/pkg/analyzer/node.go +++ b/pkg/analyzer/node.go @@ -2,6 +2,7 @@ package analyzer import ( "fmt" + v1 "k8s.io/api/core/v1" "github.com/k8sgpt-ai/k8sgpt/pkg/common" @@ -13,6 +14,12 @@ type NodeAnalyzer struct{} func (NodeAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) { + kind := "Node" + + AnalyzerErrorsMetric.DeletePartialMatch(map[string]string{ + "analyzer_name": kind, + }) + list, err := a.Client.GetClient().CoreV1().Nodes().List(a.Context, metav1.ListOptions{}) if err != nil { return nil, err @@ -42,12 +49,14 @@ func (NodeAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) { Node: node, FailureDetails: failures, } + AnalyzerErrorsMetric.WithLabelValues(kind, node.Name, "").Set(float64(len(failures))) + } } for key, value := range preAnalysis { var currentAnalysis = common.Result{ - Kind: "Node", + Kind: kind, Name: key, Error: value.FailureDetails, } diff --git a/pkg/analyzer/pdb.go b/pkg/analyzer/pdb.go index 0c0e383919..f660423c5a 100644 --- a/pkg/analyzer/pdb.go +++ b/pkg/analyzer/pdb.go @@ -12,6 +12,12 @@ type PdbAnalyzer struct{} func (PdbAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) { + kind := "PodDisruptionBudget" + + AnalyzerErrorsMetric.DeletePartialMatch(map[string]string{ + "analyzer_name": kind, + }) + list, err := a.Client.GetClient().PolicyV1().PodDisruptionBudgets(a.Namespace).List(a.Context, metav1.ListOptions{}) if err != nil { return nil, err @@ -63,12 +69,13 @@ func (PdbAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) { PodDisruptionBudget: pdb, FailureDetails: failures, } + AnalyzerErrorsMetric.WithLabelValues(kind, pdb.Name, pdb.Namespace).Set(float64(len(failures))) } } for key, value := range preAnalysis { var currentAnalysis = common.Result{ - Kind: "PodDisruptionBudget", + Kind: kind, Name: key, Error: value.FailureDetails, } diff --git a/pkg/analyzer/pod.go b/pkg/analyzer/pod.go index a812c2f106..2d4135490d 100644 --- a/pkg/analyzer/pod.go +++ b/pkg/analyzer/pod.go @@ -12,6 +12,13 @@ type PodAnalyzer struct { } func (PodAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) { + + kind := "Pod" + + AnalyzerErrorsMetric.DeletePartialMatch(map[string]string{ + "analyzer_name": kind, + }) + // search all namespaces for pods that are not running list, err := a.Client.GetClient().CoreV1().Pods(a.Namespace).List(a.Context, metav1.ListOptions{}) if err != nil { @@ -70,12 +77,13 @@ func (PodAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) { Pod: pod, FailureDetails: failures, } + AnalyzerErrorsMetric.WithLabelValues(kind, pod.Name, pod.Namespace).Set(float64(len(failures))) } } for key, value := range preAnalysis { var currentAnalysis = common.Result{ - Kind: "Pod", + Kind: kind, Name: key, Error: value.FailureDetails, } diff --git a/pkg/analyzer/pvc.go b/pkg/analyzer/pvc.go index a2bcba5acb..14ba441d9b 100644 --- a/pkg/analyzer/pvc.go +++ b/pkg/analyzer/pvc.go @@ -12,6 +12,12 @@ type PvcAnalyzer struct{} func (PvcAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) { + kind := "PersistentVolumeClaim" + + AnalyzerErrorsMetric.DeletePartialMatch(map[string]string{ + "analyzer_name": kind, + }) + // search all namespaces for pods that are not running list, err := a.Client.GetClient().CoreV1().PersistentVolumeClaims(a.Namespace).List(a.Context, metav1.ListOptions{}) if err != nil { @@ -43,12 +49,13 @@ func (PvcAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) { PersistentVolumeClaim: pvc, FailureDetails: failures, } + AnalyzerErrorsMetric.WithLabelValues(kind, pvc.Name, pvc.Namespace).Set(float64(len(failures))) } } for key, value := range preAnalysis { var currentAnalysis = common.Result{ - Kind: "PersistentVolumeClaim", + Kind: kind, Name: key, Error: value.FailureDetails, } diff --git a/pkg/analyzer/rs.go b/pkg/analyzer/rs.go index 1cac7b2a59..cc31f2ec15 100644 --- a/pkg/analyzer/rs.go +++ b/pkg/analyzer/rs.go @@ -12,6 +12,12 @@ type ReplicaSetAnalyzer struct{} func (ReplicaSetAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) { + kind := "ReplicaSet" + + AnalyzerErrorsMetric.DeletePartialMatch(map[string]string{ + "analyzer_name": kind, + }) + // search all namespaces for pods that are not running list, err := a.Client.GetClient().AppsV1().ReplicaSets(a.Namespace).List(a.Context, metav1.ListOptions{}) if err != nil { @@ -42,12 +48,13 @@ func (ReplicaSetAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) { ReplicaSet: rs, FailureDetails: failures, } + AnalyzerErrorsMetric.WithLabelValues(kind, rs.Name, rs.Namespace).Set(float64(len(failures))) } } for key, value := range preAnalysis { var currentAnalysis = common.Result{ - Kind: "ReplicaSet", + Kind: kind, Name: key, Error: value.FailureDetails, } diff --git a/pkg/analyzer/service.go b/pkg/analyzer/service.go index ac81c4f250..cc03cfb316 100644 --- a/pkg/analyzer/service.go +++ b/pkg/analyzer/service.go @@ -13,6 +13,12 @@ type ServiceAnalyzer struct{} func (ServiceAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) { + kind := "Service" + + AnalyzerErrorsMetric.DeletePartialMatch(map[string]string{ + "analyzer_name": kind, + }) + // search all namespaces for pods that are not running list, err := a.Client.GetClient().CoreV1().Endpoints(a.Namespace).List(a.Context, metav1.ListOptions{}) if err != nil { @@ -71,12 +77,13 @@ func (ServiceAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) { Endpoint: ep, FailureDetails: failures, } + AnalyzerErrorsMetric.WithLabelValues(kind, ep.Name, ep.Namespace).Set(float64(len(failures))) } } for key, value := range preAnalysis { var currentAnalysis = common.Result{ - Kind: "Service", + Kind: kind, Name: key, Error: value.FailureDetails, } diff --git a/pkg/analyzer/statefulset.go b/pkg/analyzer/statefulset.go index e9251cda74..173aba6f3f 100644 --- a/pkg/analyzer/statefulset.go +++ b/pkg/analyzer/statefulset.go @@ -11,6 +11,13 @@ import ( type StatefulSetAnalyzer struct{} func (StatefulSetAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) { + + kind := "StatefulSet" + + AnalyzerErrorsMetric.DeletePartialMatch(map[string]string{ + "analyzer_name": kind, + }) + list, err := a.Client.GetClient().AppsV1().StatefulSets(a.Namespace).List(a.Context, metav1.ListOptions{}) if err != nil { return nil, err @@ -61,12 +68,13 @@ func (StatefulSetAnalyzer) Analyze(a common.Analyzer) ([]common.Result, error) { StatefulSet: sts, FailureDetails: failures, } + AnalyzerErrorsMetric.WithLabelValues(kind, sts.Name, sts.Namespace).Set(float64(len(failures))) } } for key, value := range preAnalysis { var currentAnalysis = common.Result{ - Kind: "StatefulSet", + Kind: kind, Name: key, Error: value.FailureDetails, } diff --git a/pkg/server/main.go b/pkg/server/main.go index 8dab23d571..4ff64de490 100644 --- a/pkg/server/main.go +++ b/pkg/server/main.go @@ -3,11 +3,13 @@ package server import ( json "encoding/json" "fmt" - "github.com/fatih/color" - "github.com/k8sgpt-ai/k8sgpt/pkg/analysis" "net/http" "strconv" "strings" + + "github.com/fatih/color" + "github.com/k8sgpt-ai/k8sgpt/pkg/analysis" + "github.com/prometheus/client_golang/prometheus/promhttp" ) type Config struct { @@ -74,6 +76,7 @@ func (s *Config) analyzeHandler(w http.ResponseWriter, r *http.Request) { } func (s *Config) Serve() error { + http.Handle("/metrics", promhttp.Handler()) http.HandleFunc("/analyze", s.analyzeHandler) http.HandleFunc("/healthz", s.healthzHandler) color.Green("Starting server on port %s", s.Port)