Skip to content

Commit

Permalink
Fix metrics issues (#774)
Browse files Browse the repository at this point in the history
* Expose scrape interval

Signed-off-by: clyang82 <chuyang@redhat.com>

* update

Signed-off-by: clyang82 <chuyang@redhat.com>

* update

Signed-off-by: clyang82 <chuyang@redhat.com>

* Register metrics globally

Signed-off-by: clyang82 <chuyang@redhat.com>

* Avoid using init

Signed-off-by: clyang82 <chuyang@redhat.com>

* Remove unnecessary change

Signed-off-by: clyang82 <chuyang@redhat.com>

---------

Signed-off-by: clyang82 <chuyang@redhat.com>
  • Loading branch information
clyang82 committed Jan 18, 2024
1 parent 6c43337 commit c306bf3
Show file tree
Hide file tree
Showing 9 changed files with 49 additions and 62 deletions.
7 changes: 2 additions & 5 deletions manager/cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ import (
managerconfig "github.com/stolostron/multicluster-global-hub/manager/pkg/config"
"github.com/stolostron/multicluster-global-hub/manager/pkg/cronjob"
"github.com/stolostron/multicluster-global-hub/manager/pkg/eventcollector"
globalhubmetrics "github.com/stolostron/multicluster-global-hub/manager/pkg/metrics"
"github.com/stolostron/multicluster-global-hub/manager/pkg/monitoring"
"github.com/stolostron/multicluster-global-hub/manager/pkg/nonk8sapi"
managerscheme "github.com/stolostron/multicluster-global-hub/manager/pkg/scheme"
"github.com/stolostron/multicluster-global-hub/manager/pkg/specsyncer"
Expand Down Expand Up @@ -73,6 +73,7 @@ var (

func init() {
managerscheme.AddToScheme(scheme)
monitoring.RegisterMetrics()
}

func parseFlags() *managerconfig.ManagerConfig {
Expand Down Expand Up @@ -272,10 +273,6 @@ func createManager(ctx context.Context, restConfig *rest.Config, managerConfig *
return nil, fmt.Errorf("failed to add event collector: %w", err)
}

if err := mgr.Add(globalhubmetrics.NewGlobalHubMetrics()); err != nil {
return nil, fmt.Errorf("failed to add metrics to manager: %w", err)
}

return mgr, nil
}

Expand Down
4 changes: 4 additions & 0 deletions manager/pkg/cronjob/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

"github.com/stolostron/multicluster-global-hub/manager/pkg/config"
"github.com/stolostron/multicluster-global-hub/manager/pkg/cronjob/task"
"github.com/stolostron/multicluster-global-hub/manager/pkg/monitoring"
)

const (
Expand Down Expand Up @@ -73,6 +74,9 @@ func AddSchedulerToManager(ctx context.Context, mgr ctrl.Manager,

func (s *GlobalHubJobScheduler) Start(ctx context.Context) error {
s.log.Info("start job scheduler")
// Set the status of the job to 0 (success) when the job is started.
monitoring.GlobalHubCronJobGaugeVec.WithLabelValues(task.RetentionTaskName).Set(0)
monitoring.GlobalHubCronJobGaugeVec.WithLabelValues(task.LocalComplianceTaskName).Set(0)
s.scheduler.StartAsync()
if err := s.execJobs(ctx); err != nil {
return err
Expand Down
10 changes: 3 additions & 7 deletions manager/pkg/cronjob/task/data_retention.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
"github.com/go-co-op/gocron"
ctrl "sigs.k8s.io/controller-runtime"

"github.com/stolostron/multicluster-global-hub/manager/pkg/metrics"
"github.com/stolostron/multicluster-global-hub/manager/pkg/monitoring"
"github.com/stolostron/multicluster-global-hub/pkg/database"
"github.com/stolostron/multicluster-global-hub/pkg/database/models"
)
Expand Down Expand Up @@ -42,19 +42,15 @@ var (
retentionLog = ctrl.Log.WithName(RetentionTaskName)
)

func init() {
metrics.GlobalHubCronJobGaugeVec.WithLabelValues(RetentionTaskName).Set(0)
}

func DataRetention(ctx context.Context, retentionMonth int, job gocron.Job) {
currentTime := time.Now()

var err error
defer func() {
if err != nil {
metrics.GlobalHubCronJobGaugeVec.WithLabelValues(RetentionTaskName).Set(1)
monitoring.GlobalHubCronJobGaugeVec.WithLabelValues(RetentionTaskName).Set(1)
} else {
metrics.GlobalHubCronJobGaugeVec.WithLabelValues(RetentionTaskName).Set(0)
monitoring.GlobalHubCronJobGaugeVec.WithLabelValues(RetentionTaskName).Set(0)
}
}()

Expand Down
10 changes: 3 additions & 7 deletions manager/pkg/cronjob/task/local_compliance_history.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
"k8s.io/apimachinery/pkg/util/wait"
ctrl "sigs.k8s.io/controller-runtime"

"github.com/stolostron/multicluster-global-hub/manager/pkg/metrics"
"github.com/stolostron/multicluster-global-hub/manager/pkg/monitoring"
"github.com/stolostron/multicluster-global-hub/pkg/database"
"github.com/stolostron/multicluster-global-hub/pkg/database/models"
)
Expand All @@ -35,10 +35,6 @@ var (
// sizes and measure the performance of the queries.
)

func init() {
metrics.GlobalHubCronJobGaugeVec.WithLabelValues(LocalComplianceTaskName).Set(0)
}

func SyncLocalCompliance(ctx context.Context, enableSimulation bool, job gocron.Job) {
startTime = time.Now()

Expand All @@ -57,9 +53,9 @@ func SyncLocalCompliance(ctx context.Context, enableSimulation bool, job gocron.
var err error
defer func() {
if err != nil {
metrics.GlobalHubCronJobGaugeVec.WithLabelValues(LocalComplianceTaskName).Set(1)
monitoring.GlobalHubCronJobGaugeVec.WithLabelValues(LocalComplianceTaskName).Set(1)
} else {
metrics.GlobalHubCronJobGaugeVec.WithLabelValues(LocalComplianceTaskName).Set(0)
monitoring.GlobalHubCronJobGaugeVec.WithLabelValues(LocalComplianceTaskName).Set(0)
}
}()

Expand Down
39 changes: 0 additions & 39 deletions manager/pkg/metrics/cronjob.go

This file was deleted.

21 changes: 21 additions & 0 deletions manager/pkg/monitoring/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package monitoring

import (
"github.com/prometheus/client_golang/prometheus"
"sigs.k8s.io/controller-runtime/pkg/metrics"
)

var GlobalHubCronJobGaugeVec = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "multicluster_global_hub_jobs_status",
Help: "The status of the job. 0 == success, 1 == failure.",
},
[]string{
"type", // The name of the cronjob.
},
)

// RegisterMetrics will register metrics with the global prometheus registry
func RegisterMetrics() {
metrics.Registry.MustRegister(GlobalHubCronJobGaugeVec)
}
15 changes: 12 additions & 3 deletions operator/pkg/config/multiclusterglobalhub_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,10 @@ var (
GrafanaImageKey: "quay.io/stolostron/grafana:globalhub-1.0",
PostgresImageKey: "quay.io/stolostron/postgresql-13:1-101",
}
statisticLogInterval = "1m"
imagePullSecretName = ""
transporter transport.Transporter
statisticLogInterval = "1m"
metricsScrapeInterval = "12h"
imagePullSecretName = ""
transporter transport.Transporter
)

func SetMGHNamespacedName(namespacedName types.NamespacedName) {
Expand Down Expand Up @@ -223,6 +224,14 @@ func GetStatisticLogInterval() string {
return statisticLogInterval
}

func GetMetricsScrapeInterval(mgh *globalhubv1alpha4.MulticlusterGlobalHub) string {
interval := getAnnotation(mgh, operatorconstants.AnnotationMetricsScrapeInterval)
if interval == "" {
interval = metricsScrapeInterval
}
return interval
}

func GetPostgresStorageSize(mgh *globalhubv1alpha4.MulticlusterGlobalHub) string {
if mgh.Spec.DataLayer.Postgres.StorageSize != "" {
return mgh.Spec.DataLayer.Postgres.StorageSize
Expand Down
2 changes: 2 additions & 0 deletions operator/pkg/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ const (
MGHOperandImagePrefix = "RELATED_IMAGE_"
// AnnotationStatisticInterval to log the interval of statistic log
AnnotationStatisticInterval = "mgh-statistic-interval"
// AnnotationMetricsScrapeInterval to set the scrape interval for metrics
AnnotationMetricsScrapeInterval = "mgh-metrics-scrape-interval"
// AnnotationONMulticlusterHub indicates the addons are running on a hub cluster
AnnotationONMulticlusterHub = "addon.open-cluster-management.io/on-multicluster-hub"
// AnnotationPolicyONMulticlusterHub indicates the policy spec sync is running on a hub cluster
Expand Down
3 changes: 2 additions & 1 deletion operator/pkg/controllers/hubofhubs/globalhub_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"

globalhubv1alpha4 "github.com/stolostron/multicluster-global-hub/operator/apis/v1alpha4"
"github.com/stolostron/multicluster-global-hub/operator/pkg/config"
operatorconstants "github.com/stolostron/multicluster-global-hub/operator/pkg/constants"
"github.com/stolostron/multicluster-global-hub/pkg/constants"
"github.com/stolostron/multicluster-global-hub/pkg/utils"
Expand Down Expand Up @@ -67,7 +68,7 @@ func (r *MulticlusterGlobalHubReconciler) reconcileMetrics(ctx context.Context,
{
Port: "metrics",
Path: "/metrics",
Interval: promv1.Duration("12h"),
Interval: promv1.Duration(config.GetMetricsScrapeInterval(mgh)),
},
},
},
Expand Down

0 comments on commit c306bf3

Please sign in to comment.