Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix metrics issues #774

Merged
merged 6 commits into from
Jan 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions manager/cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ import (
managerconfig "github.com/stolostron/multicluster-global-hub/manager/pkg/config"
"github.com/stolostron/multicluster-global-hub/manager/pkg/cronjob"
"github.com/stolostron/multicluster-global-hub/manager/pkg/eventcollector"
globalhubmetrics "github.com/stolostron/multicluster-global-hub/manager/pkg/metrics"
"github.com/stolostron/multicluster-global-hub/manager/pkg/monitoring"
"github.com/stolostron/multicluster-global-hub/manager/pkg/nonk8sapi"
managerscheme "github.com/stolostron/multicluster-global-hub/manager/pkg/scheme"
"github.com/stolostron/multicluster-global-hub/manager/pkg/specsyncer"
Expand Down Expand Up @@ -73,6 +73,7 @@ var (

func init() {
managerscheme.AddToScheme(scheme)
monitoring.RegisterMetrics()
}

func parseFlags() *managerconfig.ManagerConfig {
Expand Down Expand Up @@ -272,10 +273,6 @@ func createManager(ctx context.Context, restConfig *rest.Config, managerConfig *
return nil, fmt.Errorf("failed to add event collector: %w", err)
}

if err := mgr.Add(globalhubmetrics.NewGlobalHubMetrics()); err != nil {
return nil, fmt.Errorf("failed to add metrics to manager: %w", err)
}

return mgr, nil
}

Expand Down
4 changes: 4 additions & 0 deletions manager/pkg/cronjob/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

"github.com/stolostron/multicluster-global-hub/manager/pkg/config"
"github.com/stolostron/multicluster-global-hub/manager/pkg/cronjob/task"
"github.com/stolostron/multicluster-global-hub/manager/pkg/monitoring"
)

const (
Expand Down Expand Up @@ -73,6 +74,9 @@ func AddSchedulerToManager(ctx context.Context, mgr ctrl.Manager,

func (s *GlobalHubJobScheduler) Start(ctx context.Context) error {
s.log.Info("start job scheduler")
// Set the status of the job to 0 (success) when the job is started.
monitoring.GlobalHubCronJobGaugeVec.WithLabelValues(task.RetentionTaskName).Set(0)
monitoring.GlobalHubCronJobGaugeVec.WithLabelValues(task.LocalComplianceTaskName).Set(0)
s.scheduler.StartAsync()
if err := s.execJobs(ctx); err != nil {
return err
Expand Down
10 changes: 3 additions & 7 deletions manager/pkg/cronjob/task/data_retention.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
"github.com/go-co-op/gocron"
ctrl "sigs.k8s.io/controller-runtime"

"github.com/stolostron/multicluster-global-hub/manager/pkg/metrics"
"github.com/stolostron/multicluster-global-hub/manager/pkg/monitoring"
"github.com/stolostron/multicluster-global-hub/pkg/database"
"github.com/stolostron/multicluster-global-hub/pkg/database/models"
)
Expand Down Expand Up @@ -42,19 +42,15 @@ var (
retentionLog = ctrl.Log.WithName(RetentionTaskName)
)

func init() {
metrics.GlobalHubCronJobGaugeVec.WithLabelValues(RetentionTaskName).Set(0)
}

func DataRetention(ctx context.Context, retentionMonth int, job gocron.Job) {
currentTime := time.Now()

var err error
defer func() {
if err != nil {
metrics.GlobalHubCronJobGaugeVec.WithLabelValues(RetentionTaskName).Set(1)
monitoring.GlobalHubCronJobGaugeVec.WithLabelValues(RetentionTaskName).Set(1)
} else {
metrics.GlobalHubCronJobGaugeVec.WithLabelValues(RetentionTaskName).Set(0)
monitoring.GlobalHubCronJobGaugeVec.WithLabelValues(RetentionTaskName).Set(0)
}
}()

Expand Down
10 changes: 3 additions & 7 deletions manager/pkg/cronjob/task/local_compliance_history.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
"k8s.io/apimachinery/pkg/util/wait"
ctrl "sigs.k8s.io/controller-runtime"

"github.com/stolostron/multicluster-global-hub/manager/pkg/metrics"
"github.com/stolostron/multicluster-global-hub/manager/pkg/monitoring"
"github.com/stolostron/multicluster-global-hub/pkg/database"
"github.com/stolostron/multicluster-global-hub/pkg/database/models"
)
Expand All @@ -35,10 +35,6 @@ var (
// sizes and measure the performance of the queries.
)

func init() {
metrics.GlobalHubCronJobGaugeVec.WithLabelValues(LocalComplianceTaskName).Set(0)
}

func SyncLocalCompliance(ctx context.Context, enableSimulation bool, job gocron.Job) {
startTime = time.Now()

Expand All @@ -57,9 +53,9 @@ func SyncLocalCompliance(ctx context.Context, enableSimulation bool, job gocron.
var err error
defer func() {
if err != nil {
metrics.GlobalHubCronJobGaugeVec.WithLabelValues(LocalComplianceTaskName).Set(1)
monitoring.GlobalHubCronJobGaugeVec.WithLabelValues(LocalComplianceTaskName).Set(1)
} else {
metrics.GlobalHubCronJobGaugeVec.WithLabelValues(LocalComplianceTaskName).Set(0)
monitoring.GlobalHubCronJobGaugeVec.WithLabelValues(LocalComplianceTaskName).Set(0)
}
}()

Expand Down
39 changes: 0 additions & 39 deletions manager/pkg/metrics/cronjob.go

This file was deleted.

21 changes: 21 additions & 0 deletions manager/pkg/monitoring/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package monitoring

import (
"github.com/prometheus/client_golang/prometheus"
"sigs.k8s.io/controller-runtime/pkg/metrics"
)

var GlobalHubCronJobGaugeVec = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "multicluster_global_hub_jobs_status",
Help: "The status of the job. 0 == success, 1 == failure.",
},
[]string{
"type", // The name of the cronjob.
},
)

// RegisterMetrics will register metrics with the global prometheus registry
func RegisterMetrics() {
metrics.Registry.MustRegister(GlobalHubCronJobGaugeVec)
}
15 changes: 12 additions & 3 deletions operator/pkg/config/multiclusterglobalhub_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,10 @@ var (
GrafanaImageKey: "quay.io/stolostron/grafana:globalhub-1.0",
PostgresImageKey: "quay.io/stolostron/postgresql-13:1-101",
}
statisticLogInterval = "1m"
imagePullSecretName = ""
transporter transport.Transporter
statisticLogInterval = "1m"
metricsScrapeInterval = "12h"
imagePullSecretName = ""
transporter transport.Transporter
)

func SetMGHNamespacedName(namespacedName types.NamespacedName) {
Expand Down Expand Up @@ -223,6 +224,14 @@ func GetStatisticLogInterval() string {
return statisticLogInterval
}

func GetMetricsScrapeInterval(mgh *globalhubv1alpha4.MulticlusterGlobalHub) string {
interval := getAnnotation(mgh, operatorconstants.AnnotationMetricsScrapeInterval)
if interval == "" {
interval = metricsScrapeInterval
}
return interval
}

func GetPostgresStorageSize(mgh *globalhubv1alpha4.MulticlusterGlobalHub) string {
if mgh.Spec.DataLayer.Postgres.StorageSize != "" {
return mgh.Spec.DataLayer.Postgres.StorageSize
Expand Down
2 changes: 2 additions & 0 deletions operator/pkg/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ const (
MGHOperandImagePrefix = "RELATED_IMAGE_"
// AnnotationStatisticInterval to log the interval of statistic log
AnnotationStatisticInterval = "mgh-statistic-interval"
// AnnotationMetricsScrapeInterval to set the scrape interval for metrics
AnnotationMetricsScrapeInterval = "mgh-metrics-scrape-interval"
// AnnotationONMulticlusterHub indicates the addons are running on a hub cluster
AnnotationONMulticlusterHub = "addon.open-cluster-management.io/on-multicluster-hub"
// AnnotationPolicyONMulticlusterHub indicates the policy spec sync is running on a hub cluster
Expand Down
3 changes: 2 additions & 1 deletion operator/pkg/controllers/hubofhubs/globalhub_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"

globalhubv1alpha4 "github.com/stolostron/multicluster-global-hub/operator/apis/v1alpha4"
"github.com/stolostron/multicluster-global-hub/operator/pkg/config"
operatorconstants "github.com/stolostron/multicluster-global-hub/operator/pkg/constants"
"github.com/stolostron/multicluster-global-hub/pkg/constants"
"github.com/stolostron/multicluster-global-hub/pkg/utils"
Expand Down Expand Up @@ -67,7 +68,7 @@ func (r *MulticlusterGlobalHubReconciler) reconcileMetrics(ctx context.Context,
{
Port: "metrics",
Path: "/metrics",
Interval: promv1.Duration("12h"),
Interval: promv1.Duration(config.GetMetricsScrapeInterval(mgh)),
},
},
},
Expand Down