diff --git a/cmd/koord-manager/main.go b/cmd/koord-manager/main.go index 96be54c16..029ae9bb4 100644 --- a/cmd/koord-manager/main.go +++ b/cmd/koord-manager/main.go @@ -24,21 +24,26 @@ import ( "os" "time" + "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/spf13/pflag" _ "k8s.io/client-go/plugin/pkg/client/auth/gcp" "k8s.io/client-go/rest" "k8s.io/client-go/tools/leaderelection/resourcelock" + _ "k8s.io/component-base/metrics/prometheus/clientgo" // load restclient and workqueue metrics "k8s.io/klog/v2" "k8s.io/klog/v2/klogr" ctrl "sigs.k8s.io/controller-runtime" + ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" "github.com/koordinator-sh/koordinator/cmd/koord-manager/extensions" "github.com/koordinator-sh/koordinator/cmd/koord-manager/options" extclient "github.com/koordinator-sh/koordinator/pkg/client" "github.com/koordinator-sh/koordinator/pkg/features" + "github.com/koordinator-sh/koordinator/pkg/slo-controller/metrics" utilclient "github.com/koordinator-sh/koordinator/pkg/util/client" utilfeature "github.com/koordinator-sh/koordinator/pkg/util/feature" "github.com/koordinator-sh/koordinator/pkg/util/fieldindex" + metricsutil "github.com/koordinator-sh/koordinator/pkg/util/metrics" _ "github.com/koordinator-sh/koordinator/pkg/util/metrics/leadership" "github.com/koordinator-sh/koordinator/pkg/util/sloconfig" "github.com/koordinator-sh/koordinator/pkg/webhook" @@ -170,6 +175,11 @@ func main() { klog.V(4).Infof("webhook framework feature gate not enabled") } + if err := installHTTPHandler(mgr); err != nil { + setupLog.Error(err, "unable to install http handler") + os.Exit(1) + } + setupLog.Info("starting manager") extensions.StartExtensions(ctx, mgr) if err := mgr.Start(ctx); err != nil { @@ -186,3 +196,18 @@ func setRestConfig(c *rest.Config) { c.Burst = *restConfigBurst } } + +func installHTTPHandler(mgr ctrl.Manager) error { + if err := mgr.AddMetricsExtraHandler(metrics.InternalHTTPPath, promhttp.HandlerFor(metrics.InternalRegistry, promhttp.HandlerOpts{})); err != nil { + return err + } + if err := mgr.AddMetricsExtraHandler(metrics.ExternalHTTPPath, promhttp.HandlerFor(metrics.ExternalRegistry, promhttp.HandlerOpts{})); err != nil { + return err + } + // merge internal, external and controller-runtime metrics + if err := mgr.AddMetricsExtraHandler(metrics.DefaultHTTPPath, promhttp.HandlerFor( + metricsutil.MergedGatherFunc(metrics.InternalRegistry, metrics.ExternalRegistry, ctrlmetrics.Registry), promhttp.HandlerOpts{})); err != nil { + return err + } + return nil +} diff --git a/cmd/koordlet/main.go b/cmd/koordlet/main.go index b2df9fcd6..683c8418f 100644 --- a/cmd/koordlet/main.go +++ b/cmd/koordlet/main.go @@ -33,6 +33,8 @@ import ( agent "github.com/koordinator-sh/koordinator/pkg/koordlet" "github.com/koordinator-sh/koordinator/pkg/koordlet/audit" "github.com/koordinator-sh/koordinator/pkg/koordlet/config" + "github.com/koordinator-sh/koordinator/pkg/koordlet/metrics" + metricsutil "github.com/koordinator-sh/koordinator/pkg/util/metrics" ) func main() { @@ -77,18 +79,24 @@ func main() { } // Expose the Prometheus http endpoint - go func() { - klog.Infof("Starting prometheus server on %v", *options.ServerAddr) - mux := http.NewServeMux() - mux.Handle("/metrics", promhttp.Handler()) - if features.DefaultKoordletFeatureGate.Enabled(features.AuditEventsHTTPHandler) { - mux.HandleFunc("/events", audit.HttpHandler()) - } - // http.HandleFunc("/healthz", d.HealthzHandler()) - klog.Fatalf("Prometheus monitoring failed: %v", http.ListenAndServe(*options.ServerAddr, mux)) - }() + go installHTTPHandler() // Start the Cmd klog.Info("Starting the koordlet daemon") d.Run(stopCtx.Done()) } + +func installHTTPHandler() { + klog.Infof("Starting prometheus server on %v", *options.ServerAddr) + mux := http.NewServeMux() + mux.Handle(metrics.ExternalHTTPPath, promhttp.HandlerFor(metrics.ExternalRegistry, promhttp.HandlerOpts{})) + mux.Handle(metrics.InternalHTTPPath, promhttp.HandlerFor(metrics.InternalRegistry, promhttp.HandlerOpts{})) + // merge internal and external + mux.Handle(metrics.DefaultHTTPPath, promhttp.HandlerFor( + metricsutil.MergedGatherFunc(metrics.InternalRegistry, metrics.ExternalRegistry), promhttp.HandlerOpts{})) + if features.DefaultKoordletFeatureGate.Enabled(features.AuditEventsHTTPHandler) { + mux.HandleFunc("/events", audit.HttpHandler()) + } + // http.HandleFunc("/healthz", d.HealthzHandler()) + klog.Fatalf("Prometheus monitoring failed: %v", http.ListenAndServe(*options.ServerAddr, mux)) +} diff --git a/pkg/koordlet/metrics/external_metrics.go b/pkg/koordlet/metrics/external_metrics.go new file mode 100644 index 000000000..c87d2a64e --- /dev/null +++ b/pkg/koordlet/metrics/external_metrics.go @@ -0,0 +1,38 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import "github.com/prometheus/client_golang/prometheus" + +const ( + ExternalHTTPPath = "/external-metrics" +) + +var ( + // ExternalRegistry register metrics for users such as PMU or extended resources settings + ExternalRegistry = prometheus.NewRegistry() +) + +func ExternalMustRegister(metrics ...prometheus.Collector) { + ExternalRegistry.MustRegister(metrics...) +} + +func init() { + ExternalMustRegister(ResourceSummaryCollectors...) + ExternalMustRegister(CPICollectors...) + ExternalMustRegister(PSICollectors...) +} diff --git a/pkg/koordlet/metrics/internal_metrics.go b/pkg/koordlet/metrics/internal_metrics.go new file mode 100644 index 000000000..65c58d69f --- /dev/null +++ b/pkg/koordlet/metrics/internal_metrics.go @@ -0,0 +1,44 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" + "k8s.io/component-base/metrics/legacyregistry" +) + +const ( + InternalHTTPPath = "/internal-metrics" +) + +var ( + // InternalRegistry only register metrics of koordlet itself for performance and functional monitor + // TODO consider using k8s.io/component-base/metrics to replace github.com/prometheus/client_golang/prometheus + InternalRegistry = legacyregistry.DefaultGatherer +) + +func internalMustRegister(metrics ...prometheus.Collector) { + legacyregistry.RawMustRegister(metrics...) +} + +func init() { + internalMustRegister(CommonCollectors...) + internalMustRegister(CPUSuppressCollector...) + internalMustRegister(CPUBurstCollector...) + internalMustRegister(PredictionCollectors...) + internalMustRegister(CoreSchedCollector...) +} diff --git a/pkg/koordlet/metrics/metrics.go b/pkg/koordlet/metrics/metrics.go index aaff8c645..e01ba1b9c 100644 --- a/pkg/koordlet/metrics/metrics.go +++ b/pkg/koordlet/metrics/metrics.go @@ -24,17 +24,6 @@ import ( "k8s.io/klog/v2" ) -func init() { - prometheus.MustRegister(CommonCollectors...) - prometheus.MustRegister(ResourceSummaryCollectors...) - prometheus.MustRegister(CPICollectors...) - prometheus.MustRegister(PSICollectors...) - prometheus.MustRegister(CPUSuppressCollector...) - prometheus.MustRegister(CPUBurstCollector...) - prometheus.MustRegister(PredictionCollectors...) - prometheus.MustRegister(CoreSchedCollector...) -} - const ( KoordletSubsystem = "koordlet" @@ -72,6 +61,10 @@ var ( nodeLock sync.RWMutex ) +const ( + DefaultHTTPPath = "/metrics" +) + // Register registers the metrics with the node object func Register(node *corev1.Node) { nodeLock.Lock() diff --git a/pkg/slo-controller/metrics/common.go b/pkg/slo-controller/metrics/common.go index 360ab6bd6..d21b5091b 100644 --- a/pkg/slo-controller/metrics/common.go +++ b/pkg/slo-controller/metrics/common.go @@ -19,7 +19,7 @@ package metrics import "github.com/prometheus/client_golang/prometheus" func init() { - MustRegister(CommonCollectors...) + InternalMustRegister(CommonCollectors...) } var ( diff --git a/pkg/slo-controller/metrics/metrics.go b/pkg/slo-controller/metrics/metrics.go index 382787830..b9359f207 100644 --- a/pkg/slo-controller/metrics/metrics.go +++ b/pkg/slo-controller/metrics/metrics.go @@ -19,7 +19,8 @@ package metrics import ( "github.com/prometheus/client_golang/prometheus" corev1 "k8s.io/api/core/v1" - "sigs.k8s.io/controller-runtime/pkg/metrics" + "k8s.io/component-base/metrics/legacyregistry" + _ "k8s.io/component-base/metrics/prometheus/clientgo" // load restclient and workqueue metrics ) const ( @@ -41,11 +42,27 @@ const ( UnitInteger = "integer" ) -// DefaultRegistry uses the controller runtime registry by default. -var DefaultRegistry = metrics.Registry +const ( + // DefaultHTTPPath use /all-metrics since /metrics is occupied by controller manager default registry + DefaultHTTPPath = "/all-metrics" + ExternalHTTPPath = "/external-metrics" + InternalHTTPPath = "/internal-metrics" +) + +var ( + // ExternalRegistry register metrics for users + ExternalRegistry = prometheus.NewRegistry() + + // InternalRegistry only register metrics of koord-manager itself for performance and functional monitor + InternalRegistry = legacyregistry.DefaultGatherer +) + +func ExternalMustRegister(cs ...prometheus.Collector) { + ExternalRegistry.MustRegister(cs...) +} -func MustRegister(cs ...prometheus.Collector) { - DefaultRegistry.MustRegister(cs...) +func InternalMustRegister(cs ...prometheus.Collector) { + legacyregistry.RawMustRegister(cs...) } func genNodeLabels(node *corev1.Node) prometheus.Labels { diff --git a/pkg/slo-controller/metrics/metrics_test.go b/pkg/slo-controller/metrics/metrics_test.go index 49e85e3f6..f320dc8e2 100644 --- a/pkg/slo-controller/metrics/metrics_test.go +++ b/pkg/slo-controller/metrics/metrics_test.go @@ -35,7 +35,16 @@ func TestMustRegister(t *testing.T) { Help: "test counter", }, []string{StatusKey, ReasonKey}) assert.NotPanics(t, func() { - MustRegister(testMetricVec) + InternalMustRegister(testMetricVec) + }) + + testExternalMetricVec := prometheus.NewCounterVec(prometheus.CounterOpts{ + Subsystem: "test", + Name: "test_external_counter", + Help: "test counter", + }, []string{StatusKey, ReasonKey}) + assert.NotPanics(t, func() { + ExternalMustRegister(testExternalMetricVec) }) } diff --git a/pkg/slo-controller/metrics/node_resource.go b/pkg/slo-controller/metrics/node_resource.go index 908a0fd5c..e49e3da3f 100644 --- a/pkg/slo-controller/metrics/node_resource.go +++ b/pkg/slo-controller/metrics/node_resource.go @@ -24,7 +24,7 @@ import ( ) func init() { - MustRegister(NodeResourceCollectors...) + InternalMustRegister(NodeResourceCollectors...) } var ( diff --git a/pkg/util/metrics/merged_gather.go b/pkg/util/metrics/merged_gather.go new file mode 100644 index 000000000..95ba6b9e9 --- /dev/null +++ b/pkg/util/metrics/merged_gather.go @@ -0,0 +1,45 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" +) + +var _ prometheus.Gatherer = &mergedGather{} + +type mergedGather struct { + gathers []prometheus.Gatherer +} + +// MergedGatherFunc returns a Gatherer that merges the results of multiple Gatherers +func MergedGatherFunc(g ...prometheus.Gatherer) prometheus.Gatherer { + return &mergedGather{gathers: g} +} + +func (m *mergedGather) Gather() ([]*dto.MetricFamily, error) { + result := make([]*dto.MetricFamily, 0) + for _, g := range m.gathers { + if metrics, err := g.Gather(); err != nil { + return result, err + } else { + result = append(result, metrics...) + } + } + return result, nil +} diff --git a/pkg/util/metrics/merged_gather_test.go b/pkg/util/metrics/merged_gather_test.go new file mode 100644 index 000000000..819354756 --- /dev/null +++ b/pkg/util/metrics/merged_gather_test.go @@ -0,0 +1,61 @@ +/* +Copyright 2022 The Koordinator Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestMergedGather(t *testing.T) { + // Create two Gatherer objects + gatherer1 := prometheus.NewRegistry() + gatherer2 := prometheus.NewRegistry() + + // Add some metrics to the Gatherer objects + c1 := prometheus.NewCounter(prometheus.CounterOpts{Name: "c1"}) + c2 := prometheus.NewCounter(prometheus.CounterOpts{Name: "c2"}) + gatherer1.MustRegister(c1) + gatherer2.MustRegister(c2) + + // Create a Merged Gatherer + mergedGatherer := MergedGatherFunc(gatherer1, gatherer2) + + // Call Gather and check the result + metrics, err := mergedGatherer.Gather() + require.NoError(t, err) + + // Verify that the result contains the expected metrics + assert.Equal(t, 2, len(metrics)) + assert.Equal(t, "c1", *metrics[0].Name) + assert.Equal(t, "c2", *metrics[1].Name) +} + +func TestMergedGatherEmpty(t *testing.T) { + // Create empty merged gatherer + mergedGatherer := MergedGatherFunc() + + // Gather metrics + metricsFamilies, err := mergedGatherer.Gather() + assert.NoError(t, err) + + // Check if no metric families are returned + assert.Equal(t, 0, len(metricsFamilies)) +}