Skip to content

Commit

Permalink
Merge pull request kubernetes-sigs#132 from pusher/metrics
Browse files Browse the repository at this point in the history
Add prometheus metrics to internal controller
  • Loading branch information
k8s-ci-robot committed Nov 2, 2018
2 parents 012d0da + 29fb7a5 commit da89fe5
Show file tree
Hide file tree
Showing 45 changed files with 4,239 additions and 1,190 deletions.
16 changes: 12 additions & 4 deletions Gopkg.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions Gopkg.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ required = ["sigs.k8s.io/testing_frameworks/integration",
"github.com/go-openapi/spec",
"k8s.io/kube-openapi/pkg/common",
"k8s.io/apiextensions-apiserver",
"github.com/prometheus/client_golang/prometheus",
]

[[constraint]]
Expand Down Expand Up @@ -54,6 +55,10 @@ required = ["sigs.k8s.io/testing_frameworks/integration",
name = "go.uber.org/zap"
version = "1.8.0"

[[constraint]]
name = "github.com/prometheus/client_golang"
version = "0.9.0"

# these are not listed explicitly until we get version tags,
# since dep doesn't like bare revision dependencies

Expand Down
7 changes: 7 additions & 0 deletions pkg/builder/builder_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
. "github.com/onsi/gomega"
"k8s.io/client-go/rest"
"sigs.k8s.io/controller-runtime/pkg/envtest"
"sigs.k8s.io/controller-runtime/pkg/metrics"
logf "sigs.k8s.io/controller-runtime/pkg/runtime/log"
)

Expand All @@ -43,9 +44,15 @@ var _ = BeforeSuite(func(done Done) {
cfg, err = testenv.Start()
Expect(err).NotTo(HaveOccurred())

// Prevent the metrics listener being created
metrics.DefaultBindAddress = "0"

close(done)
}, 60)

var _ = AfterSuite(func() {
testenv.Stop()

// Put the DefaultBindAddress back
metrics.DefaultBindAddress = ":8080"
})
7 changes: 7 additions & 0 deletions pkg/controller/controller_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"sigs.k8s.io/controller-runtime/pkg/envtest"
"sigs.k8s.io/controller-runtime/pkg/metrics"
logf "sigs.k8s.io/controller-runtime/pkg/runtime/log"
)

Expand All @@ -48,9 +49,15 @@ var _ = BeforeSuite(func(done Done) {
clientset, err = kubernetes.NewForConfig(cfg)
Expect(err).NotTo(HaveOccurred())

// Prevent the metrics listener being created
metrics.DefaultBindAddress = "0"

close(done)
}, 60)

var _ = AfterSuite(func() {
testenv.Stop()

// Put the DefaultBindAddress back
metrics.DefaultBindAddress = ":8080"
})
15 changes: 15 additions & 0 deletions pkg/controller/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,21 @@ var _ = Describe("controller.Controller", func() {

close(done)
})

It("should not return an error if two controllers are registered with different names", func(done Done) {
m, err := manager.New(cfg, manager.Options{})
Expect(err).NotTo(HaveOccurred())

c1, err := controller.New("c1", m, controller.Options{Reconciler: rec})
Expect(err).NotTo(HaveOccurred())
Expect(c1).ToNot(BeNil())

c2, err := controller.New("c2", m, controller.Options{Reconciler: rec})
Expect(err).NotTo(HaveOccurred())
Expect(c2).ToNot(BeNil())

close(done)
})
})
})

Expand Down
12 changes: 12 additions & 0 deletions pkg/internal/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/handler"
ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/internal/controller/metrics"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/runtime/inject"
Expand Down Expand Up @@ -171,6 +172,10 @@ func (c *Controller) Start(stop <-chan struct{}) error {
func (c *Controller) processNextWorkItem() bool {
// This code copy-pasted from the sample-Controller.

// Update metrics after processing each item
reconcileStartTS := time.Now()
defer c.updateMetrics(time.Now().Sub(reconcileStartTS))

obj, shutdown := c.Queue.Get()
if obj == nil {
// Sometimes the Queue gives us nil items when it starts up
Expand Down Expand Up @@ -207,6 +212,7 @@ func (c *Controller) processNextWorkItem() bool {
if result, err := c.Do.Reconcile(req); err != nil {
c.Queue.AddRateLimited(req)
log.Error(err, "Reconciler error", "Controller", c.Name, "Request", req)
ctrlmetrics.ReconcileErrors.WithLabelValues(c.Name).Inc()

return false
} else if result.RequeueAfter > 0 {
Expand All @@ -233,3 +239,9 @@ func (c *Controller) InjectFunc(f inject.Func) error {
c.SetFields = f
return nil
}

// updateMetrics updates prometheus metrics within the controller
func (c *Controller) updateMetrics(reconcileTime time.Duration) {
ctrlmetrics.QueueLength.WithLabelValues(c.Name).Set(float64(c.Queue.Len()))
ctrlmetrics.ReconcileTime.WithLabelValues(c.Name).Observe(reconcileTime.Seconds())
}
87 changes: 87 additions & 0 deletions pkg/internal/controller/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import (

. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"
"k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
Expand All @@ -31,6 +33,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/cache/informertest"
"sigs.k8s.io/controller-runtime/pkg/controller/controllertest"
"sigs.k8s.io/controller-runtime/pkg/handler"
ctrlmetrics "sigs.k8s.io/controller-runtime/pkg/internal/controller/metrics"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"sigs.k8s.io/controller-runtime/pkg/reconcile/reconciletest"
Expand Down Expand Up @@ -404,6 +407,90 @@ var _ = Describe("controller", func() {
It("should create a new go routine for MaxConcurrentReconciles", func() {
// TODO(community): write this test
})

Context("should update prometheus metrics", func() {
It("should requeue a Request if there is an error and continue processing items", func(done Done) {
ctrlmetrics.QueueLength = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "controller_runtime_reconcile_queue_length",
Help: "Length of reconcile queue per controller",
}, []string{"controller"})
ctrlmetrics.ReconcileErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "controller_runtime_reconcile_errors_total",
Help: "Total number of reconcile errors per controller",
}, []string{"controller"})

fakeReconcile.Err = fmt.Errorf("expected error: reconcile")
go func() {
defer GinkgoRecover()
Expect(ctrl.Start(stop)).NotTo(HaveOccurred())
}()
ctrl.Queue.Add(request)

// Reduce the jitterperiod so we don't have to wait a second before the reconcile function is rerun.
ctrl.JitterPeriod = time.Millisecond

By("Invoking Reconciler which will give an error")
Expect(<-reconciled).To(Equal(request))
var queueLength, reconcileErrs dto.Metric
Eventually(func() error {
ctrlmetrics.QueueLength.WithLabelValues(ctrl.Name).Write(&queueLength)
if queueLength.GetGauge().GetValue() != 1.0 {
return fmt.Errorf("metrics not updated")
}
return nil
}, 2.0).Should(Succeed())
Eventually(func() error {
ctrlmetrics.ReconcileErrors.WithLabelValues(ctrl.Name).Write(&reconcileErrs)
if reconcileErrs.GetCounter().GetValue() != 1.0 {
return fmt.Errorf("metrics not updated")
}
return nil
}, 2.0).Should(Succeed())

By("Invoking Reconciler a second time without error")
fakeReconcile.Err = nil
Expect(<-reconciled).To(Equal(request))

By("Removing the item from the queue")
Eventually(ctrl.Queue.Len).Should(Equal(0))
Eventually(func() int { return ctrl.Queue.NumRequeues(request) }).Should(Equal(0))

close(done)
}, 2.0)

It("should add a reconcile time to the reconcile time histogram", func(done Done) {
ctrlmetrics.ReconcileTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "controller_runtime_reconcile_time_second",
Help: "Length of time per reconcile per controller",
}, []string{"controller"})

go func() {
defer GinkgoRecover()
Expect(ctrl.Start(stop)).NotTo(HaveOccurred())
}()
ctrl.Queue.Add(request)

By("Invoking Reconciler")
Expect(<-reconciled).To(Equal(request))

By("Removing the item from the queue")
Eventually(ctrl.Queue.Len).Should(Equal(0))
Eventually(func() int { return ctrl.Queue.NumRequeues(request) }).Should(Equal(0))

var reconcileTime dto.Metric
Eventually(func() error {
histObserver := ctrlmetrics.ReconcileTime.WithLabelValues(ctrl.Name)
hist := histObserver.(prometheus.Histogram)
hist.Write(&reconcileTime)
if reconcileTime.GetHistogram().GetSampleCount() != uint64(1) {
return fmt.Errorf("metrics not updated")
}
return nil
}, 2.0).Should(Succeed())

close(done)
}, 4.0)
})
})
})

Expand Down
51 changes: 51 additions & 0 deletions pkg/internal/controller/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package metrics

import (
"github.com/prometheus/client_golang/prometheus"
"sigs.k8s.io/controller-runtime/pkg/metrics"
)

var (
// QueueLength is a prometheus metric which counts the current reconcile
// queue length per controller
QueueLength = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "controller_runtime_reconcile_queue_length",
Help: "Length of reconcile queue per controller",
}, []string{"controller"})

// ReconcileErrors is a prometheus counter metrics which holds the total
// number of errors from the Reconciler
ReconcileErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
Name: "controller_runtime_reconcile_errors_total",
Help: "Total number of reconcile errors per controller",
}, []string{"controller"})

// ReconcileTime is a prometheus metric which keeps track of the duration
// of reconciles
ReconcileTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: "controller_runtime_reconcile_time_seconds",
Help: "Length of time per reconcile per controller",
}, []string{"controller"})
)

func init() {
metrics.Registry.MustRegister(QueueLength)
metrics.Registry.MustRegister(ReconcileErrors)
metrics.Registry.MustRegister(ReconcileTime)
}
Loading

0 comments on commit da89fe5

Please sign in to comment.