Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add metrics for TemplateInstance controller #16455

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/build/metrics/prometheus/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ var (
// vs. the other "finished" builds phases, where the reason is not set
terminalBuildCountDesc = prometheus.NewDesc(
buildSubsystem+separator+terminalBuildCount,
"Counts total teriminal builds by phase",
"Counts total terminal builds by phase",
[]string{"phase"},
nil,
)
Expand Down
65 changes: 65 additions & 0 deletions pkg/template/controller/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
package controller

import (
templateapi "github.com/openshift/origin/pkg/template/apis/template"
"github.com/prometheus/client_golang/prometheus"
"k8s.io/apimachinery/pkg/labels"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
kapi "k8s.io/kubernetes/pkg/api"
)

var templateInstancesTotal = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "openshift_template_instance_total",
Help: "Counts TemplateInstance objects by condition type and status",
},
[]string{"type", "status"},
)

var templateInstancesActiveStartTime = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "openshift_template_instance_active_start_time_seconds",
Help: "Show the start time in unix epoch form of active TemplateInstance objects by namespace and name",
},
[]string{"namespace", "name"},
)

func (c *TemplateInstanceController) Describe(ch chan<- *prometheus.Desc) {
templateInstancesTotal.Describe(ch)
templateInstancesActiveStartTime.Describe(ch)
}

func (c *TemplateInstanceController) Collect(ch chan<- prometheus.Metric) {
templateInstances, err := c.lister.List(labels.Everything())
if err != nil {
utilruntime.HandleError(err)
return
}

templateInstancesTotal.Reset()
templateInstancesActiveStartTime.Reset()

templateInstancesTotal.WithLabelValues("", "").Set(0)

for _, templateInstance := range templateInstances {
waiting := true

templateInstancesTotal.WithLabelValues("", "").Inc()

for _, cond := range templateInstance.Status.Conditions {
templateInstancesTotal.WithLabelValues(string(cond.Type), string(cond.Status)).Inc()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Heads up, during the build metrics review, @smarterclayton was big on following the code stylings of kube_state_metrics for actually registering the metrics.

See addCountGauge and addTimeGauge in pkg/build/metrics/prometheus/metrics.go

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm reluctant to follow this coding approach because it duplicates logic that sits in the prometheus client library. My approach uses the client library logic to populate all the histogram buckets before sending them off. Although it matters less than the Histogram case, func (bc *buildCollector) Collect is basically reimplementing the prometheus client Gauge.Inc() when I don't believe it needs to.


if cond.Status == kapi.ConditionTrue &&
(cond.Type == templateapi.TemplateInstanceInstantiateFailure || cond.Type == templateapi.TemplateInstanceReady) {
waiting = false
}
}

if waiting {
templateInstancesActiveStartTime.WithLabelValues(templateInstance.Namespace, templateInstance.Name).Set(float64(templateInstance.CreationTimestamp.Unix()))
}
}

templateInstancesTotal.Collect(ch)
templateInstancesActiveStartTime.Collect(ch)
}
96 changes: 96 additions & 0 deletions pkg/template/controller/metrics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package controller

import (
"bytes"
"net/http"
"testing"
"time"

templateapi "github.com/openshift/origin/pkg/template/apis/template"
"github.com/openshift/origin/pkg/template/generated/listers/template/internalversion"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
kapi "k8s.io/kubernetes/pkg/api"
)

type fakeLister []*templateapi.TemplateInstance

func (f fakeLister) List(labels.Selector) ([]*templateapi.TemplateInstance, error) {
return f, nil
}
func (fakeLister) TemplateInstances(string) internalversion.TemplateInstanceNamespaceLister {
return nil
}

type fakeResponseWriter struct {
bytes.Buffer
statusCode int
header http.Header
}

func (f *fakeResponseWriter) Header() http.Header {
return f.header
}

func (f *fakeResponseWriter) WriteHeader(statusCode int) {
f.statusCode = statusCode
}

func TestMetrics(t *testing.T) {
expectedResponse := `# HELP openshift_template_instance_active_start_time_seconds Show the start time in unix epoch form of active TemplateInstance objects by namespace and name
# TYPE openshift_template_instance_active_start_time_seconds gauge
openshift_template_instance_active_start_time_seconds{name="testname",namespace="testnamespace"} 123
# HELP openshift_template_instance_total Counts TemplateInstance objects by condition type and status
# TYPE openshift_template_instance_total gauge
openshift_template_instance_total{status="",type=""} 2
openshift_template_instance_total{status="False",type="Ready"} 1
openshift_template_instance_total{status="True",type="Ready"} 1
`
registry := prometheus.NewRegistry()

c := &TemplateInstanceController{
lister: &fakeLister{
{
Status: templateapi.TemplateInstanceStatus{
Conditions: []templateapi.TemplateInstanceCondition{
{
Type: templateapi.TemplateInstanceReady,
Status: kapi.ConditionTrue,
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Namespace: "testnamespace",
Name: "testname",
CreationTimestamp: metav1.Time{
Time: time.Unix(123, 0),
},
},
Status: templateapi.TemplateInstanceStatus{
Conditions: []templateapi.TemplateInstanceCondition{
{
Type: templateapi.TemplateInstanceReady,
Status: kapi.ConditionFalse,
},
},
},
},
},
}

registry.MustRegister(c)

h := promhttp.HandlerFor(registry, promhttp.HandlerOpts{ErrorHandling: promhttp.PanicOnError})
rw := &fakeResponseWriter{header: http.Header{}}
h.ServeHTTP(rw, &http.Request{})

if rw.String() != expectedResponse {
t.Error(rw.String())
}
}
5 changes: 4 additions & 1 deletion pkg/template/controller/templateinstance_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"k8s.io/kubernetes/pkg/kubectl/resource"

"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"

"github.com/openshift/origin/pkg/authorization/util"
buildclient "github.com/openshift/origin/pkg/build/generated/internalclientset"
Expand Down Expand Up @@ -75,7 +76,7 @@ func NewTemplateInstanceController(config *rest.Config, kc kclientsetinternal.In
buildClient: buildClient,
lister: informer.Lister(),
informer: informer.Informer(),
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "TemplateInstanceController"),
queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "openshift_template_instance_controller"),
readinessLimiter: workqueue.NewItemFastSlowRateLimiter(5*time.Second, 20*time.Second, 200),
}

Expand All @@ -90,6 +91,8 @@ func NewTemplateInstanceController(config *rest.Config, kc kclientsetinternal.In
},
})

prometheus.MustRegister(c)

return c
}

Expand Down
61 changes: 61 additions & 0 deletions test/integration/metrics_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package integration

import (
"regexp"
"testing"
"time"

"k8s.io/apimachinery/pkg/util/wait"

testutil "github.com/openshift/origin/test/util"
testserver "github.com/openshift/origin/test/util/server"
)

var metricsRegexp = regexp.MustCompile("(?m)^# HELP ([^ ]*)")

func TestMetrics(t *testing.T) {
expectedMetrics := []string{
"openshift_build_terminal_phase_total",
"openshift_template_instance_total",
}

masterConfig, clusterAdminKubeConfig, err := testserver.StartTestMaster()
if err != nil {
t.Fatal(err)
}
defer testserver.CleanupMasterEtcd(t, masterConfig)

clusterAdminClient, err := testutil.GetClusterAdminClient(clusterAdminKubeConfig)
if err != nil {
t.Fatal(err)
}

var missingMetrics []string
err = wait.Poll(time.Second, 30*time.Second, func() (bool, error) {
missingMetrics = []string{}

b, err := clusterAdminClient.Get().RequestURI("/metrics").DoRaw()
if err != nil {
return false, err
}

metrics := map[string]struct{}{}
for _, match := range metricsRegexp.FindAllStringSubmatch(string(b), -1) {
metrics[match[1]] = struct{}{}
}

for _, metric := range expectedMetrics {
if _, ok := metrics[metric]; !ok {
missingMetrics = append(missingMetrics, metric)
}
}

return len(missingMetrics) == 0, nil
})
if len(missingMetrics) > 0 {
t.Error(missingMetrics)
}
if err != nil {
t.Error(err)
}
}