Skip to content

Commit

Permalink
Kanister prometheus metrics: Controller Integration + ActionSet resol…
Browse files Browse the repository at this point in the history
…ution counter exports (#2247)

* Added metrics file in controller

* Exporting resolution metrics for success and failure

* Adding unit test to ensure increments are done correctly

* Added code to avoid panics if nil registry is passed

* Added code to test for nil registry scenario

* Added code to read env variable which checks to see if kanister metrics is enabled

* moved synchronous failure increments to the callee

* fixed issue with registry setting

* renamed metric to include kanister_ prefix

* Added godocs

* Addressed Vivek's review comments

* renamed dto to promtest

* renamed promtest to promgomodel

* fixed go.mod

* Fixed testing apis

* Addressed Pavan's review comments

---------

Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
  • Loading branch information
mellon-collie and mergify[bot] committed Aug 25, 2023
1 parent 38e2af4 commit 536ac9a
Show file tree
Hide file tree
Showing 7 changed files with 203 additions and 55 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ require (
github.com/openshift/client-go v0.0.0-20230324103026-3f1513df25e0
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.16.0
github.com/prometheus/client_model v0.3.0
github.com/sirupsen/logrus v1.9.3
github.com/spf13/cobra v1.7.0
github.com/vmware/govmomi v0.30.7
Expand Down Expand Up @@ -152,7 +153,6 @@ require (
github.com/oklog/ulid v1.3.1 // indirect
github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
github.com/pierrec/lz4 v2.6.1+incompatible // indirect
github.com/prometheus/client_model v0.3.0 // indirect
github.com/prometheus/common v0.42.0 // indirect
github.com/prometheus/procfs v0.10.1 // indirect
github.com/renier/xmlrpc v0.0.0-20170708154548-ce4a1a486c03 // indirect
Expand Down
23 changes: 21 additions & 2 deletions pkg/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (

"github.com/kanisterio/kanister/pkg/customresource"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"gopkg.in/tomb.v2"
corev1 "k8s.io/api/core/v1"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand Down Expand Up @@ -65,12 +66,18 @@ type Controller struct {
osClient osversioned.Interface
recorder record.EventRecorder
actionSetTombMap sync.Map
metrics *metrics
}

// New create controller for watching kanister custom resources created
func New(c *rest.Config) *Controller {
func New(c *rest.Config, reg prometheus.Registerer) *Controller {
var m *metrics
if reg != nil {
m = newMetrics(reg)
}
return &Controller{
config: c,
config: c,
metrics: m,
}
}

Expand Down Expand Up @@ -137,6 +144,12 @@ func checkCRAccess(ctx context.Context, cli versioned.Interface, ns string) erro
return nil
}

func (c *Controller) incrementActionSetResolutionCounterVec(resolution string) {
if c.metrics != nil {
c.metrics.actionSetResolutionCounterVec.WithLabelValues(resolution).Inc()
}
}

func (c *Controller) onAdd(obj interface{}) {
o, ok := obj.(runtime.Object)
if !ok {
Expand Down Expand Up @@ -435,17 +448,20 @@ func (c *Controller) runAction(ctx context.Context, t *tomb.Tomb, as *crv1alpha1
c.logAndSuccessEvent(ctx, fmt.Sprintf("Executing action %s", action.Name), "Started Action", as)
tp, err := param.New(ctx, c.clientset, c.dynClient, c.crClient, c.osClient, action)
if err != nil {
c.incrementActionSetResolutionCounterVec(ACTION_SET_COUNTER_VEC_LABEL_RES_FAILURE)
return err
}
phases, err := kanister.GetPhases(*bp, action.Name, action.PreferredVersion, *tp)
if err != nil {
c.incrementActionSetResolutionCounterVec(ACTION_SET_COUNTER_VEC_LABEL_RES_FAILURE)
return err
}

// deferPhase is the phase that should be run after every successful or failed action run
// can be specified in blueprint using actions[name].deferPhase
deferPhase, err := kanister.GetDeferPhase(*bp, action.Name, action.PreferredVersion, *tp)
if err != nil {
c.incrementActionSetResolutionCounterVec(ACTION_SET_COUNTER_VEC_LABEL_RES_FAILURE)
return err
}

Expand All @@ -461,6 +477,9 @@ func (c *Controller) runAction(ctx context.Context, t *tomb.Tomb, as *crv1alpha1
// render artifacts only if all the phases are run successfully
if deferErr == nil && coreErr == nil {
c.renderActionsetArtifacts(ctx, as, aIDX, as.Namespace, as.Name, action.Name, bp, tp, coreErr, deferErr)
c.incrementActionSetResolutionCounterVec(ACTION_SET_COUNTER_VEC_LABEL_RES_SUCCESS)
} else {
c.incrementActionSetResolutionCounterVec(ACTION_SET_COUNTER_VEC_LABEL_RES_FAILURE)
}
}()

Expand Down
137 changes: 88 additions & 49 deletions pkg/controller/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (
"time"

"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
promgomodel "github.com/prometheus/client_model/go"
. "gopkg.in/check.v1"
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -57,6 +59,7 @@ type ControllerSuite struct {
deployment *appsv1.Deployment
confimap *v1.ConfigMap
recorder record.EventRecorder
ctrl *Controller
}

var _ = Suite(&ControllerSuite{})
Expand Down Expand Up @@ -133,8 +136,9 @@ func (s *ControllerSuite) SetUpTest(c *C) {

ctx := context.Background()
ctx, cancel := context.WithCancel(ctx)
ctlr := New(config)
err = ctlr.StartWatch(ctx, s.namespace)
testPrometheusRegistry := prometheus.NewRegistry()
s.ctrl = New(config, testPrometheusRegistry)
err = s.ctrl.StartWatch(ctx, s.namespace)
c.Assert(err, IsNil)
s.cancel = cancel
}
Expand Down Expand Up @@ -449,6 +453,14 @@ func newBPForProgressRunningPhase() *crv1alpha1.Blueprint {
}
}

func getCounterVecValue(metric prometheus.CounterVec, metricLabels []string) float64 {
m := &promgomodel.Metric{}
if err := metric.WithLabelValues(metricLabels...).Write(m); err != nil {
return 0
}
return m.Counter.GetValue()
}

func (s *ControllerSuite) TestEmptyActionSetStatus(c *C) {
as := &crv1alpha1.ActionSet{
ObjectMeta: metav1.ObjectMeta{
Expand Down Expand Up @@ -497,83 +509,107 @@ func (s *ControllerSuite) TestSynchronousFailure(c *C) {
c.Assert(err, IsNil)
}

func (s *ControllerSuite) TestNilPrometheusRegistry(c *C) {
config, err := kube.LoadConfig()
c.Assert(err, IsNil)
c.Assert(config, NotNil)
ctrl := New(config, nil)
c.Assert(ctrl, NotNil)
c.Assert(ctrl.metrics, IsNil)
}

func (s *ControllerSuite) TestExecActionSet(c *C) {
for _, pok := range []string{"StatefulSet", "Deployment"} {
for _, tc := range []struct {
funcNames []string
args [][]string
name string
version string
funcNames []string
args [][]string
name string
version string
metricResolution string
}{
{
funcNames: []string{testutil.WaitFuncName},
name: "WaitFunc",
version: kanister.DefaultVersion,
funcNames: []string{testutil.WaitFuncName},
name: "WaitFunc",
version: kanister.DefaultVersion,
metricResolution: ACTION_SET_COUNTER_VEC_LABEL_RES_SUCCESS,
},
{
funcNames: []string{testutil.WaitFuncName, testutil.WaitFuncName},
name: "WaitWait",
version: kanister.DefaultVersion,
funcNames: []string{testutil.WaitFuncName, testutil.WaitFuncName},
name: "WaitWait",
version: kanister.DefaultVersion,
metricResolution: ACTION_SET_COUNTER_VEC_LABEL_RES_SUCCESS,
},
{
funcNames: []string{testutil.FailFuncName},
name: "FailFunc",
version: kanister.DefaultVersion,
funcNames: []string{testutil.FailFuncName},
name: "FailFunc",
version: kanister.DefaultVersion,
metricResolution: ACTION_SET_COUNTER_VEC_LABEL_RES_FAILURE,
},
{
funcNames: []string{testutil.WaitFuncName, testutil.FailFuncName},
name: "WaitFail",
version: kanister.DefaultVersion,
funcNames: []string{testutil.WaitFuncName, testutil.FailFuncName},
name: "WaitFail",
version: kanister.DefaultVersion,
metricResolution: ACTION_SET_COUNTER_VEC_LABEL_RES_FAILURE,
},
{
funcNames: []string{testutil.FailFuncName, testutil.WaitFuncName},
name: "FailWait",
version: kanister.DefaultVersion,
funcNames: []string{testutil.FailFuncName, testutil.WaitFuncName},
name: "FailWait",
version: kanister.DefaultVersion,
metricResolution: ACTION_SET_COUNTER_VEC_LABEL_RES_FAILURE,
},
{
funcNames: []string{testutil.ArgFuncName},
name: "ArgFunc",
version: kanister.DefaultVersion,
funcNames: []string{testutil.ArgFuncName},
name: "ArgFunc",
version: kanister.DefaultVersion,
metricResolution: ACTION_SET_COUNTER_VEC_LABEL_RES_SUCCESS,
},
{
funcNames: []string{testutil.ArgFuncName, testutil.FailFuncName},
name: "ArgFail",
version: kanister.DefaultVersion,
funcNames: []string{testutil.ArgFuncName, testutil.FailFuncName},
name: "ArgFail",
version: kanister.DefaultVersion,
metricResolution: ACTION_SET_COUNTER_VEC_LABEL_RES_FAILURE,
},
{
funcNames: []string{testutil.OutputFuncName},
name: "OutputFunc",
version: kanister.DefaultVersion,
funcNames: []string{testutil.OutputFuncName},
name: "OutputFunc",
version: kanister.DefaultVersion,
metricResolution: ACTION_SET_COUNTER_VEC_LABEL_RES_SUCCESS,
},
{
funcNames: []string{testutil.CancelFuncName},
name: "CancelFunc",
version: kanister.DefaultVersion,
funcNames: []string{testutil.CancelFuncName},
name: "CancelFunc",
version: kanister.DefaultVersion,
metricResolution: ACTION_SET_COUNTER_VEC_LABEL_RES_FAILURE,
},
{
funcNames: []string{testutil.ArgFuncName},
name: "ArgFuncVersion",
version: testutil.TestVersion,
funcNames: []string{testutil.ArgFuncName},
name: "ArgFuncVersion",
version: testutil.TestVersion,
metricResolution: ACTION_SET_COUNTER_VEC_LABEL_RES_SUCCESS,
},
{
funcNames: []string{testutil.ArgFuncName},
name: "ArgFuncVersionFallback",
version: "v1.2.3",
funcNames: []string{testutil.ArgFuncName},
name: "ArgFuncVersionFallback",
version: "v1.2.3",
metricResolution: ACTION_SET_COUNTER_VEC_LABEL_RES_SUCCESS,
},
{
funcNames: []string{testutil.ArgFuncName},
name: "ArgFuncNoActionSetVersion",
version: "",
funcNames: []string{testutil.ArgFuncName},
name: "ArgFuncNoActionSetVersion",
version: "",
metricResolution: ACTION_SET_COUNTER_VEC_LABEL_RES_SUCCESS,
},
{
funcNames: []string{testutil.VersionMismatchFuncName},
name: "VersionMismatchFunc",
version: "v1.2.3",
funcNames: []string{testutil.VersionMismatchFuncName},
name: "VersionMismatchFunc",
version: "v1.2.3",
metricResolution: ACTION_SET_COUNTER_VEC_LABEL_RES_FAILURE,
},
{
funcNames: []string{testutil.ArgFuncName, testutil.OutputFuncName},
name: "ArgOutputFallbackOnlyOutput",
version: testutil.TestVersion,
funcNames: []string{testutil.ArgFuncName, testutil.OutputFuncName},
name: "ArgOutputFallbackOnlyOutput",
version: testutil.TestVersion,
metricResolution: ACTION_SET_COUNTER_VEC_LABEL_RES_SUCCESS,
},
} {
var err error
Expand All @@ -584,6 +620,8 @@ func (s *ControllerSuite) TestExecActionSet(c *C) {
bp, err = s.crCli.Blueprints(s.namespace).Create(ctx, bp, metav1.CreateOptions{})
c.Assert(err, IsNil)

oldValue := getCounterVecValue(s.ctrl.metrics.actionSetResolutionCounterVec, []string{tc.metricResolution})

var n string
switch pok {
case "StatefulSet":
Expand Down Expand Up @@ -630,6 +668,7 @@ func (s *ControllerSuite) TestExecActionSet(c *C) {
if !cancel {
err = s.waitOnActionSetState(c, as, final)
c.Assert(err, IsNil, Commentf("Failed case: %s", tc.name))
c.Assert(getCounterVecValue(s.ctrl.metrics.actionSetResolutionCounterVec, []string{tc.metricResolution}), Equals, oldValue+1, Commentf("Failed case: %s", tc.name))
}
err = s.crCli.Blueprints(s.namespace).Delete(context.TODO(), bp.GetName(), metav1.DeleteOptions{})
c.Assert(err, IsNil)
Expand Down Expand Up @@ -676,7 +715,7 @@ func (s *ControllerSuite) TestRuntimeObjEventLogs(c *C) {
ctx = field.Context(ctx, consts.ActionsetNameKey, as.GetName())
config, err := kube.LoadConfig()
c.Assert(err, IsNil)
ctlr := New(config)
ctlr := New(config, nil)
ctlr.logAndErrorEvent(ctx, msg, reason, errors.New("Testing Event Logs"), as, nilAs, bp)

// Test ActionSet error event logging
Expand Down
58 changes: 58 additions & 0 deletions pkg/controller/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Copyright 2023 The Kanister Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package controller

import (
"github.com/prometheus/client_golang/prometheus"

kanistermetrics "github.com/kanisterio/kanister/pkg/metrics"
)

// metrics encapsulates all the prometheus metrics that controller
// needs to own.
type metrics struct {
actionSetResolutionCounterVec prometheus.CounterVec
}

const (
ACTION_SET_COUNTER_VEC_LABEL_RES = "resolution"
ACTION_SET_COUNTER_VEC_LABEL_RES_SUCCESS = "success"
ACTION_SET_COUNTER_VEC_LABEL_RES_FAILURE = "failure"
)

// getActionSetCounterVecLabels builds a new BoundedLabel list to construct
// the labels permutations for the prometheus metric.
func getActionSetCounterVecLabels() []kanistermetrics.BoundedLabel {
bl := make([]kanistermetrics.BoundedLabel, 1)
bl[0] = kanistermetrics.BoundedLabel{
LabelName: ACTION_SET_COUNTER_VEC_LABEL_RES,
LabelValues: []string{
ACTION_SET_COUNTER_VEC_LABEL_RES_SUCCESS,
ACTION_SET_COUNTER_VEC_LABEL_RES_FAILURE,
},
}
return bl
}

// newMetrics constructs a new metrics object that encapsulates all the
// prometheus metric objects that the controller package needs to own.
func newMetrics(reg prometheus.Registerer) *metrics {
actionSetCounterOpts := prometheus.CounterOpts{
Name: "kanister_action_set_resolutions_total",
Help: "Total number of action set resolutions",
}
actionSetResolutionCounterVec := kanistermetrics.InitCounterVec(reg, actionSetCounterOpts, getActionSetCounterVecLabels())
return &metrics{actionSetResolutionCounterVec: *actionSetResolutionCounterVec}
}
Loading

0 comments on commit 536ac9a

Please sign in to comment.