Skip to content

Commit

Permalink
Implement Prometheus instrumentation (#99)
Browse files Browse the repository at this point in the history
* Add Prometheus exporter for mesh metrics
- appmesh_mesh_state gauge
- appmesh_virtual_node_state gauge
- appmesh_virtual_service_state gauge
- appmesh_api_request_duration_seconds histogram
* Add Prometheus instrumentation to App Mesh API client
Records the duration of App Mesh API calls based on object kind, name and operation type. The operation type can be get, create, update or delete. The object kind can be mesh, virtual node, virtual route, virtual router or virtual service.
* Add Prometheus instrumentation to controller
Record mesh, virtual node and virtual service operations as gauges. For each object the gauge value represents the current state, 1 means that the object is active while 0 means that the object has been deleted.
* Add tests for mesh metrics recorder

Signed-off-by: stefanprodan <stefan.prodan@gmail.com>
  • Loading branch information
stefanprodan authored and nckturner committed Nov 6, 2019
1 parent aeb43f8 commit 533b0e6
Show file tree
Hide file tree
Showing 10 changed files with 332 additions and 11 deletions.
11 changes: 4 additions & 7 deletions cmd/app-mesh-controller/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,11 @@ import (
"k8s.io/client-go/tools/clientcmd"
"k8s.io/klog"

// TODO(nic) Don't depend on k8s.io/kubernetes, just duplicate the logic in this package -- it will be a
// smaller headache.
//_ "k8s.io/kubernetes/pkg/client/metrics/prometheus" // for client metric registration
//_ "k8s.io/kubernetes/pkg/util/reflector/prometheus" // for reflector metric registration
//_ "k8s.io/kubernetes/pkg/util/workqueue/prometheus" // for workqueue metric registration

"github.com/aws/aws-app-mesh-controller-for-k8s/pkg/aws"
meshclientset "github.com/aws/aws-app-mesh-controller-for-k8s/pkg/client/clientset/versioned"
meshinformers "github.com/aws/aws-app-mesh-controller-for-k8s/pkg/client/informers/externalversions"
"github.com/aws/aws-app-mesh-controller-for-k8s/pkg/controller"
"github.com/aws/aws-app-mesh-controller-for-k8s/pkg/metrics"
)

var (
Expand Down Expand Up @@ -89,7 +84,8 @@ var rootCmd = &cobra.Command{
klog.Fatal(err)
}

cloud, err := aws.NewCloud(cfg.aws)
stats := metrics.NewRecorder(true)
cloud, err := aws.NewCloud(cfg.aws, stats)
if err != nil {
klog.Fatal(err)
}
Expand Down Expand Up @@ -117,6 +113,7 @@ var rootCmd = &cobra.Command{
meshInformerFactory.Appmesh().V1beta1().Meshes(),
meshInformerFactory.Appmesh().V1beta1().VirtualNodes(),
meshInformerFactory.Appmesh().V1beta1().VirtualServices(),
stats,
)

if err != nil {
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ require (
github.com/imdario/mergo v0.3.7 // indirect
github.com/inconshreveable/mousetrap v1.0.0 // indirect
github.com/prometheus/client_golang v0.9.2
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910
github.com/spf13/cobra v0.0.3
github.com/spf13/pflag v1.0.5
github.com/spf13/viper v1.3.1
Expand Down
100 changes: 100 additions & 0 deletions pkg/aws/appmesh.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ func (v *Mesh) Name() string {

// GetMesh calls describe mesh.
func (c *Cloud) GetMesh(ctx context.Context, name string) (*Mesh, error) {
begin := time.Now()
defer func() {
c.stats.SetRequestDuration("mesh", name, "get", time.Since(begin))
}()

ctx, cancel := context.WithTimeout(ctx, time.Second*DescribeMeshTimeout)
defer cancel()

Expand All @@ -96,6 +101,11 @@ func (c *Cloud) GetMesh(ctx context.Context, name string) (*Mesh, error) {

// CreateMesh converts the desired mesh spec into CreateMeshInput and calls create mesh.
func (c *Cloud) CreateMesh(ctx context.Context, mesh *appmeshv1beta1.Mesh) (*Mesh, error) {
begin := time.Now()
defer func() {
c.stats.SetRequestDuration("mesh", mesh.Name, "create", time.Since(begin))
}()

ctx, cancel := context.WithTimeout(ctx, time.Second*CreateMeshTimeout)
defer cancel()

Expand All @@ -116,6 +126,11 @@ func (c *Cloud) CreateMesh(ctx context.Context, mesh *appmeshv1beta1.Mesh) (*Mes

// DeleteMesh deletes the given mesh
func (c *Cloud) DeleteMesh(ctx context.Context, name string) (*Mesh, error) {
begin := time.Now()
defer func() {
c.stats.SetRequestDuration("mesh", name, "delete", time.Since(begin))
}()

ctx, cancel := context.WithTimeout(ctx, time.Second*DeleteMeshTimeout)
defer cancel()

Expand Down Expand Up @@ -231,6 +246,11 @@ func (v *VirtualNode) BackendsSet() set.Set {

// GetVirtualNode calls describe virtual node.
func (c *Cloud) GetVirtualNode(ctx context.Context, name string, meshName string) (*VirtualNode, error) {
begin := time.Now()
defer func() {
c.stats.SetRequestDuration("virtual_node", name, "get", time.Since(begin))
}()

ctx, cancel := context.WithTimeout(ctx, time.Second*DescribeVirtualNodeTimeout)
defer cancel()

Expand All @@ -253,6 +273,11 @@ func (c *Cloud) GetVirtualNode(ctx context.Context, name string, meshName string
// CreateVirtualNode converts the desired virtual node spec into CreateVirtualNodeInput and calls create
// virtual node.
func (c *Cloud) CreateVirtualNode(ctx context.Context, vnode *appmeshv1beta1.VirtualNode) (*VirtualNode, error) {
begin := time.Now()
defer func() {
c.stats.SetRequestDuration("virtual_node", vnode.Name, "create", time.Since(begin))
}()

ctx, cancel := context.WithTimeout(ctx, time.Second*CreateVirtualNodeTimeout)
defer cancel()

Expand Down Expand Up @@ -341,6 +366,11 @@ func (c *Cloud) CreateVirtualNode(ctx context.Context, vnode *appmeshv1beta1.Vir
// UpdateVirtualNode converts the desired virtual node spec into UpdateVirtualNodeInput and calls update
// virtual node.
func (c *Cloud) UpdateVirtualNode(ctx context.Context, vnode *appmeshv1beta1.VirtualNode) (*VirtualNode, error) {
begin := time.Now()
defer func() {
c.stats.SetRequestDuration("virtual_node", vnode.Name, "update", time.Since(begin))
}()

ctx, cancel := context.WithTimeout(ctx, time.Second*UpdateVirtualNodeTimeout)
defer cancel()

Expand Down Expand Up @@ -427,6 +457,11 @@ func (c *Cloud) UpdateVirtualNode(ctx context.Context, vnode *appmeshv1beta1.Vir
}

func (c *Cloud) DeleteVirtualNode(ctx context.Context, name string, meshName string) (*VirtualNode, error) {
begin := time.Now()
defer func() {
c.stats.SetRequestDuration("virtual_node", name, "delete", time.Since(begin))
}()

ctx, cancel := context.WithTimeout(ctx, time.Second*DeleteVirtualNodeTimeout)
defer cancel()

Expand Down Expand Up @@ -476,6 +511,11 @@ func (v *VirtualService) Status() string {

// GetVirtualService calls describe virtual service.
func (c *Cloud) GetVirtualService(ctx context.Context, name string, meshName string) (*VirtualService, error) {
begin := time.Now()
defer func() {
c.stats.SetRequestDuration("virtual_service", name, "get", time.Since(begin))
}()

ctx, cancel := context.WithTimeout(ctx, time.Second*DescribeVirtualServiceTimeout)
defer cancel()

Expand All @@ -498,6 +538,11 @@ func (c *Cloud) GetVirtualService(ctx context.Context, name string, meshName str
// CreateVirtualService converts the desired virtual service spec into CreateVirtualServiceInput and calls create
// virtual service.
func (c *Cloud) CreateVirtualService(ctx context.Context, vservice *appmeshv1beta1.VirtualService) (*VirtualService, error) {
begin := time.Now()
defer func() {
c.stats.SetRequestDuration("virtual_service", vservice.Name, "create", time.Since(begin))
}()

ctx, cancel := context.WithTimeout(ctx, time.Second*CreateVirtualServiceTimeout)
defer cancel()

Expand Down Expand Up @@ -526,6 +571,11 @@ func (c *Cloud) CreateVirtualService(ctx context.Context, vservice *appmeshv1bet
}

func (c *Cloud) UpdateVirtualService(ctx context.Context, vservice *appmeshv1beta1.VirtualService) (*VirtualService, error) {
begin := time.Now()
defer func() {
c.stats.SetRequestDuration("virtual_service", vservice.Name, "update", time.Since(begin))
}()

ctx, cancel := context.WithTimeout(ctx, time.Second*UpdateVirtualServiceTimeout)
defer cancel()

Expand Down Expand Up @@ -554,6 +604,11 @@ func (c *Cloud) UpdateVirtualService(ctx context.Context, vservice *appmeshv1bet
}

func (c *Cloud) DeleteVirtualService(ctx context.Context, name string, meshName string) (*VirtualService, error) {
begin := time.Now()
defer func() {
c.stats.SetRequestDuration("virtual_service", name, "delete", time.Since(begin))
}()

ctx, cancel := context.WithTimeout(ctx, time.Second*DeleteVirtualServiceTimeout)
defer cancel()

Expand Down Expand Up @@ -593,6 +648,11 @@ func (v *VirtualRouter) Status() string {

// GetVirtualRouter calls describe virtual router.
func (c *Cloud) GetVirtualRouter(ctx context.Context, name string, meshName string) (*VirtualRouter, error) {
begin := time.Now()
defer func() {
c.stats.SetRequestDuration("virtual_router", name, "get", time.Since(begin))
}()

ctx, cancel := context.WithTimeout(ctx, time.Second*DescribeVirtualRouterTimeout)
defer cancel()

Expand All @@ -615,6 +675,11 @@ func (c *Cloud) GetVirtualRouter(ctx context.Context, name string, meshName stri
// CreateVirtualRouter converts the desired virtual service spec into CreateVirtualServiceInput and calls create
// virtual router.
func (c *Cloud) CreateVirtualRouter(ctx context.Context, vrouter *appmeshv1beta1.VirtualRouter, meshName string) (*VirtualRouter, error) {
begin := time.Now()
defer func() {
c.stats.SetRequestDuration("virtual_router", vrouter.Name, "create", time.Since(begin))
}()

ctx, cancel := context.WithTimeout(ctx, time.Second*CreateVirtualRouterTimeout)
defer cancel()

Expand Down Expand Up @@ -652,6 +717,11 @@ func (c *Cloud) CreateVirtualRouter(ctx context.Context, vrouter *appmeshv1beta1

// UpdateVirtualRouter converts the desired virtual router spec into UpdateVirtualRouter calls
func (c *Cloud) UpdateVirtualRouter(ctx context.Context, vrouter *appmeshv1beta1.VirtualRouter, meshName string) (*VirtualRouter, error) {
begin := time.Now()
defer func() {
c.stats.SetRequestDuration("virtual_router", vrouter.Name, "update", time.Since(begin))
}()

ctx, cancel := context.WithTimeout(ctx, time.Second*UpdateVirtualRouterTimeout)
defer cancel()

Expand Down Expand Up @@ -688,6 +758,11 @@ func (c *Cloud) UpdateVirtualRouter(ctx context.Context, vrouter *appmeshv1beta1
}

func (c *Cloud) DeleteVirtualRouter(ctx context.Context, name string, meshName string) (*VirtualRouter, error) {
begin := time.Now()
defer func() {
c.stats.SetRequestDuration("virtual_router", name, "delete", time.Since(begin))
}()

ctx, cancel := context.WithTimeout(ctx, time.Second*DeleteVirtualRouterTimeout)
defer cancel()

Expand Down Expand Up @@ -948,6 +1023,11 @@ func (r Routes) RouteByName(name string) Route {

// GetRoute calls describe route.
func (c *Cloud) GetRoute(ctx context.Context, name string, routerName string, meshName string) (*Route, error) {
begin := time.Now()
defer func() {
c.stats.SetRequestDuration("virtual_route", name, "get", time.Since(begin))
}()

ctx, cancel := context.WithTimeout(ctx, time.Second*DescribeRouteTimeout)
defer cancel()

Expand All @@ -970,6 +1050,11 @@ func (c *Cloud) GetRoute(ctx context.Context, name string, routerName string, me

// CreateRoute converts the desired virtual service spec into CreateVirtualServiceInput and calls create route.
func (c *Cloud) CreateRoute(ctx context.Context, route *appmeshv1beta1.Route, routerName string, meshName string) (*Route, error) {
begin := time.Now()
defer func() {
c.stats.SetRequestDuration("virtual_route", route.Name, "create", time.Since(begin))
}()

ctx, cancel := context.WithTimeout(ctx, time.Second*CreateRouteTimeout)
defer cancel()

Expand All @@ -992,6 +1077,11 @@ func (c *Cloud) CreateRoute(ctx context.Context, route *appmeshv1beta1.Route, ro
}

func (c *Cloud) GetRoutesForVirtualRouter(ctx context.Context, routerName string, meshName string) (Routes, error) {
begin := time.Now()
defer func() {
c.stats.SetRequestDuration("virtual_router", routerName, "get", time.Since(begin))
}()

listctx, cancel := context.WithTimeout(ctx, time.Second*ListRoutesTimeout)
defer cancel()

Expand Down Expand Up @@ -1025,6 +1115,11 @@ func (c *Cloud) GetRoutesForVirtualRouter(ctx context.Context, routerName string

// UpdateRoute converts the desired virtual service spec into UpdateRouteInput and calls update route.
func (c *Cloud) UpdateRoute(ctx context.Context, route *appmeshv1beta1.Route, routerName string, meshName string) (*Route, error) {
begin := time.Now()
defer func() {
c.stats.SetRequestDuration("virtual_route", route.Name, "update", time.Since(begin))
}()

ctx, cancel := context.WithTimeout(ctx, time.Second*UpdateRouteTimeout)
defer cancel()

Expand All @@ -1047,6 +1142,11 @@ func (c *Cloud) UpdateRoute(ctx context.Context, route *appmeshv1beta1.Route, ro
}

func (c *Cloud) DeleteRoute(ctx context.Context, name string, routerName string, meshName string) (*Route, error) {
begin := time.Now()
defer func() {
c.stats.SetRequestDuration("virtual_route", name, "delete", time.Since(begin))
}()

ctx, cancel := context.WithTimeout(ctx, time.Second*DeleteRouteTimeout)
defer cancel()

Expand Down
9 changes: 6 additions & 3 deletions pkg/aws/cloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ import (
"fmt"
"time"

"github.com/aws/aws-sdk-go/aws/ec2metadata"

"github.com/aws/aws-app-mesh-controller-for-k8s/pkg/metrics"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/ec2metadata"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/appmesh"
"github.com/aws/aws-sdk-go/service/appmesh/appmeshiface"
Expand All @@ -28,6 +28,8 @@ type Cloud struct {

namespaceIDCache cache.Store
serviceIDCache cache.Store

stats *metrics.Recorder
}

type cloudmapServiceCacheItem struct {
Expand All @@ -50,7 +52,7 @@ type CloudMapNamespaceSummary struct {
NamespaceType string
}

func NewCloud(opts CloudOptions) (CloudAPI, error) {
func NewCloud(opts CloudOptions, stats *metrics.Recorder) (CloudAPI, error) {
cfg := &aws.Config{Region: aws.String(opts.Region)}

session, err := session.NewSession(cfg)
Expand All @@ -77,5 +79,6 @@ func NewCloud(opts CloudOptions) (CloudAPI, error) {
serviceIDCache: cache.NewTTLStore(func(obj interface{}) (string, error) {
return obj.(*cloudmapServiceCacheItem).key, nil
}, 60*time.Second),
stats: stats,
}, nil
}
8 changes: 7 additions & 1 deletion pkg/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
meshscheme "github.com/aws/aws-app-mesh-controller-for-k8s/pkg/client/clientset/versioned/scheme"
meshinformers "github.com/aws/aws-app-mesh-controller-for-k8s/pkg/client/informers/externalversions/appmesh/v1beta1"
meshlisters "github.com/aws/aws-app-mesh-controller-for-k8s/pkg/client/listers/appmesh/v1beta1"
"github.com/aws/aws-app-mesh-controller-for-k8s/pkg/metrics"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
Expand Down Expand Up @@ -69,6 +70,9 @@ type Controller struct {
// recorder is an event recorder for recording Event resources to the
// Kubernetes API.
recorder record.EventRecorder

// stats records mesh Prometheus metrics
stats *metrics.Recorder
}

func NewController(
Expand All @@ -78,7 +82,8 @@ func NewController(
podInformer coreinformers.PodInformer,
meshInformer meshinformers.MeshInformer,
virtualNodeInformer meshinformers.VirtualNodeInformer,
virtualServiceInformer meshinformers.VirtualServiceInformer) (*Controller, error) {
virtualServiceInformer meshinformers.VirtualServiceInformer,
stats *metrics.Recorder) (*Controller, error) {

utilruntime.Must(meshscheme.AddToScheme(scheme.Scheme))
klog.V(4).Info("Creating event broadcaster")
Expand All @@ -105,6 +110,7 @@ func NewController(
sq: workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()),
pq: workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()),
recorder: recorder,
stats: stats,
}

podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
Expand Down
3 changes: 3 additions & 0 deletions pkg/controller/mesh.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ func (c *Controller) handleMesh(key string) error {
// Resources with finalizers are not deleted immediately,
// instead the deletion timestamp is set when a client deletes them.
if !mesh.DeletionTimestamp.IsZero() {
c.stats.SetMeshInactive(mesh.Name)
// Resource is being deleted, process finalizers
return c.handleMeshDelete(ctx, mesh)
}
Expand Down Expand Up @@ -65,6 +66,8 @@ func (c *Controller) handleMesh(key string) error {
}
}

c.stats.SetMeshActive(mesh.Name)

return nil
}

Expand Down
3 changes: 3 additions & 0 deletions pkg/controller/virtualnode.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ func (c *Controller) handleVNode(key string) error {
// Resources with finalizers are not deleted immediately,
// instead the deletion timestamp is set when a client deletes them.
if !vnode.DeletionTimestamp.IsZero() {
c.stats.SetVirtualNodeInactive(vnode.Name, vnode.Spec.MeshName)
// Resource is being deleted, process finalizers
return c.handleVNodeDelete(ctx, vnode, copy)
}
Expand Down Expand Up @@ -108,6 +109,8 @@ func (c *Controller) handleVNode(key string) error {
}
}

c.stats.SetVirtualNodeActive(vnode.Name, vnode.Spec.MeshName)

updated, err := c.updateVNodeStatus(copy, targetNode)
if err != nil {
return fmt.Errorf("error updating virtual node status: %s", err)
Expand Down
Loading

0 comments on commit 533b0e6

Please sign in to comment.