Skip to content

Commit 533b0e6

Browse files
stefanprodannckturner
authored andcommitted
Implement Prometheus instrumentation (#99)
* Add Prometheus exporter for mesh metrics - appmesh_mesh_state gauge - appmesh_virtual_node_state gauge - appmesh_virtual_service_state gauge - appmesh_api_request_duration_seconds histogram * Add Prometheus instrumentation to App Mesh API client Records the duration of App Mesh API calls based on object kind, name and operation type. The operation type can be get, create, update or delete. The object kind can be mesh, virtual node, virtual route, virtual router or virtual service. * Add Prometheus instrumentation to controller Record mesh, virtual node and virtual service operations as gauges. For each object the gauge value represents the current state, 1 means that the object is active while 0 means that the object has been deleted. * Add tests for mesh metrics recorder Signed-off-by: stefanprodan <[email protected]>
1 parent aeb43f8 commit 533b0e6

File tree

10 files changed

+332
-11
lines changed

10 files changed

+332
-11
lines changed

cmd/app-mesh-controller/root.go

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,11 @@ import (
1515
"k8s.io/client-go/tools/clientcmd"
1616
"k8s.io/klog"
1717

18-
// TODO(nic) Don't depend on k8s.io/kubernetes, just duplicate the logic in this package -- it will be a
19-
// smaller headache.
20-
//_ "k8s.io/kubernetes/pkg/client/metrics/prometheus" // for client metric registration
21-
//_ "k8s.io/kubernetes/pkg/util/reflector/prometheus" // for reflector metric registration
22-
//_ "k8s.io/kubernetes/pkg/util/workqueue/prometheus" // for workqueue metric registration
23-
2418
"github.com/aws/aws-app-mesh-controller-for-k8s/pkg/aws"
2519
meshclientset "github.com/aws/aws-app-mesh-controller-for-k8s/pkg/client/clientset/versioned"
2620
meshinformers "github.com/aws/aws-app-mesh-controller-for-k8s/pkg/client/informers/externalversions"
2721
"github.com/aws/aws-app-mesh-controller-for-k8s/pkg/controller"
22+
"github.com/aws/aws-app-mesh-controller-for-k8s/pkg/metrics"
2823
)
2924

3025
var (
@@ -89,7 +84,8 @@ var rootCmd = &cobra.Command{
8984
klog.Fatal(err)
9085
}
9186

92-
cloud, err := aws.NewCloud(cfg.aws)
87+
stats := metrics.NewRecorder(true)
88+
cloud, err := aws.NewCloud(cfg.aws, stats)
9389
if err != nil {
9490
klog.Fatal(err)
9591
}
@@ -117,6 +113,7 @@ var rootCmd = &cobra.Command{
117113
meshInformerFactory.Appmesh().V1beta1().Meshes(),
118114
meshInformerFactory.Appmesh().V1beta1().VirtualNodes(),
119115
meshInformerFactory.Appmesh().V1beta1().VirtualServices(),
116+
stats,
120117
)
121118

122119
if err != nil {

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ require (
1010
github.com/imdario/mergo v0.3.7 // indirect
1111
github.com/inconshreveable/mousetrap v1.0.0 // indirect
1212
github.com/prometheus/client_golang v0.9.2
13+
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910
1314
github.com/spf13/cobra v0.0.3
1415
github.com/spf13/pflag v1.0.5
1516
github.com/spf13/viper v1.3.1

pkg/aws/appmesh.go

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,11 @@ func (v *Mesh) Name() string {
7676

7777
// GetMesh calls describe mesh.
7878
func (c *Cloud) GetMesh(ctx context.Context, name string) (*Mesh, error) {
79+
begin := time.Now()
80+
defer func() {
81+
c.stats.SetRequestDuration("mesh", name, "get", time.Since(begin))
82+
}()
83+
7984
ctx, cancel := context.WithTimeout(ctx, time.Second*DescribeMeshTimeout)
8085
defer cancel()
8186

@@ -96,6 +101,11 @@ func (c *Cloud) GetMesh(ctx context.Context, name string) (*Mesh, error) {
96101

97102
// CreateMesh converts the desired mesh spec into CreateMeshInput and calls create mesh.
98103
func (c *Cloud) CreateMesh(ctx context.Context, mesh *appmeshv1beta1.Mesh) (*Mesh, error) {
104+
begin := time.Now()
105+
defer func() {
106+
c.stats.SetRequestDuration("mesh", mesh.Name, "create", time.Since(begin))
107+
}()
108+
99109
ctx, cancel := context.WithTimeout(ctx, time.Second*CreateMeshTimeout)
100110
defer cancel()
101111

@@ -116,6 +126,11 @@ func (c *Cloud) CreateMesh(ctx context.Context, mesh *appmeshv1beta1.Mesh) (*Mes
116126

117127
// DeleteMesh deletes the given mesh
118128
func (c *Cloud) DeleteMesh(ctx context.Context, name string) (*Mesh, error) {
129+
begin := time.Now()
130+
defer func() {
131+
c.stats.SetRequestDuration("mesh", name, "delete", time.Since(begin))
132+
}()
133+
119134
ctx, cancel := context.WithTimeout(ctx, time.Second*DeleteMeshTimeout)
120135
defer cancel()
121136

@@ -231,6 +246,11 @@ func (v *VirtualNode) BackendsSet() set.Set {
231246

232247
// GetVirtualNode calls describe virtual node.
233248
func (c *Cloud) GetVirtualNode(ctx context.Context, name string, meshName string) (*VirtualNode, error) {
249+
begin := time.Now()
250+
defer func() {
251+
c.stats.SetRequestDuration("virtual_node", name, "get", time.Since(begin))
252+
}()
253+
234254
ctx, cancel := context.WithTimeout(ctx, time.Second*DescribeVirtualNodeTimeout)
235255
defer cancel()
236256

@@ -253,6 +273,11 @@ func (c *Cloud) GetVirtualNode(ctx context.Context, name string, meshName string
253273
// CreateVirtualNode converts the desired virtual node spec into CreateVirtualNodeInput and calls create
254274
// virtual node.
255275
func (c *Cloud) CreateVirtualNode(ctx context.Context, vnode *appmeshv1beta1.VirtualNode) (*VirtualNode, error) {
276+
begin := time.Now()
277+
defer func() {
278+
c.stats.SetRequestDuration("virtual_node", vnode.Name, "create", time.Since(begin))
279+
}()
280+
256281
ctx, cancel := context.WithTimeout(ctx, time.Second*CreateVirtualNodeTimeout)
257282
defer cancel()
258283

@@ -341,6 +366,11 @@ func (c *Cloud) CreateVirtualNode(ctx context.Context, vnode *appmeshv1beta1.Vir
341366
// UpdateVirtualNode converts the desired virtual node spec into UpdateVirtualNodeInput and calls update
342367
// virtual node.
343368
func (c *Cloud) UpdateVirtualNode(ctx context.Context, vnode *appmeshv1beta1.VirtualNode) (*VirtualNode, error) {
369+
begin := time.Now()
370+
defer func() {
371+
c.stats.SetRequestDuration("virtual_node", vnode.Name, "update", time.Since(begin))
372+
}()
373+
344374
ctx, cancel := context.WithTimeout(ctx, time.Second*UpdateVirtualNodeTimeout)
345375
defer cancel()
346376

@@ -427,6 +457,11 @@ func (c *Cloud) UpdateVirtualNode(ctx context.Context, vnode *appmeshv1beta1.Vir
427457
}
428458

429459
func (c *Cloud) DeleteVirtualNode(ctx context.Context, name string, meshName string) (*VirtualNode, error) {
460+
begin := time.Now()
461+
defer func() {
462+
c.stats.SetRequestDuration("virtual_node", name, "delete", time.Since(begin))
463+
}()
464+
430465
ctx, cancel := context.WithTimeout(ctx, time.Second*DeleteVirtualNodeTimeout)
431466
defer cancel()
432467

@@ -476,6 +511,11 @@ func (v *VirtualService) Status() string {
476511

477512
// GetVirtualService calls describe virtual service.
478513
func (c *Cloud) GetVirtualService(ctx context.Context, name string, meshName string) (*VirtualService, error) {
514+
begin := time.Now()
515+
defer func() {
516+
c.stats.SetRequestDuration("virtual_service", name, "get", time.Since(begin))
517+
}()
518+
479519
ctx, cancel := context.WithTimeout(ctx, time.Second*DescribeVirtualServiceTimeout)
480520
defer cancel()
481521

@@ -498,6 +538,11 @@ func (c *Cloud) GetVirtualService(ctx context.Context, name string, meshName str
498538
// CreateVirtualService converts the desired virtual service spec into CreateVirtualServiceInput and calls create
499539
// virtual service.
500540
func (c *Cloud) CreateVirtualService(ctx context.Context, vservice *appmeshv1beta1.VirtualService) (*VirtualService, error) {
541+
begin := time.Now()
542+
defer func() {
543+
c.stats.SetRequestDuration("virtual_service", vservice.Name, "create", time.Since(begin))
544+
}()
545+
501546
ctx, cancel := context.WithTimeout(ctx, time.Second*CreateVirtualServiceTimeout)
502547
defer cancel()
503548

@@ -526,6 +571,11 @@ func (c *Cloud) CreateVirtualService(ctx context.Context, vservice *appmeshv1bet
526571
}
527572

528573
func (c *Cloud) UpdateVirtualService(ctx context.Context, vservice *appmeshv1beta1.VirtualService) (*VirtualService, error) {
574+
begin := time.Now()
575+
defer func() {
576+
c.stats.SetRequestDuration("virtual_service", vservice.Name, "update", time.Since(begin))
577+
}()
578+
529579
ctx, cancel := context.WithTimeout(ctx, time.Second*UpdateVirtualServiceTimeout)
530580
defer cancel()
531581

@@ -554,6 +604,11 @@ func (c *Cloud) UpdateVirtualService(ctx context.Context, vservice *appmeshv1bet
554604
}
555605

556606
func (c *Cloud) DeleteVirtualService(ctx context.Context, name string, meshName string) (*VirtualService, error) {
607+
begin := time.Now()
608+
defer func() {
609+
c.stats.SetRequestDuration("virtual_service", name, "delete", time.Since(begin))
610+
}()
611+
557612
ctx, cancel := context.WithTimeout(ctx, time.Second*DeleteVirtualServiceTimeout)
558613
defer cancel()
559614

@@ -593,6 +648,11 @@ func (v *VirtualRouter) Status() string {
593648

594649
// GetVirtualRouter calls describe virtual router.
595650
func (c *Cloud) GetVirtualRouter(ctx context.Context, name string, meshName string) (*VirtualRouter, error) {
651+
begin := time.Now()
652+
defer func() {
653+
c.stats.SetRequestDuration("virtual_router", name, "get", time.Since(begin))
654+
}()
655+
596656
ctx, cancel := context.WithTimeout(ctx, time.Second*DescribeVirtualRouterTimeout)
597657
defer cancel()
598658

@@ -615,6 +675,11 @@ func (c *Cloud) GetVirtualRouter(ctx context.Context, name string, meshName stri
615675
// CreateVirtualRouter converts the desired virtual service spec into CreateVirtualServiceInput and calls create
616676
// virtual router.
617677
func (c *Cloud) CreateVirtualRouter(ctx context.Context, vrouter *appmeshv1beta1.VirtualRouter, meshName string) (*VirtualRouter, error) {
678+
begin := time.Now()
679+
defer func() {
680+
c.stats.SetRequestDuration("virtual_router", vrouter.Name, "create", time.Since(begin))
681+
}()
682+
618683
ctx, cancel := context.WithTimeout(ctx, time.Second*CreateVirtualRouterTimeout)
619684
defer cancel()
620685

@@ -652,6 +717,11 @@ func (c *Cloud) CreateVirtualRouter(ctx context.Context, vrouter *appmeshv1beta1
652717

653718
// UpdateVirtualRouter converts the desired virtual router spec into UpdateVirtualRouter calls
654719
func (c *Cloud) UpdateVirtualRouter(ctx context.Context, vrouter *appmeshv1beta1.VirtualRouter, meshName string) (*VirtualRouter, error) {
720+
begin := time.Now()
721+
defer func() {
722+
c.stats.SetRequestDuration("virtual_router", vrouter.Name, "update", time.Since(begin))
723+
}()
724+
655725
ctx, cancel := context.WithTimeout(ctx, time.Second*UpdateVirtualRouterTimeout)
656726
defer cancel()
657727

@@ -688,6 +758,11 @@ func (c *Cloud) UpdateVirtualRouter(ctx context.Context, vrouter *appmeshv1beta1
688758
}
689759

690760
func (c *Cloud) DeleteVirtualRouter(ctx context.Context, name string, meshName string) (*VirtualRouter, error) {
761+
begin := time.Now()
762+
defer func() {
763+
c.stats.SetRequestDuration("virtual_router", name, "delete", time.Since(begin))
764+
}()
765+
691766
ctx, cancel := context.WithTimeout(ctx, time.Second*DeleteVirtualRouterTimeout)
692767
defer cancel()
693768

@@ -948,6 +1023,11 @@ func (r Routes) RouteByName(name string) Route {
9481023

9491024
// GetRoute calls describe route.
9501025
func (c *Cloud) GetRoute(ctx context.Context, name string, routerName string, meshName string) (*Route, error) {
1026+
begin := time.Now()
1027+
defer func() {
1028+
c.stats.SetRequestDuration("virtual_route", name, "get", time.Since(begin))
1029+
}()
1030+
9511031
ctx, cancel := context.WithTimeout(ctx, time.Second*DescribeRouteTimeout)
9521032
defer cancel()
9531033

@@ -970,6 +1050,11 @@ func (c *Cloud) GetRoute(ctx context.Context, name string, routerName string, me
9701050

9711051
// CreateRoute converts the desired virtual service spec into CreateVirtualServiceInput and calls create route.
9721052
func (c *Cloud) CreateRoute(ctx context.Context, route *appmeshv1beta1.Route, routerName string, meshName string) (*Route, error) {
1053+
begin := time.Now()
1054+
defer func() {
1055+
c.stats.SetRequestDuration("virtual_route", route.Name, "create", time.Since(begin))
1056+
}()
1057+
9731058
ctx, cancel := context.WithTimeout(ctx, time.Second*CreateRouteTimeout)
9741059
defer cancel()
9751060

@@ -992,6 +1077,11 @@ func (c *Cloud) CreateRoute(ctx context.Context, route *appmeshv1beta1.Route, ro
9921077
}
9931078

9941079
func (c *Cloud) GetRoutesForVirtualRouter(ctx context.Context, routerName string, meshName string) (Routes, error) {
1080+
begin := time.Now()
1081+
defer func() {
1082+
c.stats.SetRequestDuration("virtual_router", routerName, "get", time.Since(begin))
1083+
}()
1084+
9951085
listctx, cancel := context.WithTimeout(ctx, time.Second*ListRoutesTimeout)
9961086
defer cancel()
9971087

@@ -1025,6 +1115,11 @@ func (c *Cloud) GetRoutesForVirtualRouter(ctx context.Context, routerName string
10251115

10261116
// UpdateRoute converts the desired virtual service spec into UpdateRouteInput and calls update route.
10271117
func (c *Cloud) UpdateRoute(ctx context.Context, route *appmeshv1beta1.Route, routerName string, meshName string) (*Route, error) {
1118+
begin := time.Now()
1119+
defer func() {
1120+
c.stats.SetRequestDuration("virtual_route", route.Name, "update", time.Since(begin))
1121+
}()
1122+
10281123
ctx, cancel := context.WithTimeout(ctx, time.Second*UpdateRouteTimeout)
10291124
defer cancel()
10301125

@@ -1047,6 +1142,11 @@ func (c *Cloud) UpdateRoute(ctx context.Context, route *appmeshv1beta1.Route, ro
10471142
}
10481143

10491144
func (c *Cloud) DeleteRoute(ctx context.Context, name string, routerName string, meshName string) (*Route, error) {
1145+
begin := time.Now()
1146+
defer func() {
1147+
c.stats.SetRequestDuration("virtual_route", name, "delete", time.Since(begin))
1148+
}()
1149+
10501150
ctx, cancel := context.WithTimeout(ctx, time.Second*DeleteRouteTimeout)
10511151
defer cancel()
10521152

pkg/aws/cloud.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ import (
44
"fmt"
55
"time"
66

7-
"github.com/aws/aws-sdk-go/aws/ec2metadata"
8-
7+
"github.com/aws/aws-app-mesh-controller-for-k8s/pkg/metrics"
98
"github.com/aws/aws-sdk-go/aws"
9+
"github.com/aws/aws-sdk-go/aws/ec2metadata"
1010
"github.com/aws/aws-sdk-go/aws/session"
1111
"github.com/aws/aws-sdk-go/service/appmesh"
1212
"github.com/aws/aws-sdk-go/service/appmesh/appmeshiface"
@@ -28,6 +28,8 @@ type Cloud struct {
2828

2929
namespaceIDCache cache.Store
3030
serviceIDCache cache.Store
31+
32+
stats *metrics.Recorder
3133
}
3234

3335
type cloudmapServiceCacheItem struct {
@@ -50,7 +52,7 @@ type CloudMapNamespaceSummary struct {
5052
NamespaceType string
5153
}
5254

53-
func NewCloud(opts CloudOptions) (CloudAPI, error) {
55+
func NewCloud(opts CloudOptions, stats *metrics.Recorder) (CloudAPI, error) {
5456
cfg := &aws.Config{Region: aws.String(opts.Region)}
5557

5658
session, err := session.NewSession(cfg)
@@ -77,5 +79,6 @@ func NewCloud(opts CloudOptions) (CloudAPI, error) {
7779
serviceIDCache: cache.NewTTLStore(func(obj interface{}) (string, error) {
7880
return obj.(*cloudmapServiceCacheItem).key, nil
7981
}, 60*time.Second),
82+
stats: stats,
8083
}, nil
8184
}

pkg/controller/controller.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
meshscheme "github.com/aws/aws-app-mesh-controller-for-k8s/pkg/client/clientset/versioned/scheme"
1212
meshinformers "github.com/aws/aws-app-mesh-controller-for-k8s/pkg/client/informers/externalversions/appmesh/v1beta1"
1313
meshlisters "github.com/aws/aws-app-mesh-controller-for-k8s/pkg/client/listers/appmesh/v1beta1"
14+
"github.com/aws/aws-app-mesh-controller-for-k8s/pkg/metrics"
1415
corev1 "k8s.io/api/core/v1"
1516
"k8s.io/apimachinery/pkg/util/runtime"
1617
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
@@ -69,6 +70,9 @@ type Controller struct {
6970
// recorder is an event recorder for recording Event resources to the
7071
// Kubernetes API.
7172
recorder record.EventRecorder
73+
74+
// stats records mesh Prometheus metrics
75+
stats *metrics.Recorder
7276
}
7377

7478
func NewController(
@@ -78,7 +82,8 @@ func NewController(
7882
podInformer coreinformers.PodInformer,
7983
meshInformer meshinformers.MeshInformer,
8084
virtualNodeInformer meshinformers.VirtualNodeInformer,
81-
virtualServiceInformer meshinformers.VirtualServiceInformer) (*Controller, error) {
85+
virtualServiceInformer meshinformers.VirtualServiceInformer,
86+
stats *metrics.Recorder) (*Controller, error) {
8287

8388
utilruntime.Must(meshscheme.AddToScheme(scheme.Scheme))
8489
klog.V(4).Info("Creating event broadcaster")
@@ -105,6 +110,7 @@ func NewController(
105110
sq: workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()),
106111
pq: workqueue.NewRateLimitingQueue(workqueue.DefaultControllerRateLimiter()),
107112
recorder: recorder,
113+
stats: stats,
108114
}
109115

110116
podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{

pkg/controller/mesh.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ func (c *Controller) handleMesh(key string) error {
3535
// Resources with finalizers are not deleted immediately,
3636
// instead the deletion timestamp is set when a client deletes them.
3737
if !mesh.DeletionTimestamp.IsZero() {
38+
c.stats.SetMeshInactive(mesh.Name)
3839
// Resource is being deleted, process finalizers
3940
return c.handleMeshDelete(ctx, mesh)
4041
}
@@ -65,6 +66,8 @@ func (c *Controller) handleMesh(key string) error {
6566
}
6667
}
6768

69+
c.stats.SetMeshActive(mesh.Name)
70+
6871
return nil
6972
}
7073

pkg/controller/virtualnode.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ func (c *Controller) handleVNode(key string) error {
5252
// Resources with finalizers are not deleted immediately,
5353
// instead the deletion timestamp is set when a client deletes them.
5454
if !vnode.DeletionTimestamp.IsZero() {
55+
c.stats.SetVirtualNodeInactive(vnode.Name, vnode.Spec.MeshName)
5556
// Resource is being deleted, process finalizers
5657
return c.handleVNodeDelete(ctx, vnode, copy)
5758
}
@@ -108,6 +109,8 @@ func (c *Controller) handleVNode(key string) error {
108109
}
109110
}
110111

112+
c.stats.SetVirtualNodeActive(vnode.Name, vnode.Spec.MeshName)
113+
111114
updated, err := c.updateVNodeStatus(copy, targetNode)
112115
if err != nil {
113116
return fmt.Errorf("error updating virtual node status: %s", err)

0 commit comments

Comments
 (0)