Skip to content

Commit

Permalink
Do not send telemetry data if failure in collection (nginxinc#1731)
Browse files Browse the repository at this point in the history
Fix error handling in telemetry and usage reporting

Problem: We send empty telemetry data if there is any failure in collection of data. We also do the same for NIM usage reporting.

Solution: Don't send telemetry data in case of failure of collection. Also does not send data for NIM usage reporting if there was any failure. Refactored some tests to comply with code coverage.
  • Loading branch information
bjee19 authored and amimimor committed Apr 3, 2024
1 parent 4d1f933 commit 1d39271
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 50 deletions.
2 changes: 1 addition & 1 deletion internal/framework/events/events_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,5 @@ func TestEventLoop_SwapBatches(t *testing.T) {
g.Expect(eventLoop.currentBatch).To(HaveLen(len(nextBatch)))
g.Expect(eventLoop.currentBatch).To(Equal(nextBatch))
g.Expect(eventLoop.nextBatch).To(BeEmpty())
g.Expect(cap(eventLoop.nextBatch)).To(Equal(3))
g.Expect(eventLoop.nextBatch).To(HaveCap(3))
}
1 change: 1 addition & 0 deletions internal/mode/static/telemetry/job_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ func CreateTelemetryJobWorker(
data, err := dataCollector.Collect(ctx)
if err != nil {
logger.Error(err, "Failed to collect telemetry data")
return
}

// Export telemetry
Expand Down
22 changes: 21 additions & 1 deletion internal/mode/static/telemetry/job_worker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package telemetry_test

import (
"context"
"errors"
"testing"
"time"

Expand All @@ -13,7 +14,7 @@ import (
"github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/telemetry/telemetryfakes"
)

func TestCreateTelemetryJobWorker(t *testing.T) {
func TestCreateTelemetryJobWorker_Succeeds(t *testing.T) {
g := NewWithT(t)

exporter := &telemetryfakes.FakeExporter{}
Expand All @@ -36,3 +37,22 @@ func TestCreateTelemetryJobWorker(t *testing.T) {
_, data := exporter.ExportArgsForCall(0)
g.Expect(data).To(Equal(&expData))
}

func TestCreateTelemetryJobWorker_CollectFails(t *testing.T) {
g := NewWithT(t)

exporter := &telemetryfakes.FakeExporter{}
dataCollector := &telemetryfakes.FakeDataCollector{}

worker := telemetry.CreateTelemetryJobWorker(zap.New(), exporter, dataCollector)

expData := telemetry.Data{}
dataCollector.CollectReturns(expData, errors.New("failed to collect cluster information"))

timeout := 10 * time.Second
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()

worker(ctx)
g.Expect(exporter.ExportCallCount()).To(Equal(0))
}
3 changes: 3 additions & 0 deletions internal/mode/static/usage/job_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,19 @@ func CreateUsageJobWorker(
nodeCount, err := CollectNodeCount(ctx, k8sClient)
if err != nil {
logger.Error(err, "Failed to collect node count")
return
}

podCount, err := GetTotalNGFPodCount(ctx, k8sClient)
if err != nil {
logger.Error(err, "Failed to collect replica count")
return
}

clusterUID, err := telemetry.CollectClusterID(ctx, k8sClient)
if err != nil {
logger.Error(err, "Failed to collect cluster UID")
return
}

clusterDetails := ClusterDetails{
Expand Down
179 changes: 131 additions & 48 deletions internal/mode/static/usage/job_worker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,26 @@ package usage_test

import (
"context"
"errors"
"testing"
"time"

. "github.com/onsi/gomega"
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/fake"
"sigs.k8s.io/controller-runtime/pkg/log/zap"

"github.com/nginxinc/nginx-gateway-fabric/internal/framework/events/eventsfakes"
"github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/config"
"github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/usage"
"github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/usage/usagefakes"
)

func TestCreateUsageJobWorker(t *testing.T) {
g := NewWithT(t)

replicas := int32(1)
ngfReplicaSet := &appsv1.ReplicaSet{
ObjectMeta: metav1.ObjectMeta{
Expand All @@ -34,64 +36,145 @@ func TestCreateUsageJobWorker(t *testing.T) {
},
}

ngfPod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: "nginx-gateway",
Name: "ngf-pod",
OwnerReferences: []metav1.OwnerReference{
{
Kind: "ReplicaSet",
Name: "ngf-replicaset",
tests := []struct {
name string
listCalls func(_ context.Context, object client.ObjectList, _ ...client.ListOption) error
getCalls func(_ context.Context, _ types.NamespacedName, object client.Object, _ ...client.GetOption) error
expData usage.ClusterDetails
expErr bool
}{
{
name: "succeeds",
listCalls: func(_ context.Context, object client.ObjectList, _ ...client.ListOption) error {
switch typedList := object.(type) {
case *v1.NodeList:
typedList.Items = append(typedList.Items, v1.Node{})
return nil
case *appsv1.ReplicaSetList:
typedList.Items = append(typedList.Items, *ngfReplicaSet)
return nil
}
return nil
},
getCalls: func(_ context.Context, _ types.NamespacedName, object client.Object, _ ...client.GetOption) error {
switch typedObject := object.(type) {
case *v1.Namespace:
typedObject.Name = metav1.NamespaceSystem
typedObject.UID = "1234abcd"
return nil
}
return nil
},
expData: usage.ClusterDetails{
Metadata: usage.Metadata{
UID: "1234abcd",
DisplayName: "my-cluster",
},
NodeCount: 1,
PodDetails: usage.PodDetails{
CurrentPodCounts: usage.CurrentPodsCount{
PodCount: 1,
},
},
},
expErr: false,
},
}

kubeSystem := &v1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Name: metav1.NamespaceSystem,
UID: "1234abcd",
},
}

k8sClient := fake.NewFakeClient(&v1.Node{}, ngfReplicaSet, ngfPod, kubeSystem)
reporter := &usagefakes.FakeReporter{}

worker := usage.CreateUsageJobWorker(
zap.New(),
k8sClient,
reporter,
config.Config{
GatewayPodConfig: config.GatewayPodConfig{
Namespace: "nginx-gateway",
Name: "ngf-pod",
{
name: "collect node count fails",
listCalls: func(_ context.Context, object client.ObjectList, _ ...client.ListOption) error {
switch object.(type) {
case *v1.NodeList:
return errors.New("failed to collect node list")
}
return nil
},
UsageReportConfig: &config.UsageReportConfig{
ClusterDisplayName: "my-cluster",
getCalls: func(_ context.Context, _ types.NamespacedName, _ client.Object, _ ...client.GetOption) error {
return nil
},
expData: usage.ClusterDetails{},
expErr: true,
},
)

expData := usage.ClusterDetails{
Metadata: usage.Metadata{
UID: "1234abcd",
DisplayName: "my-cluster",
{
name: "collect replica count fails",
listCalls: func(_ context.Context, object client.ObjectList, _ ...client.ListOption) error {
switch typedList := object.(type) {
case *v1.NodeList:
typedList.Items = append(typedList.Items, v1.Node{})
return nil
case *appsv1.ReplicaSetList:
return errors.New("failed to collect replica set list")
}
return nil
},
getCalls: func(_ context.Context, _ types.NamespacedName, _ client.Object, _ ...client.GetOption) error {
return nil
},
expData: usage.ClusterDetails{},
expErr: true,
},
NodeCount: 1,
PodDetails: usage.PodDetails{
CurrentPodCounts: usage.CurrentPodsCount{
PodCount: 1,
{
name: "collect cluster UID fails",
listCalls: func(_ context.Context, object client.ObjectList, _ ...client.ListOption) error {
switch typedList := object.(type) {
case *v1.NodeList:
typedList.Items = append(typedList.Items, v1.Node{})
return nil
case *appsv1.ReplicaSetList:
typedList.Items = append(typedList.Items, *ngfReplicaSet)
return nil
}
return nil
},
getCalls: func(_ context.Context, _ types.NamespacedName, object client.Object, _ ...client.GetOption) error {
switch object.(type) {
case *v1.Namespace:
return errors.New("failed to collect namespace")
}
return nil
},
expData: usage.ClusterDetails{},
expErr: true,
},
}

timeout := 10 * time.Second
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
g := NewWithT(t)

k8sClientReader := &eventsfakes.FakeReader{}
k8sClientReader.ListCalls(test.listCalls)
k8sClientReader.GetCalls(test.getCalls)

worker(ctx)
_, data := reporter.ReportArgsForCall(0)
g.Expect(data).To(Equal(expData))
reporter := &usagefakes.FakeReporter{}

worker := usage.CreateUsageJobWorker(
zap.New(),
k8sClientReader,
reporter,
config.Config{
GatewayPodConfig: config.GatewayPodConfig{
Namespace: "nginx-gateway",
Name: "ngf-pod",
},
UsageReportConfig: &config.UsageReportConfig{
ClusterDisplayName: "my-cluster",
},
},
)

timeout := 10 * time.Second
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()

worker(ctx)
if test.expErr {
g.Expect(reporter.ReportCallCount()).To(Equal(0))
} else {
_, data := reporter.ReportArgsForCall(0)
g.Expect(data).To(Equal(test.expData))
}
})
}
}

func TestGetTotalNGFPodCount(t *testing.T) {
Expand Down

0 comments on commit 1d39271

Please sign in to comment.