Emit stress test metrics in Fortio format.

jkowalski · Feb 20, 2019 · 607ab8f · 607ab8f
1 parent 5f6d61a
commit 607ab8f
Show file tree

Hide file tree

Showing 77 changed files with 12,657 additions and 104 deletions.
diff --git a/Gopkg.lock b/Gopkg.lock
diff --git a/Gopkg.toml b/Gopkg.toml
@@ -99,3 +99,7 @@
 [[constraint]]
   name = "contrib.go.opencensus.io/exporter/stackdriver"
   version = "v0.8.0"
+
+[[constraint]]
+  name = "fortio.org/fortio"
+  version = "1.3.1"
diff --git a/build/Makefile b/build/Makefile
@@ -138,6 +138,8 @@ endif
 GO_TEST=$(DOCKER_RUN) go test $(RACE_DETECTOR_ARGS)
 GO_E2E_TEST_ARGS=--kubeconfig /root/.kube/$(kubeconfig_file)
 
+PERF_OUTPUT_DIR=$(mount_path)/build/.perf
+
 go_build_base_path=$(mount_path)
 
 ifdef LOCAL_GO
@@ -148,6 +150,7 @@ ifdef LOCAL_GO
 	GO_BUILD_DARWIN_AMD64=GOOS=darwin GOARCH=amd64 go build
 	GO_TEST=go test -v $(RACE_DETECTOR_ARGS)
 	GO_E2E_TEST_ARGS=
+  PERF_OUTPUT_DIR=$(build_path)/.perf
 	go_build_base_path=$(agones_path)
 endif
 
@@ -229,7 +232,8 @@ stress-test-e2e: $(ensure-build-image)
 		-run '.*StressTest.*' \
 		--gameserver-image=$(GS_TEST_IMAGE) \
 		--pullsecret=$(IMAGE_PULL_SECRET) \
-		--stress $(STRESS_TEST_LEVEL)
+		--stress $(STRESS_TEST_LEVEL) \
+		--perf-output $(PERF_OUTPUT_DIR)
 
 # Run test on install yaml - make sure there is no change
 # mostly this is for CI

diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go
@@ -670,8 +670,11 @@ func TestScaleUpAndDownInParallelStressTest(t *testing.T) {
 
 	var fleets []*v1alpha1.Fleet
 
-	var scaleUpResults e2e.PerfResults
-	var scaleDownResults e2e.PerfResults
+	scaleUpStats := framework.NewStatsCollector(fmt.Sprintf("fleet_%v_scale_up", fleetSize))
+	scaleDownStats := framework.NewStatsCollector(fmt.Sprintf("fleet_%v_scale_down", fleetSize))
+
+	defer scaleUpStats.Report()
+	defer scaleDownStats.Report()
 
 	for fleetNumber := 0; fleetNumber < fleetCount; fleetNumber++ {
 		flt := defaultFleet()
@@ -713,22 +716,19 @@ func TestScaleUpAndDownInParallelStressTest(t *testing.T) {
 			}()
 
 			if fleetNumber%2 == 0 {
-				scaleDownResults.AddSample(scaleAndWait(t, flt, 0))
+				scaleDownStats.ReportDuration(scaleAndWait(t, flt, 0), nil)
 			}
 			for i := 0; i < repeatCount; i++ {
 				if time.Now().After(deadline) {
 					break
 				}
-				scaleUpResults.AddSample(scaleAndWait(t, flt, fleetSize))
-				scaleDownResults.AddSample(scaleAndWait(t, flt, 0))
+				scaleUpStats.ReportDuration(scaleAndWait(t, flt, fleetSize), nil)
+				scaleDownStats.ReportDuration(scaleAndWait(t, flt, 0), nil)
 			}
 		}(fleetNumber, flt)
 	}
 
 	wg.Wait()
-
-	scaleUpResults.Report(fmt.Sprintf("scale up 0 to %v with %v fleets", fleetSize, fleetCount))
-	scaleDownResults.Report(fmt.Sprintf("scale down %v to 0 with %v fleets", fleetSize, fleetCount))
 }
 
 func scaleAndWait(t *testing.T, flt *v1alpha1.Fleet, fleetSize int32) time.Duration {

diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go
@@ -49,10 +49,11 @@ type Framework struct {
 	GameServerImage string
 	PullSecret      string
 	StressTestLevel int
+	PerfOutputDir   string
 }
 
 // New setups a testing framework using a kubeconfig path and the game server image to use for testing.
-func New(kubeconfig, gsimage string, pullSecret string, stressTestLevel int) (*Framework, error) {
+func New(kubeconfig string) (*Framework, error) {
 	config, err := clientcmd.BuildConfigFromFlags("", kubeconfig)
 	if err != nil {
 		return nil, errors.Wrap(err, "build config from flags failed")
@@ -69,11 +70,8 @@ func New(kubeconfig, gsimage string, pullSecret string, stressTestLevel int) (*F
 	}
 
 	return &Framework{
-		KubeClient:      kubeClient,
-		AgonesClient:    agonesClient,
-		GameServerImage: gsimage,
-		PullSecret:      pullSecret,
-		StressTestLevel: stressTestLevel,
+		KubeClient:   kubeClient,
+		AgonesClient: agonesClient,
 	}, nil
 }
 
@@ -201,6 +199,15 @@ func (f *Framework) WaitForFleetGameServersCondition(flt *v1alpha1.Fleet, cond f
 	})
 }
 
+// NewStatsCollector returns new instance of statistics collector,
+// which can be used to emit performance statistics for load tests and stress tests.
+func (f *Framework) NewStatsCollector(name string) *StatsCollector {
+	if f.StressTestLevel > 0 {
+		name = fmt.Sprintf("stress_%v_%v", f.StressTestLevel, name)
+	}
+	return &StatsCollector{name: name, outputDir: f.PerfOutputDir}
+}
+
 // CleanUp Delete all Agones resources in a given namespace.
 func (f *Framework) CleanUp(ns string) error {
 	logrus.Info("Cleaning up now.")

diff --git a/test/e2e/framework/perf.go b/test/e2e/framework/perf.go
@@ -1,25 +1,35 @@
 package framework
 
 import (
-	"sort"
+	"encoding/json"
+	"net/http"
+	"os"
+	"path/filepath"
 	"sync"
 	"time"
 
+	"fortio.org/fortio/fhttp"
+	"fortio.org/fortio/stats"
 	"github.com/sirupsen/logrus"
+
+	k8serrors "k8s.io/apimachinery/pkg/api/errors"
 )
 
-// PerfResults aggregates performance test results.
-// The AddSample() method is safe for concurrent use by multiple goroutines.
-type PerfResults struct {
-	mu      sync.Mutex
-	samples []time.Duration
+// StatsCollector collects latency and throughput counters.
+// The ReportDuration() method is safe for concurrent use by multiple goroutines.
+type StatsCollector struct {
+	name      string
+	outputDir string
 
+	mu              sync.Mutex
+	samples         []time.Duration
+	statusCounts    map[int]int64
 	firstSampleTime time.Time
 	lastSampleTime  time.Time
 }
 
-// AddSample adds a single time measurement.
-func (p *PerfResults) AddSample(d time.Duration) {
+// ReportDuration adds a single time measurement.
+func (p *StatsCollector) ReportDuration(d time.Duration, err error) {
 	p.mu.Lock()
 	defer p.mu.Unlock()
 
@@ -29,37 +39,77 @@ func (p *PerfResults) AddSample(d time.Duration) {
 	}
 	p.lastSampleTime = n
 	p.samples = append(p.samples, d)
+	if p.statusCounts == nil {
+		p.statusCounts = map[int]int64{}
+	}
+	p.statusCounts[errToHTTPStatusCode(err)]++
+}
+
+func errToHTTPStatusCode(err error) int {
+	// crude translation from 'err' to HTTP status code.
+	switch {
+	case err == nil:
+		return http.StatusOK
+	case k8serrors.IsNotFound(err):
+		return http.StatusNotFound
+	case k8serrors.IsConflict(err):
+		return http.StatusConflict
+	case k8serrors.IsUnauthorized(err):
+		return http.StatusUnauthorized
+	case k8serrors.IsServiceUnavailable(err):
+		return http.StatusServiceUnavailable
+	default:
+		return http.StatusInternalServerError
+	}
 }
 
 // Report outputs performance report to log.
-func (p *PerfResults) Report(name string) {
+func (p *StatsCollector) Report() {
 	if len(p.samples) == 0 {
 		return
 	}
 
-	sort.Slice(p.samples, func(i, j int) bool {
-		return p.samples[i] < p.samples[j]
-	})
-
-	var sum time.Duration
+	h := stats.NewHistogram(0, 1)
 	for _, s := range p.samples {
-		sum += s
+		h.Record(s.Seconds())
 	}
 
-	avg := time.Duration(int64(sum) / int64(len(p.samples)))
+	var rr fhttp.HTTPRunnerResults
+	rr.RunType = "HTTP"
+	rr.Labels = "Agones " + p.name
+	rr.StartTime = time.Now()
+	rr.ActualDuration = p.lastSampleTime.Sub(p.firstSampleTime)
+	rr.DurationHistogram = h.Export()
+	rr.DurationHistogram.CalcPercentiles([]float64{50, 90, 95, 99, 99.9})
+	rr.RetCodes = map[int]int64{}
+	rr.ActualQPS = float64(len(p.samples)) / rr.ActualDuration.Seconds()
+
 	logrus.
-		WithField("avg", avg).
-		WithField("count", len(p.samples)).
-		WithField("min", p.samples[0].Seconds()).
-		WithField("max", p.samples[len(p.samples)-1].Seconds()).
-		WithField("p50", p.samples[len(p.samples)*500/1001].Seconds()).
-		WithField("p90", p.samples[len(p.samples)*900/1001].Seconds()).
-		WithField("p95", p.samples[len(p.samples)*950/1001].Seconds()).
-		WithField("p99", p.samples[len(p.samples)*990/1001].Seconds()).
-		WithField("p999", p.samples[len(p.samples)*999/1001].Seconds()).
+		WithField("avg", rr.DurationHistogram.Avg).
+		WithField("count", rr.DurationHistogram.Count).
+		WithField("min", rr.DurationHistogram.Min).
+		WithField("max", rr.DurationHistogram.Max).
+		WithField("p50", rr.DurationHistogram.CalcPercentile(50)).
+		WithField("p90", rr.DurationHistogram.CalcPercentile(90)).
+		WithField("p95", rr.DurationHistogram.CalcPercentile(95)).
+		WithField("p99", rr.DurationHistogram.CalcPercentile(99)).
+		WithField("p999", rr.DurationHistogram.CalcPercentile(99.9)).
 		WithField("duration", p.lastSampleTime.Sub(p.firstSampleTime).Seconds()).
-		Info(name)
+		Info(p.name)
+
+	if p.outputDir != "" {
+		os.MkdirAll(p.outputDir, 0755) //nolint:errcheck
 
-	// TODO - use something like Fortio ("fortio.org/fortio/stats") to
-	// generate histogram for long-term storage and analysis.
+		fname := filepath.Join(p.outputDir, p.name+"_"+rr.StartTime.UTC().Format("2006-01-02_1504")+".json")
+		f, err := os.Create(fname)
+		if err != nil {
+			logrus.WithError(err).Error("unable to create performance log")
+			return
+		}
+		defer f.Close() //nolint:errcheck
+
+		e := json.NewEncoder(f)
+		e.SetIndent("", "  ")
+		e.Encode(rr) //nolint:errcheck
+	}
 }
diff --git a/test/e2e/main_test.go b/test/e2e/main_test.go
@@ -39,6 +39,7 @@ func TestMain(m *testing.M) {
 	pullSecret := flag.String("pullsecret", "",
 		"optional secret to be used for pulling the gameserver and/or Agones SDK sidecar images")
 	stressTestLevel := flag.Int("stress", 0, "enable stress test at given level 0-100")
+	perfOutputDir := flag.String("perf-output", "", "write performance statistics to the specified directrory")
 
 	flag.Parse()
 
@@ -53,11 +54,16 @@ func TestMain(m *testing.M) {
 		exitCode int
 	)
 
-	if framework, err = e2eframework.New(*kubeconfig, *gsimage, *pullSecret, *stressTestLevel); err != nil {
+	if framework, err = e2eframework.New(*kubeconfig); err != nil {
 		log.Printf("failed to setup framework: %v\n", err)
 		os.Exit(1)
 	}
 
+	framework.GameServerImage = *gsimage
+	framework.PullSecret = *pullSecret
+	framework.StressTestLevel = *stressTestLevel
+	framework.PerfOutputDir = *perfOutputDir
+
 	// run cleanup before tests, to ensure no resources from previous runs exist.
 	err = framework.CleanUp(defaultNs)
 	if err != nil {

diff --git a/vendor/fortio.org/fortio/.gitignore b/vendor/fortio.org/fortio/.gitignore
diff --git a/vendor/fortio.org/fortio/CONTRIBUTING.md b/vendor/fortio.org/fortio/CONTRIBUTING.md