From e8692b2b0baa06a0b3c684a00017749471206e4a Mon Sep 17 00:00:00 2001 From: Alexander Apalikov Date: Tue, 24 Sep 2019 19:12:17 +0300 Subject: [PATCH] Fix race in stress-test-e2e. Add a function which would return error instead failing the test. Add error stream to finish stress test gracefully in case there is no capacity in a cluster for a specific test. It is forbidden to call t.Fatalf() from go routine other than main test routine. --- test/e2e/fleet_test.go | 45 +++++++++++++++++++++++++++------ test/e2e/framework/framework.go | 14 ++++++++-- 2 files changed, 49 insertions(+), 10 deletions(-) diff --git a/test/e2e/fleet_test.go b/test/e2e/fleet_test.go index cb1b83ce9b..ab392dedcb 100644 --- a/test/e2e/fleet_test.go +++ b/test/e2e/fleet_test.go @@ -801,8 +801,9 @@ func TestScaleUpAndDownInParallelStressTest(t *testing.T) { framework.WaitForFleetCondition(t, flt, e2e.FleetReadyCount(0)) } } - + errors := make(chan error) var wg sync.WaitGroup + finished := make(chan bool, 1) for fleetNumber, flt := range fleets { wg.Add(1) @@ -815,19 +816,46 @@ func TestScaleUpAndDownInParallelStressTest(t *testing.T) { }() if fleetNumber%2 == 0 { - scaleDownStats.ReportDuration(scaleAndWait(t, flt, 0), nil) + duration, err := scaleAndWait(t, flt, 0) + if err != nil { + fmt.Println(err) + errors <- err + return + } + scaleDownStats.ReportDuration(duration, nil) } for i := 0; i < repeatCount; i++ { if time.Now().After(deadline) { break } - scaleUpStats.ReportDuration(scaleAndWait(t, flt, fleetSize), nil) - scaleDownStats.ReportDuration(scaleAndWait(t, flt, 0), nil) + duration, err := scaleAndWait(t, flt, fleetSize) + if err != nil { + fmt.Println(err) + errors <- err + return + } + scaleUpStats.ReportDuration(duration, nil) + duration, err = scaleAndWait(t, flt, 0) + if err != nil { + fmt.Println(err) + errors <- err + return + } + scaleDownStats.ReportDuration(duration, nil) } }(fleetNumber, flt) } + go func() { + wg.Wait() + close(finished) + }() - wg.Wait() + select { + case <-finished: + case err := <-errors: + t.Fatalf("Error in waiting for a fleet to scale: %s", err) + } + fmt.Println("We are Done") } // Creates a fleet and one GameServer with Packed scheduling. @@ -999,11 +1027,12 @@ func schedulingFleetPatch(t *testing.T, return fltRes } -func scaleAndWait(t *testing.T, flt *agonesv1.Fleet, fleetSize int32) time.Duration { +func scaleAndWait(t *testing.T, flt *agonesv1.Fleet, fleetSize int32) (duration time.Duration, err error) { t0 := time.Now() scaleFleetSubresource(t, flt, fleetSize) - framework.WaitForFleetCondition(t, flt, e2e.FleetReadyCount(fleetSize)) - return time.Since(t0) + err = framework.WaitForFleetConditionParallel(t, flt, e2e.FleetReadyCount(fleetSize)) + duration = time.Since(t0) + return } // scaleFleetPatch creates a patch to apply to a Fleet. diff --git a/test/e2e/framework/framework.go b/test/e2e/framework/framework.go index b04cb0799c..de5628bf9d 100644 --- a/test/e2e/framework/framework.go +++ b/test/e2e/framework/framework.go @@ -129,8 +129,17 @@ func (f *Framework) WaitForGameServerState(gs *agonesv1.GameServer, state agones } // WaitForFleetCondition waits for the Fleet to be in a specific condition or fails the test if the condition can't be met in 5 minutes. -// nolint: dupl func (f *Framework) WaitForFleetCondition(t *testing.T, flt *agonesv1.Fleet, condition func(fleet *agonesv1.Fleet) bool) { + t.Helper() + err := f.WaitForFleetConditionParallel(t, flt, condition) + if err != nil { + // Do not call Fatalf() from go routine other than main test go routine, because it could cause a race + t.Fatalf("error waiting for fleet condition on fleet %v", flt.Name) + } +} + +// WaitForFleetConditionParallel waits for the Fleet to be in a specific condition or returns an error if the condition can't be met in 5 minutes. +func (f *Framework) WaitForFleetConditionParallel(t *testing.T, flt *agonesv1.Fleet, condition func(fleet *agonesv1.Fleet) bool) error { t.Helper() logrus.WithField("fleet", flt.Name).Info("waiting for fleet condition") err := wait.PollImmediate(2*time.Second, 5*time.Minute, func() (bool, error) { @@ -143,8 +152,9 @@ func (f *Framework) WaitForFleetCondition(t *testing.T, flt *agonesv1.Fleet, con }) if err != nil { logrus.WithField("fleet", flt.Name).WithError(err).Info("error waiting for fleet condition") - t.Fatalf("error waiting for fleet condition on fleet %v", flt.Name) + return err } + return nil } // WaitForFleetAutoScalerCondition waits for the FleetAutoscaler to be in a specific condition or fails the test if the condition can't be met in 2 minutes.