Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci: fix node tests fail intermittently #4044

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 50 additions & 40 deletions node/pkg/node/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -939,54 +939,64 @@ func runGuardianConfigTests(t *testing.T, testCases []testCaseGuardianConfig) {
// because we're only instantiating the guardians and kill them right after they started running, 2s should be plenty of time
const testTimeout = time.Second * 2

// Test's main lifecycle context.
rootCtx, rootCtxCancel := context.WithTimeout(context.Background(), testTimeout)
defer rootCtxCancel()
func() {
// Test's main lifecycle context.
rootCtx, rootCtxCancel := context.WithTimeout(context.Background(), testTimeout)
defer rootCtxCancel()

// we need to catch a zap.Logger.Fatal() here.
// By default zap.Logger.Fatal() will os.Exit(1), which we can't catch.
// We modify zap's behavior to instead assert that the error is the one we're looking for and then panic
// The panic will be subsequently caught by the supervisor
fatalHook := make(fatalHook)
defer close(fatalHook)
zapLogger, zapObserver, _ := setupLogsCapture(t, zap.WithFatalHook(fatalHook))

supervisor.New(rootCtx, zapLogger, func(ctx context.Context) error {
// Create a sub-context with cancel function that we can pass to G.run.
ctx, ctxCancel := context.WithCancel(ctx)
defer ctxCancel()

if err := supervisor.Run(ctx, tc.name, NewGuardianNode(common.GoTest, nil).Run(ctxCancel, tc.opts...)); err != nil {
panic(err)
}

// we need to catch a zap.Logger.Fatal() here.
// By default zap.Logger.Fatal() will os.Exit(1), which we can't catch.
// We modify zap's behavior to instead assert that the error is the one we're looking for and then panic
// The panic will be subsequently caught by the supervisor
fatalHook := make(fatalHook)
defer close(fatalHook)
zapLogger, zapObserver, _ := setupLogsCapture(t, zap.WithFatalHook(fatalHook))
supervisor.Signal(ctx, supervisor.SignalHealthy)

supervisor.New(rootCtx, zapLogger, func(ctx context.Context) error {
// Create a sub-context with cancel function that we can pass to G.run.
ctx, ctxCancel := context.WithCancel(ctx)
defer ctxCancel()
// wait for all options to get applied
// If we were expecting an error, we should never get past this point.
for len(zapObserver.FilterMessage("GuardianNode initialization done.").All()) == 0 {
time.Sleep(time.Millisecond * 10)
}

if err := supervisor.Run(ctx, tc.name, NewGuardianNode(common.GoTest, nil).Run(ctxCancel, tc.opts...)); err != nil {
panic(err)
}
// Test done.
logger.Info("Test done.")
supervisor.Signal(ctx, supervisor.SignalDone)
rootCtxCancel()

supervisor.Signal(ctx, supervisor.SignalHealthy)
return nil
})

// wait for all options to get applied
// If we were expecting an error, we should never get past this point.
for len(zapObserver.FilterMessage("GuardianNode initialization done.").All()) == 0 {
time.Sleep(time.Millisecond * 10)
select {
case r := <-fatalHook:
if tc.err == "" {
assert.Equal(t, tc.err, r)
}
assert.Contains(t, r, tc.err)
rootCtxCancel()
case <-rootCtx.Done():
assert.NotEqual(t, rootCtx.Err(), context.DeadlineExceeded)
assert.Equal(t, tc.err, "") // we only want to end up here if we did not expect an error.
}

// Test done.
logger.Info("Test done.")
supervisor.Signal(ctx, supervisor.SignalDone)
rootCtxCancel()

return nil
})

select {
case r := <-fatalHook:
if tc.err == "" {
assert.Equal(t, tc.err, r)
// There is a race condition where the logger can get destroyed before the supervisor finishes logging on exit. Wait for the last log message from the supervisor.
count := 0
for len(zapObserver.FilterMessage("supervisor exited").All()) == 0 {
time.Sleep(time.Millisecond * 10)
count++
assert.Greater(t, 100, count)
}
assert.Contains(t, r, tc.err)
rootCtxCancel()
case <-rootCtx.Done():
assert.NotEqual(t, rootCtx.Err(), context.DeadlineExceeded)
assert.Equal(t, tc.err, "") // we only want to end up here if we did not expect an error.
}
}()
}
}

Expand Down
Loading