Skip to content

Commit

Permalink
ktesting: tone down warning about leaked test goroutine
Browse files Browse the repository at this point in the history
Not terminating immediately is fairly normal: many controllers
log "terminating" messages while they shut down, which often is
right after test completion, if that is when the test cancels the
context and then doesn't wait for goroutines (which is often
not possible).

Therefore the warning now only gets printed if that happens after more than 10
seconds since test completion, and then the warning is only printed as an info
message, not an error.
  • Loading branch information
pohly committed Jun 11, 2024
1 parent 007e661 commit ed62027
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 9 deletions.
30 changes: 26 additions & 4 deletions ktesting/testinglogger.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,19 @@ type tloggerShared struct {
// t gets cleared when the test is completed.
t TL

// We warn once when a leaked goroutine is detected because
// it logs after test completion.
// The time when the test completed.
stopTime time.Time

// We warn once when a leaked goroutine logs after test completion.
//
// Not terminating immediately is fairly normal: many controllers
// log "terminating" messages while they shut down, which often is
// right after test completion, if that is when the test cancels the
// context and then doesn't wait for goroutines (which is often
// not possible).
//
// Therefore there is the [stopGracePeriod] during which messages get
// logged to the global logger without the warning.
goroutineWarningDone bool

formatter serialize.Formatter
Expand All @@ -228,10 +239,15 @@ type tloggerShared struct {
callDepth int
}

// Log output of a leaked goroutine during this period after test completion
// does not trigger the warning.
const stopGracePeriod = 10 * time.Second

func (ls *tloggerShared) stop() {
ls.mutex.Lock()
defer ls.mutex.Unlock()
ls.t = nil
ls.stopTime = time.Now()
}

// tlogger is the actual LogSink implementation.
Expand All @@ -241,6 +257,8 @@ type tlogger struct {
values []interface{}
}

// fallbackLogger is called while l.shared.mutex is locked and after it has
// been determined that the original testing.TB is no longer usable.
func (l tlogger) fallbackLogger() logr.Logger {
logger := klog.Background().WithValues(l.values...).WithName(l.shared.testName + " leaked goroutine")
if l.prefix != "" {
Expand All @@ -250,8 +268,12 @@ func (l tlogger) fallbackLogger() logr.Logger {
logger = logger.WithCallDepth(l.shared.callDepth + 1)

if !l.shared.goroutineWarningDone {
logger.WithCallDepth(1).Error(nil, "WARNING: test kept at least one goroutine running after test completion", "callstack", string(dbg.Stacks(false)))
l.shared.goroutineWarningDone = true
duration := time.Since(l.shared.stopTime)
if duration > stopGracePeriod {

logger.WithCallDepth(1).Info("WARNING: test kept at least one goroutine running after test completion", "timeSinceCompletion", duration.Round(time.Second), "callstack", string(dbg.Stacks(false)))
l.shared.goroutineWarningDone = true
}
}
return logger
}
Expand Down
15 changes: 10 additions & 5 deletions ktesting/testinglogger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"runtime"
"sync"
"testing"
"time"

"k8s.io/klog/v2"
"k8s.io/klog/v2/internal/test/require"
Expand Down Expand Up @@ -198,6 +199,7 @@ func TestStop(t *testing.T) {
_, _, line, _ = runtime.Caller(0)
logger.Info("simple info message")
logger.Error(nil, "error message")
time.Sleep(15 * time.Second)
logger.WithName("me").WithValues("completed", true).Info("complex info message", "anotherValue", 1)
}()
})
Expand All @@ -212,11 +214,14 @@ func TestStop(t *testing.T) {
// Strip time and pid prefix.
actual = regexp.MustCompile(`(?m)^.* testinglogger_test.go:`).ReplaceAllString(actual, `testinglogger_test.go:`)

// Strip duration.
actual = regexp.MustCompile(`timeSinceCompletion="\d+s"`).ReplaceAllString(actual, `timeSinceCompletion="<...>s"`)

// All lines from the callstack get stripped. We can be sure that it was non-empty because otherwise we wouldn't
// have the < > markers.
//
// Full output:
// testinglogger_test.go:194] "WARNING: test kept at least one goroutine running after test completion" logger="TestStop/Sub leaked goroutine" callstack=<
// testinglogger_test.go:194] "WARNING: test kept at least one goroutine running after test completion" logger="TestStop/Sub leaked goroutine.me" completed=true timeSinceCompletion="15s" callstack=<
// goroutine 23 [running]:
// k8s.io/klog/v2/internal/dbg.Stacks(0x0)
// /nvme/gopath/src/k8s.io/klog/internal/dbg/dbg.go:34 +0x8a
Expand All @@ -233,13 +238,13 @@ func TestStop(t *testing.T) {
// >
actual = regexp.MustCompile(`(?m)^\t.*?\n`).ReplaceAllString(actual, ``)

expected := fmt.Sprintf(`testinglogger_test.go:%d] "WARNING: test kept at least one goroutine running after test completion" logger="TestStop/Sub leaked goroutine" callstack=<
>
testinglogger_test.go:%d] "simple info message" logger="TestStop/Sub leaked goroutine"
expected := fmt.Sprintf(`testinglogger_test.go:%d] "simple info message" logger="TestStop/Sub leaked goroutine"
testinglogger_test.go:%d] "error message" logger="TestStop/Sub leaked goroutine"
testinglogger_test.go:%d] "WARNING: test kept at least one goroutine running after test completion" logger="TestStop/Sub leaked goroutine.me" completed=true timeSinceCompletion="<...>s" callstack=<
>
testinglogger_test.go:%d] "complex info message" logger="TestStop/Sub leaked goroutine.me" completed=true anotherValue=1
`,
line+1, line+1, line+2, line+3)
line+1, line+2, line+4, line+4)
if actual != expected {
t.Errorf("Output does not match. Expected:\n%s\nActual:\n%s\n", expected, actual)
}
Expand Down

0 comments on commit ed62027

Please sign in to comment.