Skip to content

Commit

Permalink
Consolidate; catch panicking goroutines
Browse files Browse the repository at this point in the history
  • Loading branch information
RebeccaMahany committed Jan 8, 2025
1 parent f6fb7c5 commit 4602574
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 24 deletions.
58 changes: 34 additions & 24 deletions ee/errgroup/errgroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@ package errgroup

import (
"context"
"fmt"
"log/slog"
"time"

"github.com/pkg/errors"
"golang.org/x/sync/errgroup"
)

Expand Down Expand Up @@ -34,6 +36,22 @@ func NewLoggedErrgroup(ctx context.Context, slogger *slog.Logger) *LoggedErrgrou
// StartGoroutine starts the given goroutine in the errgroup, ensuring that we log its start and exit.
func (l *LoggedErrgroup) StartGoroutine(ctx context.Context, goroutineName string, goroutine func() error) {
l.errgroup.Go(func() error {
// Catch any panicking goroutines and log them
defer func() {
if r := recover(); r != nil {
l.slogger.Log(ctx, slog.LevelError,
"panic occurred in goroutine",
"err", r,
)
if err, ok := r.(error); ok {
l.slogger.Log(ctx, slog.LevelError,
"panic stack trace",
"stack_trace", fmt.Sprintf("%+v", errors.WithStack(err)),
)
}
}
}()

l.slogger.Log(ctx, slog.LevelInfo,
"starting goroutine in errgroup",
"goroutine_name", goroutineName,
Expand All @@ -55,14 +73,7 @@ func (l *LoggedErrgroup) StartGoroutine(ctx context.Context, goroutineName strin
// If the delay is non-zero, the goroutine will not start until after the delay interval has elapsed. The goroutine
// will run on the given interval, and will continue to run until it returns an error or the errgroup shuts down.
func (l *LoggedErrgroup) StartRepeatedGoroutine(ctx context.Context, goroutineName string, interval time.Duration, delay time.Duration, goroutine func() error) {
l.errgroup.Go(func() error {
l.slogger.Log(ctx, slog.LevelInfo,
"starting repeated goroutine in errgroup",
"goroutine_name", goroutineName,
"goroutine_interval", interval.String(),
"goroutine_start_delay", delay.String(),
)

l.StartGoroutine(ctx, goroutineName, func() error {
if delay != 0*time.Second {
select {
case <-time.After(delay):
Expand Down Expand Up @@ -103,31 +114,30 @@ func (l *LoggedErrgroup) StartRepeatedGoroutine(ctx context.Context, goroutineNa
// AddShutdownGoroutine adds the given goroutine to the errgroup, ensuring that we log its start and exit.
// The goroutine will not execute until the errgroup has received a signal to exit.
func (l *LoggedErrgroup) AddShutdownGoroutine(ctx context.Context, goroutineName string, goroutine func() error) {
l.errgroup.Go(func() error {
l.StartGoroutine(ctx, goroutineName, func() error {
// Wait for errgroup to exit
<-l.doneCtx.Done()

l.slogger.Log(ctx, slog.LevelInfo,
"starting shutdown goroutine in errgroup",
"goroutine_name", goroutineName,
)

goroutineStart := time.Now()
err := goroutine()
elapsedTime := time.Since(goroutineStart)

logLevel := slog.LevelInfo
if elapsedTime > maxShutdownGoroutineDuration {
logLevel = slog.LevelWarn
// Log anything amiss about the shutdown goroutine -- did it return an error? Did it take too long?
if err != nil {
l.slogger.Log(ctx, slog.LevelWarn,
"shutdown routine returned err",
"goroutine_name", goroutineName,
"goroutine_run_time", elapsedTime.String(),
"goroutine_err", err,
)
} else if elapsedTime > maxShutdownGoroutineDuration {
l.slogger.Log(ctx, slog.LevelWarn,
"noticed slow shutdown routine",
"goroutine_name", goroutineName,
"goroutine_run_time", elapsedTime.String(),
)
}

l.slogger.Log(ctx, logLevel,
"exiting shutdown goroutine in errgroup",
"goroutine_name", goroutineName,
"goroutine_run_time", elapsedTime.String(),
"goroutine_err", err,
)

// We don't want to actually return the error here, to avoid causing an otherwise successful call
// to `Shutdown` => `Wait` to return an error. Shutdown routine errors don't matter for the success
// of the errgroup overall.
Expand Down
28 changes: 28 additions & 0 deletions ee/errgroup/errgroup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package errgroup
import (
"context"
"errors"
"fmt"
"strconv"
"testing"
"time"
Expand Down Expand Up @@ -100,3 +101,30 @@ func TestShutdown(t *testing.T) {

require.True(t, canceled, "errgroup did not exit")
}

func Test_HandlesPanic(t *testing.T) {
t.Parallel()

ctx, cancel := context.WithCancel(context.TODO())
defer cancel()

eg := NewLoggedErrgroup(ctx, multislogger.NewNopLogger())

eg.StartGoroutine(ctx, "test_goroutine", func() error {
testArr := make([]int, 0)
fmt.Println(testArr[2]) // cause out-of-bounds panic
return nil
})

// We expect that the errgroup shuts down -- the test should not panic
eg.Shutdown()
eg.Wait()
canceled := false
select {
case <-eg.Exited():
canceled = true
default:
}

require.True(t, canceled, "errgroup did not exit")
}

0 comments on commit 4602574

Please sign in to comment.