Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

client: recover from getter panics #14696

Merged
merged 2 commits into from
Sep 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/14696.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:security
client: recover from panics caused by artifact download to prevent the Nomad client from crashing
```
21 changes: 19 additions & 2 deletions client/allocrunner/taskrunner/getter/getter.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@ import (
"fmt"
"net/http"
"net/url"
"runtime/debug"
"strings"

"github.com/hashicorp/go-cleanhttp"
gg "github.com/hashicorp/go-getter"
"github.com/hashicorp/go-hclog"

"github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/interfaces"
Expand All @@ -22,6 +24,8 @@ const (

// Getter wraps go-getter calls in an artifact configuration.
type Getter struct {
logger hclog.Logger

// httpClient is a shared HTTP client for use across all http/https
// Getter instantiations. The HTTP client is designed to be
// thread-safe, and using a pooled transport will help reduce excessive
Expand All @@ -32,8 +36,9 @@ type Getter struct {

// NewGetter returns a new Getter instance. This function is called once per
// client and shared across alloc and task runners.
func NewGetter(config *config.ArtifactConfig) *Getter {
func NewGetter(logger hclog.Logger, config *config.ArtifactConfig) *Getter {
return &Getter{
logger: logger,
httpClient: &http.Client{
Transport: cleanhttp.DefaultPooledTransport(),
},
Expand All @@ -42,7 +47,19 @@ func NewGetter(config *config.ArtifactConfig) *Getter {
}

// GetArtifact downloads an artifact into the specified task directory.
func (g *Getter) GetArtifact(taskEnv interfaces.EnvReplacer, artifact *structs.TaskArtifact) error {
func (g *Getter) GetArtifact(taskEnv interfaces.EnvReplacer, artifact *structs.TaskArtifact) (returnErr error) {
// Recover from panics to avoid crashing the entire Nomad client due to
// artifact download failures, such as bugs in go-getter.
defer func() {
if r := recover(); r != nil {
g.logger.Error("panic while downloading artifact",
"artifact", artifact.GetterSource,
"error", r,
"stack", string(debug.Stack()))
returnErr = fmt.Errorf("getter panic: %v", r)
}
}()

ggURL, err := getGetterUrl(taskEnv, artifact)
if err != nil {
return newGetError(artifact.GetterSource, err, false)
Expand Down
25 changes: 24 additions & 1 deletion client/allocrunner/taskrunner/getter/getter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"time"

gg "github.com/hashicorp/go-getter"
"github.com/hashicorp/go-hclog"
clientconfig "github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/client/interfaces"
"github.com/hashicorp/nomad/client/taskenv"
Expand Down Expand Up @@ -56,6 +57,19 @@ func noopTaskEnv(taskDir string) interfaces.EnvReplacer {
}
}

// panicReplacer is a version of taskenv.TaskEnv.ReplaceEnv that panics.
type panicReplacer struct{}

func (panicReplacer) ReplaceEnv(_ string) string {
panic("panic")
}
func (panicReplacer) ClientPath(_ string, _ bool) (string, bool) {
panic("panic")
}
func panicTaskEnv() interfaces.EnvReplacer {
return panicReplacer{}
}

// upperReplacer is a version of taskenv.TaskEnv.ReplaceEnv that upper-cases
// the given input.
type upperReplacer struct {
Expand All @@ -72,7 +86,7 @@ func (u upperReplacer) ClientPath(p string, join bool) (string, bool) {
}

func TestGetter_getClient(t *testing.T) {
getter := NewGetter(&clientconfig.ArtifactConfig{
getter := NewGetter(hclog.NewNullLogger(), &clientconfig.ArtifactConfig{
HTTPReadTimeout: time.Minute,
HTTPMaxBytes: 100_000,
GCSTimeout: 1 * time.Minute,
Expand Down Expand Up @@ -436,6 +450,15 @@ func TestGetArtifact_Setuid(t *testing.T) {
}
}

// TestGetArtifact_handlePanic tests that a panic during the getter execution
// does not cause its goroutine to crash.
func TestGetArtifact_handlePanic(t *testing.T) {
getter := TestDefaultGetter(t)
err := getter.GetArtifact(panicTaskEnv(), &structs.TaskArtifact{})
require.Error(t, err)
require.Contains(t, err.Error(), "panic")
}

func TestGetGetterUrl_Queries(t *testing.T) {
cases := []struct {
name string
Expand Down
3 changes: 2 additions & 1 deletion client/allocrunner/taskrunner/getter/testing.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package getter
import (
"testing"

"github.com/hashicorp/go-hclog"
clientconfig "github.com/hashicorp/nomad/client/config"
"github.com/hashicorp/nomad/nomad/structs/config"
"github.com/stretchr/testify/require"
Expand All @@ -14,5 +15,5 @@ import (
func TestDefaultGetter(t *testing.T) *Getter {
getterConf, err := clientconfig.ArtifactConfigFromAgent(config.DefaultArtifactConfig())
require.NoError(t, err)
return NewGetter(getterConf)
return NewGetter(hclog.NewNullLogger(), getterConf)
}
2 changes: 1 addition & 1 deletion client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ func NewClient(cfg *config.Config, consulCatalog consul.CatalogAPI, consulProxie
serversContactedCh: make(chan struct{}),
serversContactedOnce: sync.Once{},
cpusetManager: cgutil.CreateCPUSetManager(cfg.CgroupParent, cfg.ReservableCores, logger),
getter: getter.NewGetter(cfg.Artifact),
getter: getter.NewGetter(logger.Named("artifact_getter"), cfg.Artifact),
EnterpriseClient: newEnterpriseClient(logger),
}

Expand Down