Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into f-verify-rpc-cert-origin
Browse files Browse the repository at this point in the history
  • Loading branch information
lgfa29 committed Feb 1, 2022
2 parents d1950c4 + 4d5bc23 commit dd59dc5
Show file tree
Hide file tree
Showing 66 changed files with 1,844 additions and 422 deletions.
3 changes: 3 additions & 0 deletions .changelog/11878.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
core: Fixed auto-promotion of canaries in jobs with at least one task group without canaries.
```
3 changes: 3 additions & 0 deletions .changelog/11890.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
csi: Fixed a bug where garbage collected allocations could block new claims on a volume
```
3 changes: 3 additions & 0 deletions .changelog/11892.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
csi: Unmount volumes from the client before sending unpublish RPC
```
27 changes: 27 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
## 1.2.5 (February 1, 2022)

BUG FIXES:

* csi: Fixed a bug where garbage collected allocations could block new claims on a volume [[GH-11890](https://github.com/hashicorp/nomad/issues/11890)]
* csi: Fixed a bug where releasing volume claims would fail with ACL errors after leadership transitions. [[GH-11891](https://github.com/hashicorp/nomad/issues/11891)]
* csi: Unmount volumes from the client before sending unpublish RPC [[GH-11892](https://github.com/hashicorp/nomad/issues/11892)]
* template: Fixed a bug where client template configuration that did not include any of the new 1.2.4 configuration options could result in none of the configuration getting set. [[GH-11902](https://github.com/hashicorp/nomad/issues/11902)]

## 1.2.4 (January 18, 2022)

FEATURES:
Expand Down Expand Up @@ -151,6 +160,15 @@ BUG FIXES:
* server: Fixed a panic on arm64 platform when dispatching a job with a payload [[GH-11396](https://github.com/hashicorp/nomad/issues/11396)]
* server: Fixed a panic that may occur when preempting multiple allocations on the same node [[GH-11346](https://github.com/hashicorp/nomad/issues/11346)]

## 1.1.11 (February 1, 2022)

BUG FIXES:

* csi: Fixed a bug where garbage collected allocations could block new claims on a volume [[GH-11890](https://github.com/hashicorp/nomad/issues/11890)]
* csi: Fixed a bug where releasing volume claims would fail with ACL errors after leadership transitions. [[GH-11891](https://github.com/hashicorp/nomad/issues/11891)]
* csi: Fixed a bug where volume claim releases that were not fully processed before a leadership transition would be ignored [[GH-11776](https://github.com/hashicorp/nomad/issues/11776)]
* csi: Unmount volumes from the client before sending unpublish RPC [[GH-11892](https://github.com/hashicorp/nomad/issues/11892)]

## 1.1.10 (January 18, 2022)

BUG FIXES:
Expand Down Expand Up @@ -434,6 +452,15 @@ BUG FIXES:
* server: Fixed a panic that may arise on submission of jobs containing invalid service checks [[GH-10154](https://github.com/hashicorp/nomad/issues/10154)]
* ui: Fixed the rendering of interstitial components shown after processing a dynamic application sizing recommendation. [[GH-10094](https://github.com/hashicorp/nomad/pull/10094)]

## 1.0.17 (February 1, 2022)

BUG FIXES:

* csi: Fixed a bug where garbage collected allocations could block new claims on a volume [[GH-11890](https://github.com/hashicorp/nomad/issues/11890)]
* csi: Fixed a bug where releasing volume claims would fail with ACL errors after leadership transitions. [[GH-11891](https://github.com/hashicorp/nomad/issues/11891)]
* csi: Fixed a bug where volume claim releases that were not fully processed before a leadership transition would be ignored [[GH-11776](https://github.com/hashicorp/nomad/issues/11776)]
* csi: Unmount volumes from the client before sending unpublish RPC [[GH-11892](https://github.com/hashicorp/nomad/issues/11892)]

## 1.0.16 (January 18, 2022)

BUG FIXES:
Expand Down
2 changes: 1 addition & 1 deletion GNUmakefile
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ PROTO_COMPARE_TAG ?= v1.0.3$(if $(findstring ent,$(GO_TAGS)),+ent,)

# LAST_RELEASE is the git sha of the latest release corresponding to this branch. main should have the latest
# published release, but backport branches should point to the parent tag (e.g. 1.0.8 in release-1.0.9 after 1.1.0 is cut).
LAST_RELEASE ?= v1.2.4
LAST_RELEASE ?= v1.2.5

default: help

Expand Down
160 changes: 130 additions & 30 deletions client/allocrunner/csi_hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package allocrunner
import (
"context"
"fmt"
"sync"
"time"

hclog "github.com/hashicorp/go-hclog"
multierror "github.com/hashicorp/go-multierror"
Expand All @@ -24,7 +26,9 @@ type csiHook struct {
updater hookResourceSetter
nodeSecret string

volumeRequests map[string]*volumeAndRequest
volumeRequests map[string]*volumeAndRequest
maxBackoffInterval time.Duration
maxBackoffDuration time.Duration
}

// implemented by allocrunner
Expand All @@ -42,6 +46,8 @@ func newCSIHook(alloc *structs.Allocation, logger hclog.Logger, csi csimanager.M
updater: updater,
nodeSecret: nodeSecret,
volumeRequests: map[string]*volumeAndRequest{},
maxBackoffInterval: time.Minute,
maxBackoffDuration: time.Hour * 24,
}
}

Expand Down Expand Up @@ -103,41 +109,43 @@ func (c *csiHook) Postrun() error {
return nil
}

var mErr *multierror.Error
var wg sync.WaitGroup
errs := make(chan error, len(c.volumeRequests))

for _, pair := range c.volumeRequests {
wg.Add(1)

// CSI RPCs can potentially fail for a very long time if a
// node plugin has failed. split the work into goroutines so
// that operators could potentially reuse one of a set of
// volumes even if this hook is stuck waiting on the others
go func(pair *volumeAndRequest) {
defer wg.Done()

// we can recover an unmount failure if the operator
// brings the plugin back up, so retry every few minutes
// but eventually give up
err := c.unmountWithRetry(pair)
if err != nil {
errs <- err
return
}

mode := structs.CSIVolumeClaimRead
if !pair.request.ReadOnly {
mode = structs.CSIVolumeClaimWrite
}
// we can't recover from this RPC error client-side; the
// volume claim GC job will have to clean up for us once
// the allocation is marked terminal
errs <- c.unpublish(pair)
}(pair)
}

source := pair.request.Source
if pair.request.PerAlloc {
// NOTE: PerAlloc can't be set if we have canaries
source = source + structs.AllocSuffix(c.alloc.Name)
}
wg.Wait()
close(errs) // so we don't block waiting if there were no errors

req := &structs.CSIVolumeUnpublishRequest{
VolumeID: source,
Claim: &structs.CSIVolumeClaim{
AllocationID: c.alloc.ID,
NodeID: c.alloc.NodeID,
Mode: mode,
State: structs.CSIVolumeClaimStateUnpublishing,
},
WriteRequest: structs.WriteRequest{
Region: c.alloc.Job.Region,
Namespace: c.alloc.Job.Namespace,
AuthToken: c.nodeSecret,
},
}
err := c.rpcClient.RPC("CSIVolume.Unpublish",
req, &structs.CSIVolumeUnpublishResponse{})
if err != nil {
mErr = multierror.Append(mErr, err)
}
var mErr *multierror.Error
for err := range errs {
mErr = multierror.Append(mErr, err)
}

return mErr.ErrorOrNil()
}

Expand Down Expand Up @@ -231,3 +239,95 @@ func (c *csiHook) shouldRun() bool {

return false
}

func (c *csiHook) unpublish(pair *volumeAndRequest) error {

mode := structs.CSIVolumeClaimRead
if !pair.request.ReadOnly {
mode = structs.CSIVolumeClaimWrite
}

source := pair.request.Source
if pair.request.PerAlloc {
// NOTE: PerAlloc can't be set if we have canaries
source = source + structs.AllocSuffix(c.alloc.Name)
}

req := &structs.CSIVolumeUnpublishRequest{
VolumeID: source,
Claim: &structs.CSIVolumeClaim{
AllocationID: c.alloc.ID,
NodeID: c.alloc.NodeID,
Mode: mode,
State: structs.CSIVolumeClaimStateUnpublishing,
},
WriteRequest: structs.WriteRequest{
Region: c.alloc.Job.Region,
Namespace: c.alloc.Job.Namespace,
AuthToken: c.nodeSecret,
},
}

return c.rpcClient.RPC("CSIVolume.Unpublish",
req, &structs.CSIVolumeUnpublishResponse{})

}

// unmountWithRetry tries to unmount/unstage the volume, retrying with
// exponential backoff capped to a maximum interval
func (c *csiHook) unmountWithRetry(pair *volumeAndRequest) error {

// note: allocrunner hooks don't have access to the client's
// shutdown context, just the allocrunner's shutdown; if we make
// it available in the future we should thread it through here so
// that retry can exit gracefully instead of dropping the
// in-flight goroutine
ctx, cancel := context.WithTimeout(context.TODO(), c.maxBackoffDuration)
defer cancel()
var err error
backoff := time.Second
ticker := time.NewTicker(backoff)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return err
case <-ticker.C:
}

err = c.unmountImpl(pair)
if err == nil {
break
}

if backoff < c.maxBackoffInterval {
backoff = backoff * 2
if backoff > c.maxBackoffInterval {
backoff = c.maxBackoffInterval
}
}
ticker.Reset(backoff)
}
return nil
}

// unmountImpl implements the call to the CSI plugin manager to
// unmount the volume. Each retry will write an "Unmount volume"
// NodeEvent
func (c *csiHook) unmountImpl(pair *volumeAndRequest) error {

mounter, err := c.csimanager.MounterForPlugin(context.TODO(), pair.volume.PluginID)
if err != nil {
return err
}

usageOpts := &csimanager.UsageOptions{
ReadOnly: pair.request.ReadOnly,
AttachmentMode: pair.request.AttachmentMode,
AccessMode: pair.request.AccessMode,
MountOptions: pair.request.MountOptions,
}

return mounter.UnmountVolume(context.TODO(),
pair.volume.ID, pair.volume.RemoteID(), c.alloc.ID, usageOpts)
}
10 changes: 7 additions & 3 deletions client/allocrunner/csi_hook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"path/filepath"
"testing"
"time"

"github.com/stretchr/testify/require"

Expand Down Expand Up @@ -59,7 +60,7 @@ func TestCSIHook(t *testing.T) {
"test-alloc-dir/%s/testvolume0/ro-file-system-single-node-reader-only", alloc.ID)},
},
expectedMountCalls: 1,
expectedUnmountCalls: 0, // not until this is done client-side
expectedUnmountCalls: 1,
expectedClaimCalls: 1,
expectedUnpublishCalls: 1,
},
Expand All @@ -83,7 +84,7 @@ func TestCSIHook(t *testing.T) {
"test-alloc-dir/%s/testvolume0/ro-file-system-single-node-reader-only", alloc.ID)},
},
expectedMountCalls: 1,
expectedUnmountCalls: 0, // not until this is done client-side
expectedUnmountCalls: 1,
expectedClaimCalls: 1,
expectedUnpublishCalls: 1,
},
Expand Down Expand Up @@ -122,7 +123,7 @@ func TestCSIHook(t *testing.T) {
// "test-alloc-dir/%s/testvolume0/ro-file-system-multi-node-reader-only", alloc.ID)},
// },
// expectedMountCalls: 1,
// expectedUnmountCalls: 0, // not until this is done client-side
// expectedUnmountCalls: 1,
// expectedClaimCalls: 1,
// expectedUnpublishCalls: 1,
// },
Expand All @@ -144,6 +145,9 @@ func TestCSIHook(t *testing.T) {
},
}
hook := newCSIHook(alloc, logger, mgr, rpcer, ar, ar, "secret")
hook.maxBackoffInterval = 100 * time.Millisecond
hook.maxBackoffDuration = 2 * time.Second

require.NotNil(t, hook)

require.NoError(t, hook.Prerun())
Expand Down
6 changes: 3 additions & 3 deletions client/allocrunner/taskrunner/envoy_bootstrap_hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ import (
"github.com/hashicorp/nomad/client/taskenv"
agentconsul "github.com/hashicorp/nomad/command/agent/consul"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/exptime"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/nomad/structs/config"
"github.com/pkg/errors"
"oss.indeed.com/go/libtime/decay"
)

const envoyBootstrapHookName = "envoy_bootstrap"
Expand Down Expand Up @@ -277,7 +277,7 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *ifs.TaskPrestart

// Since Consul services are registered asynchronously with this task
// hook running, retry until timeout or success.
if backoffErr := exptime.Backoff(func() (bool, error) {
if backoffErr := decay.Backoff(func() (bool, error) {

// If hook is killed, just stop.
select {
Expand Down Expand Up @@ -324,7 +324,7 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *ifs.TaskPrestart
_ = os.Remove(bootstrapFilePath)

return true, cmdErr
}, exptime.BackoffOptions{
}, decay.BackoffOptions{
MaxSleepTime: h.envoyBootstrapWaitTime,
InitialGapSize: h.envoyBoostrapInitialGap,
MaxJitterSize: h.envoyBootstrapMaxJitter,
Expand Down
25 changes: 20 additions & 5 deletions client/lib/cgutil/cpuset_manager_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,27 +57,31 @@ func TestCpusetManager_Init(t *testing.T) {
require.DirExists(t, filepath.Join(manager.cgroupParentPath, ReservedCpusetCgroupName))
}

func TestCpusetManager_AddAlloc(t *testing.T) {
func TestCpusetManager_AddAlloc_single(t *testing.T) {
manager, cleanup := tmpCpusetManager(t)
defer cleanup()
require.NoError(t, manager.Init())

alloc := mock.Alloc()
alloc.AllocatedResources.Tasks["web"].Cpu.ReservedCores = manager.parentCpuset.ToSlice()
// reserve just one core (the 0th core, which probably exists)
alloc.AllocatedResources.Tasks["web"].Cpu.ReservedCores = cpuset.New(0).ToSlice()
manager.AddAlloc(alloc)

// force reconcile
manager.reconcileCpusets()

// check that no more cores exist in the shared cgroup
// check that the 0th core is no longer available in the shared group
// actual contents of shared group depends on machine core count
require.DirExists(t, filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName))
require.FileExists(t, filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus"))
sharedCpusRaw, err := ioutil.ReadFile(filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus"))
require.NoError(t, err)
sharedCpus, err := cpuset.Parse(string(sharedCpusRaw))
require.NoError(t, err)
require.Empty(t, sharedCpus.ToSlice())
require.NotEmpty(t, sharedCpus.ToSlice())
require.NotContains(t, sharedCpus.ToSlice(), uint16(0))

// check that all cores are allocated to reserved cgroup
// check that the 0th core is allocated to reserved cgroup
require.DirExists(t, filepath.Join(manager.cgroupParentPath, ReservedCpusetCgroupName))
reservedCpusRaw, err := ioutil.ReadFile(filepath.Join(manager.cgroupParentPath, ReservedCpusetCgroupName, "cpuset.cpus"))
require.NoError(t, err)
Expand All @@ -100,6 +104,17 @@ func TestCpusetManager_AddAlloc(t *testing.T) {
require.Exactly(t, alloc.AllocatedResources.Tasks["web"].Cpu.ReservedCores, taskCpus.ToSlice())
}

func TestCpusetManager_AddAlloc_subset(t *testing.T) {
t.Skip("todo: add test for #11933")
}

func TestCpusetManager_AddAlloc_all(t *testing.T) {
// cgroupsv2 changes behavior of writing empty cpuset.cpu, which is what
// happens to the /shared group when one or more allocs consume all available
// cores.
t.Skip("todo: add test for #11933")
}

func TestCpusetManager_RemoveAlloc(t *testing.T) {
manager, cleanup := tmpCpusetManager(t)
defer cleanup()
Expand Down
Loading

0 comments on commit dd59dc5

Please sign in to comment.