diff --git a/.changelog/11878.txt b/.changelog/11878.txt new file mode 100644 index 000000000000..7023c3c3a73e --- /dev/null +++ b/.changelog/11878.txt @@ -0,0 +1,3 @@ +```release-note:bug +core: Fixed auto-promotion of canaries in jobs with at least one task group without canaries. +``` diff --git a/.changelog/11890.txt b/.changelog/11890.txt new file mode 100644 index 000000000000..1074aa29cb44 --- /dev/null +++ b/.changelog/11890.txt @@ -0,0 +1,3 @@ +```release-note:bug +csi: Fixed a bug where garbage collected allocations could block new claims on a volume +``` diff --git a/.changelog/11892.txt b/.changelog/11892.txt new file mode 100644 index 000000000000..4a6dc2cb7668 --- /dev/null +++ b/.changelog/11892.txt @@ -0,0 +1,3 @@ +```release-note:bug +csi: Unmount volumes from the client before sending unpublish RPC +``` diff --git a/CHANGELOG.md b/CHANGELOG.md index ec1c40d72fa2..9558ff4a257a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +## 1.2.5 (February 1, 2022) + +BUG FIXES: + +* csi: Fixed a bug where garbage collected allocations could block new claims on a volume [[GH-11890](https://github.com/hashicorp/nomad/issues/11890)] +* csi: Fixed a bug where releasing volume claims would fail with ACL errors after leadership transitions. [[GH-11891](https://github.com/hashicorp/nomad/issues/11891)] +* csi: Unmount volumes from the client before sending unpublish RPC [[GH-11892](https://github.com/hashicorp/nomad/issues/11892)] +* template: Fixed a bug where client template configuration that did not include any of the new 1.2.4 configuration options could result in none of the configuration getting set. [[GH-11902](https://github.com/hashicorp/nomad/issues/11902)] + ## 1.2.4 (January 18, 2022) FEATURES: @@ -151,6 +160,15 @@ BUG FIXES: * server: Fixed a panic on arm64 platform when dispatching a job with a payload [[GH-11396](https://github.com/hashicorp/nomad/issues/11396)] * server: Fixed a panic that may occur when preempting multiple allocations on the same node [[GH-11346](https://github.com/hashicorp/nomad/issues/11346)] +## 1.1.11 (February 1, 2022) + +BUG FIXES: + +* csi: Fixed a bug where garbage collected allocations could block new claims on a volume [[GH-11890](https://github.com/hashicorp/nomad/issues/11890)] +* csi: Fixed a bug where releasing volume claims would fail with ACL errors after leadership transitions. [[GH-11891](https://github.com/hashicorp/nomad/issues/11891)] +* csi: Fixed a bug where volume claim releases that were not fully processed before a leadership transition would be ignored [[GH-11776](https://github.com/hashicorp/nomad/issues/11776)] +* csi: Unmount volumes from the client before sending unpublish RPC [[GH-11892](https://github.com/hashicorp/nomad/issues/11892)] + ## 1.1.10 (January 18, 2022) BUG FIXES: @@ -434,6 +452,15 @@ BUG FIXES: * server: Fixed a panic that may arise on submission of jobs containing invalid service checks [[GH-10154](https://github.com/hashicorp/nomad/issues/10154)] * ui: Fixed the rendering of interstitial components shown after processing a dynamic application sizing recommendation. [[GH-10094](https://github.com/hashicorp/nomad/pull/10094)] +## 1.0.17 (February 1, 2022) + +BUG FIXES: + +* csi: Fixed a bug where garbage collected allocations could block new claims on a volume [[GH-11890](https://github.com/hashicorp/nomad/issues/11890)] +* csi: Fixed a bug where releasing volume claims would fail with ACL errors after leadership transitions. [[GH-11891](https://github.com/hashicorp/nomad/issues/11891)] +* csi: Fixed a bug where volume claim releases that were not fully processed before a leadership transition would be ignored [[GH-11776](https://github.com/hashicorp/nomad/issues/11776)] +* csi: Unmount volumes from the client before sending unpublish RPC [[GH-11892](https://github.com/hashicorp/nomad/issues/11892)] + ## 1.0.16 (January 18, 2022) BUG FIXES: diff --git a/GNUmakefile b/GNUmakefile index 1620218323ac..6b01979822f5 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -39,7 +39,7 @@ PROTO_COMPARE_TAG ?= v1.0.3$(if $(findstring ent,$(GO_TAGS)),+ent,) # LAST_RELEASE is the git sha of the latest release corresponding to this branch. main should have the latest # published release, but backport branches should point to the parent tag (e.g. 1.0.8 in release-1.0.9 after 1.1.0 is cut). -LAST_RELEASE ?= v1.2.4 +LAST_RELEASE ?= v1.2.5 default: help diff --git a/client/allocrunner/csi_hook.go b/client/allocrunner/csi_hook.go index e7e7385a3e8a..6fb1b2866abb 100644 --- a/client/allocrunner/csi_hook.go +++ b/client/allocrunner/csi_hook.go @@ -3,6 +3,8 @@ package allocrunner import ( "context" "fmt" + "sync" + "time" hclog "github.com/hashicorp/go-hclog" multierror "github.com/hashicorp/go-multierror" @@ -24,7 +26,9 @@ type csiHook struct { updater hookResourceSetter nodeSecret string - volumeRequests map[string]*volumeAndRequest + volumeRequests map[string]*volumeAndRequest + maxBackoffInterval time.Duration + maxBackoffDuration time.Duration } // implemented by allocrunner @@ -42,6 +46,8 @@ func newCSIHook(alloc *structs.Allocation, logger hclog.Logger, csi csimanager.M updater: updater, nodeSecret: nodeSecret, volumeRequests: map[string]*volumeAndRequest{}, + maxBackoffInterval: time.Minute, + maxBackoffDuration: time.Hour * 24, } } @@ -103,41 +109,43 @@ func (c *csiHook) Postrun() error { return nil } - var mErr *multierror.Error + var wg sync.WaitGroup + errs := make(chan error, len(c.volumeRequests)) for _, pair := range c.volumeRequests { + wg.Add(1) + + // CSI RPCs can potentially fail for a very long time if a + // node plugin has failed. split the work into goroutines so + // that operators could potentially reuse one of a set of + // volumes even if this hook is stuck waiting on the others + go func(pair *volumeAndRequest) { + defer wg.Done() + + // we can recover an unmount failure if the operator + // brings the plugin back up, so retry every few minutes + // but eventually give up + err := c.unmountWithRetry(pair) + if err != nil { + errs <- err + return + } - mode := structs.CSIVolumeClaimRead - if !pair.request.ReadOnly { - mode = structs.CSIVolumeClaimWrite - } + // we can't recover from this RPC error client-side; the + // volume claim GC job will have to clean up for us once + // the allocation is marked terminal + errs <- c.unpublish(pair) + }(pair) + } - source := pair.request.Source - if pair.request.PerAlloc { - // NOTE: PerAlloc can't be set if we have canaries - source = source + structs.AllocSuffix(c.alloc.Name) - } + wg.Wait() + close(errs) // so we don't block waiting if there were no errors - req := &structs.CSIVolumeUnpublishRequest{ - VolumeID: source, - Claim: &structs.CSIVolumeClaim{ - AllocationID: c.alloc.ID, - NodeID: c.alloc.NodeID, - Mode: mode, - State: structs.CSIVolumeClaimStateUnpublishing, - }, - WriteRequest: structs.WriteRequest{ - Region: c.alloc.Job.Region, - Namespace: c.alloc.Job.Namespace, - AuthToken: c.nodeSecret, - }, - } - err := c.rpcClient.RPC("CSIVolume.Unpublish", - req, &structs.CSIVolumeUnpublishResponse{}) - if err != nil { - mErr = multierror.Append(mErr, err) - } + var mErr *multierror.Error + for err := range errs { + mErr = multierror.Append(mErr, err) } + return mErr.ErrorOrNil() } @@ -231,3 +239,95 @@ func (c *csiHook) shouldRun() bool { return false } + +func (c *csiHook) unpublish(pair *volumeAndRequest) error { + + mode := structs.CSIVolumeClaimRead + if !pair.request.ReadOnly { + mode = structs.CSIVolumeClaimWrite + } + + source := pair.request.Source + if pair.request.PerAlloc { + // NOTE: PerAlloc can't be set if we have canaries + source = source + structs.AllocSuffix(c.alloc.Name) + } + + req := &structs.CSIVolumeUnpublishRequest{ + VolumeID: source, + Claim: &structs.CSIVolumeClaim{ + AllocationID: c.alloc.ID, + NodeID: c.alloc.NodeID, + Mode: mode, + State: structs.CSIVolumeClaimStateUnpublishing, + }, + WriteRequest: structs.WriteRequest{ + Region: c.alloc.Job.Region, + Namespace: c.alloc.Job.Namespace, + AuthToken: c.nodeSecret, + }, + } + + return c.rpcClient.RPC("CSIVolume.Unpublish", + req, &structs.CSIVolumeUnpublishResponse{}) + +} + +// unmountWithRetry tries to unmount/unstage the volume, retrying with +// exponential backoff capped to a maximum interval +func (c *csiHook) unmountWithRetry(pair *volumeAndRequest) error { + + // note: allocrunner hooks don't have access to the client's + // shutdown context, just the allocrunner's shutdown; if we make + // it available in the future we should thread it through here so + // that retry can exit gracefully instead of dropping the + // in-flight goroutine + ctx, cancel := context.WithTimeout(context.TODO(), c.maxBackoffDuration) + defer cancel() + var err error + backoff := time.Second + ticker := time.NewTicker(backoff) + defer ticker.Stop() + for { + select { + case <-ctx.Done(): + return err + case <-ticker.C: + } + + err = c.unmountImpl(pair) + if err == nil { + break + } + + if backoff < c.maxBackoffInterval { + backoff = backoff * 2 + if backoff > c.maxBackoffInterval { + backoff = c.maxBackoffInterval + } + } + ticker.Reset(backoff) + } + return nil +} + +// unmountImpl implements the call to the CSI plugin manager to +// unmount the volume. Each retry will write an "Unmount volume" +// NodeEvent +func (c *csiHook) unmountImpl(pair *volumeAndRequest) error { + + mounter, err := c.csimanager.MounterForPlugin(context.TODO(), pair.volume.PluginID) + if err != nil { + return err + } + + usageOpts := &csimanager.UsageOptions{ + ReadOnly: pair.request.ReadOnly, + AttachmentMode: pair.request.AttachmentMode, + AccessMode: pair.request.AccessMode, + MountOptions: pair.request.MountOptions, + } + + return mounter.UnmountVolume(context.TODO(), + pair.volume.ID, pair.volume.RemoteID(), c.alloc.ID, usageOpts) +} diff --git a/client/allocrunner/csi_hook_test.go b/client/allocrunner/csi_hook_test.go index 045ef3e0afce..d05d07385c3d 100644 --- a/client/allocrunner/csi_hook_test.go +++ b/client/allocrunner/csi_hook_test.go @@ -5,6 +5,7 @@ import ( "fmt" "path/filepath" "testing" + "time" "github.com/stretchr/testify/require" @@ -59,7 +60,7 @@ func TestCSIHook(t *testing.T) { "test-alloc-dir/%s/testvolume0/ro-file-system-single-node-reader-only", alloc.ID)}, }, expectedMountCalls: 1, - expectedUnmountCalls: 0, // not until this is done client-side + expectedUnmountCalls: 1, expectedClaimCalls: 1, expectedUnpublishCalls: 1, }, @@ -83,7 +84,7 @@ func TestCSIHook(t *testing.T) { "test-alloc-dir/%s/testvolume0/ro-file-system-single-node-reader-only", alloc.ID)}, }, expectedMountCalls: 1, - expectedUnmountCalls: 0, // not until this is done client-side + expectedUnmountCalls: 1, expectedClaimCalls: 1, expectedUnpublishCalls: 1, }, @@ -122,7 +123,7 @@ func TestCSIHook(t *testing.T) { // "test-alloc-dir/%s/testvolume0/ro-file-system-multi-node-reader-only", alloc.ID)}, // }, // expectedMountCalls: 1, - // expectedUnmountCalls: 0, // not until this is done client-side + // expectedUnmountCalls: 1, // expectedClaimCalls: 1, // expectedUnpublishCalls: 1, // }, @@ -144,6 +145,9 @@ func TestCSIHook(t *testing.T) { }, } hook := newCSIHook(alloc, logger, mgr, rpcer, ar, ar, "secret") + hook.maxBackoffInterval = 100 * time.Millisecond + hook.maxBackoffDuration = 2 * time.Second + require.NotNil(t, hook) require.NoError(t, hook.Prerun()) diff --git a/client/allocrunner/taskrunner/envoy_bootstrap_hook.go b/client/allocrunner/taskrunner/envoy_bootstrap_hook.go index fa9c63a43135..c5aab12ed3c2 100644 --- a/client/allocrunner/taskrunner/envoy_bootstrap_hook.go +++ b/client/allocrunner/taskrunner/envoy_bootstrap_hook.go @@ -18,10 +18,10 @@ import ( "github.com/hashicorp/nomad/client/taskenv" agentconsul "github.com/hashicorp/nomad/command/agent/consul" "github.com/hashicorp/nomad/helper" - "github.com/hashicorp/nomad/helper/exptime" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/nomad/structs/config" "github.com/pkg/errors" + "oss.indeed.com/go/libtime/decay" ) const envoyBootstrapHookName = "envoy_bootstrap" @@ -277,7 +277,7 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *ifs.TaskPrestart // Since Consul services are registered asynchronously with this task // hook running, retry until timeout or success. - if backoffErr := exptime.Backoff(func() (bool, error) { + if backoffErr := decay.Backoff(func() (bool, error) { // If hook is killed, just stop. select { @@ -324,7 +324,7 @@ func (h *envoyBootstrapHook) Prestart(ctx context.Context, req *ifs.TaskPrestart _ = os.Remove(bootstrapFilePath) return true, cmdErr - }, exptime.BackoffOptions{ + }, decay.BackoffOptions{ MaxSleepTime: h.envoyBootstrapWaitTime, InitialGapSize: h.envoyBoostrapInitialGap, MaxJitterSize: h.envoyBootstrapMaxJitter, diff --git a/client/lib/cgutil/cpuset_manager_linux_test.go b/client/lib/cgutil/cpuset_manager_linux_test.go index c2ed95a55c98..e2bb00679c36 100644 --- a/client/lib/cgutil/cpuset_manager_linux_test.go +++ b/client/lib/cgutil/cpuset_manager_linux_test.go @@ -57,27 +57,31 @@ func TestCpusetManager_Init(t *testing.T) { require.DirExists(t, filepath.Join(manager.cgroupParentPath, ReservedCpusetCgroupName)) } -func TestCpusetManager_AddAlloc(t *testing.T) { +func TestCpusetManager_AddAlloc_single(t *testing.T) { manager, cleanup := tmpCpusetManager(t) defer cleanup() require.NoError(t, manager.Init()) alloc := mock.Alloc() - alloc.AllocatedResources.Tasks["web"].Cpu.ReservedCores = manager.parentCpuset.ToSlice() + // reserve just one core (the 0th core, which probably exists) + alloc.AllocatedResources.Tasks["web"].Cpu.ReservedCores = cpuset.New(0).ToSlice() manager.AddAlloc(alloc) + // force reconcile manager.reconcileCpusets() - // check that no more cores exist in the shared cgroup + // check that the 0th core is no longer available in the shared group + // actual contents of shared group depends on machine core count require.DirExists(t, filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName)) require.FileExists(t, filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus")) sharedCpusRaw, err := ioutil.ReadFile(filepath.Join(manager.cgroupParentPath, SharedCpusetCgroupName, "cpuset.cpus")) require.NoError(t, err) sharedCpus, err := cpuset.Parse(string(sharedCpusRaw)) require.NoError(t, err) - require.Empty(t, sharedCpus.ToSlice()) + require.NotEmpty(t, sharedCpus.ToSlice()) + require.NotContains(t, sharedCpus.ToSlice(), uint16(0)) - // check that all cores are allocated to reserved cgroup + // check that the 0th core is allocated to reserved cgroup require.DirExists(t, filepath.Join(manager.cgroupParentPath, ReservedCpusetCgroupName)) reservedCpusRaw, err := ioutil.ReadFile(filepath.Join(manager.cgroupParentPath, ReservedCpusetCgroupName, "cpuset.cpus")) require.NoError(t, err) @@ -100,6 +104,17 @@ func TestCpusetManager_AddAlloc(t *testing.T) { require.Exactly(t, alloc.AllocatedResources.Tasks["web"].Cpu.ReservedCores, taskCpus.ToSlice()) } +func TestCpusetManager_AddAlloc_subset(t *testing.T) { + t.Skip("todo: add test for #11933") +} + +func TestCpusetManager_AddAlloc_all(t *testing.T) { + // cgroupsv2 changes behavior of writing empty cpuset.cpu, which is what + // happens to the /shared group when one or more allocs consume all available + // cores. + t.Skip("todo: add test for #11933") +} + func TestCpusetManager_RemoveAlloc(t *testing.T) { manager, cleanup := tmpCpusetManager(t) defer cleanup() diff --git a/client/pluginmanager/csimanager/volume.go b/client/pluginmanager/csimanager/volume.go index 9bca471cf085..4c6bf1d144e4 100644 --- a/client/pluginmanager/csimanager/volume.go +++ b/client/pluginmanager/csimanager/volume.go @@ -353,11 +353,16 @@ func (v *volumeManager) UnmountVolume(ctx context.Context, volID, remoteID, allo } } + if errors.Is(err, structs.ErrCSIClientRPCIgnorable) { + logger.Trace("unmounting volume failed with ignorable error", "error", err) + err = nil + } + event := structs.NewNodeEvent(). SetSubsystem(structs.NodeEventSubsystemStorage). SetMessage("Unmount volume"). AddDetail("volume_id", volID) - if err == nil || errors.Is(err, structs.ErrCSIClientRPCIgnorable) { + if err == nil { event.AddDetail("success", "true") } else { event.AddDetail("success", "false") diff --git a/e2e/consul/check_restart.go b/e2e/consul/check_restart.go index c7756fc13082..030b27a21346 100644 --- a/e2e/consul/check_restart.go +++ b/e2e/consul/check_restart.go @@ -31,7 +31,7 @@ func (tc *CheckRestartE2ETest) AfterEach(f *framework.F) { } for _, id := range tc.jobIds { - _, err := e2e.Command("nomad", "job", "stop", "-purge", id) + err := e2e.StopJob(id, "-purge") f.Assert().NoError(err) } tc.jobIds = []string{} diff --git a/e2e/consultemplate/consultemplate.go b/e2e/consultemplate/consultemplate.go index 034e2b2c28b1..a700268fccf8 100644 --- a/e2e/consultemplate/consultemplate.go +++ b/e2e/consultemplate/consultemplate.go @@ -46,7 +46,7 @@ func (tc *ConsulTemplateTest) AfterEach(f *framework.F) { } for _, id := range tc.jobIDs { - _, err := e2eutil.Command("nomad", "job", "stop", "-purge", id) + err := e2eutil.StopJob(id, "-purge") f.Assert().NoError(err, "could not clean up job", id) } tc.jobIDs = []string{} diff --git a/e2e/csi/ebs.go b/e2e/csi/ebs.go index f2ae4a152d65..6935fd8b54e1 100644 --- a/e2e/csi/ebs.go +++ b/e2e/csi/ebs.go @@ -78,8 +78,8 @@ func (tc *CSIControllerPluginEBSTest) AfterAll(f *framework.F) { // Stop all jobs in test for _, id := range tc.testJobIDs { - out, err := e2e.Command("nomad", "job", "stop", "-purge", id) - f.Assert().NoError(err, out) + err := e2e.StopJob(id, "-purge") + f.Assert().NoError(err) } tc.testJobIDs = []string{} @@ -94,8 +94,8 @@ func (tc *CSIControllerPluginEBSTest) AfterAll(f *framework.F) { // Deregister all plugin jobs in test for _, id := range tc.pluginJobIDs { - out, err := e2e.Command("nomad", "job", "stop", "-purge", id) - f.Assert().NoError(err, out) + err := e2e.StopJob(id, "-purge") + f.Assert().NoError(err) } tc.pluginJobIDs = []string{} @@ -130,7 +130,7 @@ func (tc *CSIControllerPluginEBSTest) TestVolumeClaim(f *framework.F) { // Shutdown (and purge) the writer so we can run a reader. // we could mount the EBS volume with multi-attach, but we // want this test to exercise the unpublish workflow. - _, err = e2e.Command("nomad", "job", "stop", "-purge", writeJobID) + err = e2e.StopJob(writeJobID, "-purge") f.NoError(err) // wait for the volume unpublish workflow to complete diff --git a/e2e/csi/efs.go b/e2e/csi/efs.go index 8a82cee70c1a..bbc41d9528a3 100644 --- a/e2e/csi/efs.go +++ b/e2e/csi/efs.go @@ -93,7 +93,7 @@ func (tc *CSINodeOnlyPluginEFSTest) TestEFSVolumeClaim(f *framework.F) { // Shutdown the writer so we can run a reader. // although EFS should support multiple readers, the plugin // does not. - _, err = e2e.Command("nomad", "job", "stop", writeJobID) + err = e2e.StopJob(writeJobID) require.NoError(err) // wait for the volume unpublish workflow to complete @@ -123,8 +123,8 @@ func (tc *CSINodeOnlyPluginEFSTest) AfterEach(f *framework.F) { // Stop all jobs in test for _, id := range tc.testJobIDs { - out, err := e2e.Command("nomad", "job", "stop", "-purge", id) - f.Assert().NoError(err, out) + err := e2e.StopJob(id, "-purge") + f.Assert().NoError(err) } tc.testJobIDs = []string{} @@ -142,8 +142,8 @@ func (tc *CSINodeOnlyPluginEFSTest) AfterEach(f *framework.F) { // Deregister all plugin jobs in test for _, id := range tc.pluginJobIDs { - out, err := e2e.Command("nomad", "job", "stop", "-purge", id) - f.Assert().NoError(err, out) + err := e2e.StopJob(id, "-purge") + f.Assert().NoError(err) } tc.pluginJobIDs = []string{} diff --git a/e2e/e2eutil/job.go b/e2e/e2eutil/job.go index 518e725799c8..cb67ca6d55a8 100644 --- a/e2e/e2eutil/job.go +++ b/e2e/e2eutil/job.go @@ -192,3 +192,28 @@ func DispatchedJobs(jobID string) ([]map[string]string, error) { return summary, nil } + +func StopJob(jobID string, args ...string) error { + + // Build our argument list in the correct order, ensuring the jobID is last + // and the Nomad subcommand are first. + baseArgs := []string{"job", "stop"} + for i := range args { + baseArgs = append(baseArgs, args[i]) + } + baseArgs = append(baseArgs, jobID) + + // Execute the command. We do not care about the stdout, only stderr. + _, err := Command("nomad", baseArgs...) + + if err != nil { + // When stopping a job and monitoring the resulting deployment, we + // expect that the monitor fails and exits with status code one because + // technically the deployment has failed. Overwrite the error to be + // nil. + if strings.Contains(err.Error(), "Description = Cancelled because job is stopped") { + err = nil + } + } + return err +} diff --git a/e2e/namespaces/namespaces.go b/e2e/namespaces/namespaces.go index 67a81d913147..c7700140289d 100644 --- a/e2e/namespaces/namespaces.go +++ b/e2e/namespaces/namespaces.go @@ -42,10 +42,10 @@ func (tc *NamespacesE2ETest) AfterEach(f *framework.F) { ns := pair[0] jobID := pair[1] if ns != "" { - _, err := e2e.Command("nomad", "job", "stop", "-purge", "-namespace", ns, jobID) + err := e2e.StopJob(jobID, "-purge", "-namespace", ns) f.Assert().NoError(err) } else { - _, err := e2e.Command("nomad", "job", "stop", "-purge", jobID) + err := e2e.StopJob(jobID, "-purge") f.Assert().NoError(err) } } @@ -179,6 +179,6 @@ func (tc *NamespacesE2ETest) TestNamespacesFiltering(f *framework.F) { f.Equal(fmt.Sprintf("No job(s) with prefix or id %q found\n", jobA), out) f.Error(err, "exit status 1") - _, err = e2e.Command("nomad", "job", "stop", "-namespace", "NamespaceA", jobA) + err = e2e.StopJob(jobA, "-namespace", "NamespaceA") f.NoError(err, "could not stop job in namespace") } diff --git a/e2e/networking/networking.go b/e2e/networking/networking.go index 90c86e66a7b0..a1d410dd1e64 100644 --- a/e2e/networking/networking.go +++ b/e2e/networking/networking.go @@ -36,7 +36,7 @@ func (tc *NetworkingE2ETest) AfterEach(f *framework.F) { } for _, jobID := range tc.jobIDs { - _, err := e2eutil.Command("nomad", "job", "stop", "-purge", jobID) + err := e2eutil.StopJob(jobID, "-purge") f.NoError(err) } tc.jobIDs = []string{} diff --git a/e2e/rescheduling/rescheduling.go b/e2e/rescheduling/rescheduling.go index dd4895b85b0b..ce9a5f3420ed 100644 --- a/e2e/rescheduling/rescheduling.go +++ b/e2e/rescheduling/rescheduling.go @@ -44,7 +44,7 @@ func (tc *RescheduleE2ETest) AfterEach(f *framework.F) { } for _, id := range tc.jobIds { - _, err := e2e.Command("nomad", "job", "stop", "-purge", id) + err := e2e.StopJob(id, "-purge") f.Assert().NoError(err) } tc.jobIds = []string{} diff --git a/e2e/scaling/scaling.go b/e2e/scaling/scaling.go index 0fd90eca663e..cb046ae9c8f0 100644 --- a/e2e/scaling/scaling.go +++ b/e2e/scaling/scaling.go @@ -38,8 +38,8 @@ func (tc *ScalingE2ETest) AfterEach(f *framework.F) { } for _, namespacedJob := range tc.namespacedJobIDs { - _, err := e2eutil.Command("nomad", "job", "stop", "-purge", "-namespace", - namespacedJob[0], namespacedJob[1]) + err := e2eutil.StopJob(namespacedJob[1], "-purge", "-namespace", + namespacedJob[0]) f.NoError(err) } tc.namespacedJobIDs = [][2]string{} diff --git a/e2e/scalingpolicies/scalingpolicies.go b/e2e/scalingpolicies/scalingpolicies.go index 3579219d2063..b011bf4fb0a5 100644 --- a/e2e/scalingpolicies/scalingpolicies.go +++ b/e2e/scalingpolicies/scalingpolicies.go @@ -38,8 +38,8 @@ func (tc *ScalingPolicyE2ETest) AfterEach(f *framework.F) { } for _, namespacedJob := range tc.namespacedJobIDs { - _, err := e2eutil.Command("nomad", "job", "stop", "-purge", "-namespace", - namespacedJob[0], namespacedJob[1]) + err := e2eutil.StopJob(namespacedJob[1], "-purge", "-namespace", + namespacedJob[0]) f.Assert().NoError(err) } tc.namespacedJobIDs = [][2]string{} diff --git a/e2e/volumes/volumes.go b/e2e/volumes/volumes.go index 936c2099b27e..9e7b48cddc3d 100644 --- a/e2e/volumes/volumes.go +++ b/e2e/volumes/volumes.go @@ -41,7 +41,7 @@ func (tc *VolumesTest) AfterEach(f *framework.F) { } for _, id := range tc.jobIDs { - _, err := e2e.Command("nomad", "job", "stop", "-purge", id) + err := e2e.StopJob(id, "-purge") f.Assert().NoError(err) } tc.jobIDs = []string{} diff --git a/go.mod b/go.mod index 96b2e424d18a..5d09af3b0c3f 100644 --- a/go.mod +++ b/go.mod @@ -122,6 +122,7 @@ require ( google.golang.org/grpc v1.42.0 gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 gopkg.in/tomb.v2 v2.0.0-20140626144623-14b3d72120e8 + oss.indeed.com/go/libtime v1.5.0 ) require ( @@ -181,6 +182,7 @@ require ( github.com/go-ole/go-ole v1.2.4 // indirect github.com/godbus/dbus/v5 v5.0.4 // indirect github.com/gogo/protobuf v1.3.2 // indirect + github.com/gojuno/minimock/v3 v3.0.6 // indirect github.com/golang-jwt/jwt/v4 v4.0.0 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/google/btree v1.0.0 // indirect diff --git a/go.sum b/go.sum index e7fcead211c7..59bfbb44b875 100644 --- a/go.sum +++ b/go.sum @@ -522,6 +522,9 @@ github.com/gogo/protobuf v1.3.0/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXP github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/gojuno/minimock/v3 v3.0.4/go.mod h1:HqeqnwV8mAABn3pO5hqF+RE7gjA0jsN8cbbSogoGrzI= +github.com/gojuno/minimock/v3 v3.0.6 h1:YqHcVR10x2ZvswPK8Ix5yk+hMpspdQ3ckSpkOzyF85I= +github.com/gojuno/minimock/v3 v3.0.6/go.mod h1:v61ZjAKHr+WnEkND63nQPCZ/DTfQgJdvbCi3IuoMblY= github.com/golang-jwt/jwt/v4 v4.0.0 h1:RAqyYixv1p7uEnocuy8P1nru5wprCh/MH2BIlW5z5/o= github.com/golang-jwt/jwt/v4 v4.0.0/go.mod h1:/xlHOz8bRuivTWchD4jCa+NbatV+wEUSzwAxVc6locg= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= @@ -788,6 +791,7 @@ github.com/hashicorp/vic v1.5.1-0.20190403131502-bbfe86ec9443/go.mod h1:bEpDU35n github.com/hashicorp/yamux v0.0.0-20180604194846-3520598351bb/go.mod h1:+NfK9FKeTrX5uv1uIXGdwYDTeHna2qgaIlx54MXqjAM= github.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d h1:kJCB4vdITiW1eC1vq2e6IsrXKrZit1bv/TDYFGMp4BQ= github.com/hashicorp/yamux v0.0.0-20181012175058-2f1d1f20f75d/go.mod h1:+NfK9FKeTrX5uv1uIXGdwYDTeHna2qgaIlx54MXqjAM= +github.com/hexdigest/gowrap v1.1.7/go.mod h1:Z+nBFUDLa01iaNM+/jzoOA1JJ7sm51rnYFauKFUB5fs= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/hpcloud/tail v1.0.1-0.20170814160653-37f427138745 h1:8as8OQ+RF1QrsHvWWsKBtBKINhD9QaD1iozA1wrO4aA= github.com/hpcloud/tail v1.0.1-0.20170814160653-37f427138745/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= @@ -1026,6 +1030,7 @@ github.com/opencontainers/selinux v1.6.0/go.mod h1:VVGKuOLlE7v4PJyT6h7mNWvq1rzqi github.com/opencontainers/selinux v1.8.0/go.mod h1:RScLhm78qiWa2gbVCcGkC7tCGdgk3ogry1nUQF8Evvo= github.com/opencontainers/selinux v1.8.2 h1:c4ca10UMgRcvZ6h0K4HtS15UaVSBEaE+iln2LVpAuGc= github.com/opencontainers/selinux v1.8.2/go.mod h1:MUIHuUEvKB1wtJjQdOyYRgOnLD2xAPP8dBsCoU0KuF8= +github.com/opentracing/opentracing-go v1.0.2/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/packethost/packngo v0.1.1-0.20180711074735-b9cb5096f54c h1:vwpFWvAO8DeIZfFeqASzZfsxuWPno9ncAebBEP0N3uE= github.com/packethost/packngo v0.1.1-0.20180711074735-b9cb5096f54c/go.mod h1:otzZQXgoO96RTzDB/Hycg0qZcXZsWJGJRSXbmEIJ+4M= @@ -1862,6 +1867,8 @@ k8s.io/kube-openapi v0.0.0-20201113171705-d219536bb9fd/go.mod h1:WOJ3KddDSol4tAG k8s.io/kubernetes v1.13.0/go.mod h1:ocZa8+6APFNC2tX1DZASIbocyYT5jHzqFVsY5aoB7Jk= k8s.io/utils v0.0.0-20200324210504-a9aa75ae1b89/go.mod h1:sZAwmy6armz5eXlNoLmJcl4F1QuKu7sr+mFQ0byX7Ew= k8s.io/utils v0.0.0-20201110183641-67b214c5f920/go.mod h1:jPW/WVKK9YHAvNhRxK0md/EJ228hCsBRufyofKtW8HA= +oss.indeed.com/go/libtime v1.5.0 h1:wulKS+oHhb3P2wFi1fcA+g8CXiC8+ygFECUQea5ZqLU= +oss.indeed.com/go/libtime v1.5.0/go.mod h1:B2sdEcuzB0zhTKkAuHy4JInKRc7Al3tME4qWam6R7mA= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= diff --git a/helper/exptime/LICENSE.md b/helper/exptime/LICENSE.md deleted file mode 100644 index 861cb2bd27c5..000000000000 --- a/helper/exptime/LICENSE.md +++ /dev/null @@ -1,11 +0,0 @@ -Copyright (c) 2019 The Indeed Engineering Authors. All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/helper/exptime/backoff.go b/helper/exptime/backoff.go deleted file mode 100644 index 8213e65cb3ac..000000000000 --- a/helper/exptime/backoff.go +++ /dev/null @@ -1,142 +0,0 @@ -// Package exptime provides a generalized exponential backoff retry implementation. -// -// This package was copied from oss.indeed.com/go/libtime/decay and modified. -package exptime - -import ( - "errors" - "fmt" - "math/rand" - "time" -) - -var ( - // ErrMaximumTimeExceeded indicates the maximum wait time has been exceeded. - ErrMaximumTimeExceeded = errors.New("maximum backoff time exceeded") -) - -// A TryFunc is what gets executed between retry wait periods during execution -// of Backoff. The keepRetrying return value is used to control whether a retry -// attempt should be made. This feature is useful in manipulating control flow -// in cases where it is known a retry will not be successful. -type TryFunc func() (keepRetrying bool, err error) - -// BackoffOptions allow for fine-tuning backoff behavior. -type BackoffOptions struct { - // MaxSleepTime represents the maximum amount of time - // the exponential backoff system will spend sleeping, - // accumulating the amount of time spent asleep between - // retries. - // - // The algorithm starts at an interval of InitialGapSize - // and increases exponentially (x2 each iteration) from there. - // With no jitter, a MaxSleepTime of 10 seconds and InitialGapSize - // of 1 millisecond would suggest a total of 15 attempts - // (since the very last retry truncates the sleep time to - // align exactly with MaxSleepTime). - MaxSleepTime time.Duration - - // InitialGapSize sets the initial amount of time the algorithm - // will sleep before the first retry (after the first attempt). - // The actual amount of sleep time will include a random amount - // of jitter, if MaxJitterSize is non-zero. - InitialGapSize time.Duration - - // MaxJitterSize limits how much randomness we may - // introduce in the duration of each retry interval. - // The purpose of introducing jitter is to mitigate the - // effect of thundering herds - MaxJitterSize time.Duration - - // RandomSeed is used for generating a randomly computed - // jitter size for each retry. - RandomSeed int64 - - // Sleeper is used to cause the process to sleep for - // a computed amount of time. If not set, a default - // implementation based on time.Sleep will be used. - Sleeper Sleeper -} - -// A Sleeper is a useful way for calling time.Sleep -// in a mock-able way for tests. -type Sleeper func(time.Duration) - -// Backoff will attempt to execute function using a configurable -// exponential backoff algorithm. function is a TryFunc which requires -// two return parameters - a boolean for optimizing control flow, and -// an error for reporting failure conditions. If the first parameter is -// false, the backoff algorithm will abandon further retry attempts and -// simply return an error. Otherwise, if the returned error is non-nil, the -// backoff algorithm will sleep for an increasing amount of time, and -// then retry again later, until the maximum amount of sleep time has -// been consumed. Once function has executed successfully with no error, -// the backoff algorithm returns a nil error. -func Backoff(function TryFunc, options BackoffOptions) error { - if options.MaxSleepTime <= 0 { - panic("max sleep time must be > 0") - } - - if options.InitialGapSize <= 0 { - panic("initial gap size must be > 0") - } - - if options.MaxJitterSize < 0 { - panic("max jitter size must be >= 0") - } - - if options.MaxJitterSize > (options.MaxSleepTime / 2) { - panic("max jitter size is way too large") - } - - if options.Sleeper == nil { - options.Sleeper = time.Sleep - } - - consumed := time.Duration(0) - gap := options.InitialGapSize - random := rand.New(rand.NewSource(options.RandomSeed)) - - for consumed < options.MaxSleepTime { - keepRetrying, err := function() - if err != nil && !keepRetrying { - return fmt.Errorf("exponential backoff instructed to stop retrying: %w", err) - } - - // we can ignore keepRetrying at this point, since we know - // what to do based on err - if err == nil { - return nil // success - } - - // there was an error, and function wants to keep retrying - // we will sleep, and then let the loop continue - // - // (random.Float64 returns a value [0.0, 1.0), which is used to - // randomly scale the jitter from 0 to MaxJitterSize. - jitter := nextJitter(random.Float64(), options.MaxJitterSize) - duration := gap + jitter - - if (duration + consumed) > options.MaxSleepTime { - // this will be our last try, force the duration - // to line up with the maximum sleep time - duration = options.MaxSleepTime - consumed - } - - // sleep for the configured duration - options.Sleeper(duration) - - // account for how long we intended to sleep - consumed += duration - - // exponentially increase the gap - gap *= 2 - } - - return ErrMaximumTimeExceeded -} - -func nextJitter(fraction float64, maxSize time.Duration) time.Duration { - scaled := fraction * float64(maxSize) - return time.Duration(scaled) -} diff --git a/nomad/core_sched.go b/nomad/core_sched.go index cffb6114ba20..f6aa3c112812 100644 --- a/nomad/core_sched.go +++ b/nomad/core_sched.go @@ -773,7 +773,6 @@ func (c *CoreScheduler) csiVolumeClaimGC(eval *structs.Evaluation) error { "index", oldThreshold, "csi_volume_claim_gc_threshold", c.srv.config.CSIVolumeClaimGCThreshold) -NEXT_VOLUME: for i := iter.Next(); i != nil; i = iter.Next() { vol := i.(*structs.CSIVolume) @@ -785,31 +784,9 @@ NEXT_VOLUME: // we only call the claim release RPC if the volume has claims // that no longer have valid allocations. otherwise we'd send // out a lot of do-nothing RPCs. - for id := range vol.ReadClaims { - alloc, err := c.snap.AllocByID(ws, id) - if err != nil { - return err - } - if alloc == nil || alloc.TerminalStatus() { - err = gcClaims(vol.Namespace, vol.ID) - if err != nil { - return err - } - goto NEXT_VOLUME - } - } - for id := range vol.WriteClaims { - alloc, err := c.snap.AllocByID(ws, id) - if err != nil { - return err - } - if alloc == nil || alloc.TerminalStatus() { - err = gcClaims(vol.Namespace, vol.ID) - if err != nil { - return err - } - goto NEXT_VOLUME - } + vol, err := c.snap.CSIVolumeDenormalize(ws, vol) + if err != nil { + return err } if len(vol.PastClaims) > 0 { err = gcClaims(vol.Namespace, vol.ID) diff --git a/nomad/core_sched_test.go b/nomad/core_sched_test.go index a19d4395b07d..095975a31f66 100644 --- a/nomad/core_sched_test.go +++ b/nomad/core_sched_test.go @@ -2383,19 +2383,64 @@ func TestCoreScheduler_CSIVolumeClaimGC(t *testing.T) { c := core.(*CoreScheduler) require.NoError(c.csiVolumeClaimGC(gc)) - // the volumewatcher will hit an error here because there's no - // path to the node. but we can't update the claim to bypass the - // client RPCs without triggering the volumewatcher's normal code - // path. + // TODO(tgross): the condition below means this test doesn't tell + // us much; ideally we should be intercepting the claim request + // and verifying that we send the expected claims but we don't + // have test infra in place to do that for server RPCs + + // sending the GC claim will trigger the volumewatcher's normal + // code path. but the volumewatcher will hit an error here + // because there's no path to the node, so we shouldn't see + // the WriteClaims removed require.Eventually(func() bool { vol, _ := state.CSIVolumeByID(ws, ns, volID) return len(vol.WriteClaims) == 1 && len(vol.WriteAllocs) == 1 && - len(vol.PastClaims) == 0 + len(vol.PastClaims) == 1 }, time.Second*1, 10*time.Millisecond, "claims were released unexpectedly") } +func TestCoreScheduler_CSIBadState_ClaimGC(t *testing.T) { + t.Parallel() + require := require.New(t) + + srv, shutdown := TestServer(t, func(c *Config) { + c.NumSchedulers = 0 // Prevent automatic dequeue + }) + + defer shutdown() + testutil.WaitForLeader(t, srv.RPC) + + err := state.TestBadCSIState(t, srv.State()) + require.NoError(err) + + snap, err := srv.State().Snapshot() + require.NoError(err) + core := NewCoreScheduler(srv, snap) + + index, _ := srv.State().LatestIndex() + index++ + gc := srv.coreJobEval(structs.CoreJobForceGC, index) + c := core.(*CoreScheduler) + require.NoError(c.csiVolumeClaimGC(gc)) + + require.Eventually(func() bool { + vol, _ := srv.State().CSIVolumeByID(nil, + structs.DefaultNamespace, "csi-volume-nfs0") + if len(vol.PastClaims) != 2 { + return false + } + for _, claim := range vol.PastClaims { + if claim.State != structs.CSIVolumeClaimStateUnpublishing { + return false + } + } + return true + }, time.Second*1, 10*time.Millisecond, "invalid claims should be marked for GC") + +} + func TestCoreScheduler_FailLoop(t *testing.T) { t.Parallel() require := require.New(t) diff --git a/nomad/csi_endpoint.go b/nomad/csi_endpoint.go index fea730dfc20b..ac63b8fa5659 100644 --- a/nomad/csi_endpoint.go +++ b/nomad/csi_endpoint.go @@ -615,39 +615,25 @@ func (v *CSIVolume) nodeUnpublishVolume(vol *structs.CSIVolume, claim *structs.C return v.checkpointClaim(vol, claim) } - // The RPC sent from the 'nomad node detach' command won't have an + // The RPC sent from the 'nomad node detach' command or GC won't have an // allocation ID set so we try to unpublish every terminal or invalid - // alloc on the node - allocIDs := []string{} + // alloc on the node, all of which will be in PastClaims after denormalizing state := v.srv.fsm.State() vol, err := state.CSIVolumeDenormalize(memdb.NewWatchSet(), vol) if err != nil { return err } - for allocID, alloc := range vol.ReadAllocs { - if alloc == nil { - rclaim, ok := vol.ReadClaims[allocID] - if ok && rclaim.NodeID == claim.NodeID { - allocIDs = append(allocIDs, allocID) - } - } else if alloc.NodeID == claim.NodeID && alloc.TerminalStatus() { - allocIDs = append(allocIDs, allocID) - } - } - for allocID, alloc := range vol.WriteAllocs { - if alloc == nil { - wclaim, ok := vol.WriteClaims[allocID] - if ok && wclaim.NodeID == claim.NodeID { - allocIDs = append(allocIDs, allocID) - } - } else if alloc.NodeID == claim.NodeID && alloc.TerminalStatus() { - allocIDs = append(allocIDs, allocID) + + claimsToUnpublish := []*structs.CSIVolumeClaim{} + for _, pastClaim := range vol.PastClaims { + if claim.NodeID == pastClaim.NodeID { + claimsToUnpublish = append(claimsToUnpublish, pastClaim) } } + var merr multierror.Error - for _, allocID := range allocIDs { - claim.AllocationID = allocID - err := v.nodeUnpublishVolumeImpl(vol, claim) + for _, pastClaim := range claimsToUnpublish { + err := v.nodeUnpublishVolumeImpl(vol, pastClaim) if err != nil { merr.Errors = append(merr.Errors, err) } @@ -668,8 +654,8 @@ func (v *CSIVolume) nodeUnpublishVolumeImpl(vol *structs.CSIVolume, claim *struc ExternalID: vol.RemoteID(), AllocID: claim.AllocationID, NodeID: claim.NodeID, - AttachmentMode: vol.AttachmentMode, - AccessMode: vol.AccessMode, + AttachmentMode: claim.AttachmentMode, + AccessMode: claim.AccessMode, ReadOnly: claim.Mode == structs.CSIVolumeClaimRead, } err := v.srv.RPC("ClientCSI.NodeDetachVolume", diff --git a/nomad/deploymentwatcher/deployment_watcher.go b/nomad/deploymentwatcher/deployment_watcher.go index f12357d15514..bb7bc1f52584 100644 --- a/nomad/deploymentwatcher/deployment_watcher.go +++ b/nomad/deploymentwatcher/deployment_watcher.go @@ -283,9 +283,16 @@ func (w *deploymentWatcher) autoPromoteDeployment(allocs []*structs.AllocListStu return nil } - // AutoPromote iff every task group is marked auto_promote and is healthy. The whole + // AutoPromote iff every task group with canaries is marked auto_promote and is healthy. The whole // job version has been incremented, so we promote together. See also AutoRevert for _, dstate := range d.TaskGroups { + + // skip auto promote canary validation if the task group has no canaries + // to prevent auto promote hanging on mixed canary/non-canary taskgroup deploys + if dstate.DesiredCanaries < 1 { + continue + } + if !dstate.AutoPromote || dstate.DesiredCanaries != len(dstate.PlacedCanaries) { return nil } diff --git a/nomad/deploymentwatcher/deployments_watcher_test.go b/nomad/deploymentwatcher/deployments_watcher_test.go index 64fc6a724a4c..50835b74cc56 100644 --- a/nomad/deploymentwatcher/deployments_watcher_test.go +++ b/nomad/deploymentwatcher/deployments_watcher_test.go @@ -535,15 +535,19 @@ func TestWatcher_AutoPromoteDeployment(t *testing.T) { w, m := defaultTestDeploymentWatcher(t) now := time.Now() - // Create 1 UpdateStrategy, 1 job (1 TaskGroup), 2 canaries, and 1 deployment - upd := structs.DefaultUpdateStrategy.Copy() - upd.AutoPromote = true - upd.MaxParallel = 2 - upd.Canary = 2 - upd.ProgressDeadline = 5 * time.Second + // Create 1 UpdateStrategy, 1 job (2 TaskGroups), 2 canaries, and 1 deployment + canaryUpd := structs.DefaultUpdateStrategy.Copy() + canaryUpd.AutoPromote = true + canaryUpd.MaxParallel = 2 + canaryUpd.Canary = 2 + canaryUpd.ProgressDeadline = 5 * time.Second - j := mock.Job() - j.TaskGroups[0].Update = upd + rollingUpd := structs.DefaultUpdateStrategy.Copy() + rollingUpd.ProgressDeadline = 5 * time.Second + + j := mock.MultiTaskGroupJob() + j.TaskGroups[0].Update = canaryUpd + j.TaskGroups[1].Update = rollingUpd d := mock.Deployment() d.JobID = j.ID @@ -551,14 +555,20 @@ func TestWatcher_AutoPromoteDeployment(t *testing.T) { // UpdateStrategy are copied in d.TaskGroups = map[string]*structs.DeploymentState{ "web": { - AutoPromote: upd.AutoPromote, - AutoRevert: upd.AutoRevert, - ProgressDeadline: upd.ProgressDeadline, + AutoPromote: canaryUpd.AutoPromote, + AutoRevert: canaryUpd.AutoRevert, + ProgressDeadline: canaryUpd.ProgressDeadline, + DesiredTotal: 2, + }, + "api": { + AutoPromote: rollingUpd.AutoPromote, + AutoRevert: rollingUpd.AutoRevert, + ProgressDeadline: rollingUpd.ProgressDeadline, DesiredTotal: 2, }, } - alloc := func() *structs.Allocation { + canaryAlloc := func() *structs.Allocation { a := mock.Alloc() a.DeploymentID = d.ID a.CreateTime = now.UnixNano() @@ -569,14 +579,36 @@ func TestWatcher_AutoPromoteDeployment(t *testing.T) { return a } - a := alloc() - b := alloc() + rollingAlloc := func() *structs.Allocation { + a := mock.Alloc() + a.DeploymentID = d.ID + a.CreateTime = now.UnixNano() + a.ModifyTime = now.UnixNano() + a.TaskGroup = "api" + a.AllocatedResources.Tasks["api"] = a.AllocatedResources.Tasks["web"].Copy() + delete(a.AllocatedResources.Tasks, "web") + a.TaskResources["api"] = a.TaskResources["web"].Copy() + delete(a.TaskResources, "web") + a.DeploymentStatus = &structs.AllocDeploymentStatus{ + Canary: false, + } + return a + } - d.TaskGroups[a.TaskGroup].PlacedCanaries = []string{a.ID, b.ID} - d.TaskGroups[a.TaskGroup].DesiredCanaries = 2 + // Web taskgroup (0) + ca1 := canaryAlloc() + ca2 := canaryAlloc() + + // Api taskgroup (1) + ra1 := rollingAlloc() + ra2 := rollingAlloc() + + d.TaskGroups[ca1.TaskGroup].PlacedCanaries = []string{ca1.ID, ca2.ID} + d.TaskGroups[ca1.TaskGroup].DesiredCanaries = 2 + d.TaskGroups[ra1.TaskGroup].PlacedAllocs = 2 require.NoError(t, m.state.UpsertJob(structs.MsgTypeTestSetup, m.nextIndex(), j), "UpsertJob") require.NoError(t, m.state.UpsertDeployment(m.nextIndex(), d), "UpsertDeployment") - require.NoError(t, m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{a, b}), "UpsertAllocs") + require.NoError(t, m.state.UpsertAllocs(structs.MsgTypeTestSetup, m.nextIndex(), []*structs.Allocation{ca1, ca2, ra1, ra2}), "UpsertAllocs") // ============================================================= // Support method calls @@ -595,7 +627,7 @@ func TestWatcher_AutoPromoteDeployment(t *testing.T) { matchConfig1 := &matchDeploymentAllocHealthRequestConfig{ DeploymentID: d.ID, - Healthy: []string{a.ID, b.ID}, + Healthy: []string{ca1.ID, ca2.ID, ra1.ID, ra2.ID}, Eval: true, } matcher1 := matchDeploymentAllocHealthRequest(matchConfig1) @@ -629,7 +661,7 @@ func TestWatcher_AutoPromoteDeployment(t *testing.T) { // Mark the canaries healthy req := &structs.DeploymentAllocHealthRequest{ DeploymentID: d.ID, - HealthyAllocationIDs: []string{a.ID, b.ID}, + HealthyAllocationIDs: []string{ca1.ID, ca2.ID, ra1.ID, ra2.ID}, } var resp structs.DeploymentUpdateResponse // Calls w.raft.UpdateDeploymentAllocHealth, which is implemented by StateStore in @@ -654,12 +686,12 @@ func TestWatcher_AutoPromoteDeployment(t *testing.T) { require.Equal(t, "running", d.Status) require.True(t, d.TaskGroups["web"].Promoted) - a1, _ := m.state.AllocByID(ws, a.ID) + a1, _ := m.state.AllocByID(ws, ca1.ID) require.False(t, a1.DeploymentStatus.Canary) require.Equal(t, "pending", a1.ClientStatus) require.Equal(t, "run", a1.DesiredStatus) - b1, _ := m.state.AllocByID(ws, b.ID) + b1, _ := m.state.AllocByID(ws, ca2.ID) require.False(t, b1.DeploymentStatus.Canary) } diff --git a/nomad/mock/mock.go b/nomad/mock/mock.go index 95886654624c..beda95a7cf16 100644 --- a/nomad/mock/mock.go +++ b/nomad/mock/mock.go @@ -337,6 +337,88 @@ func Job() *structs.Job { return job } +func MultiTaskGroupJob() *structs.Job { + job := Job() + apiTaskGroup := &structs.TaskGroup{ + Name: "api", + Count: 10, + EphemeralDisk: &structs.EphemeralDisk{ + SizeMB: 150, + }, + RestartPolicy: &structs.RestartPolicy{ + Attempts: 3, + Interval: 10 * time.Minute, + Delay: 1 * time.Minute, + Mode: structs.RestartPolicyModeDelay, + }, + ReschedulePolicy: &structs.ReschedulePolicy{ + Attempts: 2, + Interval: 10 * time.Minute, + Delay: 5 * time.Second, + DelayFunction: "constant", + }, + Migrate: structs.DefaultMigrateStrategy(), + Networks: []*structs.NetworkResource{ + { + Mode: "host", + DynamicPorts: []structs.Port{ + {Label: "http"}, + {Label: "admin"}, + }, + }, + }, + Tasks: []*structs.Task{ + { + Name: "api", + Driver: "exec", + Config: map[string]interface{}{ + "command": "/bin/date", + }, + Env: map[string]string{ + "FOO": "bar", + }, + Services: []*structs.Service{ + { + Name: "${TASK}-backend", + PortLabel: "http", + Tags: []string{"pci:${meta.pci-dss}", "datacenter:${node.datacenter}"}, + Checks: []*structs.ServiceCheck{ + { + Name: "check-table", + Type: structs.ServiceCheckScript, + Command: "/usr/local/check-table-${meta.database}", + Args: []string{"${meta.version}"}, + Interval: 30 * time.Second, + Timeout: 5 * time.Second, + }, + }, + }, + { + Name: "${TASK}-admin", + PortLabel: "admin", + }, + }, + LogConfig: structs.DefaultLogConfig(), + Resources: &structs.Resources{ + CPU: 500, + MemoryMB: 256, + }, + Meta: map[string]string{ + "foo": "bar", + }, + }, + }, + Meta: map[string]string{ + "elb_check_type": "http", + "elb_check_interval": "30s", + "elb_check_min": "3", + }, + } + job.TaskGroups = append(job.TaskGroups, apiTaskGroup) + job.Canonicalize() + return job +} + func LifecycleSideTask(resources structs.Resources, i int) *structs.Task { return &structs.Task{ Name: fmt.Sprintf("side-%d", i), diff --git a/nomad/plan_endpoint.go b/nomad/plan_endpoint.go index e7e3a56843e6..a6cd8dbefa67 100644 --- a/nomad/plan_endpoint.go +++ b/nomad/plan_endpoint.go @@ -31,6 +31,10 @@ func (p *Plan) Submit(args *structs.PlanRequest, reply *structs.PlanResponse) er return fmt.Errorf("invalid server connection in region %s: %v", p.srv.Region(), err) } + if args.Plan == nil { + return fmt.Errorf("cannot submit nil plan") + } + // Pause the Nack timer for the eval as it is making progress as long as it // is in the plan queue. We resume immediately after we get a result to // handle the case that the receiving worker dies. diff --git a/nomad/plan_endpoint_test.go b/nomad/plan_endpoint_test.go index 039512bc3e93..a3fb596a6d4c 100644 --- a/nomad/plan_endpoint_test.go +++ b/nomad/plan_endpoint_test.go @@ -8,6 +8,7 @@ import ( "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" + "github.com/stretchr/testify/require" ) func TestPlanEndpoint_Submit(t *testing.T) { @@ -49,3 +50,80 @@ func TestPlanEndpoint_Submit(t *testing.T) { t.Fatalf("missing result") } } + +// TestPlanEndpoint_Submit_Bad asserts that the Plan.Submit endpoint rejects +// bad data with an error instead of panicking. +func TestPlanEndpoint_Submit_Bad(t *testing.T) { + t.Parallel() + + s1, cleanupS1 := TestServer(t, func(c *Config) { + c.NumSchedulers = 0 + }) + defer cleanupS1() + codec := rpcClient(t, s1) + testutil.WaitForLeader(t, s1.RPC) + + // Mock a valid eval being dequeued by a worker + eval := mock.Eval() + s1.evalBroker.Enqueue(eval) + + evalOut, _, err := s1.evalBroker.Dequeue([]string{eval.Type}, time.Second) + require.NoError(t, err) + require.Equal(t, eval, evalOut) + + cases := []struct { + Name string + Plan *structs.Plan + Err string + }{ + { + Name: "Nil", + Plan: nil, + Err: "cannot submit nil plan", + }, + { + Name: "Empty", + Plan: &structs.Plan{}, + Err: "evaluation is not outstanding", + }, + { + Name: "BadEvalID", + Plan: &structs.Plan{ + EvalID: "1234", // does not exist + }, + Err: "evaluation is not outstanding", + }, + { + Name: "MissingToken", + Plan: &structs.Plan{ + EvalID: eval.ID, + }, + Err: "evaluation token does not match", + }, + { + Name: "InvalidToken", + Plan: &structs.Plan{ + EvalID: eval.ID, + EvalToken: "1234", // invalid + }, + Err: "evaluation token does not match", + }, + } + + for i := range cases { + tc := cases[i] + t.Run(tc.Name, func(t *testing.T) { + req := &structs.PlanRequest{ + Plan: tc.Plan, + WriteRequest: structs.WriteRequest{Region: "global"}, + } + var resp structs.PlanResponse + err := msgpackrpc.CallWithCodec(codec, "Plan.Submit", req, &resp) + require.EqualError(t, err, tc.Err) + require.Nil(t, resp.Result) + }) + } + + // Ensure no plans were enqueued + require.Zero(t, s1.planner.planQueue.Stats().Depth) +} diff --git a/nomad/rpc_test.go b/nomad/rpc_test.go index 22f4f85f273b..07f2d9492ee4 100644 --- a/nomad/rpc_test.go +++ b/nomad/rpc_test.go @@ -1139,7 +1139,6 @@ func TestRPC_TLS_Enforcement_RPC(t *testing.T) { WriteRequest: structs.WriteRequest{Region: "global"}, }, "Plan.Submit": &structs.PlanRequest{ - Plan: &structs.Plan{}, WriteRequest: structs.WriteRequest{Region: "global"}, }, "Deployment.Reap": &structs.DeploymentDeleteRequest{ diff --git a/nomad/state/state_store.go b/nomad/state/state_store.go index 7d8decfdd798..f40d44e936c6 100644 --- a/nomad/state/state_store.go +++ b/nomad/state/state_store.go @@ -2194,7 +2194,7 @@ func (s *StateStore) CSIVolumeByID(ws memdb.WatchSet, namespace, id string) (*st // we return the volume with the plugins denormalized by default, // because the scheduler needs them for feasibility checking - return s.CSIVolumeDenormalizePluginsTxn(txn, vol.Copy()) + return s.csiVolumeDenormalizePluginsTxn(txn, vol.Copy()) } // CSIVolumesByPluginID looks up csi_volumes by pluginID. Caller should @@ -2326,11 +2326,11 @@ func (s *StateStore) CSIVolumeClaim(index uint64, namespace, id string, claim *s } } - volume, err := s.CSIVolumeDenormalizePluginsTxn(txn, orig.Copy()) + volume, err := s.csiVolumeDenormalizePluginsTxn(txn, orig.Copy()) if err != nil { return err } - volume, err = s.CSIVolumeDenormalizeTxn(txn, nil, volume) + volume, err = s.csiVolumeDenormalizeTxn(txn, nil, volume) if err != nil { return err } @@ -2414,7 +2414,7 @@ func (s *StateStore) CSIVolumeDeregister(index uint64, namespace string, ids []s // volSafeToForce checks if the any of the remaining allocations // are in a non-terminal state. func (s *StateStore) volSafeToForce(txn Txn, v *structs.CSIVolume) bool { - vol, err := s.CSIVolumeDenormalizeTxn(txn, nil, v) + vol, err := s.csiVolumeDenormalizeTxn(txn, nil, v) if err != nil { return false } @@ -2443,15 +2443,12 @@ func (s *StateStore) CSIVolumeDenormalizePlugins(ws memdb.WatchSet, vol *structs } txn := s.db.ReadTxn() defer txn.Abort() - return s.CSIVolumeDenormalizePluginsTxn(txn, vol) + return s.csiVolumeDenormalizePluginsTxn(txn, vol) } -// CSIVolumeDenormalizePluginsTxn returns a CSIVolume with current health and -// plugins, but without allocations. -// Use this for current volume metadata, handling lists of volumes. -// Use CSIVolumeDenormalize for volumes containing both health and current -// allocations. -func (s *StateStore) CSIVolumeDenormalizePluginsTxn(txn Txn, vol *structs.CSIVolume) (*structs.CSIVolume, error) { +// csiVolumeDenormalizePluginsTxn implements +// CSIVolumeDenormalizePlugins, inside a transaction. +func (s *StateStore) csiVolumeDenormalizePluginsTxn(txn Txn, vol *structs.CSIVolume) (*structs.CSIVolume, error) { if vol == nil { return nil, nil } @@ -2484,54 +2481,83 @@ func (s *StateStore) CSIVolumeDenormalizePluginsTxn(txn Txn, vol *structs.CSIVol return vol, nil } -// CSIVolumeDenormalize returns a CSIVolume with allocations +// CSIVolumeDenormalize returns a CSIVolume with its current +// Allocations and Claims, including creating new PastClaims for +// terminal or garbage collected allocations. This ensures we have a +// consistent state. Note that it mutates the original volume and so +// should always be called on a Copy after reading from the state +// store. func (s *StateStore) CSIVolumeDenormalize(ws memdb.WatchSet, vol *structs.CSIVolume) (*structs.CSIVolume, error) { txn := s.db.ReadTxn() - return s.CSIVolumeDenormalizeTxn(txn, ws, vol) + return s.csiVolumeDenormalizeTxn(txn, ws, vol) } -// CSIVolumeDenormalizeTxn populates a CSIVolume with allocations -func (s *StateStore) CSIVolumeDenormalizeTxn(txn Txn, ws memdb.WatchSet, vol *structs.CSIVolume) (*structs.CSIVolume, error) { +// csiVolumeDenormalizeTxn implements CSIVolumeDenormalize inside a transaction +func (s *StateStore) csiVolumeDenormalizeTxn(txn Txn, ws memdb.WatchSet, vol *structs.CSIVolume) (*structs.CSIVolume, error) { if vol == nil { return nil, nil } - for id := range vol.ReadAllocs { - a, err := s.allocByIDImpl(txn, ws, id) - if err != nil { - return nil, err - } - if a != nil { - vol.ReadAllocs[id] = a - // COMPAT(1.0): the CSIVolumeClaim fields were added - // after 0.11.1, so claims made before that may be - // missing this value. (same for WriteAlloc below) - if _, ok := vol.ReadClaims[id]; !ok { - vol.ReadClaims[id] = &structs.CSIVolumeClaim{ + + // note: denormalize mutates the maps we pass in! + denormalize := func( + currentAllocs map[string]*structs.Allocation, + currentClaims, pastClaims map[string]*structs.CSIVolumeClaim, + fallbackMode structs.CSIVolumeClaimMode) error { + + for id := range currentAllocs { + a, err := s.allocByIDImpl(txn, ws, id) + if err != nil { + return err + } + pastClaim := pastClaims[id] + currentClaim := currentClaims[id] + if currentClaim == nil { + // COMPAT(1.4.0): the CSIVolumeClaim fields were added + // after 0.11.1, so claims made before that may be + // missing this value. No clusters should see this + // anymore, so warn nosily in the logs so that + // operators ask us about it. Remove this block and + // the now-unused fallbackMode parameter, and return + // an error if currentClaim is nil in 1.4.0 + s.logger.Warn("volume was missing claim for allocation", + "volume_id", vol.ID, "alloc", id) + currentClaim = &structs.CSIVolumeClaim{ AllocationID: a.ID, NodeID: a.NodeID, - Mode: structs.CSIVolumeClaimRead, + Mode: fallbackMode, State: structs.CSIVolumeClaimStateTaken, } + currentClaims[id] = currentClaim } - } - } - for id := range vol.WriteAllocs { - a, err := s.allocByIDImpl(txn, ws, id) - if err != nil { - return nil, err - } - if a != nil { - vol.WriteAllocs[id] = a - if _, ok := vol.WriteClaims[id]; !ok { - vol.WriteClaims[id] = &structs.CSIVolumeClaim{ - AllocationID: a.ID, - NodeID: a.NodeID, - Mode: structs.CSIVolumeClaimWrite, - State: structs.CSIVolumeClaimStateTaken, + currentAllocs[id] = a + if (a == nil || a.TerminalStatus()) && pastClaim == nil { + // the alloc is garbage collected but nothing has written a PastClaim, + // so create one now + pastClaim = &structs.CSIVolumeClaim{ + AllocationID: id, + NodeID: currentClaim.NodeID, + Mode: currentClaim.Mode, + State: structs.CSIVolumeClaimStateUnpublishing, + AccessMode: currentClaim.AccessMode, + AttachmentMode: currentClaim.AttachmentMode, } + pastClaims[id] = pastClaim } + } + return nil + } + + err := denormalize(vol.ReadAllocs, vol.ReadClaims, vol.PastClaims, + structs.CSIVolumeClaimRead) + if err != nil { + return nil, err + } + err = denormalize(vol.WriteAllocs, vol.WriteClaims, vol.PastClaims, + structs.CSIVolumeClaimWrite) + if err != nil { + return nil, err } // COMPAT: the AccessMode and AttachmentMode fields were added to claims diff --git a/nomad/state/testing.go b/nomad/state/testing.go index 460df609773c..c7a2f3e8e27c 100644 --- a/nomad/state/testing.go +++ b/nomad/state/testing.go @@ -1,7 +1,9 @@ package state import ( + "math" "testing" + "time" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/helper/uuid" @@ -124,3 +126,189 @@ func createTestCSIPlugin(s *StateStore, id string, requiresController bool) func s.DeleteNode(structs.MsgTypeTestSetup, index, ids) } } + +func TestBadCSIState(t testing.TB, store *StateStore) error { + + pluginID := "org.democratic-csi.nfs" + + controllerInfo := func(isHealthy bool) map[string]*structs.CSIInfo { + desc := "healthy" + if !isHealthy { + desc = "failed fingerprinting with error" + } + return map[string]*structs.CSIInfo{ + pluginID: { + PluginID: pluginID, + AllocID: uuid.Generate(), + Healthy: isHealthy, + HealthDescription: desc, + RequiresControllerPlugin: true, + ControllerInfo: &structs.CSIControllerInfo{ + SupportsReadOnlyAttach: true, + SupportsAttachDetach: true, + }, + }, + } + } + + nodeInfo := func(nodeName string, isHealthy bool) map[string]*structs.CSIInfo { + desc := "healthy" + if !isHealthy { + desc = "failed fingerprinting with error" + } + return map[string]*structs.CSIInfo{ + pluginID: { + PluginID: pluginID, + AllocID: uuid.Generate(), + Healthy: isHealthy, + HealthDescription: desc, + RequiresControllerPlugin: true, + NodeInfo: &structs.CSINodeInfo{ + ID: nodeName, + MaxVolumes: math.MaxInt64, + RequiresNodeStageVolume: true, + }, + }, + } + } + + nodes := make([]*structs.Node, 3) + for i := range nodes { + n := mock.Node() + n.Attributes["nomad.version"] = "1.2.4" + nodes[i] = n + } + + nodes[0].CSIControllerPlugins = controllerInfo(true) + nodes[0].CSINodePlugins = nodeInfo("nomad-client0", true) + + drainID := uuid.Generate() + + // drained node + nodes[1].CSIControllerPlugins = controllerInfo(false) + nodes[1].CSINodePlugins = nodeInfo("nomad-client1", false) + + nodes[1].LastDrain = &structs.DrainMetadata{ + StartedAt: time.Now().Add(-10 * time.Minute), + UpdatedAt: time.Now().Add(-30 * time.Second), + Status: structs.DrainStatusComplete, + AccessorID: drainID, + } + nodes[1].SchedulingEligibility = structs.NodeSchedulingIneligible + + // previously drained but now eligible + nodes[2].CSIControllerPlugins = controllerInfo(true) + nodes[2].CSINodePlugins = nodeInfo("nomad-client2", true) + nodes[2].LastDrain = &structs.DrainMetadata{ + StartedAt: time.Now().Add(-15 * time.Minute), + UpdatedAt: time.Now().Add(-5 * time.Minute), + Status: structs.DrainStatusComplete, + AccessorID: drainID, + } + nodes[2].SchedulingEligibility = structs.NodeSchedulingEligible + + // Insert nodes into the state store + index := uint64(999) + for _, n := range nodes { + index++ + err := store.UpsertNode(structs.MsgTypeTestSetup, index, n) + if err != nil { + return err + } + } + + allocID0 := uuid.Generate() // nil alloc + allocID2 := uuid.Generate() // nil alloc + + alloc1 := mock.Alloc() + alloc1.ClientStatus = structs.AllocClientStatusRunning + alloc1.DesiredStatus = structs.AllocDesiredStatusRun + + // Insert allocs into the state store + err := store.UpsertAllocs(structs.MsgTypeTestSetup, index, []*structs.Allocation{alloc1}) + if err != nil { + return err + } + + vol := &structs.CSIVolume{ + ID: "csi-volume-nfs0", + Name: "csi-volume-nfs0", + ExternalID: "csi-volume-nfs0", + Namespace: "default", + AccessMode: structs.CSIVolumeAccessModeSingleNodeWriter, + AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, + MountOptions: &structs.CSIMountOptions{ + MountFlags: []string{"noatime"}, + }, + Context: map[string]string{ + "node_attach_driver": "nfs", + "provisioner_driver": "nfs-client", + "server": "192.168.56.69", + }, + Capacity: 0, + RequestedCapacityMin: 107374182, + RequestedCapacityMax: 107374182, + RequestedCapabilities: []*structs.CSIVolumeCapability{ + { + AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, + AccessMode: structs.CSIVolumeAccessModeMultiNodeMultiWriter, + }, + { + AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, + AccessMode: structs.CSIVolumeAccessModeSingleNodeWriter, + }, + { + AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, + AccessMode: structs.CSIVolumeAccessModeSingleNodeReader, + }, + }, + WriteAllocs: map[string]*structs.Allocation{ + allocID0: nil, + alloc1.ID: nil, + allocID2: nil, + }, + WriteClaims: map[string]*structs.CSIVolumeClaim{ + allocID0: { + AllocationID: allocID0, + NodeID: nodes[0].ID, + Mode: structs.CSIVolumeClaimWrite, + AccessMode: structs.CSIVolumeAccessModeSingleNodeWriter, + AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, + State: structs.CSIVolumeClaimStateTaken, + }, + alloc1.ID: { + AllocationID: alloc1.ID, + NodeID: nodes[1].ID, + Mode: structs.CSIVolumeClaimWrite, + AccessMode: structs.CSIVolumeAccessModeMultiNodeMultiWriter, + AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, + State: structs.CSIVolumeClaimStateTaken, + }, + allocID2: { + AllocationID: allocID2, + NodeID: nodes[2].ID, + Mode: structs.CSIVolumeClaimWrite, + AccessMode: structs.CSIVolumeAccessModeMultiNodeMultiWriter, + AttachmentMode: structs.CSIVolumeAttachmentModeFilesystem, + State: structs.CSIVolumeClaimStateTaken, + }, + }, + Schedulable: true, + PluginID: pluginID, + Provider: pluginID, + ProviderVersion: "1.4.3", + ControllerRequired: true, + ControllersHealthy: 2, + ControllersExpected: 2, + NodesHealthy: 2, + NodesExpected: 0, + } + vol = vol.Copy() // canonicalize + + err = store.CSIVolumeRegister(index, []*structs.CSIVolume{vol}) + if err != nil { + return err + } + + return nil +} diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 2c20932a24fd..5b62935832fd 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -10803,7 +10803,9 @@ type Plan struct { func (p *Plan) GoString() string { out := fmt.Sprintf("(eval %s", p.EvalID[:8]) - out += fmt.Sprintf(", job %s", p.Job.ID) + if p.Job != nil { + out += fmt.Sprintf(", job %s", p.Job.ID) + } if p.Deployment != nil { out += fmt.Sprintf(", deploy %s", p.Deployment.ID[:8]) } diff --git a/nomad/volumewatcher/volume_watcher.go b/nomad/volumewatcher/volume_watcher.go index 28fc94f35366..fe69bca4189c 100644 --- a/nomad/volumewatcher/volume_watcher.go +++ b/nomad/volumewatcher/volume_watcher.go @@ -177,17 +177,10 @@ func (vw *volumeWatcher) isUnclaimed(vol *structs.CSIVolume) bool { return len(vol.ReadClaims) == 0 && len(vol.WriteClaims) == 0 && len(vol.PastClaims) == 0 } +// volumeReapImpl unpublished all the volume's PastClaims. PastClaims +// will be populated from nil or terminal allocs when we call +// CSIVolumeDenormalize(), so this assumes we've done so in the caller func (vw *volumeWatcher) volumeReapImpl(vol *structs.CSIVolume) error { - - // PastClaims written by a volume GC core job will have no allocation, - // so we need to find out which allocs are eligible for cleanup. - for _, claim := range vol.PastClaims { - if claim.AllocationID == "" { - vol = vw.collectPastClaims(vol) - break // only need to collect once - } - } - var result *multierror.Error for _, claim := range vol.PastClaims { err := vw.unpublish(vol, claim) @@ -195,9 +188,7 @@ func (vw *volumeWatcher) volumeReapImpl(vol *structs.CSIVolume) error { result = multierror.Append(result, err) } } - return result.ErrorOrNil() - } func (vw *volumeWatcher) collectPastClaims(vol *structs.CSIVolume) *structs.CSIVolume { diff --git a/nomad/volumewatcher/volume_watcher_test.go b/nomad/volumewatcher/volume_watcher_test.go index 848ca58b925a..4bb4ddae4b66 100644 --- a/nomad/volumewatcher/volume_watcher_test.go +++ b/nomad/volumewatcher/volume_watcher_test.go @@ -37,6 +37,7 @@ func TestVolumeWatch_Reap(t *testing.T) { logger: testlog.HCLogger(t), } + vol, _ = srv.State().CSIVolumeDenormalize(nil, vol.Copy()) err := w.volumeReapImpl(vol) require.NoError(err) @@ -48,6 +49,7 @@ func TestVolumeWatch_Reap(t *testing.T) { State: structs.CSIVolumeClaimStateNodeDetached, }, } + vol, _ = srv.State().CSIVolumeDenormalize(nil, vol.Copy()) err = w.volumeReapImpl(vol) require.NoError(err) require.Len(vol.PastClaims, 1) @@ -59,6 +61,7 @@ func TestVolumeWatch_Reap(t *testing.T) { Mode: structs.CSIVolumeClaimGC, }, } + vol, _ = srv.State().CSIVolumeDenormalize(nil, vol.Copy()) err = w.volumeReapImpl(vol) require.NoError(err) require.Len(vol.PastClaims, 2) // alloc claim + GC claim @@ -71,7 +74,37 @@ func TestVolumeWatch_Reap(t *testing.T) { Mode: structs.CSIVolumeClaimRead, }, } + vol, _ = srv.State().CSIVolumeDenormalize(nil, vol.Copy()) err = w.volumeReapImpl(vol) require.NoError(err) require.Len(vol.PastClaims, 2) // alloc claim + GC claim } + +func TestVolumeReapBadState(t *testing.T) { + + store := state.TestStateStore(t) + err := state.TestBadCSIState(t, store) + require.NoError(t, err) + srv := &MockRPCServer{ + state: store, + } + + vol, err := srv.state.CSIVolumeByID(nil, + structs.DefaultNamespace, "csi-volume-nfs0") + require.NoError(t, err) + srv.state.CSIVolumeDenormalize(nil, vol) + + ctx, exitFn := context.WithCancel(context.Background()) + w := &volumeWatcher{ + v: vol, + rpc: srv, + state: srv.State(), + ctx: ctx, + exitFn: exitFn, + logger: testlog.HCLogger(t), + } + + err = w.volumeReapImpl(vol) + require.NoError(t, err) + require.Equal(t, 2, srv.countCSIUnpublish) +} diff --git a/nomad/volumewatcher/volumes_watcher_test.go b/nomad/volumewatcher/volumes_watcher_test.go index 7f0365be3518..c66411631b1a 100644 --- a/nomad/volumewatcher/volumes_watcher_test.go +++ b/nomad/volumewatcher/volumes_watcher_test.go @@ -67,13 +67,18 @@ func TestVolumeWatch_LeadershipTransition(t *testing.T) { plugin := mock.CSIPlugin() node := testNode(plugin, srv.State()) alloc := mock.Alloc() - alloc.ClientStatus = structs.AllocClientStatusComplete + alloc.ClientStatus = structs.AllocClientStatusRunning vol := testVolume(plugin, alloc, node.ID) + index++ + err := srv.State().UpsertAllocs(structs.MsgTypeTestSetup, index, + []*structs.Allocation{alloc}) + require.NoError(err) + watcher.SetEnabled(true, srv.State(), "") index++ - err := srv.State().CSIVolumeRegister(index, []*structs.CSIVolume{vol}) + err = srv.State().CSIVolumeRegister(index, []*structs.CSIVolume{vol}) require.NoError(err) // we should get or start up a watcher when we get an update for diff --git a/scheduler/feasible.go b/scheduler/feasible.go index 206544249653..3b10331c5c88 100644 --- a/scheduler/feasible.go +++ b/scheduler/feasible.go @@ -15,17 +15,18 @@ import ( ) const ( - FilterConstraintHostVolumes = "missing compatible host volumes" - FilterConstraintCSIPluginTemplate = "CSI plugin %s is missing from client %s" - FilterConstraintCSIPluginUnhealthyTemplate = "CSI plugin %s is unhealthy on client %s" - FilterConstraintCSIPluginMaxVolumesTemplate = "CSI plugin %s has the maximum number of volumes on client %s" - FilterConstraintCSIVolumesLookupFailed = "CSI volume lookup failed" - FilterConstraintCSIVolumeNotFoundTemplate = "missing CSI Volume %s" - FilterConstraintCSIVolumeNoReadTemplate = "CSI volume %s is unschedulable or has exhausted its available reader claims" - FilterConstraintCSIVolumeNoWriteTemplate = "CSI volume %s is unschedulable or is read-only" - FilterConstraintCSIVolumeInUseTemplate = "CSI volume %s has exhausted its available writer claims" // - FilterConstraintDrivers = "missing drivers" - FilterConstraintDevices = "missing devices" + FilterConstraintHostVolumes = "missing compatible host volumes" + FilterConstraintCSIPluginTemplate = "CSI plugin %s is missing from client %s" + FilterConstraintCSIPluginUnhealthyTemplate = "CSI plugin %s is unhealthy on client %s" + FilterConstraintCSIPluginMaxVolumesTemplate = "CSI plugin %s has the maximum number of volumes on client %s" + FilterConstraintCSIVolumesLookupFailed = "CSI volume lookup failed" + FilterConstraintCSIVolumeNotFoundTemplate = "missing CSI Volume %s" + FilterConstraintCSIVolumeNoReadTemplate = "CSI volume %s is unschedulable or has exhausted its available reader claims" + FilterConstraintCSIVolumeNoWriteTemplate = "CSI volume %s is unschedulable or is read-only" + FilterConstraintCSIVolumeInUseTemplate = "CSI volume %s has exhausted its available writer claims" + FilterConstraintCSIVolumeGCdAllocationTemplate = "CSI volume %s has exhausted its available writer claims and is claimed by a garbage collected allocation %s; waiting for claim to be released" + FilterConstraintDrivers = "missing drivers" + FilterConstraintDevices = "missing devices" ) var ( @@ -320,11 +321,20 @@ func (c *CSIVolumeChecker) isFeasible(n *structs.Node) (bool, string) { return false, fmt.Sprintf(FilterConstraintCSIVolumeNoWriteTemplate, vol.ID) } if !vol.WriteFreeClaims() { - // Check the blocking allocations to see if they belong to this job for id := range vol.WriteAllocs { a, err := c.ctx.State().AllocByID(ws, id) - if err != nil || a == nil || - a.Namespace != c.namespace || a.JobID != c.jobID { + // the alloc for this blocking claim has been + // garbage collected but the volumewatcher hasn't + // finished releasing the claim (and possibly + // detaching the volume), so we need to block + // until it can be scheduled + if err != nil || a == nil { + return false, fmt.Sprintf( + FilterConstraintCSIVolumeGCdAllocationTemplate, vol.ID, id) + } else if a.Namespace != c.namespace || a.JobID != c.jobID { + // the blocking claim is for another live job + // so it's legitimately blocking more write + // claims return false, fmt.Sprintf( FilterConstraintCSIVolumeInUseTemplate, vol.ID) } diff --git a/ui/app/adapters/evaluation.js b/ui/app/adapters/evaluation.js new file mode 100644 index 000000000000..13f0caabb3db --- /dev/null +++ b/ui/app/adapters/evaluation.js @@ -0,0 +1,9 @@ +import ApplicationAdapter from './application'; + +export default class EvaluationAdapter extends ApplicationAdapter { + handleResponse(_status, headers) { + const result = super.handleResponse(...arguments); + result.meta = { nextToken: headers['x-nomad-nexttoken'] }; + return result; + } +} diff --git a/ui/app/components/status-cell.hbs b/ui/app/components/status-cell.hbs new file mode 100644 index 000000000000..6692a043b594 --- /dev/null +++ b/ui/app/components/status-cell.hbs @@ -0,0 +1 @@ +{{@status}} \ No newline at end of file diff --git a/ui/app/controllers/evaluations/index.js b/ui/app/controllers/evaluations/index.js new file mode 100644 index 000000000000..e277abe7f074 --- /dev/null +++ b/ui/app/controllers/evaluations/index.js @@ -0,0 +1,70 @@ +import Controller from '@ember/controller'; +import { action } from '@ember/object'; +import { tracked } from '@glimmer/tracking'; +import { inject as service } from '@ember/service'; + +export default class EvaluationsController extends Controller { + @service userSettings; + + queryParams = ['nextToken', 'pageSize', 'status']; + + get shouldDisableNext() { + return !this.model.meta?.nextToken; + } + + get shouldDisablePrev() { + return !this.previousTokens.length; + } + + get optionsEvaluationsStatus() { + return [ + { key: null, label: 'All' }, + { key: 'blocked', label: 'Blocked' }, + { key: 'pending', label: 'Pending' }, + { key: 'complete', label: 'Complete' }, + { key: 'failed', label: 'Failed' }, + { key: 'canceled', label: 'Canceled' }, + ]; + } + + @tracked pageSize = this.userSettings.pageSize; + @tracked nextToken = null; + @tracked previousTokens = []; + @tracked status = null; + + @action + onChange(newPageSize) { + this.pageSize = newPageSize; + } + + @action + onNext(nextToken) { + this.previousTokens = [...this.previousTokens, this.nextToken]; + this.nextToken = nextToken; + } + + @action + onPrev() { + const lastToken = this.previousTokens.pop(); + this.previousTokens = [...this.previousTokens]; + this.nextToken = lastToken; + } + + @action + refresh() { + this._resetTokens(); + this.status = null; + this.pageSize = this.userSettings.pageSize; + } + + @action + setStatus(selection) { + this._resetTokens(); + this.status = selection; + } + + _resetTokens() { + this.nextToken = null; + this.previousTokens = []; + } +} diff --git a/ui/app/models/evaluation.js b/ui/app/models/evaluation.js index 944d03e102f1..51507708bc39 100644 --- a/ui/app/models/evaluation.js +++ b/ui/app/models/evaluation.js @@ -6,6 +6,7 @@ import shortUUIDProperty from '../utils/properties/short-uuid'; export default class Evaluation extends Model { @shortUUIDProperty('id') shortId; + @shortUUIDProperty('nodeId') shortNodeId; @attr('number') priority; @attr('string') type; @attr('string') triggeredBy; @@ -18,6 +19,7 @@ export default class Evaluation extends Model { @equal('status', 'blocked') isBlocked; @belongsTo('job') job; + @belongsTo('node') node; @attr('number') modifyIndex; @attr('date') modifyTime; @@ -26,4 +28,18 @@ export default class Evaluation extends Model { @attr('date') createTime; @attr('date') waitUntil; + @attr('string') namespace; + @attr('string') plainJobId; + + get hasJob() { + return !!this.plainJobId; + } + + get hasNode() { + return !!this.belongsTo('node').id(); + } + + get nodeId() { + return this.belongsTo('node').id(); + } } diff --git a/ui/app/router.js b/ui/app/router.js index d567714f18b7..596fa543a95e 100644 --- a/ui/app/router.js +++ b/ui/app/router.js @@ -74,5 +74,8 @@ Router.map(function () { this.route('tokens'); }); + // if we don't include function() the outlet won't render + this.route('evaluations', function () {}); + this.route('not-found', { path: '/*' }); }); diff --git a/ui/app/routes/evaluations/index.js b/ui/app/routes/evaluations/index.js new file mode 100644 index 000000000000..6733e5bd61e0 --- /dev/null +++ b/ui/app/routes/evaluations/index.js @@ -0,0 +1,29 @@ +import { inject as service } from '@ember/service'; +import Route from '@ember/routing/route'; + +const ALL_NAMESPACE_WILDCARD = '*'; + +export default class EvaluationsIndexRoute extends Route { + @service store; + + queryParams = { + pageSize: { + refreshModel: true, + }, + nextToken: { + refreshModel: true, + }, + status: { + refreshModel: true, + }, + }; + + model({ pageSize, status, nextToken }) { + return this.store.query('evaluation', { + namespace: ALL_NAMESPACE_WILDCARD, + per_page: pageSize, + next_token: nextToken, + status, + }); + } +} diff --git a/ui/app/styles/charts/colors.scss b/ui/app/styles/charts/colors.scss index 1676fdc8937d..0ddc4a79b096 100644 --- a/ui/app/styles/charts/colors.scss +++ b/ui/app/styles/charts/colors.scss @@ -6,6 +6,8 @@ $failed: $danger; $lost: $dark; $not-scheduled: $blue-200; $degraded: $warning; +$blocked: $danger; +$canceled: $dark; .chart { .queued { @@ -120,6 +122,14 @@ $degraded: $warning; background: $degraded; } + &.canceled { + background: $canceled; + } + + &.blocked { + background: $blocked; + } + @each $name, $pair in $colors { $color: nth($pair, 1); diff --git a/ui/app/styles/core/table.scss b/ui/app/styles/core/table.scss index 9f971de56eee..533f275cf93d 100644 --- a/ui/app/styles/core/table.scss +++ b/ui/app/styles/core/table.scss @@ -1,4 +1,4 @@ -@use "sass:math"; +@use 'sass:math'; .table { color: $text; @@ -269,6 +269,10 @@ justify-content: space-between; align-items: center; + &.with-padding { + padding: 6px; + } + .pagination { padding: 0; margin: 0; diff --git a/ui/app/templates/components/gutter-menu.hbs b/ui/app/templates/components/gutter-menu.hbs index f2ef4048f39b..fc9d25c49915 100644 --- a/ui/app/templates/components/gutter-menu.hbs +++ b/ui/app/templates/components/gutter-menu.hbs @@ -1,7 +1,12 @@
- + @@ -12,7 +17,8 @@ {{#if this.system.shouldShowRegions}}
{{yield}}
-
+
\ No newline at end of file diff --git a/ui/app/templates/components/page-size-select.hbs b/ui/app/templates/components/page-size-select.hbs index f61c33295b6c..3b2efb3c28b8 100644 --- a/ui/app/templates/components/page-size-select.hbs +++ b/ui/app/templates/components/page-size-select.hbs @@ -1,15 +1,16 @@ -
- Per page +
+ + Per page + + @onChange={{action (queue (action (mut this.userSettings.pageSize)) (action this.onChange)) + }} as |option| + > {{option}} -
+
\ No newline at end of file diff --git a/ui/app/templates/evaluations.hbs b/ui/app/templates/evaluations.hbs new file mode 100644 index 000000000000..cd930aba258f --- /dev/null +++ b/ui/app/templates/evaluations.hbs @@ -0,0 +1,3 @@ + + {{outlet}} + \ No newline at end of file diff --git a/ui/app/templates/evaluations/index.hbs b/ui/app/templates/evaluations/index.hbs new file mode 100644 index 000000000000..b7e952ee99f4 --- /dev/null +++ b/ui/app/templates/evaluations/index.hbs @@ -0,0 +1,138 @@ +{{page-title "Evaluations"}} +
+
+
+ +
+
+ {{#if @model.length}} + + + + Evaluation ID + + + Resource + + + Priority + + + Created + + + Triggered By + + + Status + + + Placement Failures + + + + + + {{row.model.shortId}} + + + {{#if row.model.hasJob}} + + {{row.model.plainJobId}} + + {{else}} + + {{row.model.shortNodeId}} + + {{/if}} + + + {{row.model.priority}} + + + {{format-month-ts row.model.createTime}} + + + {{row.model.triggeredBy}} + + + + + + {{#if (eq row.model.status "blocked")}} + N/A - In Progress + {{else if row.model.hasPlacementFailures}} + True + {{else}} + False + {{/if}} + + + + +
+ +
+ + + +
+
+ {{else}} +
+
+

+ No Matches +

+

+ {{#if this.status}} + + No evaluations match the status + + {{this.status}} + + + {{else}} + + There are no evaluations + + {{/if}} +

+
+
+ {{/if}} +
\ No newline at end of file diff --git a/ui/mirage/config.js b/ui/mirage/config.js index b95244b545e0..a95e068232ef 100644 --- a/ui/mirage/config.js +++ b/ui/mirage/config.js @@ -261,6 +261,7 @@ export default function () { return this.serialize(evaluations.where({ jobId: params.id })); }); + this.get('/evaluations'); this.get('/evaluation/:id'); this.get('/deployment/allocations/:id', function (schema, { params }) { diff --git a/ui/mirage/factories/evaluation.js b/ui/mirage/factories/evaluation.js index 8a3666d1677a..1ea767717c77 100644 --- a/ui/mirage/factories/evaluation.js +++ b/ui/mirage/factories/evaluation.js @@ -31,9 +31,17 @@ const generateCountMap = (keysCount, list) => () => { }; const generateNodesAvailable = generateCountMap(5, DATACENTERS); -const generateClassFiltered = generateCountMap(3, provide(10, faker.hacker.abbreviation)); +const generateClassFiltered = generateCountMap( + 3, + provide(10, faker.hacker.abbreviation) +); const generateClassExhausted = generateClassFiltered; -const generateDimensionExhausted = generateCountMap(1, ['cpu', 'mem', 'disk', 'iops']); +const generateDimensionExhausted = generateCountMap(1, [ + 'cpu', + 'mem', + 'disk', + 'iops', +]); const generateQuotaExhausted = generateDimensionExhausted; const generateScores = generateCountMap(1, ['binpack', 'job-anti-affinity']); const generateConstraintFiltered = generateCountMap(2, [ @@ -59,7 +67,9 @@ export default Factory.extend({ createIndex: () => faker.random.number({ min: 10, max: 2000 }), createTime() { - return faker.date.past(2 / 365, new Date(this.modifyTime / 1000000)) * 1000000; + return ( + faker.date.past(2 / 365, new Date(this.modifyTime / 1000000)) * 1000000 + ); }, waitUntil: null, @@ -68,14 +78,22 @@ export default Factory.extend({ status: 'blocked', afterCreate(evaluation, server) { assignJob(evaluation, server); - const taskGroups = server.db.taskGroups.where({ jobId: evaluation.jobId }); + const taskGroups = server.db.taskGroups.where({ + jobId: evaluation.jobId, + }); const taskGroupNames = taskGroups.mapBy('name'); - const failedTaskGroupsCount = faker.random.number({ min: 1, max: taskGroupNames.length }); + const failedTaskGroupsCount = faker.random.number({ + min: 1, + max: taskGroupNames.length, + }); const failedTaskGroupNames = []; for (let i = 0; i < failedTaskGroupsCount; i++) { failedTaskGroupNames.push( - ...taskGroupNames.splice(faker.random.number(taskGroupNames.length - 1), 1) + ...taskGroupNames.splice( + faker.random.number(taskGroupNames.length - 1), + 1 + ) ); } @@ -91,7 +109,9 @@ export default Factory.extend({ }), afterCreate(evaluation, server) { - assignJob(evaluation, server); + if (!evaluation.nodeId) { + assignJob(evaluation, server); + } }, }); @@ -101,7 +121,9 @@ function assignJob(evaluation, server) { server.db.jobs.length ); - const job = evaluation.jobId ? server.db.jobs.find(evaluation.jobId) : pickOne(server.db.jobs); + const job = evaluation.jobId + ? server.db.jobs.find(evaluation.jobId) + : pickOne(server.db.jobs); evaluation.update({ jobId: job.id, }); @@ -113,12 +135,18 @@ export function generateTaskGroupFailures() { NodesEvaluated: faker.random.number({ min: 1, max: 100 }), NodesExhausted: faker.random.number({ min: 1, max: 100 }), - NodesAvailable: faker.random.number(10) >= 7 ? generateNodesAvailable() : null, - ClassFiltered: faker.random.number(10) >= 7 ? generateClassFiltered() : null, - ConstraintFiltered: faker.random.number(10) >= 7 ? generateConstraintFiltered() : null, - ClassExhausted: faker.random.number(10) >= 7 ? generateClassExhausted() : null, - DimensionExhausted: faker.random.number(10) >= 7 ? generateDimensionExhausted() : null, - QuotaExhausted: faker.random.number(10) >= 7 ? generateQuotaExhausted() : null, + NodesAvailable: + faker.random.number(10) >= 7 ? generateNodesAvailable() : null, + ClassFiltered: + faker.random.number(10) >= 7 ? generateClassFiltered() : null, + ConstraintFiltered: + faker.random.number(10) >= 7 ? generateConstraintFiltered() : null, + ClassExhausted: + faker.random.number(10) >= 7 ? generateClassExhausted() : null, + DimensionExhausted: + faker.random.number(10) >= 7 ? generateDimensionExhausted() : null, + QuotaExhausted: + faker.random.number(10) >= 7 ? generateQuotaExhausted() : null, Scores: faker.random.number(10) >= 7 ? generateScores() : null, }; } diff --git a/ui/package.json b/ui/package.json index 8f5a619ba917..e7f5845be2bb 100644 --- a/ui/package.json +++ b/ui/package.json @@ -20,7 +20,9 @@ "build-storybook": "STORYBOOK=true ember build && build-storybook -s dist", "storybook": "STORYBOOK=true start-storybook -p 6006 -s dist", "test": "npm-run-all lint:* test:*", - "test:ember": "ember test" + "test:ember": "ember test", + "local:qunitdom": "ember test --server --query=dockcontainer", + "local:exam": "ember exam --server --load-balance --parallel=4" }, "husky": { "hooks": { @@ -83,6 +85,7 @@ "ember-data": "~3.24", "ember-data-model-fragments": "5.0.0-beta.2", "ember-decorators": "^6.1.1", + "ember-exam": "6.1.0", "ember-export-application-global": "^2.0.1", "ember-fetch": "^8.0.2", "ember-inflector": "^3.0.0", diff --git a/ui/testem.js b/ui/testem.js index f0d81dfdfa9d..0d47b107fe77 100644 --- a/ui/testem.js +++ b/ui/testem.js @@ -11,6 +11,7 @@ const config = { launch_in_ci: ['Chrome'], launch_in_dev: ['Chrome'], browser_start_timeout: 120, + parallel: -1, browser_args: { // New format in testem/master, but not in a release yet // Chrome: { diff --git a/ui/tests/acceptance/evaluations-test.js b/ui/tests/acceptance/evaluations-test.js new file mode 100644 index 000000000000..bc444042149d --- /dev/null +++ b/ui/tests/acceptance/evaluations-test.js @@ -0,0 +1,446 @@ +import { click, currentRouteName, visit } from '@ember/test-helpers'; +import { module, test } from 'qunit'; +import { setupApplicationTest } from 'ember-qunit'; +import { setupMirage } from 'ember-cli-mirage/test-support'; +import { Response } from 'ember-cli-mirage'; +import a11yAudit from 'nomad-ui/tests/helpers/a11y-audit'; +import { + selectChoose, + clickTrigger, +} from 'ember-power-select/test-support/helpers'; + +const getStandardRes = () => [ + { + CreateIndex: 1249, + CreateTime: 1640181894162724000, + DeploymentID: '12efbb28-840e-7794-b215-a7b112e40a4f', + ID: '5fb1b8cd-00f8-fff8-de0c-197dc37f5053', + JobID: 'cores-example', + JobModifyIndex: 694, + ModifyIndex: 1251, + ModifyTime: 1640181894167194000, + Namespace: 'ted-lasso', + Priority: 50, + QueuedAllocations: { + lb: 0, + webapp: 0, + }, + SnapshotIndex: 1249, + Status: 'complete', + TriggeredBy: 'job-register', + Type: 'service', + }, + { + CreateIndex: 1304, + CreateTime: 1640183201719510000, + DeploymentID: '878435bf-7265-62b1-7902-d45c44b23b79', + ID: '66cb98a6-7740-d5ef-37e4-fa0f8b1de44b', + JobID: 'cores-example', + JobModifyIndex: 1304, + ModifyIndex: 1306, + ModifyTime: 1640183201721418000, + Namespace: 'default', + Priority: 50, + QueuedAllocations: { + webapp: 0, + lb: 0, + }, + SnapshotIndex: 1304, + Status: 'complete', + TriggeredBy: 'job-register', + Type: 'service', + }, + { + CreateIndex: 1267, + CreateTime: 1640182198255685000, + DeploymentID: '12efbb28-840e-7794-b215-a7b112e40a4f', + ID: '78009518-574d-eee6-919a-e83879175dd3', + JobID: 'cores-example', + JobModifyIndex: 1250, + ModifyIndex: 1274, + ModifyTime: 1640182228112823000, + Namespace: 'ted-lasso', + PreviousEval: '84f1082f-3e6e-034d-6df4-c6a321e7bd63', + Priority: 50, + QueuedAllocations: { + lb: 0, + }, + SnapshotIndex: 1272, + Status: 'complete', + TriggeredBy: 'alloc-failure', + Type: 'service', + WaitUntil: '2021-12-22T14:10:28.108136Z', + }, + { + CreateIndex: 1322, + CreateTime: 1640183505760099000, + DeploymentID: '878435bf-7265-62b1-7902-d45c44b23b79', + ID: 'c184f72b-68a3-5180-afd6-af01860ad371', + JobID: 'cores-example', + JobModifyIndex: 1305, + ModifyIndex: 1329, + ModifyTime: 1640183535540881000, + Namespace: 'default', + PreviousEval: '9a917a93-7bc3-6991-ffc9-15919a38f04b', + Priority: 50, + QueuedAllocations: { + lb: 0, + }, + SnapshotIndex: 1326, + Status: 'complete', + TriggeredBy: 'alloc-failure', + Type: 'service', + WaitUntil: '2021-12-22T14:32:15.539556Z', + }, +]; + +module('Acceptance | evaluations list', function (hooks) { + setupApplicationTest(hooks); + setupMirage(hooks); + + test('it passes an accessibility audit', async function (assert) { + assert.expect(2); + + await visit('/evaluations'); + + assert.equal( + currentRouteName(), + 'evaluations.index', + 'The default route in evaluations is evaluations index' + ); + + await a11yAudit(assert); + }); + + test('it renders an empty message if there are no evaluations rendered', async function (assert) { + await visit('/evaluations'); + + assert + .dom('[data-test-empty-evaluations-list]') + .exists('We display empty table message.'); + assert + .dom('[data-test-no-eval]') + .exists('We display a message saying there are no evaluations.'); + }); + + test('it renders a list of evaluations', async function (assert) { + assert.expect(3); + + server.get('/evaluations', function (_server, fakeRequest) { + assert.deepEqual( + fakeRequest.queryParams, + { + namespace: '*', + per_page: '25', + status: '', + next_token: '', + }, + 'Forwards the correct query parameters on default query when route initially loads' + ); + return getStandardRes(); + }); + + await visit('/evaluations'); + + assert + .dom('[data-test-eval-table]') + .exists('Evaluations table should render'); + assert + .dom('[data-test-evaluation]') + .exists({ count: 4 }, 'Should render the correct number of evaluations'); + }); + + test('it should enable filtering by evaluation status', async function (assert) { + assert.expect(2); + + server.get('/evaluations', getStandardRes); + + await visit('/evaluations'); + + server.get('/evaluations', function (_server, fakeRequest) { + assert.deepEqual( + fakeRequest.queryParams, + { + namespace: '*', + per_page: '25', + status: 'pending', + next_token: '', + }, + 'It makes another server request using the options selected by the user' + ); + return []; + }); + + await clickTrigger('[data-test-evaluation-status-facet]'); + await selectChoose('[data-test-evaluation-status-facet]', 'Pending'); + + assert + .dom('[data-test-no-eval-match]') + .exists('Renders a message saying no evaluations match filter status'); + }); + + module('page size', function (hooks) { + hooks.afterEach(function () { + // PageSizeSelect and the Evaluations Controller are both using localStorage directly + // Will come back and invert the dependency + window.localStorage.clear(); + }); + + test('it is possible to change page size', async function (assert) { + assert.expect(1); + + server.get('/evaluations', getStandardRes); + + await visit('/evaluations'); + + server.get('/evaluations', function (_server, fakeRequest) { + assert.deepEqual( + fakeRequest.queryParams, + { + namespace: '*', + per_page: '50', + status: '', + next_token: '', + }, + 'It makes a request with the per_page set by the user' + ); + return getStandardRes(); + }); + + await clickTrigger('[data-test-per-page]'); + await selectChoose('[data-test-per-page]', 50); + }); + }); + + module('pagination', function () { + test('it should enable pagination by using next tokens', async function (assert) { + assert.expect(7); + + server.get('/evaluations', function () { + return new Response( + 200, + { 'x-nomad-nexttoken': 'next-token-1' }, + getStandardRes() + ); + }); + + await visit('/evaluations'); + + server.get('/evaluations', function (_server, fakeRequest) { + assert.deepEqual( + fakeRequest.queryParams, + { + namespace: '*', + per_page: '25', + status: '', + next_token: 'next-token-1', + }, + 'It makes another server request using the options selected by the user' + ); + return new Response( + 200, + { 'x-nomad-nexttoken': 'next-token-2' }, + getStandardRes() + ); + }); + + assert + .dom('[data-test-eval-pagination-next]') + .isEnabled( + 'If there is a next-token in the API response the next button should be enabled.' + ); + await click('[data-test-eval-pagination-next]'); + + server.get('/evaluations', function (_server, fakeRequest) { + assert.deepEqual( + fakeRequest.queryParams, + { + namespace: '*', + per_page: '25', + status: '', + next_token: 'next-token-2', + }, + 'It makes another server request using the options selected by the user' + ); + return getStandardRes(); + }); + await click('[data-test-eval-pagination-next]'); + + assert + .dom('[data-test-eval-pagination-next]') + .isDisabled('If there is no next-token, the next button is disabled.'); + + assert + .dom('[data-test-eval-pagination-prev]') + .isEnabled( + 'After we transition to the next page, the previous page button is enabled.' + ); + + server.get('/evaluations', function (_server, fakeRequest) { + assert.deepEqual( + fakeRequest.queryParams, + { + namespace: '*', + per_page: '25', + status: '', + next_token: 'next-token-1', + }, + 'It makes a request using the stored old token.' + ); + return new Response( + 200, + { 'x-nomad-nexttoken': 'next-token-2' }, + getStandardRes() + ); + }); + + await click('[data-test-eval-pagination-prev]'); + + server.get('/evaluations', function (_server, fakeRequest) { + assert.deepEqual( + fakeRequest.queryParams, + { + namespace: '*', + per_page: '25', + status: '', + next_token: '', + }, + 'When there are no more stored previous tokens, we will request with no next-token.' + ); + return new Response( + 200, + { 'x-nomad-nexttoken': 'next-token-1' }, + getStandardRes() + ); + }); + + await click('[data-test-eval-pagination-prev]'); + }); + + test('it should clear all query parameters on refresh', async function (assert) { + assert.expect(1); + + server.get('/evaluations', function () { + return new Response( + 200, + { 'x-nomad-nexttoken': 'next-token-1' }, + getStandardRes() + ); + }); + + await visit('/evaluations'); + + server.get('/evaluations', function () { + return getStandardRes(); + }); + + await click('[data-test-eval-pagination-next]'); + + await clickTrigger('[data-test-evaluation-status-facet]'); + await selectChoose('[data-test-evaluation-status-facet]', 'Pending'); + + server.get('/evaluations', function (_server, fakeRequest) { + assert.deepEqual( + fakeRequest.queryParams, + { + namespace: '*', + per_page: '25', + status: '', + next_token: '', + }, + 'It clears all query parameters when making a refresh' + ); + return new Response( + 200, + { 'x-nomad-nexttoken': 'next-token-1' }, + getStandardRes() + ); + }); + + await click('[data-test-eval-refresh]'); + }); + + test('it should reset pagination when filters are applied', async function (assert) { + assert.expect(1); + + server.get('/evaluations', function () { + return new Response( + 200, + { 'x-nomad-nexttoken': 'next-token-1' }, + getStandardRes() + ); + }); + + await visit('/evaluations'); + + server.get('/evaluations', function () { + return new Response( + 200, + { 'x-nomad-nexttoken': 'next-token-2' }, + getStandardRes() + ); + }); + + await click('[data-test-eval-pagination-next]'); + + server.get('/evaluations', getStandardRes); + await click('[data-test-eval-pagination-next]'); + + server.get('/evaluations', function (_server, fakeRequest) { + assert.deepEqual( + fakeRequest.queryParams, + { + namespace: '*', + per_page: '25', + status: 'pending', + next_token: '', + }, + 'It clears all next token when filtered request is made' + ); + return getStandardRes(); + }); + await clickTrigger('[data-test-evaluation-status-facet]'); + await selectChoose('[data-test-evaluation-status-facet]', 'Pending'); + }); + }); + + module('resource linking', function () { + test('it should generate a link to the job resource', async function (assert) { + server.create('node'); + const job = server.create('job', { shallow: true }); + server.create('evaluation', { jobId: job.id }); + await visit('/evaluations'); + + assert + .dom('[data-test-evaluation-resource]') + .hasText( + job.name, + 'It conditionally renders the correct resource name' + ); + await click('[data-test-evaluation-resource]'); + assert + .dom('[data-test-job-name]') + .includesText(job.name, 'We navigate to the correct job page.'); + }); + + test('it should generate a link to the node resource', async function (assert) { + const node = server.create('node'); + server.create('evaluation', { nodeId: node.id }); + await visit('/evaluations'); + + const shortNodeId = node.id.split('-')[0]; + assert + .dom('[data-test-evaluation-resource]') + .hasText( + shortNodeId, + 'It conditionally renders the correct resource name' + ); + + await click('[data-test-evaluation-resource]'); + + assert + .dom('[data-test-title]') + .includesText(node.name, 'We navigate to the correct client page.'); + }); + }); +}); diff --git a/ui/tests/test-helper.js b/ui/tests/test-helper.js index b246c315936a..a8efa1f76517 100644 --- a/ui/tests/test-helper.js +++ b/ui/tests/test-helper.js @@ -2,7 +2,7 @@ import 'core-js'; import Application from 'nomad-ui/app'; import config from 'nomad-ui/config/environment'; import { setApplication } from '@ember/test-helpers'; -import { start } from 'ember-qunit'; +import start from 'ember-exam/test-support/start'; import { useNativeEvents } from 'ember-cli-page-object/extend'; useNativeEvents(); diff --git a/ui/tests/unit/serializers/evaluation-test.js b/ui/tests/unit/serializers/evaluation-test.js index 6e17427eb61e..d8f7a75614b6 100644 --- a/ui/tests/unit/serializers/evaluation-test.js +++ b/ui/tests/unit/serializers/evaluation-test.js @@ -40,6 +40,8 @@ module('Unit | Serializer | Evaluation', function (hooks) { nodesAvailable: 10, }, ], + namespace: 'test-namespace', + plainJobId: 'some-job-id', }, relationships: { job: { @@ -89,6 +91,8 @@ module('Unit | Serializer | Evaluation', function (hooks) { nodesAvailable: 25, }, ], + namespace: 'test-namespace', + plainJobId: 'some-job-id', }, relationships: { job: { diff --git a/ui/yarn.lock b/ui/yarn.lock index 576b50462cde..739b5ba96726 100644 --- a/ui/yarn.lock +++ b/ui/yarn.lock @@ -2493,6 +2493,19 @@ resolve "^1.8.1" semver "^7.3.2" +"@embroider/shared-internals@^1.0.0": + version "1.0.0" + resolved "https://registry.yarnpkg.com/@embroider/shared-internals/-/shared-internals-1.0.0.tgz#b081708ac79e4582f17ba0f3e3796e6612a8976c" + integrity sha512-Vx3dmejJxI5MG/qC7or3EUZY0AZBSBNOAR50PYotX3LxUSb4lAm5wISPnFbwEY4bbo2VhL/6XtWjMv8ZMcaP+g== + dependencies: + babel-import-util "^1.1.0" + ember-rfc176-data "^0.3.17" + fs-extra "^9.1.0" + lodash "^4.17.21" + resolve-package-path "^4.0.1" + semver "^7.3.5" + typescript-memoize "^1.0.1" + "@embroider/util@^0.36.0": version "0.36.0" resolved "https://registry.yarnpkg.com/@embroider/util/-/util-0.36.0.tgz#b2ffb2b06ac491f157a771392191ce91ef2216a6" @@ -4868,6 +4881,11 @@ babel-helpers@^6.24.1: babel-runtime "^6.22.0" babel-template "^6.24.1" +babel-import-util@^1.1.0: + version "1.1.0" + resolved "https://registry.yarnpkg.com/babel-import-util/-/babel-import-util-1.1.0.tgz#4156b16ef090c4f0d3cdb869ff799202f24aeb93" + integrity sha512-sfzgAiJsUT1es9yrHAuJZuJfBkkOE7Og6rovAIwK/gNJX6MjDfWTprbPngdJZTd5ye4F3FvpvpQmvKXObRzVYA== + babel-loader@^8.0.6: version "8.2.3" resolved "https://registry.yarnpkg.com/babel-loader/-/babel-loader-8.2.3.tgz#8986b40f1a64cacfcb4b8429320085ef68b1342d" @@ -7007,6 +7025,15 @@ cli-table3@0.6.0: optionalDependencies: colors "^1.1.2" +cli-table3@^0.6.0: + version "0.6.1" + resolved "https://registry.yarnpkg.com/cli-table3/-/cli-table3-0.6.1.tgz#36ce9b7af4847f288d3cdd081fbd09bf7bd237b8" + integrity sha512-w0q/enDHhPLq44ovMGdQeeDLvwxwavsJX7oQGYt/LrBlYsyaxyDnp6z3QzFut/6kLLKnlcUVJLrpB7KBfgG/RA== + dependencies: + string-width "^4.2.0" + optionalDependencies: + colors "1.4.0" + cli-table@^0.3.1: version "0.3.4" resolved "https://registry.yarnpkg.com/cli-table/-/cli-table-0.3.4.tgz#5b37fd723751f1a6e9e70d55953a75e16eab958e" @@ -7153,7 +7180,7 @@ colorette@^2.0.16: resolved "https://registry.yarnpkg.com/colorette/-/colorette-2.0.16.tgz#713b9af84fdb000139f04546bd4a93f62a5085da" integrity sha512-hUewv7oMjCp+wkBv5Rm0v87eJhq4woh5rSR+42YSQJKecCqgIqNkZ6lAlQms/BwHPJA5NKMRlpxPRv0n8HQW6g== -colors@^1.1.2, colors@^1.4.0: +colors@1.4.0, colors@^1.1.2, colors@^1.4.0: version "1.4.0" resolved "https://registry.yarnpkg.com/colors/-/colors-1.4.0.tgz#c50491479d4c1bdaed2c9ced32cf7c7dc2360f78" integrity sha512-a+UqTh4kgZg/SlGvfbzDHpgRu7AAQOmmqRHJnxhRZICKFUT91brVhNNt58CMWU9PsBbv3PDCZUHbVxuDiH2mtA== @@ -7911,6 +7938,13 @@ debug@^4.0.0, debug@^4.0.1, debug@^4.1.0, debug@^4.1.1, debug@^4.3.1: dependencies: ms "2.1.2" +debug@^4.2.0: + version "4.3.3" + resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.3.tgz#04266e0b70a98d4462e6e288e38259213332b664" + integrity sha512-/zxw5+vh1Tfv+4Qn7a5nsbcJKPaSvCDhojn6FEl9vupwK2VCSDtEiEtqr8DFtzYFOdz63LBkxec7DYuc2jon6Q== + dependencies: + ms "2.1.2" + debug@^4.3.2, debug@~4.3.1, debug@~4.3.2: version "4.3.2" resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.2.tgz#f0a49c18ac8779e31d4a0c6029dfb76873c7428b" @@ -8420,6 +8454,41 @@ ember-auto-import@^1.10.0, ember-auto-import@^1.2.19, ember-auto-import@^1.6.0: walk-sync "^0.3.3" webpack "^4.43.0" +ember-auto-import@^1.10.1: + version "1.12.1" + resolved "https://registry.yarnpkg.com/ember-auto-import/-/ember-auto-import-1.12.1.tgz#09967bd35cd56ac45f413c48deabf7cfb3a785f6" + integrity sha512-Jm0vWKNAy/wYMrdSQIrG8sRsvarIRHZ2sS/CGhMdMqVKJR48AhGU7NgPJ5SIlO/+seL2VSO+dtv7aEOEIaT6BA== + dependencies: + "@babel/core" "^7.1.6" + "@babel/preset-env" "^7.10.2" + "@babel/traverse" "^7.1.6" + "@babel/types" "^7.1.6" + "@embroider/shared-internals" "^1.0.0" + babel-core "^6.26.3" + babel-loader "^8.0.6" + babel-plugin-syntax-dynamic-import "^6.18.0" + babylon "^6.18.0" + broccoli-debug "^0.6.4" + broccoli-node-api "^1.7.0" + broccoli-plugin "^4.0.0" + broccoli-source "^3.0.0" + debug "^3.1.0" + ember-cli-babel "^7.0.0" + enhanced-resolve "^4.0.0" + fs-extra "^6.0.1" + fs-tree-diff "^2.0.0" + handlebars "^4.3.1" + js-string-escape "^1.0.1" + lodash "^4.17.19" + mkdirp "^0.5.1" + resolve-package-path "^3.1.0" + rimraf "^2.6.2" + semver "^7.3.4" + symlink-or-copy "^1.2.0" + typescript-memoize "^1.0.0-alpha.3" + walk-sync "^0.3.3" + webpack "^4.43.0" + ember-basic-dropdown@^3.0.16: version "3.0.16" resolved "https://registry.yarnpkg.com/ember-basic-dropdown/-/ember-basic-dropdown-3.0.16.tgz#287fcde57b5a37405d89cc65e0a4ad9a2e8e1b0b" @@ -9244,6 +9313,26 @@ ember-element-helper@^0.3.2: ember-cli-htmlbars "^5.1.0" ember-compatibility-helpers "^1.2.1" +ember-exam@6.1.0: + version "6.1.0" + resolved "https://registry.yarnpkg.com/ember-exam/-/ember-exam-6.1.0.tgz#1ea2c0ece27ac8ad6a80d959b1c207611b7dfdd7" + integrity sha512-H9tg7eUgqkjAsr1/15UzxGyZobGLgsyTi56Ng0ySnkYGCRfvVpwtVc3xgcNOFnUaa9RExUFpxC0adjW3K87Uxw== + dependencies: + "@embroider/macros" "^0.36.0" + chalk "^4.1.0" + cli-table3 "^0.6.0" + debug "^4.2.0" + ember-auto-import "^1.10.1" + ember-cli-babel "^7.21.0" + ember-cli-version-checker "^5.1.2" + execa "^4.0.3" + fs-extra "^9.0.1" + js-yaml "^3.14.0" + npmlog "^4.1.2" + rimraf "^3.0.2" + semver "^7.3.2" + silent-error "^1.1.1" + ember-export-application-global@^2.0.1: version "2.0.1" resolved "https://registry.yarnpkg.com/ember-export-application-global/-/ember-export-application-global-2.0.1.tgz#b120a70e322ab208defc9e2daebe8d0dfc2dcd46" @@ -10242,7 +10331,7 @@ execa@^3.0.0: signal-exit "^3.0.2" strip-final-newline "^2.0.0" -execa@^4.0.0: +execa@^4.0.0, execa@^4.0.3: version "4.1.0" resolved "https://registry.yarnpkg.com/execa/-/execa-4.1.0.tgz#4e5491ad1572f2f17a77d388c6c857135b22847a" integrity sha512-j5W0//W7f8UxAn8hXVnwG8tLwdiUy4FJLcSupCg6maBYZDpyBvTApK7KyuI4bKj8KOh1r2YH+6ucuYtJv1bTZA== @@ -10952,7 +11041,7 @@ fs-extra@^8.0.0, fs-extra@^8.0.1, fs-extra@^8.1.0: jsonfile "^4.0.0" universalify "^0.1.0" -fs-extra@^9.0.0, fs-extra@^9.0.1: +fs-extra@^9.0.0, fs-extra@^9.0.1, fs-extra@^9.1.0: version "9.1.0" resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-9.1.0.tgz#5954460c764a8da2094ba3554bf839e6b9a7c86d" integrity sha512-hcg3ZmepS30/7BSFqRvoo3DOMQu7IjqxO5nCDt+zM9XWjb33Wg7ziNT+Qvqbuc3+gWpzO02JubVyk2G4Zvo1OQ== @@ -12771,7 +12860,7 @@ js-tokens@^3.0.2: resolved "https://registry.yarnpkg.com/js-tokens/-/js-tokens-3.0.2.tgz#9866df395102130e38f7f996bceb65443209c25b" integrity sha1-mGbfOVECEw449/mWvOtlRDIJwls= -js-yaml@^3.13.1, js-yaml@^3.2.5, js-yaml@^3.2.7: +js-yaml@^3.13.1, js-yaml@^3.14.0, js-yaml@^3.2.5, js-yaml@^3.2.7: version "3.14.1" resolved "https://registry.yarnpkg.com/js-yaml/-/js-yaml-3.14.1.tgz#dae812fdb3825fa306609a8717383c50c36a0537" integrity sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g== @@ -16324,6 +16413,13 @@ resolve-package-path@^3.1.0: path-root "^0.1.1" resolve "^1.17.0" +resolve-package-path@^4.0.1: + version "4.0.3" + resolved "https://registry.yarnpkg.com/resolve-package-path/-/resolve-package-path-4.0.3.tgz#31dab6897236ea6613c72b83658d88898a9040aa" + integrity sha512-SRpNAPW4kewOaNUt8VPqhJ0UMxawMwzJD8V7m1cJfdSTK9ieZwS6K7Dabsm4bmLFM96Z5Y/UznrpG5kt1im8yA== + dependencies: + path-root "^0.1.1" + resolve-path@^1.4.0: version "1.4.0" resolved "https://registry.yarnpkg.com/resolve-path/-/resolve-path-1.4.0.tgz#c4bda9f5efb2fce65247873ab36bb4d834fe16f7" @@ -18197,6 +18293,11 @@ typescript-memoize@^1.0.0-alpha.3: resolved "https://registry.yarnpkg.com/typescript-memoize/-/typescript-memoize-1.0.0-alpha.4.tgz#fd97ab63807c3392af5d0ac5f4754254a4fcd634" integrity sha512-woA2UUWSvx8ugkEjPN8DMuNjukBp8NQeLmz+LRXbEsQIvhLR8LSlD+8Qxdk7NmgE8xeJabJdU8zSrO4ozijGjg== +typescript-memoize@^1.0.1: + version "1.1.0" + resolved "https://registry.yarnpkg.com/typescript-memoize/-/typescript-memoize-1.1.0.tgz#4a8f512d06fc995167c703a3592219901db8bc79" + integrity sha512-LQPKVXK8QrBBkL/zclE6YgSWn0I8ew5m0Lf+XL00IwMhlotqRLlzHV+BRrljVQIc+NohUAuQP7mg4HQwrx5Xbg== + uc.micro@^1.0.0, uc.micro@^1.0.1, uc.micro@^1.0.5: version "1.0.6" resolved "https://registry.yarnpkg.com/uc.micro/-/uc.micro-1.0.6.tgz#9c411a802a409a91fc6cf74081baba34b24499ac" diff --git a/version/version.go b/version/version.go index 4036caaaf5c7..5ed24e249e91 100644 --- a/version/version.go +++ b/version/version.go @@ -11,7 +11,7 @@ var ( GitDescribe string // The main version number that is being run at the moment. - Version = "1.2.5" + Version = "1.2.6" // A pre-release marker for the version. If this is "" (empty string) // then it means that it is a final release. Otherwise, this is a pre-release diff --git a/website/content/docs/configuration/client.mdx b/website/content/docs/configuration/client.mdx index 22e9c2261447..d536c48e648c 100644 --- a/website/content/docs/configuration/client.mdx +++ b/website/content/docs/configuration/client.mdx @@ -336,6 +336,8 @@ see the [drivers documentation](/docs/drivers). - `cpu` `(int: 0)` - Specifies the amount of CPU to reserve, in MHz. +- `cores` `(int: 0)` - Specifies the number of CPU cores to reserve. + - `memory` `(int: 0)` - Specifies the amount of memory to reserve, in MB. - `disk` `(int: 0)` - Specifies the amount of disk to reserve, in MB. diff --git a/website/data/version.js b/website/data/version.js index b8f734d0a196..5c1966f528c5 100644 --- a/website/data/version.js +++ b/website/data/version.js @@ -1 +1 @@ -export default '1.2.4' +export default '1.2.5' diff --git a/website/lib/consent-manager-services/index.ts b/website/lib/consent-manager-services/index.ts new file mode 100644 index 000000000000..3a1a65171d08 --- /dev/null +++ b/website/lib/consent-manager-services/index.ts @@ -0,0 +1,15 @@ +import { ConsentManagerService } from '@hashicorp/react-consent-manager/types' + +const localConsentManagerServices: ConsentManagerService[] = [ + { + name: 'Demandbase Tag', + description: + 'The Demandbase tag is a tracking service to identify website visitors and measure interest on our website.', + category: 'Analytics', + url: 'https://tag.demandbase.com/960ab0a0f20fb102.min.js', + async: true, + }, +] + +export default localConsentManagerServices + diff --git a/website/pages/_app.js b/website/pages/_app.js index fc7707aac333..431a48ddd6c9 100644 --- a/website/pages/_app.js +++ b/website/pages/_app.js @@ -6,6 +6,7 @@ import Head from 'next/head' import NProgress from '@hashicorp/platform-util/nprogress' import { ErrorBoundary } from '@hashicorp/platform-runtime-error-monitoring' import createConsentManager from '@hashicorp/react-consent-manager/loader' +import localConsentManagerServices from 'lib/consent-manager-services' import useFathomAnalytics from '@hashicorp/platform-analytics' import useAnchorLinkAnalytics from '@hashicorp/platform-util/anchor-link-analytics' import HashiStackMenu from '@hashicorp/react-hashi-stack-menu' @@ -19,6 +20,7 @@ import alertBannerData, { ALERT_BANNER_ACTIVE } from 'data/alert-banner' NProgress({ Router }) const { ConsentManager, openConsentManager } = createConsentManager({ preset: 'oss', + otherServices: [...localConsentManagerServices], }) export default function App({ Component, pageProps }) {