From 1fb28b54cd786fa07aec892ea39d1c53e997f396 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 2 Feb 2023 11:30:35 -0500 Subject: [PATCH 1/6] scheduler: tests demonstrating expected behavior for alloc names Service jobs should have unique allocation Names, derived from the Job.ID. System jobs do not have unique allocation Names because the index is intended to indicated the instance out of a desired count size. Because system jobs do not have an explicit count but the results are based on the targeted nodes, the index is less informative and this was intentionally omitted from the original design. --- scheduler/generic_sched_test.go | 11 +++++++++++ scheduler/scheduler_system_test.go | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index 9017e707d8e7..3a8b7f406c77 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -92,6 +92,17 @@ func TestServiceSched_JobRegister(t *testing.T) { t.Fatalf("bad: %#v", out) } + // Ensure allocations have unique names derived from Job.ID + allocNames := []string{} + for _, alloc := range out { + allocNames = append(allocNames, alloc.Name) + } + expectAllocNames := []string{} + for i := 0; i < 10; i++ { + expectAllocNames = append(expectAllocNames, fmt.Sprintf("%s.web[%d]", job.ID, i)) + } + must.SliceContainsAll(t, expectAllocNames, allocNames) + // Ensure different ports were used. used := make(map[int]map[string]struct{}) for _, alloc := range out { diff --git a/scheduler/scheduler_system_test.go b/scheduler/scheduler_system_test.go index 05a4ec6a2c62..0987bf240bc1 100644 --- a/scheduler/scheduler_system_test.go +++ b/scheduler/scheduler_system_test.go @@ -66,6 +66,17 @@ func TestSystemSched_JobRegister(t *testing.T) { // Ensure all allocations placed require.Len(t, out, 10) + // Note that all system allocations have the same name derived from Job.Name + allocNames := []string{} + for _, alloc := range out { + allocNames = append(allocNames, alloc.Name) + } + expectAllocNames := []string{} + for i := 0; i < 10; i++ { + expectAllocNames = append(expectAllocNames, fmt.Sprintf("%s.web[0]", job.Name)) + } + must.SliceContainsAll(t, expectAllocNames, allocNames) + // Check the available nodes count, ok := out[0].Metrics.NodesAvailable["dc1"] require.True(t, ok) From cf1b8cee7c6c80300ef054fd8540a7d5c6480c57 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 2 Feb 2023 11:38:56 -0500 Subject: [PATCH 2/6] docs: make it clear that NOMAD_ALLOC_INDEX is always zero for system/sysbatch jobs --- website/content/partials/envvars.mdx | 48 ++++++++++++++-------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/website/content/partials/envvars.mdx b/website/content/partials/envvars.mdx index 612e0d01cf76..4263657ee553 100644 --- a/website/content/partials/envvars.mdx +++ b/website/content/partials/envvars.mdx @@ -1,29 +1,29 @@ ### Job-related variables -| Variable | Description | -| ------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `NOMAD_ALLOC_DIR` | The path to the shared `alloc/` directory. See [here](/nomad/docs/runtime/environment#task-directories) for more information. | -| `NOMAD_TASK_DIR` | The path to the task `local/` directory. See [here](/nomad/docs/runtime/environment#task-directories) for more information. | -| `NOMAD_SECRETS_DIR` | Path to the task's secrets directory. See [here](/nomad/docs/runtime/environment#task-directories) for more information. | -| `NOMAD_MEMORY_LIMIT` | Memory limit in MB for the task | -| `NOMAD_MEMORY_MAX_LIMIT` | The maximum memory limit the task may use if client has excess memory capacity, in MB. Omitted if task isn't configured with memory oversubscription. | -| `NOMAD_CPU_LIMIT` | CPU limit in MHz for the task | -| `NOMAD_CPU_CORES` | The specific CPU cores reserved for the task in cpuset list notation. Omitted if the task does not request cpu cores. E.g. `0-2,7,12-14` | -| `NOMAD_ALLOC_ID` | Allocation ID of the task | -| `NOMAD_SHORT_ALLOC_ID` | The first 8 characters of the allocation ID of the task | -| `NOMAD_ALLOC_NAME` | Allocation name of the task | -| `NOMAD_ALLOC_INDEX` | Allocation index; useful to distinguish instances of task groups. From 0 to (count - 1). The index is unique within a given version of a job, but canaries or failed tasks in a deployment may reuse the index. | -| `NOMAD_TASK_NAME` | Task's name | -| `NOMAD_GROUP_NAME` | Group's name | -| `NOMAD_JOB_ID` | Job's ID, which is equal to the Job name when submitted through CLI but can be different when using the API | -| `NOMAD_JOB_NAME` | Job's name | -| `NOMAD_JOB_PARENT_ID` | ID of the Job's parent if it has one | -| `NOMAD_DC` | Datacenter in which the allocation is running | -| `NOMAD_PARENT_CGROUP` | The parent cgroup used to contain task cgroups (Linux only) | -| `NOMAD_NAMESPACE` | Namespace in which the allocation is running | -| `NOMAD_REGION` | Region in which the allocation is running | -| `NOMAD_META_` | The metadata value given by `key` on the task's metadata. Note that this is different from [`${meta.}`](/nomad/docs/runtime/interpolation#node-variables-) which are keys in the node's metadata. | -| `VAULT_TOKEN` | The task's Vault token. See [Vault Integration](/nomad/docs/integrations/vault-integration) for more details | +| Variable | Description | +|--------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `NOMAD_ALLOC_DIR` | The path to the shared `alloc/` directory. See [here](/nomad/docs/runtime/environment#task-directories) for more information. | +| `NOMAD_TASK_DIR` | The path to the task `local/` directory. See [here](/nomad/docs/runtime/environment#task-directories) for more information. | +| `NOMAD_SECRETS_DIR` | Path to the task's secrets directory. See [here](/nomad/docs/runtime/environment#task-directories) for more information. | +| `NOMAD_MEMORY_LIMIT` | Memory limit in MB for the task | +| `NOMAD_MEMORY_MAX_LIMIT` | The maximum memory limit the task may use if client has excess memory capacity, in MB. Omitted if task isn't configured with memory oversubscription. | +| `NOMAD_CPU_LIMIT` | CPU limit in MHz for the task | +| `NOMAD_CPU_CORES` | The specific CPU cores reserved for the task in cpuset list notation. Omitted if the task does not request cpu cores. E.g. `0-2,7,12-14` | +| `NOMAD_ALLOC_ID` | Allocation ID of the task | +| `NOMAD_SHORT_ALLOC_ID` | The first 8 characters of the allocation ID of the task | +| `NOMAD_ALLOC_NAME` | Allocation name of the task. This is derived from the job name, task group name, and allocation index. | +| `NOMAD_ALLOC_INDEX` | Allocation index; useful to distinguish instances of task groups. From 0 to (count - 1). For system jobs and sysbatch jobs, this value will always be 0. The index is unique within a given version of a job, but canaries or failed tasks in a deployment may reuse the index. | +| `NOMAD_TASK_NAME` | Task's name | +| `NOMAD_GROUP_NAME` | Group's name | +| `NOMAD_JOB_ID` | Job's ID, which is equal to the Job name when submitted through CLI but can be different when using the API | +| `NOMAD_JOB_NAME` | Job's name | +| `NOMAD_JOB_PARENT_ID` | ID of the Job's parent if it has one | +| `NOMAD_DC` | Datacenter in which the allocation is running | +| `NOMAD_PARENT_CGROUP` | The parent cgroup used to contain task cgroups (Linux only) | +| `NOMAD_NAMESPACE` | Namespace in which the allocation is running | +| `NOMAD_REGION` | Region in which the allocation is running | +| `NOMAD_META_` | The metadata value given by `key` on the task's metadata. Note that this is different from [`${meta.}`](/nomad/docs/runtime/interpolation#node-variables-) which are keys in the node's metadata. | +| `VAULT_TOKEN` | The task's Vault token. See [Vault Integration](/nomad/docs/integrations/vault-integration) for more details | ### Network-related Variables From e6e0874337db40188e15adc575f92b45a4c28ec8 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 2 Feb 2023 11:48:57 -0500 Subject: [PATCH 3/6] volumes: `per_alloc` is incompatible with system/sysbatch jobs System and sysbatch jobs always have a `NOMAD_ALLOC_INDEX` of 0. So interpolation via `per_alloc` will not work as soon as there's more than one allocation placed. Validate against this on job submission. --- nomad/structs/structs.go | 2 +- nomad/structs/volume_test.go | 10 +++++++--- nomad/structs/volumes.go | 18 +++++++++--------- .../content/docs/job-specification/volume.mdx | 3 +++ 4 files changed, 20 insertions(+), 13 deletions(-) diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index ba300873ee64..4503366a703b 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -6608,7 +6608,7 @@ func (tg *TaskGroup) Validate(j *Job) error { canaries = tg.Update.Canary } for name, volReq := range tg.Volumes { - if err := volReq.Validate(tg.Count, canaries); err != nil { + if err := volReq.Validate(j.Type, tg.Count, canaries); err != nil { mErr.Errors = append(mErr.Errors, fmt.Errorf( "Task group volume validation for %s failed: %v", name, err)) } diff --git a/nomad/structs/volume_test.go b/nomad/structs/volume_test.go index fa86ec2fb949..11415963b378 100644 --- a/nomad/structs/volume_test.go +++ b/nomad/structs/volume_test.go @@ -30,6 +30,7 @@ func TestVolumeRequest_Validate(t *testing.T) { "host volumes cannot have an access mode", "host volumes cannot have an attachment mode", "host volumes cannot have mount options", + "volume cannot be per_alloc for system or sysbatch jobs", "volume cannot be per_alloc when canaries are in use", }, canariesCount: 1, @@ -69,8 +70,11 @@ func TestVolumeRequest_Validate(t *testing.T) { }, }, { - name: "CSI volume per-alloc with canaries", - expected: []string{"volume cannot be per_alloc when canaries are in use"}, + name: "CSI volume per-alloc with canaries", + expected: []string{ + "volume cannot be per_alloc for system or sysbatch jobs", + "volume cannot be per_alloc when canaries are in use", + }, canariesCount: 1, req: &VolumeRequest{ Type: VolumeTypeCSI, @@ -81,7 +85,7 @@ func TestVolumeRequest_Validate(t *testing.T) { for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - err := tc.req.Validate(tc.taskGroupCount, tc.canariesCount) + err := tc.req.Validate(JobTypeSystem, tc.taskGroupCount, tc.canariesCount) for _, expected := range tc.expected { require.Contains(t, err.Error(), expected) } diff --git a/nomad/structs/volumes.go b/nomad/structs/volumes.go index 563d7b1cf449..d1ba01659854 100644 --- a/nomad/structs/volumes.go +++ b/nomad/structs/volumes.go @@ -102,7 +102,7 @@ type VolumeRequest struct { PerAlloc bool } -func (v *VolumeRequest) Validate(taskGroupCount, canaries int) error { +func (v *VolumeRequest) Validate(jobType string, taskGroupCount, canaries int) error { if !(v.Type == VolumeTypeHost || v.Type == VolumeTypeCSI) { return fmt.Errorf("volume has unrecognized type %s", v.Type) @@ -116,6 +116,14 @@ func (v *VolumeRequest) Validate(taskGroupCount, canaries int) error { if v.Source == "" { addErr("volume has an empty source") } + if v.PerAlloc { + if jobType == JobTypeSystem || jobType == JobTypeSysBatch { + addErr("volume cannot be per_alloc for system or sysbatch jobs") + } + if canaries > 0 { + addErr("volume cannot be per_alloc when canaries are in use") + } + } switch v.Type { @@ -129,9 +137,6 @@ func (v *VolumeRequest) Validate(taskGroupCount, canaries int) error { if v.MountOptions != nil { addErr("host volumes cannot have mount options") } - if v.PerAlloc && canaries > 0 { - addErr("volume cannot be per_alloc when canaries are in use") - } case VolumeTypeCSI: @@ -170,11 +175,6 @@ func (v *VolumeRequest) Validate(taskGroupCount, canaries int) error { case CSIVolumeAccessModeMultiNodeMultiWriter: // note: we intentionally allow read-only mount of this mode } - - if v.PerAlloc && canaries > 0 { - addErr("volume cannot be per_alloc when canaries are in use") - } - } return mErr.ErrorOrNil() diff --git a/website/content/docs/job-specification/volume.mdx b/website/content/docs/job-specification/volume.mdx index df09b96f8067..58ec2bd35259 100644 --- a/website/content/docs/job-specification/volume.mdx +++ b/website/content/docs/job-specification/volume.mdx @@ -82,6 +82,9 @@ the [volume_mount][volume_mount] block in the `task` configuration. = true`, the allocation named `myjob.mygroup.mytask[0]` will require a volume ID `myvolume[0]`. + The `per_alloc` field cannot be true for system jobs, sysbatch jobs, or jobs + that use canaries. + The following fields are only valid for volumes with `type = "csi"`: - `access_mode` `(string: )` - Defines whether a volume should be From 7b491a186426398c9475fb0426ab1a9efe0660d1 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 2 Feb 2023 11:54:26 -0500 Subject: [PATCH 4/6] changelog entry --- .changelog/16030.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .changelog/16030.txt diff --git a/.changelog/16030.txt b/.changelog/16030.txt new file mode 100644 index 000000000000..4649a508c778 --- /dev/null +++ b/.changelog/16030.txt @@ -0,0 +1,3 @@ +```release-note:bug +volumes: Fixed a bug where `per_alloc` was allowed for volume blocks on system and sysbatch jobs, which do not have an allocation index +``` From 4a27c9e321fdda9345a261b77ae86d81c3329539 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 2 Feb 2023 15:18:55 -0500 Subject: [PATCH 5/6] use ConvertSlice helper --- scheduler/generic_sched_test.go | 7 +++---- scheduler/scheduler_system_test.go | 7 +++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index 3a8b7f406c77..1bace41c76c9 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -9,6 +9,7 @@ import ( memdb "github.com/hashicorp/go-memdb" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" @@ -93,10 +94,8 @@ func TestServiceSched_JobRegister(t *testing.T) { } // Ensure allocations have unique names derived from Job.ID - allocNames := []string{} - for _, alloc := range out { - allocNames = append(allocNames, alloc.Name) - } + allocNames := helper.ConvertSlice(out, + func(alloc *structs.Allocation) string { return alloc.Name }) expectAllocNames := []string{} for i := 0; i < 10; i++ { expectAllocNames = append(expectAllocNames, fmt.Sprintf("%s.web[%d]", job.ID, i)) diff --git a/scheduler/scheduler_system_test.go b/scheduler/scheduler_system_test.go index 0987bf240bc1..40f83ceb9770 100644 --- a/scheduler/scheduler_system_test.go +++ b/scheduler/scheduler_system_test.go @@ -9,6 +9,7 @@ import ( memdb "github.com/hashicorp/go-memdb" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" @@ -67,10 +68,8 @@ func TestSystemSched_JobRegister(t *testing.T) { require.Len(t, out, 10) // Note that all system allocations have the same name derived from Job.Name - allocNames := []string{} - for _, alloc := range out { - allocNames = append(allocNames, alloc.Name) - } + allocNames := helper.ConvertSlice(out, + func(alloc *structs.Allocation) string { return alloc.Name }) expectAllocNames := []string{} for i := 0; i < 10; i++ { expectAllocNames = append(expectAllocNames, fmt.Sprintf("%s.web[0]", job.Name)) From d13eb2c77305d46b48488571dd5fe591cb7e9629 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 2 Feb 2023 15:23:57 -0500 Subject: [PATCH 6/6] add a test condition for sysbatch too --- scheduler/scheduler_sysbatch_test.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/scheduler/scheduler_sysbatch_test.go b/scheduler/scheduler_sysbatch_test.go index fac543699491..49ddbfc574d8 100644 --- a/scheduler/scheduler_sysbatch_test.go +++ b/scheduler/scheduler_sysbatch_test.go @@ -7,11 +7,13 @@ import ( "github.com/hashicorp/go-memdb" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/kr/pretty" + "github.com/shoenig/test/must" "github.com/stretchr/testify/require" ) @@ -64,6 +66,15 @@ func TestSysBatch_JobRegister(t *testing.T) { // Ensure all allocations placed require.Len(t, out, 10) + // Note that all sysbatch allocations have the same name derived from Job.Name + allocNames := helper.ConvertSlice(out, + func(alloc *structs.Allocation) string { return alloc.Name }) + expectAllocNames := []string{} + for i := 0; i < 10; i++ { + expectAllocNames = append(expectAllocNames, fmt.Sprintf("%s.pinger[0]", job.Name)) + } + must.SliceContainsAll(t, expectAllocNames, allocNames) + // Check the available nodes count, ok := out[0].Metrics.NodesAvailable["dc1"] require.True(t, ok)