diff --git a/.changelog/16030.txt b/.changelog/16030.txt new file mode 100644 index 000000000000..4649a508c778 --- /dev/null +++ b/.changelog/16030.txt @@ -0,0 +1,3 @@ +```release-note:bug +volumes: Fixed a bug where `per_alloc` was allowed for volume blocks on system and sysbatch jobs, which do not have an allocation index +``` diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 54d747f73d8e..41a06b1123f5 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -6264,7 +6264,7 @@ func (tg *TaskGroup) Validate(j *Job) error { canaries = tg.Update.Canary } for name, volReq := range tg.Volumes { - if err := volReq.Validate(tg.Count, canaries); err != nil { + if err := volReq.Validate(j.Type, tg.Count, canaries); err != nil { mErr.Errors = append(mErr.Errors, fmt.Errorf( "Task group volume validation for %s failed: %v", name, err)) } diff --git a/nomad/structs/volume_test.go b/nomad/structs/volume_test.go index 6bf6c025fee7..35d0c2b8fe31 100644 --- a/nomad/structs/volume_test.go +++ b/nomad/structs/volume_test.go @@ -65,8 +65,11 @@ func TestVolumeRequest_Validate(t *testing.T) { }, }, { - name: "CSI volume per-alloc with canaries", - expected: []string{"volume cannot be per_alloc when canaries are in use"}, + name: "CSI volume per-alloc with canaries", + expected: []string{ + "volume cannot be per_alloc for system or sysbatch jobs", + "volume cannot be per_alloc when canaries are in use", + }, canariesCount: 1, req: &VolumeRequest{ Type: VolumeTypeCSI, @@ -78,7 +81,7 @@ func TestVolumeRequest_Validate(t *testing.T) { for _, tc := range testCases { tc = tc t.Run(tc.name, func(t *testing.T) { - err := tc.req.Validate(tc.taskGroupCount, tc.canariesCount) + err := tc.req.Validate(JobTypeSystem, tc.taskGroupCount, tc.canariesCount) for _, expected := range tc.expected { require.Contains(t, err.Error(), expected) } diff --git a/nomad/structs/volumes.go b/nomad/structs/volumes.go index 0f8b040de458..0b16f9b3e7ca 100644 --- a/nomad/structs/volumes.go +++ b/nomad/structs/volumes.go @@ -102,7 +102,7 @@ type VolumeRequest struct { PerAlloc bool } -func (v *VolumeRequest) Validate(taskGroupCount, canaries int) error { +func (v *VolumeRequest) Validate(jobType string, taskGroupCount, canaries int) error { if !(v.Type == VolumeTypeHost || v.Type == VolumeTypeCSI) { return fmt.Errorf("volume has unrecognized type %s", v.Type) @@ -170,9 +170,13 @@ func (v *VolumeRequest) Validate(taskGroupCount, canaries int) error { case CSIVolumeAccessModeMultiNodeMultiWriter: // note: we intentionally allow read-only mount of this mode } - - if v.PerAlloc && canaries > 0 { - addErr("volume cannot be per_alloc when canaries are in use") + if v.PerAlloc { + if jobType == JobTypeSystem || jobType == JobTypeSysBatch { + addErr("volume cannot be per_alloc for system or sysbatch jobs") + } + if canaries > 0 { + addErr("volume cannot be per_alloc when canaries are in use") + } } } diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index 7416e4d8bf81..6ed53cfc49a9 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -9,11 +9,13 @@ import ( memdb "github.com/hashicorp/go-memdb" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" + "github.com/shoenig/test/must" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "golang.org/x/exp/slices" @@ -90,6 +92,15 @@ func TestServiceSched_JobRegister(t *testing.T) { t.Fatalf("bad: %#v", out) } + // Ensure allocations have unique names derived from Job.ID + allocNames := helper.ConvertSlice(out, + func(alloc *structs.Allocation) string { return alloc.Name }) + expectAllocNames := []string{} + for i := 0; i < 10; i++ { + expectAllocNames = append(expectAllocNames, fmt.Sprintf("%s.web[%d]", job.ID, i)) + } + must.SliceContainsAll(t, expectAllocNames, allocNames) + // Ensure different ports were used. used := make(map[int]map[string]struct{}) for _, alloc := range out { diff --git a/scheduler/scheduler_sysbatch_test.go b/scheduler/scheduler_sysbatch_test.go index fac543699491..49ddbfc574d8 100644 --- a/scheduler/scheduler_sysbatch_test.go +++ b/scheduler/scheduler_sysbatch_test.go @@ -7,11 +7,13 @@ import ( "github.com/hashicorp/go-memdb" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/kr/pretty" + "github.com/shoenig/test/must" "github.com/stretchr/testify/require" ) @@ -64,6 +66,15 @@ func TestSysBatch_JobRegister(t *testing.T) { // Ensure all allocations placed require.Len(t, out, 10) + // Note that all sysbatch allocations have the same name derived from Job.Name + allocNames := helper.ConvertSlice(out, + func(alloc *structs.Allocation) string { return alloc.Name }) + expectAllocNames := []string{} + for i := 0; i < 10; i++ { + expectAllocNames = append(expectAllocNames, fmt.Sprintf("%s.pinger[0]", job.Name)) + } + must.SliceContainsAll(t, expectAllocNames, allocNames) + // Check the available nodes count, ok := out[0].Metrics.NodesAvailable["dc1"] require.True(t, ok) diff --git a/scheduler/scheduler_system_test.go b/scheduler/scheduler_system_test.go index f21b3937881c..82e50c5a2764 100644 --- a/scheduler/scheduler_system_test.go +++ b/scheduler/scheduler_system_test.go @@ -9,6 +9,7 @@ import ( memdb "github.com/hashicorp/go-memdb" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" @@ -65,6 +66,15 @@ func TestSystemSched_JobRegister(t *testing.T) { // Ensure all allocations placed require.Len(t, out, 10) + // Note that all system allocations have the same name derived from Job.Name + allocNames := helper.ConvertSlice(out, + func(alloc *structs.Allocation) string { return alloc.Name }) + expectAllocNames := []string{} + for i := 0; i < 10; i++ { + expectAllocNames = append(expectAllocNames, fmt.Sprintf("%s.web[0]", job.Name)) + } + must.SliceContainsAll(t, expectAllocNames, allocNames) + // Check the available nodes count, ok := out[0].Metrics.NodesAvailable["dc1"] require.True(t, ok) diff --git a/website/content/docs/job-specification/volume.mdx b/website/content/docs/job-specification/volume.mdx index a416c804ebb7..8fe508e0e997 100644 --- a/website/content/docs/job-specification/volume.mdx +++ b/website/content/docs/job-specification/volume.mdx @@ -99,6 +99,9 @@ The following fields are only valid for volumes with `type = "csi"`: = true`, the allocation named `myjob.mygroup.mytask[0]` will require a volume ID `myvolume[0]`. + The `per_alloc` field cannot be true for system jobs, sysbatch jobs, or jobs + that use canaries. + - `mount_options` - Options for mounting CSI volumes that have the `file-system` [attachment mode]. These options override the `mount_options` field from [volume registration]. Consult the documentation for your storage diff --git a/website/content/partials/envvars.mdx b/website/content/partials/envvars.mdx index cf57f13a0889..b4081402d6ac 100644 --- a/website/content/partials/envvars.mdx +++ b/website/content/partials/envvars.mdx @@ -1,371 +1,63 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
VariableDescription
- NOMAD_ALLOC_DIR - - The path to the shared alloc/ directory. See - here for more information. -
- NOMAD_TASK_DIR - - The path to the task local/ directory. See - here for more information. -
- NOMAD_SECRETS_DIR - - Path to the task's secrets directory. See - here for more information. -
- NOMAD_MEMORY_LIMIT - Memory limit in MB for the task
- NOMAD_MEMORY_MAX_LIMIT - - The maximum memory limit the task may use if client has excess memory - capacity, in MB. Omitted if task isn't configured with memory oversubscription. -
- NOMAD_CPU_LIMIT - CPU limit in MHz for the task
- NOMAD_ALLOC_ID - Allocation ID of the task
- NOMAD_ALLOC_NAME - Allocation name of the task
- NOMAD_ALLOC_INDEX - - Allocation index; useful to distinguish instances of task groups. From 0 - to (count - 1). The index is unique within a given version of a job, but - canaries or failed tasks in a deployment may reuse the index. -
- NOMAD_TASK_NAME - Task's name
- NOMAD_GROUP_NAME - Group's name
- NOMAD_JOB_ID - - Job's ID, which is equal to the Job name when submitted through - CLI but can be different when using the API -
- NOMAD_JOB_NAME - Job's name
- NOMAD_JOB_PARENT_ID - ID of the Job's parent if it has one
- NOMAD_DC - Datacenter in which the allocation is running
- NOMAD_NAMESPACE - Namespace in which the allocation is running
- NOMAD_REGION - Region in which the allocation is running
- NOMAD_META_<key> - - The metadata value given by key on the task's - metadata. Note that this is different from - - ${meta.<key>} - - which are keys in the node's metadata. -
- VAULT_TOKEN - - The task's Vault token. See - Vault Integration - for more details -
Network-related Variables
- NOMAD_IP_<label> - - Host IP for the given port label. See - here for more information. -
- NOMAD_PORT_<label> - - Port for the given port label. Driver-specified port when a - port map is used, otherwise the host's static or dynamic port - allocation. Services should bind to this port. See - here for more information. -
- NOMAD_ADDR_<label> - - Host IP:Port pair for the given port label. -
- NOMAD_HOST_PORT_<label> - - Port on the host for the port label. See - here for more - information. -
- NOMAD_IP_<task>_<label> - - Deprecated. Host IP for the given port label - and task for tasks in the same task group. Only available - when setting ports via the task resource network port mapping. -
- NOMAD_PORT_<task>_<label> - - Deprecated. Port for the given port label and - task for tasks in the same task group. Driver-specified port - when a port map is used, otherwise the host's static or dynamic port - allocation. Services should bind to this port. Only available when setting - ports via the task resource network port mapping. -
- NOMAD_ADDR_<task>_<label> - - Deprecated. Host IP:Port pair for the given port - label and task for tasks in the same task group. - Only available when setting ports via the task resource network port - mapping. -
- NOMAD_HOST_PORT_<task>_<label> - - Deprecated. Port on the host for the port label and - task for tasks in the same task group. Only available when - setting ports via the task resource network port mapping. -
- NOMAD_UPSTREAM_IP_<service> - - IP for the given service when defined as a Consul Connect - upstream. -
- NOMAD_UPSTREAM_PORT_<service> - - Port for the given service when defined as a Consul Connect - upstream. -
- NOMAD_UPSTREAM_ADDR_<service> - - Host IP:Port for the given service when - defined as a Consul Connect - upstream. -
- NOMAD_ENVOY_ADMIN_ADDR_<service> - - Local address 127.0.0.2:Port for the admin port of the - envoy sidecar for the given service when defined as a - Consul Connect enabled service. Envoy runs inside the group network - namespace unless configured for host networking. -
- NOMAD_ENVOY_READY_ADDR_<service> - - Local address 127.0.0.1:Port for the ready port of the - envoy sidecar for the given service when defined as a - Consul Connect enabled service. Envoy runs inside the group network - namespace unless configured for host networking. -
Consul-related Variables (only set for connect native tasks)
- CONSUL_HTTP_ADDR - - Specifies the address to the local Consul agent. Will be automatically - set to a unix domain socket in bridge networking mode, or a tcp address in - host networking mode. -
- CONSUL_HTTP_TOKEN - - Specifies the Consul ACL token used to authorize with Consul. Will be - automatically set to a generated Connect service identity token specific - to the service instance if Consul ACLs are enabled. -
- CONSUL_HTTP_SSL - - Specifies whether HTTPS should be used when communicating with consul. Will - be automatically set to true if Nomad is configured to communicate with - Consul using TLS. -
- CONSUL_HTTP_SSL_VERIFY - - Specifies whether the HTTPS connection with Consul should be mutually - verified. Will be automatically set to true if Nomad is configured to - verify TLS certificates. -
- CONSUL_CACERT - - Specifies the path to the CA certificate used for Consul communication. - Will be automatically set if Nomad is configured with the consul.share_ssl - option. -
- CONSUL_CLIENT_CERT - - Specifies the path to the Client certificate used for Consul communication. - Will be automatically set if Nomad is configured with the consul.share_ssl - option. -
- CONSUL_CLIENT_KEY - - Specifies the path to the CLient Key certificate used for Consul communication. - Will be automatically set if Nomad is configured with the consul.share_ssl - option. -
- CONSUL_TLS_SERVER_NAME - - Specifies the server name to use as the SNI host for Consul communication. - Will be automatically set if Consul is configured to use TLS and the task - is in a group using bridge networking mode. -
+### Job-related variables + +| Variable | Description | +|--------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `NOMAD_ALLOC_DIR` | The path to the shared `alloc/` directory. See [here](/nomad/docs/runtime/environment#task-directories) for more information. | +| `NOMAD_TASK_DIR` | The path to the task `local/` directory. See [here](/nomad/docs/runtime/environment#task-directories) for more information. | +| `NOMAD_SECRETS_DIR` | Path to the task's secrets directory. See [here](/nomad/docs/runtime/environment#task-directories) for more information. | +| `NOMAD_MEMORY_LIMIT` | Memory limit in MB for the task | +| `NOMAD_MEMORY_MAX_LIMIT` | The maximum memory limit the task may use if client has excess memory capacity, in MB. Omitted if task isn't configured with memory oversubscription. | +| `NOMAD_CPU_LIMIT` | CPU limit in MHz for the task | +| `NOMAD_CPU_CORES` | The specific CPU cores reserved for the task in cpuset list notation. Omitted if the task does not request cpu cores. E.g. `0-2,7,12-14` | +| `NOMAD_ALLOC_ID` | Allocation ID of the task | +| `NOMAD_SHORT_ALLOC_ID` | The first 8 characters of the allocation ID of the task | +| `NOMAD_ALLOC_NAME` | Allocation name of the task. This is derived from the job name, task group name, and allocation index. | +| `NOMAD_ALLOC_INDEX` | Allocation index; useful to distinguish instances of task groups. From 0 to (count - 1). For system jobs and sysbatch jobs, this value will always be 0. The index is unique within a given version of a job, but canaries or failed tasks in a deployment may reuse the index. | +| `NOMAD_TASK_NAME` | Task's name | +| `NOMAD_GROUP_NAME` | Group's name | +| `NOMAD_JOB_ID` | Job's ID, which is equal to the Job name when submitted through CLI but can be different when using the API | +| `NOMAD_JOB_NAME` | Job's name | +| `NOMAD_JOB_PARENT_ID` | ID of the Job's parent if it has one | +| `NOMAD_DC` | Datacenter in which the allocation is running | +| `NOMAD_PARENT_CGROUP` | The parent cgroup used to contain task cgroups (Linux only) | +| `NOMAD_NAMESPACE` | Namespace in which the allocation is running | +| `NOMAD_REGION` | Region in which the allocation is running | +| `NOMAD_META_` | The metadata value given by `key` on the task's metadata. Note that this is different from [`${meta.}`](/nomad/docs/runtime/interpolation#node-variables-) which are keys in the node's metadata. | +| `VAULT_TOKEN` | The task's Vault token. See [Vault Integration](/nomad/docs/integrations/vault-integration) for more details | + +### Network-related Variables + +| Variable | Description | +|------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `NOMAD_IP_