diff --git a/.changelog/16030.txt b/.changelog/16030.txt new file mode 100644 index 000000000000..4649a508c778 --- /dev/null +++ b/.changelog/16030.txt @@ -0,0 +1,3 @@ +```release-note:bug +volumes: Fixed a bug where `per_alloc` was allowed for volume blocks on system and sysbatch jobs, which do not have an allocation index +``` diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 54d747f73d8e..41a06b1123f5 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -6264,7 +6264,7 @@ func (tg *TaskGroup) Validate(j *Job) error { canaries = tg.Update.Canary } for name, volReq := range tg.Volumes { - if err := volReq.Validate(tg.Count, canaries); err != nil { + if err := volReq.Validate(j.Type, tg.Count, canaries); err != nil { mErr.Errors = append(mErr.Errors, fmt.Errorf( "Task group volume validation for %s failed: %v", name, err)) } diff --git a/nomad/structs/volume_test.go b/nomad/structs/volume_test.go index 6bf6c025fee7..35d0c2b8fe31 100644 --- a/nomad/structs/volume_test.go +++ b/nomad/structs/volume_test.go @@ -65,8 +65,11 @@ func TestVolumeRequest_Validate(t *testing.T) { }, }, { - name: "CSI volume per-alloc with canaries", - expected: []string{"volume cannot be per_alloc when canaries are in use"}, + name: "CSI volume per-alloc with canaries", + expected: []string{ + "volume cannot be per_alloc for system or sysbatch jobs", + "volume cannot be per_alloc when canaries are in use", + }, canariesCount: 1, req: &VolumeRequest{ Type: VolumeTypeCSI, @@ -78,7 +81,7 @@ func TestVolumeRequest_Validate(t *testing.T) { for _, tc := range testCases { tc = tc t.Run(tc.name, func(t *testing.T) { - err := tc.req.Validate(tc.taskGroupCount, tc.canariesCount) + err := tc.req.Validate(JobTypeSystem, tc.taskGroupCount, tc.canariesCount) for _, expected := range tc.expected { require.Contains(t, err.Error(), expected) } diff --git a/nomad/structs/volumes.go b/nomad/structs/volumes.go index 0f8b040de458..0b16f9b3e7ca 100644 --- a/nomad/structs/volumes.go +++ b/nomad/structs/volumes.go @@ -102,7 +102,7 @@ type VolumeRequest struct { PerAlloc bool } -func (v *VolumeRequest) Validate(taskGroupCount, canaries int) error { +func (v *VolumeRequest) Validate(jobType string, taskGroupCount, canaries int) error { if !(v.Type == VolumeTypeHost || v.Type == VolumeTypeCSI) { return fmt.Errorf("volume has unrecognized type %s", v.Type) @@ -170,9 +170,13 @@ func (v *VolumeRequest) Validate(taskGroupCount, canaries int) error { case CSIVolumeAccessModeMultiNodeMultiWriter: // note: we intentionally allow read-only mount of this mode } - - if v.PerAlloc && canaries > 0 { - addErr("volume cannot be per_alloc when canaries are in use") + if v.PerAlloc { + if jobType == JobTypeSystem || jobType == JobTypeSysBatch { + addErr("volume cannot be per_alloc for system or sysbatch jobs") + } + if canaries > 0 { + addErr("volume cannot be per_alloc when canaries are in use") + } } } diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index 7416e4d8bf81..6ed53cfc49a9 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -9,11 +9,13 @@ import ( memdb "github.com/hashicorp/go-memdb" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" + "github.com/shoenig/test/must" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "golang.org/x/exp/slices" @@ -90,6 +92,15 @@ func TestServiceSched_JobRegister(t *testing.T) { t.Fatalf("bad: %#v", out) } + // Ensure allocations have unique names derived from Job.ID + allocNames := helper.ConvertSlice(out, + func(alloc *structs.Allocation) string { return alloc.Name }) + expectAllocNames := []string{} + for i := 0; i < 10; i++ { + expectAllocNames = append(expectAllocNames, fmt.Sprintf("%s.web[%d]", job.ID, i)) + } + must.SliceContainsAll(t, expectAllocNames, allocNames) + // Ensure different ports were used. used := make(map[int]map[string]struct{}) for _, alloc := range out { diff --git a/scheduler/scheduler_sysbatch_test.go b/scheduler/scheduler_sysbatch_test.go index fac543699491..49ddbfc574d8 100644 --- a/scheduler/scheduler_sysbatch_test.go +++ b/scheduler/scheduler_sysbatch_test.go @@ -7,11 +7,13 @@ import ( "github.com/hashicorp/go-memdb" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/kr/pretty" + "github.com/shoenig/test/must" "github.com/stretchr/testify/require" ) @@ -64,6 +66,15 @@ func TestSysBatch_JobRegister(t *testing.T) { // Ensure all allocations placed require.Len(t, out, 10) + // Note that all sysbatch allocations have the same name derived from Job.Name + allocNames := helper.ConvertSlice(out, + func(alloc *structs.Allocation) string { return alloc.Name }) + expectAllocNames := []string{} + for i := 0; i < 10; i++ { + expectAllocNames = append(expectAllocNames, fmt.Sprintf("%s.pinger[0]", job.Name)) + } + must.SliceContainsAll(t, expectAllocNames, allocNames) + // Check the available nodes count, ok := out[0].Metrics.NodesAvailable["dc1"] require.True(t, ok) diff --git a/scheduler/scheduler_system_test.go b/scheduler/scheduler_system_test.go index f21b3937881c..82e50c5a2764 100644 --- a/scheduler/scheduler_system_test.go +++ b/scheduler/scheduler_system_test.go @@ -9,6 +9,7 @@ import ( memdb "github.com/hashicorp/go-memdb" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" @@ -65,6 +66,15 @@ func TestSystemSched_JobRegister(t *testing.T) { // Ensure all allocations placed require.Len(t, out, 10) + // Note that all system allocations have the same name derived from Job.Name + allocNames := helper.ConvertSlice(out, + func(alloc *structs.Allocation) string { return alloc.Name }) + expectAllocNames := []string{} + for i := 0; i < 10; i++ { + expectAllocNames = append(expectAllocNames, fmt.Sprintf("%s.web[0]", job.Name)) + } + must.SliceContainsAll(t, expectAllocNames, allocNames) + // Check the available nodes count, ok := out[0].Metrics.NodesAvailable["dc1"] require.True(t, ok) diff --git a/website/content/docs/job-specification/volume.mdx b/website/content/docs/job-specification/volume.mdx index a416c804ebb7..8fe508e0e997 100644 --- a/website/content/docs/job-specification/volume.mdx +++ b/website/content/docs/job-specification/volume.mdx @@ -99,6 +99,9 @@ The following fields are only valid for volumes with `type = "csi"`: = true`, the allocation named `myjob.mygroup.mytask[0]` will require a volume ID `myvolume[0]`. + The `per_alloc` field cannot be true for system jobs, sysbatch jobs, or jobs + that use canaries. + - `mount_options` - Options for mounting CSI volumes that have the `file-system` [attachment mode]. These options override the `mount_options` field from [volume registration]. Consult the documentation for your storage diff --git a/website/content/partials/envvars.mdx b/website/content/partials/envvars.mdx index cf57f13a0889..b4081402d6ac 100644 --- a/website/content/partials/envvars.mdx +++ b/website/content/partials/envvars.mdx @@ -1,371 +1,63 @@ -
Variable | -Description | -
---|---|
- NOMAD_ALLOC_DIR
- |
-
- The path to the shared alloc/ directory. See
- here for more information.
- |
-
- NOMAD_TASK_DIR
- |
-
- The path to the task local/ directory. See
- here for more information.
- |
-
- NOMAD_SECRETS_DIR
- |
- - Path to the task's secrets directory. See - here for more information. - | -
- NOMAD_MEMORY_LIMIT
- |
- Memory limit in MB for the task | -
- NOMAD_MEMORY_MAX_LIMIT
- |
- - The maximum memory limit the task may use if client has excess memory - capacity, in MB. Omitted if task isn't configured with memory oversubscription. - | -
- NOMAD_CPU_LIMIT
- |
- CPU limit in MHz for the task | -
- NOMAD_ALLOC_ID
- |
- Allocation ID of the task | -
- NOMAD_ALLOC_NAME
- |
- Allocation name of the task | -
- NOMAD_ALLOC_INDEX
- |
- - Allocation index; useful to distinguish instances of task groups. From 0 - to (count - 1). The index is unique within a given version of a job, but - canaries or failed tasks in a deployment may reuse the index. - | -
- NOMAD_TASK_NAME
- |
- Task's name | -
- NOMAD_GROUP_NAME
- |
- Group's name | -
- NOMAD_JOB_ID
- |
- - Job's ID, which is equal to the Job name when submitted through - CLI but can be different when using the API - | -
- NOMAD_JOB_NAME
- |
- Job's name | -
- NOMAD_JOB_PARENT_ID
- |
- ID of the Job's parent if it has one | -
- NOMAD_DC
- |
- Datacenter in which the allocation is running | -
- NOMAD_NAMESPACE
- |
- Namespace in which the allocation is running | -
- NOMAD_REGION
- |
- Region in which the allocation is running | -
- NOMAD_META_<key>
- |
-
- The metadata value given by key on the task's
- metadata. Note that this is different from
-
- ${meta.<key>}
-
- which are keys in the node's metadata.
- |
-
- VAULT_TOKEN
- |
- - The task's Vault token. See - Vault Integration - for more details - | -
Network-related Variables | -|
- NOMAD_IP_<label>
- |
-
- Host IP for the given port label . See
- here for more information.
- |
-
- NOMAD_PORT_<label>
- |
-
- Port for the given port label . Driver-specified port when a
- port map is used, otherwise the host's static or dynamic port
- allocation. Services should bind to this port. See
- here for more information.
- |
-
- NOMAD_ADDR_<label>
- |
-
- Host IP:Port pair for the given port label .
- |
-
- NOMAD_HOST_PORT_<label>
- |
-
- Port on the host for the port label . See
- here for more
- information.
- |
-
- NOMAD_IP_<task>_<label>
- |
-
- Deprecated. Host IP for the given port label
- and task for tasks in the same task group. Only available
- when setting ports via the task resource network port mapping.
- |
-
- NOMAD_PORT_<task>_<label>
- |
-
- Deprecated. Port for the given port label and
- task for tasks in the same task group. Driver-specified port
- when a port map is used, otherwise the host's static or dynamic port
- allocation. Services should bind to this port. Only available when setting
- ports via the task resource network port mapping.
- |
-
- NOMAD_ADDR_<task>_<label>
- |
-
- Deprecated. Host IP:Port pair for the given port
- label and task for tasks in the same task group.
- Only available when setting ports via the task resource network port
- mapping.
- |
-
- NOMAD_HOST_PORT_<task>_<label>
- |
-
- Deprecated. Port on the host for the port label and
- task for tasks in the same task group. Only available when
- setting ports via the task resource network port mapping.
- |
-
- NOMAD_UPSTREAM_IP_<service>
- |
-
- IP for the given service when defined as a Consul Connect
- upstream.
- |
-
- NOMAD_UPSTREAM_PORT_<service>
- |
-
- Port for the given service when defined as a Consul Connect
- upstream.
- |
-
- NOMAD_UPSTREAM_ADDR_<service>
- |
-
- Host IP:Port for the given service when
- defined as a Consul Connect
- upstream.
- |
-
- NOMAD_ENVOY_ADMIN_ADDR_<service>
- |
-
- Local address 127.0.0.2:Port for the admin port of the
- envoy sidecar for the given service when defined as a
- Consul Connect enabled service. Envoy runs inside the group network
- namespace unless configured for host networking.
- |
-
- NOMAD_ENVOY_READY_ADDR_<service>
- |
-
- Local address 127.0.0.1:Port for the ready port of the
- envoy sidecar for the given service when defined as a
- Consul Connect enabled service. Envoy runs inside the group network
- namespace unless configured for host networking.
- |
-
Consul-related Variables (only set for connect native tasks) | -|
- CONSUL_HTTP_ADDR
- |
- - Specifies the address to the local Consul agent. Will be automatically - set to a unix domain socket in bridge networking mode, or a tcp address in - host networking mode. - | -
- CONSUL_HTTP_TOKEN
- |
- - Specifies the Consul ACL token used to authorize with Consul. Will be - automatically set to a generated Connect service identity token specific - to the service instance if Consul ACLs are enabled. - | -
- CONSUL_HTTP_SSL
- |
- - Specifies whether HTTPS should be used when communicating with consul. Will - be automatically set to true if Nomad is configured to communicate with - Consul using TLS. - | -
- CONSUL_HTTP_SSL_VERIFY
- |
- - Specifies whether the HTTPS connection with Consul should be mutually - verified. Will be automatically set to true if Nomad is configured to - verify TLS certificates. - | -
- CONSUL_CACERT
- |
-
- Specifies the path to the CA certificate used for Consul communication.
- Will be automatically set if Nomad is configured with the consul.share_ssl
- option.
- |
-
- CONSUL_CLIENT_CERT
- |
-
- Specifies the path to the Client certificate used for Consul communication.
- Will be automatically set if Nomad is configured with the consul.share_ssl
- option.
- |
-
- CONSUL_CLIENT_KEY
- |
-
- Specifies the path to the CLient Key certificate used for Consul communication.
- Will be automatically set if Nomad is configured with the consul.share_ssl
- option.
- |
-
- CONSUL_TLS_SERVER_NAME
- |
- - Specifies the server name to use as the SNI host for Consul communication. - Will be automatically set if Consul is configured to use TLS and the task - is in a group using bridge networking mode. - | -