From 7d0468992f212accd0cdbae0f7a2a535478db931 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 2 Feb 2023 16:18:01 -0500 Subject: [PATCH] System and sysbatch jobs always have zero index (#16030) Service jobs should have unique allocation Names, derived from the Job.ID. System jobs do not have unique allocation Names because the index is intended to indicated the instance out of a desired count size. Because system jobs do not have an explicit count but the results are based on the targeted nodes, the index is less informative and this was intentionally omitted from the original design. Update docs to make it clear that NOMAD_ALLOC_INDEX is always zero for system/sysbatch jobs Validate that `volume.per_alloc` is incompatible with system/sysbatch jobs. System and sysbatch jobs always have a `NOMAD_ALLOC_INDEX` of 0. So interpolation via `per_alloc` will not work as soon as there's more than one allocation placed. Validate against this on job submission. --- .changelog/16030.txt | 3 + nomad/structs/structs.go | 2 +- nomad/structs/volume_test.go | 9 +- nomad/structs/volumes.go | 12 +- scheduler/generic_sched_test.go | 11 + scheduler/scheduler_sysbatch_test.go | 11 + scheduler/scheduler_system_test.go | 11 + .../content/docs/job-specification/volume.mdx | 3 + website/content/partials/envvars.mdx | 432 +++--------------- 9 files changed, 115 insertions(+), 379 deletions(-) create mode 100644 .changelog/16030.txt diff --git a/.changelog/16030.txt b/.changelog/16030.txt new file mode 100644 index 000000000000..4649a508c778 --- /dev/null +++ b/.changelog/16030.txt @@ -0,0 +1,3 @@ +```release-note:bug +volumes: Fixed a bug where `per_alloc` was allowed for volume blocks on system and sysbatch jobs, which do not have an allocation index +``` diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 54d747f73d8e..41a06b1123f5 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -6264,7 +6264,7 @@ func (tg *TaskGroup) Validate(j *Job) error { canaries = tg.Update.Canary } for name, volReq := range tg.Volumes { - if err := volReq.Validate(tg.Count, canaries); err != nil { + if err := volReq.Validate(j.Type, tg.Count, canaries); err != nil { mErr.Errors = append(mErr.Errors, fmt.Errorf( "Task group volume validation for %s failed: %v", name, err)) } diff --git a/nomad/structs/volume_test.go b/nomad/structs/volume_test.go index 6bf6c025fee7..35d0c2b8fe31 100644 --- a/nomad/structs/volume_test.go +++ b/nomad/structs/volume_test.go @@ -65,8 +65,11 @@ func TestVolumeRequest_Validate(t *testing.T) { }, }, { - name: "CSI volume per-alloc with canaries", - expected: []string{"volume cannot be per_alloc when canaries are in use"}, + name: "CSI volume per-alloc with canaries", + expected: []string{ + "volume cannot be per_alloc for system or sysbatch jobs", + "volume cannot be per_alloc when canaries are in use", + }, canariesCount: 1, req: &VolumeRequest{ Type: VolumeTypeCSI, @@ -78,7 +81,7 @@ func TestVolumeRequest_Validate(t *testing.T) { for _, tc := range testCases { tc = tc t.Run(tc.name, func(t *testing.T) { - err := tc.req.Validate(tc.taskGroupCount, tc.canariesCount) + err := tc.req.Validate(JobTypeSystem, tc.taskGroupCount, tc.canariesCount) for _, expected := range tc.expected { require.Contains(t, err.Error(), expected) } diff --git a/nomad/structs/volumes.go b/nomad/structs/volumes.go index 0f8b040de458..0b16f9b3e7ca 100644 --- a/nomad/structs/volumes.go +++ b/nomad/structs/volumes.go @@ -102,7 +102,7 @@ type VolumeRequest struct { PerAlloc bool } -func (v *VolumeRequest) Validate(taskGroupCount, canaries int) error { +func (v *VolumeRequest) Validate(jobType string, taskGroupCount, canaries int) error { if !(v.Type == VolumeTypeHost || v.Type == VolumeTypeCSI) { return fmt.Errorf("volume has unrecognized type %s", v.Type) @@ -170,9 +170,13 @@ func (v *VolumeRequest) Validate(taskGroupCount, canaries int) error { case CSIVolumeAccessModeMultiNodeMultiWriter: // note: we intentionally allow read-only mount of this mode } - - if v.PerAlloc && canaries > 0 { - addErr("volume cannot be per_alloc when canaries are in use") + if v.PerAlloc { + if jobType == JobTypeSystem || jobType == JobTypeSysBatch { + addErr("volume cannot be per_alloc for system or sysbatch jobs") + } + if canaries > 0 { + addErr("volume cannot be per_alloc when canaries are in use") + } } } diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index 7416e4d8bf81..6ed53cfc49a9 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -9,11 +9,13 @@ import ( memdb "github.com/hashicorp/go-memdb" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/testutil" + "github.com/shoenig/test/must" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "golang.org/x/exp/slices" @@ -90,6 +92,15 @@ func TestServiceSched_JobRegister(t *testing.T) { t.Fatalf("bad: %#v", out) } + // Ensure allocations have unique names derived from Job.ID + allocNames := helper.ConvertSlice(out, + func(alloc *structs.Allocation) string { return alloc.Name }) + expectAllocNames := []string{} + for i := 0; i < 10; i++ { + expectAllocNames = append(expectAllocNames, fmt.Sprintf("%s.web[%d]", job.ID, i)) + } + must.SliceContainsAll(t, expectAllocNames, allocNames) + // Ensure different ports were used. used := make(map[int]map[string]struct{}) for _, alloc := range out { diff --git a/scheduler/scheduler_sysbatch_test.go b/scheduler/scheduler_sysbatch_test.go index fac543699491..49ddbfc574d8 100644 --- a/scheduler/scheduler_sysbatch_test.go +++ b/scheduler/scheduler_sysbatch_test.go @@ -7,11 +7,13 @@ import ( "github.com/hashicorp/go-memdb" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" "github.com/kr/pretty" + "github.com/shoenig/test/must" "github.com/stretchr/testify/require" ) @@ -64,6 +66,15 @@ func TestSysBatch_JobRegister(t *testing.T) { // Ensure all allocations placed require.Len(t, out, 10) + // Note that all sysbatch allocations have the same name derived from Job.Name + allocNames := helper.ConvertSlice(out, + func(alloc *structs.Allocation) string { return alloc.Name }) + expectAllocNames := []string{} + for i := 0; i < 10; i++ { + expectAllocNames = append(expectAllocNames, fmt.Sprintf("%s.pinger[0]", job.Name)) + } + must.SliceContainsAll(t, expectAllocNames, allocNames) + // Check the available nodes count, ok := out[0].Metrics.NodesAvailable["dc1"] require.True(t, ok) diff --git a/scheduler/scheduler_system_test.go b/scheduler/scheduler_system_test.go index f21b3937881c..1d3a56d3acc9 100644 --- a/scheduler/scheduler_system_test.go +++ b/scheduler/scheduler_system_test.go @@ -9,10 +9,12 @@ import ( memdb "github.com/hashicorp/go-memdb" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" + "github.com/shoenig/test/must" "github.com/stretchr/testify/require" ) @@ -65,6 +67,15 @@ func TestSystemSched_JobRegister(t *testing.T) { // Ensure all allocations placed require.Len(t, out, 10) + // Note that all system allocations have the same name derived from Job.Name + allocNames := helper.ConvertSlice(out, + func(alloc *structs.Allocation) string { return alloc.Name }) + expectAllocNames := []string{} + for i := 0; i < 10; i++ { + expectAllocNames = append(expectAllocNames, fmt.Sprintf("%s.web[0]", job.Name)) + } + must.SliceContainsAll(t, expectAllocNames, allocNames) + // Check the available nodes count, ok := out[0].Metrics.NodesAvailable["dc1"] require.True(t, ok) diff --git a/website/content/docs/job-specification/volume.mdx b/website/content/docs/job-specification/volume.mdx index a416c804ebb7..8fe508e0e997 100644 --- a/website/content/docs/job-specification/volume.mdx +++ b/website/content/docs/job-specification/volume.mdx @@ -99,6 +99,9 @@ The following fields are only valid for volumes with `type = "csi"`: = true`, the allocation named `myjob.mygroup.mytask[0]` will require a volume ID `myvolume[0]`. + The `per_alloc` field cannot be true for system jobs, sysbatch jobs, or jobs + that use canaries. + - `mount_options` - Options for mounting CSI volumes that have the `file-system` [attachment mode]. These options override the `mount_options` field from [volume registration]. Consult the documentation for your storage diff --git a/website/content/partials/envvars.mdx b/website/content/partials/envvars.mdx index cf57f13a0889..2c48b44c46d0 100644 --- a/website/content/partials/envvars.mdx +++ b/website/content/partials/envvars.mdx @@ -1,371 +1,61 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
VariableDescription
- NOMAD_ALLOC_DIR - - The path to the shared alloc/ directory. See - here for more information. -
- NOMAD_TASK_DIR - - The path to the task local/ directory. See - here for more information. -
- NOMAD_SECRETS_DIR - - Path to the task's secrets directory. See - here for more information. -
- NOMAD_MEMORY_LIMIT - Memory limit in MB for the task
- NOMAD_MEMORY_MAX_LIMIT - - The maximum memory limit the task may use if client has excess memory - capacity, in MB. Omitted if task isn't configured with memory oversubscription. -
- NOMAD_CPU_LIMIT - CPU limit in MHz for the task
- NOMAD_ALLOC_ID - Allocation ID of the task
- NOMAD_ALLOC_NAME - Allocation name of the task
- NOMAD_ALLOC_INDEX - - Allocation index; useful to distinguish instances of task groups. From 0 - to (count - 1). The index is unique within a given version of a job, but - canaries or failed tasks in a deployment may reuse the index. -
- NOMAD_TASK_NAME - Task's name
- NOMAD_GROUP_NAME - Group's name
- NOMAD_JOB_ID - - Job's ID, which is equal to the Job name when submitted through - CLI but can be different when using the API -
- NOMAD_JOB_NAME - Job's name
- NOMAD_JOB_PARENT_ID - ID of the Job's parent if it has one
- NOMAD_DC - Datacenter in which the allocation is running
- NOMAD_NAMESPACE - Namespace in which the allocation is running
- NOMAD_REGION - Region in which the allocation is running
- NOMAD_META_<key> - - The metadata value given by key on the task's - metadata. Note that this is different from - - ${meta.<key>} - - which are keys in the node's metadata. -
- VAULT_TOKEN - - The task's Vault token. See - Vault Integration - for more details -
Network-related Variables
- NOMAD_IP_<label> - - Host IP for the given port label. See - here for more information. -
- NOMAD_PORT_<label> - - Port for the given port label. Driver-specified port when a - port map is used, otherwise the host's static or dynamic port - allocation. Services should bind to this port. See - here for more information. -
- NOMAD_ADDR_<label> - - Host IP:Port pair for the given port label. -
- NOMAD_HOST_PORT_<label> - - Port on the host for the port label. See - here for more - information. -
- NOMAD_IP_<task>_<label> - - Deprecated. Host IP for the given port label - and task for tasks in the same task group. Only available - when setting ports via the task resource network port mapping. -
- NOMAD_PORT_<task>_<label> - - Deprecated. Port for the given port label and - task for tasks in the same task group. Driver-specified port - when a port map is used, otherwise the host's static or dynamic port - allocation. Services should bind to this port. Only available when setting - ports via the task resource network port mapping. -
- NOMAD_ADDR_<task>_<label> - - Deprecated. Host IP:Port pair for the given port - label and task for tasks in the same task group. - Only available when setting ports via the task resource network port - mapping. -
- NOMAD_HOST_PORT_<task>_<label> - - Deprecated. Port on the host for the port label and - task for tasks in the same task group. Only available when - setting ports via the task resource network port mapping. -
- NOMAD_UPSTREAM_IP_<service> - - IP for the given service when defined as a Consul Connect - upstream. -
- NOMAD_UPSTREAM_PORT_<service> - - Port for the given service when defined as a Consul Connect - upstream. -
- NOMAD_UPSTREAM_ADDR_<service> - - Host IP:Port for the given service when - defined as a Consul Connect - upstream. -
- NOMAD_ENVOY_ADMIN_ADDR_<service> - - Local address 127.0.0.2:Port for the admin port of the - envoy sidecar for the given service when defined as a - Consul Connect enabled service. Envoy runs inside the group network - namespace unless configured for host networking. -
- NOMAD_ENVOY_READY_ADDR_<service> - - Local address 127.0.0.1:Port for the ready port of the - envoy sidecar for the given service when defined as a - Consul Connect enabled service. Envoy runs inside the group network - namespace unless configured for host networking. -
Consul-related Variables (only set for connect native tasks)
- CONSUL_HTTP_ADDR - - Specifies the address to the local Consul agent. Will be automatically - set to a unix domain socket in bridge networking mode, or a tcp address in - host networking mode. -
- CONSUL_HTTP_TOKEN - - Specifies the Consul ACL token used to authorize with Consul. Will be - automatically set to a generated Connect service identity token specific - to the service instance if Consul ACLs are enabled. -
- CONSUL_HTTP_SSL - - Specifies whether HTTPS should be used when communicating with consul. Will - be automatically set to true if Nomad is configured to communicate with - Consul using TLS. -
- CONSUL_HTTP_SSL_VERIFY - - Specifies whether the HTTPS connection with Consul should be mutually - verified. Will be automatically set to true if Nomad is configured to - verify TLS certificates. -
- CONSUL_CACERT - - Specifies the path to the CA certificate used for Consul communication. - Will be automatically set if Nomad is configured with the consul.share_ssl - option. -
- CONSUL_CLIENT_CERT - - Specifies the path to the Client certificate used for Consul communication. - Will be automatically set if Nomad is configured with the consul.share_ssl - option. -
- CONSUL_CLIENT_KEY - - Specifies the path to the CLient Key certificate used for Consul communication. - Will be automatically set if Nomad is configured with the consul.share_ssl - option. -
- CONSUL_TLS_SERVER_NAME - - Specifies the server name to use as the SNI host for Consul communication. - Will be automatically set if Consul is configured to use TLS and the task - is in a group using bridge networking mode. -
+### Job-related variables + +| Variable | Description | +|--------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `NOMAD_ALLOC_DIR` | The path to the shared `alloc/` directory. See [here](/nomad/docs/runtime/environment#task-directories) for more information. | +| `NOMAD_TASK_DIR` | The path to the task `local/` directory. See [here](/nomad/docs/runtime/environment#task-directories) for more information. | +| `NOMAD_SECRETS_DIR` | Path to the task's secrets directory. See [here](/nomad/docs/runtime/environment#task-directories) for more information. | +| `NOMAD_MEMORY_LIMIT` | Memory limit in MB for the task | +| `NOMAD_MEMORY_MAX_LIMIT` | The maximum memory limit the task may use if client has excess memory capacity, in MB. Omitted if task isn't configured with memory oversubscription. | +| `NOMAD_CPU_LIMIT` | CPU limit in MHz for the task | +| `NOMAD_CPU_CORES` | The specific CPU cores reserved for the task in cpuset list notation. Omitted if the task does not request cpu cores. E.g. `0-2,7,12-14` | +| `NOMAD_ALLOC_ID` | Allocation ID of the task | +| `NOMAD_ALLOC_NAME` | Allocation name of the task. This is derived from the job name, task group name, and allocation index. | +| `NOMAD_ALLOC_INDEX` | Allocation index; useful to distinguish instances of task groups. From 0 to (count - 1). For system jobs and sysbatch jobs, this value will always be 0. The index is unique within a given version of a job, but canaries or failed tasks in a deployment may reuse the index. | +| `NOMAD_TASK_NAME` | Task's name | +| `NOMAD_GROUP_NAME` | Group's name | +| `NOMAD_JOB_ID` | Job's ID, which is equal to the Job name when submitted through CLI but can be different when using the API | +| `NOMAD_JOB_NAME` | Job's name | +| `NOMAD_JOB_PARENT_ID` | ID of the Job's parent if it has one | +| `NOMAD_DC` | Datacenter in which the allocation is running | +| `NOMAD_NAMESPACE` | Namespace in which the allocation is running | +| `NOMAD_REGION` | Region in which the allocation is running | +| `NOMAD_META_` | The metadata value given by `key` on the task's metadata. Note that this is different from [`${meta.}`](/nomad/docs/runtime/interpolation#node-variables-) which are keys in the node's metadata. | +| `VAULT_TOKEN` | The task's Vault token. See [Vault Integration](/nomad/docs/integrations/vault-integration) for more details | + +### Network-related Variables + +| Variable | Description | +|------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `NOMAD_IP_