Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

System and sysbatch jobs always have zero index #16030

Merged
merged 6 commits into from
Feb 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/16030.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
volumes: Fixed a bug where `per_alloc` was allowed for volume blocks on system and sysbatch jobs, which do not have an allocation index
```
2 changes: 1 addition & 1 deletion nomad/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -6608,7 +6608,7 @@ func (tg *TaskGroup) Validate(j *Job) error {
canaries = tg.Update.Canary
}
for name, volReq := range tg.Volumes {
if err := volReq.Validate(tg.Count, canaries); err != nil {
if err := volReq.Validate(j.Type, tg.Count, canaries); err != nil {
mErr.Errors = append(mErr.Errors, fmt.Errorf(
"Task group volume validation for %s failed: %v", name, err))
}
Expand Down
10 changes: 7 additions & 3 deletions nomad/structs/volume_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ func TestVolumeRequest_Validate(t *testing.T) {
"host volumes cannot have an access mode",
"host volumes cannot have an attachment mode",
"host volumes cannot have mount options",
"volume cannot be per_alloc for system or sysbatch jobs",
"volume cannot be per_alloc when canaries are in use",
},
canariesCount: 1,
Expand Down Expand Up @@ -69,8 +70,11 @@ func TestVolumeRequest_Validate(t *testing.T) {
},
},
{
name: "CSI volume per-alloc with canaries",
expected: []string{"volume cannot be per_alloc when canaries are in use"},
name: "CSI volume per-alloc with canaries",
expected: []string{
"volume cannot be per_alloc for system or sysbatch jobs",
"volume cannot be per_alloc when canaries are in use",
},
canariesCount: 1,
req: &VolumeRequest{
Type: VolumeTypeCSI,
Expand All @@ -81,7 +85,7 @@ func TestVolumeRequest_Validate(t *testing.T) {

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
err := tc.req.Validate(tc.taskGroupCount, tc.canariesCount)
err := tc.req.Validate(JobTypeSystem, tc.taskGroupCount, tc.canariesCount)
for _, expected := range tc.expected {
require.Contains(t, err.Error(), expected)
}
Expand Down
18 changes: 9 additions & 9 deletions nomad/structs/volumes.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ type VolumeRequest struct {
PerAlloc bool
}

func (v *VolumeRequest) Validate(taskGroupCount, canaries int) error {
func (v *VolumeRequest) Validate(jobType string, taskGroupCount, canaries int) error {
if !(v.Type == VolumeTypeHost ||
v.Type == VolumeTypeCSI) {
return fmt.Errorf("volume has unrecognized type %s", v.Type)
Expand All @@ -116,6 +116,14 @@ func (v *VolumeRequest) Validate(taskGroupCount, canaries int) error {
if v.Source == "" {
addErr("volume has an empty source")
}
if v.PerAlloc {
if jobType == JobTypeSystem || jobType == JobTypeSysBatch {
addErr("volume cannot be per_alloc for system or sysbatch jobs")
}
if canaries > 0 {
addErr("volume cannot be per_alloc when canaries are in use")
}
}

switch v.Type {

Expand All @@ -129,9 +137,6 @@ func (v *VolumeRequest) Validate(taskGroupCount, canaries int) error {
if v.MountOptions != nil {
addErr("host volumes cannot have mount options")
}
if v.PerAlloc && canaries > 0 {
addErr("volume cannot be per_alloc when canaries are in use")
}

case VolumeTypeCSI:

Expand Down Expand Up @@ -170,11 +175,6 @@ func (v *VolumeRequest) Validate(taskGroupCount, canaries int) error {
case CSIVolumeAccessModeMultiNodeMultiWriter:
// note: we intentionally allow read-only mount of this mode
}

if v.PerAlloc && canaries > 0 {
addErr("volume cannot be per_alloc when canaries are in use")
}

}

return mErr.ErrorOrNil()
Expand Down
10 changes: 10 additions & 0 deletions scheduler/generic_sched_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

memdb "github.com/hashicorp/go-memdb"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/pointer"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock"
Expand Down Expand Up @@ -92,6 +93,15 @@ func TestServiceSched_JobRegister(t *testing.T) {
t.Fatalf("bad: %#v", out)
}

// Ensure allocations have unique names derived from Job.ID
allocNames := helper.ConvertSlice(out,
func(alloc *structs.Allocation) string { return alloc.Name })
expectAllocNames := []string{}
for i := 0; i < 10; i++ {
expectAllocNames = append(expectAllocNames, fmt.Sprintf("%s.web[%d]", job.ID, i))
}
must.SliceContainsAll(t, expectAllocNames, allocNames)

// Ensure different ports were used.
used := make(map[int]map[string]struct{})
for _, alloc := range out {
Expand Down
11 changes: 11 additions & 0 deletions scheduler/scheduler_sysbatch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@ import (

"github.com/hashicorp/go-memdb"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/pointer"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/kr/pretty"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -64,6 +66,15 @@ func TestSysBatch_JobRegister(t *testing.T) {
// Ensure all allocations placed
require.Len(t, out, 10)

// Note that all sysbatch allocations have the same name derived from Job.Name
allocNames := helper.ConvertSlice(out,
func(alloc *structs.Allocation) string { return alloc.Name })
expectAllocNames := []string{}
for i := 0; i < 10; i++ {
expectAllocNames = append(expectAllocNames, fmt.Sprintf("%s.pinger[0]", job.Name))
}
must.SliceContainsAll(t, expectAllocNames, allocNames)

// Check the available nodes
count, ok := out[0].Metrics.NodesAvailable["dc1"]
require.True(t, ok)
Expand Down
10 changes: 10 additions & 0 deletions scheduler/scheduler_system_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

memdb "github.com/hashicorp/go-memdb"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/pointer"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock"
Expand Down Expand Up @@ -66,6 +67,15 @@ func TestSystemSched_JobRegister(t *testing.T) {
// Ensure all allocations placed
require.Len(t, out, 10)

// Note that all system allocations have the same name derived from Job.Name
allocNames := helper.ConvertSlice(out,
func(alloc *structs.Allocation) string { return alloc.Name })
expectAllocNames := []string{}
for i := 0; i < 10; i++ {
expectAllocNames = append(expectAllocNames, fmt.Sprintf("%s.web[0]", job.Name))
}
must.SliceContainsAll(t, expectAllocNames, allocNames)

// Check the available nodes
count, ok := out[0].Metrics.NodesAvailable["dc1"]
require.True(t, ok)
Expand Down
3 changes: 3 additions & 0 deletions website/content/docs/job-specification/volume.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ the [volume_mount][volume_mount] block in the `task` configuration.
= true`, the allocation named `myjob.mygroup.mytask[0]` will require a
volume ID `myvolume[0]`.

The `per_alloc` field cannot be true for system jobs, sysbatch jobs, or jobs
that use canaries.

The following fields are only valid for volumes with `type = "csi"`:

- `access_mode` `(string: <required>)` - Defines whether a volume should be
Expand Down
48 changes: 24 additions & 24 deletions website/content/partials/envvars.mdx
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
### Job-related variables

| Variable | Description |
| ------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `NOMAD_ALLOC_DIR` | The path to the shared `alloc/` directory. See [here](/nomad/docs/runtime/environment#task-directories) for more information. |
| `NOMAD_TASK_DIR` | The path to the task `local/` directory. See [here](/nomad/docs/runtime/environment#task-directories) for more information. |
| `NOMAD_SECRETS_DIR` | Path to the task's secrets directory. See [here](/nomad/docs/runtime/environment#task-directories) for more information. |
| `NOMAD_MEMORY_LIMIT` | Memory limit in MB for the task |
| `NOMAD_MEMORY_MAX_LIMIT` | The maximum memory limit the task may use if client has excess memory capacity, in MB. Omitted if task isn't configured with memory oversubscription. |
| `NOMAD_CPU_LIMIT` | CPU limit in MHz for the task |
| `NOMAD_CPU_CORES` | The specific CPU cores reserved for the task in cpuset list notation. Omitted if the task does not request cpu cores. E.g. `0-2,7,12-14` |
| `NOMAD_ALLOC_ID` | Allocation ID of the task |
| `NOMAD_SHORT_ALLOC_ID` | The first 8 characters of the allocation ID of the task |
| `NOMAD_ALLOC_NAME` | Allocation name of the task |
| `NOMAD_ALLOC_INDEX` | Allocation index; useful to distinguish instances of task groups. From 0 to (count - 1). The index is unique within a given version of a job, but canaries or failed tasks in a deployment may reuse the index. |
| `NOMAD_TASK_NAME` | Task's name |
| `NOMAD_GROUP_NAME` | Group's name |
| `NOMAD_JOB_ID` | Job's ID, which is equal to the Job name when submitted through CLI but can be different when using the API |
| `NOMAD_JOB_NAME` | Job's name |
| `NOMAD_JOB_PARENT_ID` | ID of the Job's parent if it has one |
| `NOMAD_DC` | Datacenter in which the allocation is running |
| `NOMAD_PARENT_CGROUP` | The parent cgroup used to contain task cgroups (Linux only) |
| `NOMAD_NAMESPACE` | Namespace in which the allocation is running |
| `NOMAD_REGION` | Region in which the allocation is running |
| `NOMAD_META_<key>` | The metadata value given by `key` on the task's metadata. Note that this is different from [`${meta.<key>}`](/nomad/docs/runtime/interpolation#node-variables-) which are keys in the node's metadata. |
| `VAULT_TOKEN` | The task's Vault token. See [Vault Integration](/nomad/docs/integrations/vault-integration) for more details |
| Variable | Description |
|--------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `NOMAD_ALLOC_DIR` | The path to the shared `alloc/` directory. See [here](/nomad/docs/runtime/environment#task-directories) for more information. |
| `NOMAD_TASK_DIR` | The path to the task `local/` directory. See [here](/nomad/docs/runtime/environment#task-directories) for more information. |
| `NOMAD_SECRETS_DIR` | Path to the task's secrets directory. See [here](/nomad/docs/runtime/environment#task-directories) for more information. |
| `NOMAD_MEMORY_LIMIT` | Memory limit in MB for the task |
| `NOMAD_MEMORY_MAX_LIMIT` | The maximum memory limit the task may use if client has excess memory capacity, in MB. Omitted if task isn't configured with memory oversubscription. |
| `NOMAD_CPU_LIMIT` | CPU limit in MHz for the task |
| `NOMAD_CPU_CORES` | The specific CPU cores reserved for the task in cpuset list notation. Omitted if the task does not request cpu cores. E.g. `0-2,7,12-14` |
| `NOMAD_ALLOC_ID` | Allocation ID of the task |
| `NOMAD_SHORT_ALLOC_ID` | The first 8 characters of the allocation ID of the task |
| `NOMAD_ALLOC_NAME` | Allocation name of the task. This is derived from the job name, task group name, and allocation index. |
| `NOMAD_ALLOC_INDEX` | Allocation index; useful to distinguish instances of task groups. From 0 to (count - 1). For system jobs and sysbatch jobs, this value will always be 0. The index is unique within a given version of a job, but canaries or failed tasks in a deployment may reuse the index. |
| `NOMAD_TASK_NAME` | Task's name |
| `NOMAD_GROUP_NAME` | Group's name |
| `NOMAD_JOB_ID` | Job's ID, which is equal to the Job name when submitted through CLI but can be different when using the API |
| `NOMAD_JOB_NAME` | Job's name |
| `NOMAD_JOB_PARENT_ID` | ID of the Job's parent if it has one |
| `NOMAD_DC` | Datacenter in which the allocation is running |
| `NOMAD_PARENT_CGROUP` | The parent cgroup used to contain task cgroups (Linux only) |
| `NOMAD_NAMESPACE` | Namespace in which the allocation is running |
| `NOMAD_REGION` | Region in which the allocation is running |
| `NOMAD_META_<key>` | The metadata value given by `key` on the task's metadata. Note that this is different from [`${meta.<key>}`](/nomad/docs/runtime/interpolation#node-variables-) which are keys in the node's metadata. |
| `VAULT_TOKEN` | The task's Vault token. See [Vault Integration](/nomad/docs/integrations/vault-integration) for more details |

### Network-related Variables

Expand Down