Skip to content

Commit

Permalink
System and sysbatch jobs always have zero index (#16030)
Browse files Browse the repository at this point in the history
Service jobs should have unique allocation Names, derived from the
Job.ID. System jobs do not have unique allocation Names because the index is
intended to indicated the instance out of a desired count size. Because system
jobs do not have an explicit count but the results are based on the targeted
nodes, the index is less informative and this was intentionally omitted from the
original design.

Update docs to make it clear that NOMAD_ALLOC_INDEX is always zero for
system/sysbatch jobs

Validate that `volume.per_alloc` is incompatible with system/sysbatch jobs.
System and sysbatch jobs always have a `NOMAD_ALLOC_INDEX` of 0. So
interpolation via `per_alloc` will not work as soon as there's more than one
allocation placed. Validate against this on job submission.
  • Loading branch information
tgross committed Feb 3, 2023
1 parent 499901c commit 7d04689
Show file tree
Hide file tree
Showing 9 changed files with 115 additions and 379 deletions.
3 changes: 3 additions & 0 deletions .changelog/16030.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
volumes: Fixed a bug where `per_alloc` was allowed for volume blocks on system and sysbatch jobs, which do not have an allocation index
```
2 changes: 1 addition & 1 deletion nomad/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -6264,7 +6264,7 @@ func (tg *TaskGroup) Validate(j *Job) error {
canaries = tg.Update.Canary
}
for name, volReq := range tg.Volumes {
if err := volReq.Validate(tg.Count, canaries); err != nil {
if err := volReq.Validate(j.Type, tg.Count, canaries); err != nil {
mErr.Errors = append(mErr.Errors, fmt.Errorf(
"Task group volume validation for %s failed: %v", name, err))
}
Expand Down
9 changes: 6 additions & 3 deletions nomad/structs/volume_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,11 @@ func TestVolumeRequest_Validate(t *testing.T) {
},
},
{
name: "CSI volume per-alloc with canaries",
expected: []string{"volume cannot be per_alloc when canaries are in use"},
name: "CSI volume per-alloc with canaries",
expected: []string{
"volume cannot be per_alloc for system or sysbatch jobs",
"volume cannot be per_alloc when canaries are in use",
},
canariesCount: 1,
req: &VolumeRequest{
Type: VolumeTypeCSI,
Expand All @@ -78,7 +81,7 @@ func TestVolumeRequest_Validate(t *testing.T) {
for _, tc := range testCases {
tc = tc
t.Run(tc.name, func(t *testing.T) {
err := tc.req.Validate(tc.taskGroupCount, tc.canariesCount)
err := tc.req.Validate(JobTypeSystem, tc.taskGroupCount, tc.canariesCount)
for _, expected := range tc.expected {
require.Contains(t, err.Error(), expected)
}
Expand Down
12 changes: 8 additions & 4 deletions nomad/structs/volumes.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ type VolumeRequest struct {
PerAlloc bool
}

func (v *VolumeRequest) Validate(taskGroupCount, canaries int) error {
func (v *VolumeRequest) Validate(jobType string, taskGroupCount, canaries int) error {
if !(v.Type == VolumeTypeHost ||
v.Type == VolumeTypeCSI) {
return fmt.Errorf("volume has unrecognized type %s", v.Type)
Expand Down Expand Up @@ -170,9 +170,13 @@ func (v *VolumeRequest) Validate(taskGroupCount, canaries int) error {
case CSIVolumeAccessModeMultiNodeMultiWriter:
// note: we intentionally allow read-only mount of this mode
}

if v.PerAlloc && canaries > 0 {
addErr("volume cannot be per_alloc when canaries are in use")
if v.PerAlloc {
if jobType == JobTypeSystem || jobType == JobTypeSysBatch {
addErr("volume cannot be per_alloc for system or sysbatch jobs")
}
if canaries > 0 {
addErr("volume cannot be per_alloc when canaries are in use")
}
}

}
Expand Down
11 changes: 11 additions & 0 deletions scheduler/generic_sched_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,13 @@ import (

memdb "github.com/hashicorp/go-memdb"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/pointer"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/exp/slices"
Expand Down Expand Up @@ -90,6 +92,15 @@ func TestServiceSched_JobRegister(t *testing.T) {
t.Fatalf("bad: %#v", out)
}

// Ensure allocations have unique names derived from Job.ID
allocNames := helper.ConvertSlice(out,
func(alloc *structs.Allocation) string { return alloc.Name })
expectAllocNames := []string{}
for i := 0; i < 10; i++ {
expectAllocNames = append(expectAllocNames, fmt.Sprintf("%s.web[%d]", job.ID, i))
}
must.SliceContainsAll(t, expectAllocNames, allocNames)

// Ensure different ports were used.
used := make(map[int]map[string]struct{})
for _, alloc := range out {
Expand Down
11 changes: 11 additions & 0 deletions scheduler/scheduler_sysbatch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@ import (

"github.com/hashicorp/go-memdb"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/pointer"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/kr/pretty"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -64,6 +66,15 @@ func TestSysBatch_JobRegister(t *testing.T) {
// Ensure all allocations placed
require.Len(t, out, 10)

// Note that all sysbatch allocations have the same name derived from Job.Name
allocNames := helper.ConvertSlice(out,
func(alloc *structs.Allocation) string { return alloc.Name })
expectAllocNames := []string{}
for i := 0; i < 10; i++ {
expectAllocNames = append(expectAllocNames, fmt.Sprintf("%s.pinger[0]", job.Name))
}
must.SliceContainsAll(t, expectAllocNames, allocNames)

// Check the available nodes
count, ok := out[0].Metrics.NodesAvailable["dc1"]
require.True(t, ok)
Expand Down
11 changes: 11 additions & 0 deletions scheduler/scheduler_system_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ import (

memdb "github.com/hashicorp/go-memdb"
"github.com/hashicorp/nomad/ci"
"github.com/hashicorp/nomad/helper"
"github.com/hashicorp/nomad/helper/pointer"
"github.com/hashicorp/nomad/helper/uuid"
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/require"
)

Expand Down Expand Up @@ -65,6 +67,15 @@ func TestSystemSched_JobRegister(t *testing.T) {
// Ensure all allocations placed
require.Len(t, out, 10)

// Note that all system allocations have the same name derived from Job.Name
allocNames := helper.ConvertSlice(out,
func(alloc *structs.Allocation) string { return alloc.Name })
expectAllocNames := []string{}
for i := 0; i < 10; i++ {
expectAllocNames = append(expectAllocNames, fmt.Sprintf("%s.web[0]", job.Name))
}
must.SliceContainsAll(t, expectAllocNames, allocNames)

// Check the available nodes
count, ok := out[0].Metrics.NodesAvailable["dc1"]
require.True(t, ok)
Expand Down
3 changes: 3 additions & 0 deletions website/content/docs/job-specification/volume.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ The following fields are only valid for volumes with `type = "csi"`:
= true`, the allocation named `myjob.mygroup.mytask[0]` will require a
volume ID `myvolume[0]`.

The `per_alloc` field cannot be true for system jobs, sysbatch jobs, or jobs
that use canaries.

- `mount_options` - Options for mounting CSI volumes that have the
`file-system` [attachment mode]. These options override the `mount_options`
field from [volume registration]. Consult the documentation for your storage
Expand Down
Loading

0 comments on commit 7d04689

Please sign in to comment.