Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport of scheduler: ensure dup alloc names are fixed before plan submit. into release/1.5.x #18890

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/18873.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
scheduler: Ensure duplicate allocation IDs are tracked and fixed when performing job updates
```
12 changes: 9 additions & 3 deletions nomad/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -10204,13 +10204,19 @@ func (a *Allocation) JobNamespacedID() NamespacedID {
// Index returns the index of the allocation. If the allocation is from a task
// group with count greater than 1, there will be multiple allocations for it.
func (a *Allocation) Index() uint {
l := len(a.Name)
prefix := len(a.JobID) + len(a.TaskGroup) + 2
return AllocIndexFromName(a.Name, a.JobID, a.TaskGroup)
}

// AllocIndexFromName returns the index of an allocation given its name, the
// jobID and the task group name.
func AllocIndexFromName(allocName, jobID, taskGroup string) uint {
l := len(allocName)
prefix := len(jobID) + len(taskGroup) + 2
if l <= 3 || l <= prefix {
return uint(0)
}

strNum := a.Name[prefix : len(a.Name)-1]
strNum := allocName[prefix : len(allocName)-1]
num, _ := strconv.Atoi(strNum)
return uint(num)
}
Expand Down
35 changes: 32 additions & 3 deletions scheduler/generic_sched.go
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ func (s *GenericScheduler) computeJobAllocs() error {
s.queuedAllocs[p.placeTaskGroup.Name] += 1
destructive = append(destructive, p)
}
return s.computePlacements(destructive, place)
return s.computePlacements(destructive, place, results.taskGroupAllocNameIndexes)
}

// downgradedJobForPlacement returns the job appropriate for non-canary placement replacement
Expand Down Expand Up @@ -505,7 +505,8 @@ func (s *GenericScheduler) downgradedJobForPlacement(p placementResult) (string,

// computePlacements computes placements for allocations. It is given the set of
// destructive updates to place and the set of new placements to place.
func (s *GenericScheduler) computePlacements(destructive, place []placementResult) error {
func (s *GenericScheduler) computePlacements(destructive, place []placementResult, nameIndex map[string]*allocNameIndex) error {

// Get the base nodes
byDC, err := s.setNodes(s.job)
if err != nil {
Expand All @@ -528,6 +529,12 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul
// Get the task group
tg := missing.TaskGroup()

// This is populated from the reconciler via the compute results,
// therefore we cannot have an allocation belonging to a task group
// that has not generated and been through allocation name index
// tracking.
taskGroupNameIndex := nameIndex[tg.Name]

var downgradedJob *structs.Job

if missing.DowngradeNonCanary() {
Expand Down Expand Up @@ -624,12 +631,34 @@ func (s *GenericScheduler) computePlacements(destructive, place []placementResul
resources.Shared.Ports = option.AllocResources.Ports
}

// Pull the allocation name as a new variables, so we can alter
// this as needed without making changes to the original
// object.
newAllocName := missing.Name()

// Identify the index from the name, so we can check this
// against the allocation name index tracking for duplicates.
allocIndex := structs.AllocIndexFromName(newAllocName, s.job.ID, tg.Name)

// If the allocation index is a duplicate, we cannot simply
// create a new allocation with the same name. We need to
// generate a new index and use this. The log message is useful
// for debugging and development, but could be removed in a
// future version of Nomad.
if taskGroupNameIndex.IsDuplicate(allocIndex) {
oldAllocName := newAllocName
newAllocName = taskGroupNameIndex.Next(1)[0]
taskGroupNameIndex.UnsetIndex(allocIndex)
s.logger.Debug("duplicate alloc index found and changed",
"old_alloc_name", oldAllocName, "new_alloc_name", newAllocName)
}

// Create an allocation for this
alloc := &structs.Allocation{
ID: uuid.Generate(),
Namespace: s.job.Namespace,
EvalID: s.eval.ID,
Name: missing.Name(),
Name: newAllocName,
JobID: s.job.ID,
TaskGroup: tg.Name,
Metrics: s.ctx.Metrics(),
Expand Down
Loading