diff --git a/client/client.go b/client/client.go index c5a9a4250b45..c652ecec7003 100644 --- a/client/client.go +++ b/client/client.go @@ -2339,7 +2339,7 @@ OUTER: // // For full context, please see https://github.com/hashicorp/nomad/issues/18267 if resp.Index <= req.MinQueryIndex { - c.logger.Debug("Received stale allocation information. Retrying.", + c.logger.Debug("received stale allocation information; retrying", "index", resp.Index, "min_index", req.MinQueryIndex) continue OUTER } @@ -2399,6 +2399,24 @@ OUTER: } } + // It is possible that Alloc.GetAllocs hits a different server than + // Node.GetClientAllocs which returns older results. + if allocsResp.Index <= allocsReq.MinQueryIndex { + retry := c.retryIntv(getAllocRetryIntv) + c.logger.Warn("failed to retrieve updated allocs; retrying", + "req_index", allocsReq.MinQueryIndex, + "resp_index", allocsResp.Index, + "num_allocs", len(pull), + "wait", retry, + ) + select { + case <-time.After(retry): + continue + case <-c.shutdownCh: + return + } + } + // Ensure that we received all the allocations we wanted pulledAllocs = make(map[string]*structs.Allocation, len(allocsResp.Allocs)) for _, alloc := range allocsResp.Allocs { diff --git a/nomad/alloc_endpoint.go b/nomad/alloc_endpoint.go index 8aab0c4909be..75071047b368 100644 --- a/nomad/alloc_endpoint.go +++ b/nomad/alloc_endpoint.go @@ -257,7 +257,7 @@ func (a *Alloc) GetAllocs(args *structs.AllocsGetRequest, reply.Allocs = allocs reply.Index = maxIndex } else { - // Use the last index that affected the nodes table + // Use the last index that affected the allocs table index, err := state.Index("allocs") if err != nil { return err