diff --git a/client/client.go b/client/client.go index 884ebdef433c..b20a7ed0b652 100644 --- a/client/client.go +++ b/client/client.go @@ -2359,7 +2359,7 @@ OUTER: // // For full context, please see https://github.com/hashicorp/nomad/issues/18267 if resp.Index <= req.MinQueryIndex { - c.logger.Debug("Received stale allocation information. Retrying.", + c.logger.Debug("received stale allocation information; retrying", "index", resp.Index, "min_index", req.MinQueryIndex) continue OUTER } @@ -2419,6 +2419,24 @@ OUTER: } } + // It is possible that Alloc.GetAllocs hits a different server than + // Node.GetClientAllocs which returns older results. + if allocsResp.Index <= allocsReq.MinQueryIndex { + retry := c.retryIntv(getAllocRetryIntv) + c.logger.Warn("failed to retrieve updated allocs; retrying", + "req_index", allocsReq.MinQueryIndex, + "resp_index", allocsResp.Index, + "num_allocs", len(pull), + "wait", retry, + ) + select { + case <-time.After(retry): + continue + case <-c.shutdownCh: + return + } + } + // Ensure that we received all the allocations we wanted pulledAllocs = make(map[string]*structs.Allocation, len(allocsResp.Allocs)) for _, alloc := range allocsResp.Allocs { diff --git a/nomad/alloc_endpoint.go b/nomad/alloc_endpoint.go index ab024213b2cf..5f227b4103a5 100644 --- a/nomad/alloc_endpoint.go +++ b/nomad/alloc_endpoint.go @@ -257,7 +257,7 @@ func (a *Alloc) GetAllocs(args *structs.AllocsGetRequest, reply.Allocs = allocs reply.Index = maxIndex } else { - // Use the last index that affected the nodes table + // Use the last index that affected the allocs table index, err := state.Index("allocs") if err != nil { return err