From f02061fba053f0e5d8ee3194ddabfd8b11b7e1b7 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Thu, 28 Sep 2023 11:42:57 -0700 Subject: [PATCH] client: prevent using stale allocs (#18601) Similar to #18269, it is possible that even if Node.GetClientAllocs retrieves fresh allocs that the subsequent Alloc.GetAllocs call retrieves stale allocs. While `diffAlloc(existing, updated)` properly ignores stale alloc *updates*, alloc deletions have no such check. So if a client retrieves an alloc created at index 123, and then a subsequent Alloc.GetAllocs call hits a new server which returns results at index 100, the client will stop the alloc created at 123 because it will be missing from the stale response. This change applies the same logic as #18269 and ensures only fresh responses are used. Glossary: * fresh - modified at an index > the query index * stale - modified at an index <= the query index --- client/client.go | 20 +++++++++++++++++++- nomad/alloc_endpoint.go | 2 +- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/client/client.go b/client/client.go index c5a9a4250b45..c652ecec7003 100644 --- a/client/client.go +++ b/client/client.go @@ -2339,7 +2339,7 @@ OUTER: // // For full context, please see https://github.com/hashicorp/nomad/issues/18267 if resp.Index <= req.MinQueryIndex { - c.logger.Debug("Received stale allocation information. Retrying.", + c.logger.Debug("received stale allocation information; retrying", "index", resp.Index, "min_index", req.MinQueryIndex) continue OUTER } @@ -2399,6 +2399,24 @@ OUTER: } } + // It is possible that Alloc.GetAllocs hits a different server than + // Node.GetClientAllocs which returns older results. + if allocsResp.Index <= allocsReq.MinQueryIndex { + retry := c.retryIntv(getAllocRetryIntv) + c.logger.Warn("failed to retrieve updated allocs; retrying", + "req_index", allocsReq.MinQueryIndex, + "resp_index", allocsResp.Index, + "num_allocs", len(pull), + "wait", retry, + ) + select { + case <-time.After(retry): + continue + case <-c.shutdownCh: + return + } + } + // Ensure that we received all the allocations we wanted pulledAllocs = make(map[string]*structs.Allocation, len(allocsResp.Allocs)) for _, alloc := range allocsResp.Allocs { diff --git a/nomad/alloc_endpoint.go b/nomad/alloc_endpoint.go index 8aab0c4909be..75071047b368 100644 --- a/nomad/alloc_endpoint.go +++ b/nomad/alloc_endpoint.go @@ -257,7 +257,7 @@ func (a *Alloc) GetAllocs(args *structs.AllocsGetRequest, reply.Allocs = allocs reply.Index = maxIndex } else { - // Use the last index that affected the nodes table + // Use the last index that affected the allocs table index, err := state.Index("allocs") if err != nil { return err