From 9cab4604e017771842e8fd0e145c1df8e0fcd875 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Wed, 27 Sep 2023 13:55:59 -0700 Subject: [PATCH] client: prevent using stale allocs Similar to #18269, it is possible that even if Node.GetClientAllocs retrieves fresh allocs that the subsequent Alloc.GetAllocs call retrieves stale allocs. While `diffAlloc(existing, updated)` properly ignores stale alloc *updates*, alloc deletions have no such check. So if a client retrieves an alloc created at index 123, and then a subsequent Alloc.GetAllocs call hits a new server which returns results at index 100, the client will stop the alloc created at 123 because it will be missing from the stale response. This change applies the same logic as #18269 and ensures only fresh responses are used. Glossary: * fresh - modified at an index > the query index * stale - modified at an index <= the query index --- client/client.go | 18 ++++++++++++++++++ nomad/alloc_endpoint.go | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/client/client.go b/client/client.go index c5a9a4250b45..db3a6ba6ab46 100644 --- a/client/client.go +++ b/client/client.go @@ -2399,6 +2399,24 @@ OUTER: } } + // It is possible that Alloc.GetAllocs hits a different server than + // Node.GetClientAllocs which returns older results. + if allocsResp.Index < allocsReq.MinQueryIndex { + retry := c.retryIntv(getAllocRetryIntv) + c.logger.Warn("failed to retrieve updated allocs; retrying", + "req_index", allocsReq.MinQueryIndex, + "resp_index", allocsResp.Index, + "num_allocs", len(pull), + "wait", retry, + ) + select { + case <-time.After(retry): + continue + case <-c.shutdownCh: + return + } + } + // Ensure that we received all the allocations we wanted pulledAllocs = make(map[string]*structs.Allocation, len(allocsResp.Allocs)) for _, alloc := range allocsResp.Allocs { diff --git a/nomad/alloc_endpoint.go b/nomad/alloc_endpoint.go index 8aab0c4909be..75071047b368 100644 --- a/nomad/alloc_endpoint.go +++ b/nomad/alloc_endpoint.go @@ -257,7 +257,7 @@ func (a *Alloc) GetAllocs(args *structs.AllocsGetRequest, reply.Allocs = allocs reply.Index = maxIndex } else { - // Use the last index that affected the nodes table + // Use the last index that affected the allocs table index, err := state.Index("allocs") if err != nil { return err