From 8ad663d1de4065f31199f70d573021641584a240 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Wed, 2 Aug 2023 09:41:46 -0400 Subject: [PATCH] allocwatcher: don't destroy local allocdir after migration (#18108) When ephemeral disks are migrated from an allocation on the same node, allocation logs for the previous allocation are lost. There are two workflows for the best-effort attempt to migrate the allocation data between the old and new allocations. For previous allocations on other clients (the "remote" workflow), we create a local allocdir and download the data from the previous client into it. That data is then moved into the new allocdir and we delete the allocdir of the previous alloc. For "local" previous allocations we don't need to create an extra directory for the previous allocation and instead move the files directly from one to the other. But we still delete the old allocdir _entirely_, which includes all the logs! There doesn't seem to be any reason to destroy the local previous allocdir, as the usual client garbage collection should destroy it later on when needed. By not deleting it, the previous allocation's logs are still available for the user to read. Fixes: #18034 --- .changelog/18108.txt | 3 +++ client/allocwatcher/alloc_watcher.go | 10 +--------- 2 files changed, 4 insertions(+), 9 deletions(-) create mode 100644 .changelog/18108.txt diff --git a/.changelog/18108.txt b/.changelog/18108.txt new file mode 100644 index 000000000000..26e7d6b588ef --- /dev/null +++ b/.changelog/18108.txt @@ -0,0 +1,3 @@ +```release-note:bug +migration: Fixed a bug where previous alloc logs were destroyed when migrating ephemeral_disk on the same client +``` diff --git a/client/allocwatcher/alloc_watcher.go b/client/allocwatcher/alloc_watcher.go index 1df200e3beec..9dc5f95d1fb9 100644 --- a/client/allocwatcher/alloc_watcher.go +++ b/client/allocwatcher/alloc_watcher.go @@ -279,15 +279,7 @@ func (p *localPrevAlloc) Migrate(ctx context.Context, dest *allocdir.AllocDir) e p.logger.Debug("copying previous alloc") - moveErr := dest.Move(p.prevAllocDir, p.tasks) - - // Always cleanup previous alloc - if err := p.prevAllocDir.Destroy(); err != nil { - p.logger.Error("error destroying alloc dir", - "error", err, "previous_alloc_dir", p.prevAllocDir.AllocDir) - } - - return moveErr + return dest.Move(p.prevAllocDir, p.tasks) } // remotePrevAlloc is a prevAllocWatcher for previous allocations on remote