Skip to content

Commit

Permalink
fix: clean up VM runners on cluster destroy
Browse files Browse the repository at this point in the history
This never worked properly, as `Wait()` doesn't work for child
processes, and `talosctl cluster destroy` is not a child of processes
created by `talosctl cluster create`.

Signed-off-by: Andrey Smirnov <andrey.smirnov@siderolabs.com>
  • Loading branch information
smira committed Jun 5, 2024
1 parent 41f92e0 commit 357d775
Showing 1 changed file with 10 additions and 8 deletions.
18 changes: 10 additions & 8 deletions pkg/provision/providers/vm/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
package vm

import (
"errors"
"fmt"
"os"
"syscall"
"time"

"github.com/siderolabs/go-retry/retry"
)

// StopProcessByPidfile stops a process by reading its PID from a file.
Expand Down Expand Up @@ -43,13 +45,13 @@ func StopProcessByPidfile(pidPath string) error {
return fmt.Errorf("error sending SIGTERM to %d (path %q): %w", pid, pidPath, err)
}

if _, err = proc.Wait(); err != nil {
if errors.Is(err, syscall.ECHILD) {
return nil
// wait for the process to exit, this is using (unreliable and slow) polling
return retry.Constant(30*time.Second, retry.WithUnits(100*time.Millisecond)).Retry(func() error {
err = proc.Signal(syscall.Signal(0))
if err == nil {
return retry.ExpectedErrorf("process %d still running", pid)
}

return fmt.Errorf("error waiting for %d to exit (path %q): %w", pid, pidPath, err)
}

return nil
return nil
})
}

0 comments on commit 357d775

Please sign in to comment.