Skip to content

Commit

Permalink
Improvements to removeCheckoutDir
Browse files Browse the repository at this point in the history
  • Loading branch information
DrJosh9000 committed Aug 22, 2024
1 parent 65ed6aa commit 031322a
Showing 1 changed file with 59 additions and 23 deletions.
82 changes: 59 additions & 23 deletions internal/job/checkout.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ import (
"context"
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"strings"
"syscall"
"time"

"github.com/buildkite/agent/v3/internal/experiments"
Expand Down Expand Up @@ -53,48 +55,82 @@ func (e *Executor) configureHTTPSInsteadOfSSH(ctx context.Context) error {
)
}

func (e *Executor) removeCheckoutDir() error {
func (e *Executor) removeCheckoutDir(ctx context.Context) error {
checkoutPath, _ := e.shell.Env.Get("BUILDKITE_BUILD_CHECKOUT_PATH")

// on windows, sometimes removing large dirs can fail for various reasons
// for instance having files open
// see https://github.com/golang/go/issues/20841
for range 10 {
return roko.NewRetrier(
roko.WithMaxAttempts(10),
roko.WithStrategy(roko.Constant(10*time.Second)),
).DoWithContext(ctx, func(r *roko.Retrier) error {
e.shell.Commentf("Removing %s", checkoutPath)
if err := os.RemoveAll(checkoutPath); err != nil {
e.shell.Errorf("Failed to remove \"%s\" (%s)", checkoutPath, err)
} else {
if _, err := os.Stat(checkoutPath); os.IsNotExist(err) {
return nil
} else {
e.shell.Errorf("Failed to remove %s", checkoutPath)
// Loop to catch broken directory permissions
for {
if err := os.RemoveAll(checkoutPath); err != nil {
// os.RemoveAll documents its only non-nil error as *os.PathError.
pathErr, ok := err.(*os.PathError)
if !ok {
return err
}

// Did we not have permission to open a directory?
if !strings.HasPrefix(pathErr.Op, "open") || !errors.Is(pathErr.Err, syscall.EACCES) {
return err
}
dir := filepath.Dir(pathErr.Path)

// Check that the EACCES was caused by mode on the directory.
// (Note that this is a TOCTOU race, but we're not changing
// owner uid/gid, and if something else is concurrently writing
// files they can probably chmod +x their files themselves)
di, statErr := os.Lstat(dir)
if statErr != nil {
return statErr
}
if !di.IsDir() {
return err
}
if di.Mode()&0o111 != 0 {
// some other permission failure?
return err
}
// Try to fix it with chmod +x dir
if err := os.Chmod(dir, 0o777); err != nil {
return err
}
// Now retry os.RemoveAll
continue
}
// Success! break the inner loop.
break
}
if _, err := os.Stat(checkoutPath); err != nil && !errors.Is(err, fs.ErrNotExist) {
e.shell.Errorf("Failed to remove %q", checkoutPath)
return err
}
e.shell.Commentf("Waiting 10 seconds")
<-time.After(time.Second * 10)
}

return fmt.Errorf("Failed to remove %s", checkoutPath)
return nil
})
}

func (e *Executor) createCheckoutDir() error {
checkoutPath, _ := e.shell.Env.Get("BUILDKITE_BUILD_CHECKOUT_PATH")

if !utils.FileExists(checkoutPath) {
e.shell.Commentf("Creating \"%s\"", checkoutPath)
// Actual file permissions will be reduced by umask, and won't be 0777 unless the user has manually changed the umask to 000
e.shell.Commentf("Creating %q", checkoutPath)
// Actual file permissions will be reduced by umask, and won't be 0777
// unless the user has manually changed the umask to 000
if err := os.MkdirAll(checkoutPath, 0777); err != nil {
return err
}
}

if e.shell.Getwd() != checkoutPath {
if err := e.shell.Chdir(checkoutPath); err != nil {
return err
}
if e.shell.Getwd() == checkoutPath {
return nil
}

return nil
return e.shell.Chdir(checkoutPath)
}

// CheckoutPhase creates the build directory and makes sure we're running the
Expand All @@ -115,7 +151,7 @@ func (e *Executor) CheckoutPhase(ctx context.Context) error {
// Remove the checkout directory if BUILDKITE_CLEAN_CHECKOUT is present
if e.CleanCheckout {
e.shell.Headerf("Cleaning pipeline checkout")
if err = e.removeCheckoutDir(); err != nil {
if err = e.removeCheckoutDir(ctx); err != nil {
return err
}
}
Expand Down Expand Up @@ -199,7 +235,7 @@ func (e *Executor) CheckoutPhase(ctx context.Context) error {
// This removes the checkout dir, which means the next checkout
// will be a lot slower (clone vs fetch), but hopefully will
// allow the agent to self-heal
if err := e.removeCheckoutDir(); err != nil {
if err := e.removeCheckoutDir(ctx); err != nil {
e.shell.Printf("Failed to remove checkout dir while cleaning up after a checkout error.")
}

Expand Down

0 comments on commit 031322a

Please sign in to comment.