Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve checkIfPRContentChanged #22611

Merged
merged 11 commits into from
Jan 28, 2023
22 changes: 22 additions & 0 deletions modules/util/io.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package util

import (
"errors"
"io"
)

Expand All @@ -17,3 +18,24 @@ func ReadAtMost(r io.Reader, buf []byte) (n int, err error) {
}
return n, err
}

// ErrNotEmpty is an error reported when there is a non-empty reader
var ErrNotEmpty = errors.New("not-empty")

// IsEmptyReader reads a reader and ensures it is empty
func IsEmptyReader(r io.Reader) (err error) {
var buf [1]byte

for {
n, err := r.Read(buf[:])
if err != nil {
if err == io.EOF {
return nil
}
return err
}
if n > 0 {
return ErrNotEmpty
}
}
}
92 changes: 39 additions & 53 deletions services/pull/pull.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@
package pull

import (
"bufio"
"bytes"
"context"
"fmt"
"io"
"os"
"regexp"
"strings"
"time"

"code.gitea.io/gitea/models"
"code.gitea.io/gitea/models/db"
Expand All @@ -29,6 +27,7 @@ import (
repo_module "code.gitea.io/gitea/modules/repository"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/sync"
"code.gitea.io/gitea/modules/util"
issue_service "code.gitea.io/gitea/services/issue"
)

Expand Down Expand Up @@ -351,69 +350,56 @@ func AddTestPullRequestTask(doer *user_model.User, repoID int64, branch string,
// checkIfPRContentChanged checks if diff to target branch has changed by push
// A commit can be considered to leave the PR untouched if the patch/diff with its merge base is unchanged
func checkIfPRContentChanged(ctx context.Context, pr *issues_model.PullRequest, oldCommitID, newCommitID string) (hasChanged bool, err error) {
if err = pr.LoadHeadRepo(ctx); err != nil {
return false, fmt.Errorf("LoadHeadRepo: %w", err)
} else if pr.HeadRepo == nil {
// corrupt data assumed changed
return true, nil
}

if err = pr.LoadBaseRepo(ctx); err != nil {
return false, fmt.Errorf("LoadBaseRepo: %w", err)
}

headGitRepo, err := git.OpenRepository(ctx, pr.HeadRepo.RepoPath())
tmpBasePath, err := createTemporaryRepo(ctx, pr)
if err != nil {
return false, fmt.Errorf("OpenRepository: %w", err)
}
defer headGitRepo.Close()

// Add a temporary remote.
tmpRemote := "checkIfPRContentChanged-" + fmt.Sprint(time.Now().UnixNano())
if err = headGitRepo.AddRemote(tmpRemote, pr.BaseRepo.RepoPath(), true); err != nil {
return false, fmt.Errorf("AddRemote: %s/%s-%s: %w", pr.HeadRepo.OwnerName, pr.HeadRepo.Name, tmpRemote, err)
log.Error("CreateTemporaryRepo: %v", err)
return false, err
}
defer func() {
if err := headGitRepo.RemoveRemote(tmpRemote); err != nil {
log.Error("checkIfPRContentChanged: RemoveRemote: %s/%s-%s: %v", pr.HeadRepo.OwnerName, pr.HeadRepo.Name, tmpRemote, err)
if err := repo_module.RemoveTemporaryPath(tmpBasePath); err != nil {
log.Error("checkIfPRContentChanged: RemoveTemporaryPath: %s", err)
}
}()
// To synchronize repo and get a base ref
_, base, err := headGitRepo.GetMergeBase(tmpRemote, pr.BaseBranch, pr.HeadBranch)

tmpRepo, err := git.OpenRepository(ctx, tmpBasePath)
if err != nil {
return false, fmt.Errorf("GetMergeBase: %w", err)
return false, fmt.Errorf("OpenRepository: %w", err)
}
defer tmpRepo.Close()

diffBefore := &bytes.Buffer{}
diffAfter := &bytes.Buffer{}
if err := headGitRepo.GetDiffFromMergeBase(base, oldCommitID, diffBefore); err != nil {
// If old commit not found, assume changed.
log.Debug("GetDiffFromMergeBase: %v", err)
return true, nil
}
if err := headGitRepo.GetDiffFromMergeBase(base, newCommitID, diffAfter); err != nil {
// New commit should be found
return false, fmt.Errorf("GetDiffFromMergeBase: %w", err)
// Find the merge-base
_, base, err := tmpRepo.GetMergeBase("", "base", "tracking")
if err != nil {
return false, fmt.Errorf("GetMergeBase: %w", err)
}

diffBeforeLines := bufio.NewScanner(diffBefore)
diffAfterLines := bufio.NewScanner(diffAfter)

for diffBeforeLines.Scan() && diffAfterLines.Scan() {
if strings.HasPrefix(diffBeforeLines.Text(), "index") && strings.HasPrefix(diffAfterLines.Text(), "index") {
// file hashes can change without the diff changing
continue
} else if strings.HasPrefix(diffBeforeLines.Text(), "@@") && strings.HasPrefix(diffAfterLines.Text(), "@@") {
// the location of the difference may change
continue
} else if !bytes.Equal(diffBeforeLines.Bytes(), diffAfterLines.Bytes()) {
cmd := git.NewCommand(ctx, "diff", "--name-only", "-z").AddDynamicArguments(newCommitID, oldCommitID, base)
stdoutReader, stdoutWriter, err := os.Pipe()
if err != nil {
return false, fmt.Errorf("unable to open pipe for to run diff: %w", err)
}

if err := cmd.Run(&git.RunOpts{
Dir: tmpBasePath,
Stdout: stdoutWriter,
PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
_ = stdoutWriter.Close()
defer func() {
_ = stdoutReader.Close()
}()
return util.IsEmptyReader(stdoutReader)
},
}); err != nil {
if err == util.ErrNotEmpty {
return true, nil
}
}

if diffBeforeLines.Scan() || diffAfterLines.Scan() {
// Diffs not of equal length
return true, nil
log.Error("Unable to run diff on %s %s %s in tempRepo for PR[%d]%s/%s...%s/%s: Error: %v",
newCommitID, oldCommitID, base,
pr.ID, pr.BaseRepo.FullName(), pr.BaseBranch, pr.HeadRepo.FullName(), pr.HeadBranch,
err)

return false, fmt.Errorf("Unable to run git diff --name-only -z %s %s %s: %w", newCommitID, oldCommitID, base, err)
}

return false, nil
Expand Down