Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CIVIS-2863] Git repository unshallowing logic #155

Merged
merged 9 commits into from
Apr 10, 2024
27 changes: 22 additions & 5 deletions lib/datadog/ci/git/local_repository.rb
Original file line number Diff line number Diff line change
Expand Up @@ -159,17 +159,34 @@ def self.git_generate_packfiles(included_commits:, excluded_commits:, path:)
nil
end

def self.git_shallow_clone?
exec_git_command("git rev-parse --is-shallow-repository") == "true"
rescue => e
log_failure(e, "git shallow clone")
false
end

def self.git_unshallow
exec_git_command(
"git fetch " \
"--shallow-since=\"1 month ago\" " \
"--update-shallow " \
"--filter=\"blob:none\" " \
"--recurse-submodules=no " \
"$(git config --default origin --get clone.defaultRemoteName) $(git rev-parse HEAD)"
)
rescue => e
log_failure(e, "git unshallow")
nil
end

# makes .exec_git_command private to make sure that this method
# is not called from outside of this module with insecure parameters
class << self
private

def filter_invalid_commits(commits)
commits.filter_map do |commit|
next unless Utils::Git.valid_commit_sha?(commit)

commit
end
commits.filter { |commit| Utils::Git.valid_commit_sha?(commit) }
end

def exec_git_command(cmd, stdin: nil)
Expand Down
27 changes: 20 additions & 7 deletions lib/datadog/ci/git/tree_uploader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ def call(repository_url)

Datadog.logger.debug { "Uploading git tree for repository #{repository_url}" }

# 2. Check if the repository clone is shallow and unshallow if appropriate
# TO BE ADDED IN CIVIS-2863
latest_commits = LocalRepository.git_commits
head_commit = latest_commits&.first
if head_commit.nil?
Expand All @@ -36,23 +34,36 @@ def call(repository_url)
end

begin
excluded_commits, included_commits = split_known_commits(repository_url, latest_commits)
if included_commits.empty?
# ask the backend for the list of commits it already has
known_commits, new_commits = fetch_known_commits_and_split(repository_url, latest_commits)
# if all commits are present in the backend, we don't need to upload anything
if new_commits.empty?
Datadog.logger.debug("No new commits to upload")
return
end

# quite often we deal with shallow clones in CI environment
if LocalRepository.git_shallow_clone? && LocalRepository.git_unshallow
Datadog.logger.debug("Detected shallow clone and unshallowed the repository, repeating commits search")

# re-run the search with the updated commit list after unshallowing
known_commits, new_commits = fetch_known_commits_and_split(
repository_url,
LocalRepository.git_commits
)
end
rescue SearchCommits::ApiError => e
Datadog.logger.debug("SearchCommits failed with #{e}, aborting git upload")
return
end

Datadog.logger.debug { "Uploading packfiles for commits: #{included_commits}" }
Datadog.logger.debug { "Uploading packfiles for commits: #{new_commits}" }
uploader = UploadPackfile.new(
api: api,
head_commit_sha: head_commit,
repository_url: repository_url
)
Packfiles.generate(included_commits: included_commits, excluded_commits: excluded_commits) do |filepath|
Packfiles.generate(included_commits: new_commits, excluded_commits: known_commits) do |filepath|
uploader.call(filepath: filepath)
rescue UploadPackfile::ApiError => e
Datadog.logger.debug("Packfile upload failed with #{e}")
Expand All @@ -62,7 +73,9 @@ def call(repository_url)

private

def split_known_commits(repository_url, latest_commits)
# Split the latest commits list into known and new commits
# based on the backend response provided by /search_commits endpoint
def fetch_known_commits_and_split(repository_url, latest_commits)
Datadog.logger.debug { "Checking the latest commits list with backend: #{latest_commits}" }
backend_commits = SearchCommits.new(api: api).call(repository_url, latest_commits)
latest_commits.partition do |commit|
Expand Down
4 changes: 4 additions & 0 deletions sig/datadog/ci/git/local_repository.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ module Datadog

def self.git_generate_packfiles: (included_commits: Enumerable[String], excluded_commits: Enumerable[String], path: String) -> String?

def self.git_shallow_clone?: () -> bool

def self.git_unshallow: () -> String?

private

def self.filter_invalid_commits: (Enumerable[String] commits) -> Array[String]
Expand Down
2 changes: 1 addition & 1 deletion sig/datadog/ci/git/tree_uploader.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ module Datadog

private

def split_known_commits: (String repository_url, Array[String] latest_commits) -> [Array[String], Array[String]]
def fetch_known_commits_and_split: (String repository_url, Array[String] latest_commits) -> [Array[String], Array[String]]
end
end
end
Expand Down
78 changes: 78 additions & 0 deletions spec/datadog/ci/git/local_repository_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -293,4 +293,82 @@ def with_custom_git_environment
it { is_expected.to eq("first-tag") }
end
end

context "with shallow clone" do
let(:tmpdir) { Dir.mktmpdir }
after { FileUtils.remove_entry(tmpdir) }

before do
# shallow clone datadog-ci-rb repository
`cd #{tmpdir} && git clone --depth 1 https://github.com/DataDog/datadog-ci-rb`
end

def with_shallow_clone_git_dir
ClimateControl.modify("GIT_DIR" => File.join(tmpdir, "datadog-ci-rb/.git")) do
yield
end
end

describe ".git_shallow_clone?" do
subject do
with_shallow_clone_git_dir { described_class.git_shallow_clone? }
end

it { is_expected.to be_truthy }
end

describe ".git_commits" do
subject do
with_shallow_clone_git_dir { described_class.git_commits }
end

it "returns a list of single git commit sha" do
expect(subject).to be_kind_of(Array)
expect(subject).not_to be_empty
expect(subject).to have(1).item
expect(subject.first).to match(/^\h{40}$/)
end
end

describe ".git_unshallow" do
# skip for jruby for now - old git version DD docker image
before { skip if PlatformHelpers.jruby? }

subject do
with_shallow_clone_git_dir { described_class.git_unshallow }
end
let(:commits) do
with_shallow_clone_git_dir { described_class.git_commits }
end

it "unshallows the repository" do
expect(subject).to be_truthy
expect(commits.size).to be > 1
end
end
end

context "with full clone" do
let(:tmpdir) { Dir.mktmpdir }
after { FileUtils.remove_entry(tmpdir) }

before do
# shallow clone datadog-ci-rb repository
`cd #{tmpdir} && git clone https://github.com/DataDog/datadog-ci-rb`
end

def with_full_clone_git_dir
ClimateControl.modify("GIT_DIR" => File.join(tmpdir, "datadog-ci-rb/.git")) do
yield
end
end

describe ".git_shallow_clone?" do
subject do
with_full_clone_git_dir { described_class.git_shallow_clone? }
end

it { is_expected.to be_falsey }
end
end
end
144 changes: 96 additions & 48 deletions spec/datadog/ci/git/tree_uploader_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
let(:search_commits) { double("search_commits", call: backend_commits) }

before do
allow(Datadog::CI::Git::LocalRepository).to receive(:git_commits).and_return(latest_commits)
allow(Datadog::CI::Git::SearchCommits).to receive(:new).with(api: api).and_return(search_commits)
end

Expand All @@ -33,73 +32,122 @@
end
end

context "when the latest commits list is empty" do
let(:latest_commits) { [] }

it "logs a debug message and aborts the git upload" do
expect(Datadog.logger).to receive(:debug).with("Got empty latest commits list, aborting git upload")

tree_uploader.call(repository_url)
end
end

context "when the backend commits search fails" do
context "when API is configured" do
before do
expect(search_commits).to receive(:call).and_raise(Datadog::CI::Git::SearchCommits::ApiError, "test error")
expect(Datadog::CI::Git::LocalRepository).to receive(:git_commits).and_return(latest_commits)
end

it "logs a debug message and aborts the git upload" do
expect(Datadog.logger).to receive(:debug).with("SearchCommits failed with test error, aborting git upload")

tree_uploader.call(repository_url)
end
end

context "when all commits are known to the backend" do
let(:backend_commits) { latest_commits }

it "logs a debug message and aborts the git upload" do
expect(Datadog.logger).to receive(:debug).with("No new commits to upload")

tree_uploader.call(repository_url)
end
end
context "when the latest commits list is empty" do
let(:latest_commits) { [] }

context "when some commits are new" do
let(:upload_packfile) { double("upload_packfile", call: nil) }
it "logs a debug message and aborts the git upload" do
expect(Datadog.logger).to receive(:debug).with("Got empty latest commits list, aborting git upload")

before do
expect(Datadog::CI::Git::Packfiles).to receive(:generate).with(
included_commits: latest_commits - backend_commits.to_a,
excluded_commits: backend_commits
).and_yield("packfile_path")

expect(Datadog::CI::Git::UploadPackfile).to receive(:new).with(
api: api,
head_commit_sha: head_commit,
repository_url: repository_url
).and_return(upload_packfile)
tree_uploader.call(repository_url)
end
end

context "when the packfile upload fails" do
context "when the backend commits search fails" do
before do
expect(upload_packfile).to receive(:call).and_raise(Datadog::CI::Git::UploadPackfile::ApiError, "test error")
expect(search_commits).to receive(:call).and_raise(Datadog::CI::Git::SearchCommits::ApiError, "test error")
end

it "logs a debug message and aborts the git upload" do
expect(Datadog.logger).to receive(:debug).with("Packfile upload failed with test error")
expect(Datadog.logger).to receive(:debug).with("SearchCommits failed with test error, aborting git upload")

tree_uploader.call(repository_url)
end
end

context "when the packfile upload succeeds" do
it "uploads the new commits" do
expect(upload_packfile).to receive(:call).with(filepath: "packfile_path").and_return(nil)
context "when all commits are known to the backend" do
let(:backend_commits) { latest_commits }

it "logs a debug message and aborts the git upload" do
expect(Datadog.logger).to receive(:debug).with("No new commits to upload")

tree_uploader.call(repository_url)
end
end

context "when some commits are new" do
let(:upload_packfile) { double("upload_packfile", call: nil) }

context "when the repository is shallow cloned" do
before do
expect(Datadog::CI::Git::LocalRepository).to receive(:git_shallow_clone?).and_return(true)
end

context "when the unshallowing fails" do
before do
expect(Datadog::CI::Git::LocalRepository).to receive(:git_unshallow).and_return(nil)
end

it "uploads what we can upload" do
expect(Datadog::CI::Git::Packfiles).to receive(:generate).with(
included_commits: %w[13c988d4f15e06bcdd0b0af290086a3079cdadb0],
excluded_commits: backend_commits
).and_yield("packfile_path")

tree_uploader.call(repository_url)
end
end

context "when the unshallowing succeeds" do
before do
expect(Datadog::CI::Git::LocalRepository).to receive(:git_unshallow).and_return("unshallow_result")
expect(Datadog::CI::Git::LocalRepository).to receive(:git_commits).and_return(
latest_commits + %w[782d09e3fbfd8cf1b5c13f3eb9621362f9089ed5]
)
end

it "uploads the new commits" do
expect(Datadog::CI::Git::Packfiles).to receive(:generate).with(
included_commits: %w[13c988d4f15e06bcdd0b0af290086a3079cdadb0 782d09e3fbfd8cf1b5c13f3eb9621362f9089ed5],
excluded_commits: backend_commits
).and_yield("packfile_path")

tree_uploader.call(repository_url)
end
end
end

context "when the repository is not shallow cloned" do
before do
expect(Datadog::CI::Git::LocalRepository).to receive(:git_shallow_clone?).and_return(false)

expect(Datadog::CI::Git::Packfiles).to receive(:generate).with(
included_commits: latest_commits - backend_commits.to_a,
excluded_commits: backend_commits
).and_yield("packfile_path")

expect(Datadog::CI::Git::UploadPackfile).to receive(:new).with(
api: api,
head_commit_sha: head_commit,
repository_url: repository_url
).and_return(upload_packfile)
end

context "when the packfile upload fails" do
before do
expect(upload_packfile).to receive(:call).and_raise(Datadog::CI::Git::UploadPackfile::ApiError, "test error")
end

it "logs a debug message and aborts the git upload" do
expect(Datadog.logger).to receive(:debug).with("Packfile upload failed with test error")

tree_uploader.call(repository_url)
end
end

context "when the packfile upload succeeds" do
it "uploads the new commits" do
expect(upload_packfile).to receive(:call).with(filepath: "packfile_path").and_return(nil)

tree_uploader.call(repository_url)
end
end
end
end
end
end
end
Loading