Skip to content

Commit

Permalink
Merge pull request #155 from DataDog/anmarchenko/git_unshallowing
Browse files Browse the repository at this point in the history
[CIVIS-2863] Git repository unshallowing logic
  • Loading branch information
anmarchenko authored Apr 10, 2024
2 parents 86d6c60 + 2bcb5f0 commit 39f3f3c
Show file tree
Hide file tree
Showing 6 changed files with 221 additions and 61 deletions.
27 changes: 22 additions & 5 deletions lib/datadog/ci/git/local_repository.rb
Original file line number Diff line number Diff line change
Expand Up @@ -159,17 +159,34 @@ def self.git_generate_packfiles(included_commits:, excluded_commits:, path:)
nil
end

def self.git_shallow_clone?
exec_git_command("git rev-parse --is-shallow-repository") == "true"
rescue => e
log_failure(e, "git shallow clone")
false
end

def self.git_unshallow
exec_git_command(
"git fetch " \
"--shallow-since=\"1 month ago\" " \
"--update-shallow " \
"--filter=\"blob:none\" " \
"--recurse-submodules=no " \
"$(git config --default origin --get clone.defaultRemoteName) $(git rev-parse HEAD)"
)
rescue => e
log_failure(e, "git unshallow")
nil
end

# makes .exec_git_command private to make sure that this method
# is not called from outside of this module with insecure parameters
class << self
private

def filter_invalid_commits(commits)
commits.filter_map do |commit|
next unless Utils::Git.valid_commit_sha?(commit)

commit
end
commits.filter { |commit| Utils::Git.valid_commit_sha?(commit) }
end

def exec_git_command(cmd, stdin: nil)
Expand Down
27 changes: 20 additions & 7 deletions lib/datadog/ci/git/tree_uploader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ def call(repository_url)

Datadog.logger.debug { "Uploading git tree for repository #{repository_url}" }

# 2. Check if the repository clone is shallow and unshallow if appropriate
# TO BE ADDED IN CIVIS-2863
latest_commits = LocalRepository.git_commits
head_commit = latest_commits&.first
if head_commit.nil?
Expand All @@ -36,23 +34,36 @@ def call(repository_url)
end

begin
excluded_commits, included_commits = split_known_commits(repository_url, latest_commits)
if included_commits.empty?
# ask the backend for the list of commits it already has
known_commits, new_commits = fetch_known_commits_and_split(repository_url, latest_commits)
# if all commits are present in the backend, we don't need to upload anything
if new_commits.empty?
Datadog.logger.debug("No new commits to upload")
return
end

# quite often we deal with shallow clones in CI environment
if LocalRepository.git_shallow_clone? && LocalRepository.git_unshallow
Datadog.logger.debug("Detected shallow clone and unshallowed the repository, repeating commits search")

# re-run the search with the updated commit list after unshallowing
known_commits, new_commits = fetch_known_commits_and_split(
repository_url,
LocalRepository.git_commits
)
end
rescue SearchCommits::ApiError => e
Datadog.logger.debug("SearchCommits failed with #{e}, aborting git upload")
return
end

Datadog.logger.debug { "Uploading packfiles for commits: #{included_commits}" }
Datadog.logger.debug { "Uploading packfiles for commits: #{new_commits}" }
uploader = UploadPackfile.new(
api: api,
head_commit_sha: head_commit,
repository_url: repository_url
)
Packfiles.generate(included_commits: included_commits, excluded_commits: excluded_commits) do |filepath|
Packfiles.generate(included_commits: new_commits, excluded_commits: known_commits) do |filepath|
uploader.call(filepath: filepath)
rescue UploadPackfile::ApiError => e
Datadog.logger.debug("Packfile upload failed with #{e}")
Expand All @@ -62,7 +73,9 @@ def call(repository_url)

private

def split_known_commits(repository_url, latest_commits)
# Split the latest commits list into known and new commits
# based on the backend response provided by /search_commits endpoint
def fetch_known_commits_and_split(repository_url, latest_commits)
Datadog.logger.debug { "Checking the latest commits list with backend: #{latest_commits}" }
backend_commits = SearchCommits.new(api: api).call(repository_url, latest_commits)
latest_commits.partition do |commit|
Expand Down
4 changes: 4 additions & 0 deletions sig/datadog/ci/git/local_repository.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ module Datadog

def self.git_generate_packfiles: (included_commits: Enumerable[String], excluded_commits: Enumerable[String], path: String) -> String?

def self.git_shallow_clone?: () -> bool

def self.git_unshallow: () -> String?

private

def self.filter_invalid_commits: (Enumerable[String] commits) -> Array[String]
Expand Down
2 changes: 1 addition & 1 deletion sig/datadog/ci/git/tree_uploader.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ module Datadog

private

def split_known_commits: (String repository_url, Array[String] latest_commits) -> [Array[String], Array[String]]
def fetch_known_commits_and_split: (String repository_url, Array[String] latest_commits) -> [Array[String], Array[String]]
end
end
end
Expand Down
78 changes: 78 additions & 0 deletions spec/datadog/ci/git/local_repository_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -293,4 +293,82 @@ def with_custom_git_environment
it { is_expected.to eq("first-tag") }
end
end

context "with shallow clone" do
let(:tmpdir) { Dir.mktmpdir }
after { FileUtils.remove_entry(tmpdir) }

before do
# shallow clone datadog-ci-rb repository
`cd #{tmpdir} && git clone --depth 1 https://github.com/DataDog/datadog-ci-rb`
end

def with_shallow_clone_git_dir
ClimateControl.modify("GIT_DIR" => File.join(tmpdir, "datadog-ci-rb/.git")) do
yield
end
end

describe ".git_shallow_clone?" do
subject do
with_shallow_clone_git_dir { described_class.git_shallow_clone? }
end

it { is_expected.to be_truthy }
end

describe ".git_commits" do
subject do
with_shallow_clone_git_dir { described_class.git_commits }
end

it "returns a list of single git commit sha" do
expect(subject).to be_kind_of(Array)
expect(subject).not_to be_empty
expect(subject).to have(1).item
expect(subject.first).to match(/^\h{40}$/)
end
end

describe ".git_unshallow" do
# skip for jruby for now - old git version DD docker image
before { skip if PlatformHelpers.jruby? }

subject do
with_shallow_clone_git_dir { described_class.git_unshallow }
end
let(:commits) do
with_shallow_clone_git_dir { described_class.git_commits }
end

it "unshallows the repository" do
expect(subject).to be_truthy
expect(commits.size).to be > 1
end
end
end

context "with full clone" do
let(:tmpdir) { Dir.mktmpdir }
after { FileUtils.remove_entry(tmpdir) }

before do
# shallow clone datadog-ci-rb repository
`cd #{tmpdir} && git clone https://github.com/DataDog/datadog-ci-rb`
end

def with_full_clone_git_dir
ClimateControl.modify("GIT_DIR" => File.join(tmpdir, "datadog-ci-rb/.git")) do
yield
end
end

describe ".git_shallow_clone?" do
subject do
with_full_clone_git_dir { described_class.git_shallow_clone? }
end

it { is_expected.to be_falsey }
end
end
end
144 changes: 96 additions & 48 deletions spec/datadog/ci/git/tree_uploader_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
let(:search_commits) { double("search_commits", call: backend_commits) }

before do
allow(Datadog::CI::Git::LocalRepository).to receive(:git_commits).and_return(latest_commits)
allow(Datadog::CI::Git::SearchCommits).to receive(:new).with(api: api).and_return(search_commits)
end

Expand All @@ -33,73 +32,122 @@
end
end

context "when the latest commits list is empty" do
let(:latest_commits) { [] }

it "logs a debug message and aborts the git upload" do
expect(Datadog.logger).to receive(:debug).with("Got empty latest commits list, aborting git upload")

tree_uploader.call(repository_url)
end
end

context "when the backend commits search fails" do
context "when API is configured" do
before do
expect(search_commits).to receive(:call).and_raise(Datadog::CI::Git::SearchCommits::ApiError, "test error")
expect(Datadog::CI::Git::LocalRepository).to receive(:git_commits).and_return(latest_commits)
end

it "logs a debug message and aborts the git upload" do
expect(Datadog.logger).to receive(:debug).with("SearchCommits failed with test error, aborting git upload")

tree_uploader.call(repository_url)
end
end

context "when all commits are known to the backend" do
let(:backend_commits) { latest_commits }

it "logs a debug message and aborts the git upload" do
expect(Datadog.logger).to receive(:debug).with("No new commits to upload")

tree_uploader.call(repository_url)
end
end
context "when the latest commits list is empty" do
let(:latest_commits) { [] }

context "when some commits are new" do
let(:upload_packfile) { double("upload_packfile", call: nil) }
it "logs a debug message and aborts the git upload" do
expect(Datadog.logger).to receive(:debug).with("Got empty latest commits list, aborting git upload")

before do
expect(Datadog::CI::Git::Packfiles).to receive(:generate).with(
included_commits: latest_commits - backend_commits.to_a,
excluded_commits: backend_commits
).and_yield("packfile_path")

expect(Datadog::CI::Git::UploadPackfile).to receive(:new).with(
api: api,
head_commit_sha: head_commit,
repository_url: repository_url
).and_return(upload_packfile)
tree_uploader.call(repository_url)
end
end

context "when the packfile upload fails" do
context "when the backend commits search fails" do
before do
expect(upload_packfile).to receive(:call).and_raise(Datadog::CI::Git::UploadPackfile::ApiError, "test error")
expect(search_commits).to receive(:call).and_raise(Datadog::CI::Git::SearchCommits::ApiError, "test error")
end

it "logs a debug message and aborts the git upload" do
expect(Datadog.logger).to receive(:debug).with("Packfile upload failed with test error")
expect(Datadog.logger).to receive(:debug).with("SearchCommits failed with test error, aborting git upload")

tree_uploader.call(repository_url)
end
end

context "when the packfile upload succeeds" do
it "uploads the new commits" do
expect(upload_packfile).to receive(:call).with(filepath: "packfile_path").and_return(nil)
context "when all commits are known to the backend" do
let(:backend_commits) { latest_commits }

it "logs a debug message and aborts the git upload" do
expect(Datadog.logger).to receive(:debug).with("No new commits to upload")

tree_uploader.call(repository_url)
end
end

context "when some commits are new" do
let(:upload_packfile) { double("upload_packfile", call: nil) }

context "when the repository is shallow cloned" do
before do
expect(Datadog::CI::Git::LocalRepository).to receive(:git_shallow_clone?).and_return(true)
end

context "when the unshallowing fails" do
before do
expect(Datadog::CI::Git::LocalRepository).to receive(:git_unshallow).and_return(nil)
end

it "uploads what we can upload" do
expect(Datadog::CI::Git::Packfiles).to receive(:generate).with(
included_commits: %w[13c988d4f15e06bcdd0b0af290086a3079cdadb0],
excluded_commits: backend_commits
).and_yield("packfile_path")

tree_uploader.call(repository_url)
end
end

context "when the unshallowing succeeds" do
before do
expect(Datadog::CI::Git::LocalRepository).to receive(:git_unshallow).and_return("unshallow_result")
expect(Datadog::CI::Git::LocalRepository).to receive(:git_commits).and_return(
latest_commits + %w[782d09e3fbfd8cf1b5c13f3eb9621362f9089ed5]
)
end

it "uploads the new commits" do
expect(Datadog::CI::Git::Packfiles).to receive(:generate).with(
included_commits: %w[13c988d4f15e06bcdd0b0af290086a3079cdadb0 782d09e3fbfd8cf1b5c13f3eb9621362f9089ed5],
excluded_commits: backend_commits
).and_yield("packfile_path")

tree_uploader.call(repository_url)
end
end
end

context "when the repository is not shallow cloned" do
before do
expect(Datadog::CI::Git::LocalRepository).to receive(:git_shallow_clone?).and_return(false)

expect(Datadog::CI::Git::Packfiles).to receive(:generate).with(
included_commits: latest_commits - backend_commits.to_a,
excluded_commits: backend_commits
).and_yield("packfile_path")

expect(Datadog::CI::Git::UploadPackfile).to receive(:new).with(
api: api,
head_commit_sha: head_commit,
repository_url: repository_url
).and_return(upload_packfile)
end

context "when the packfile upload fails" do
before do
expect(upload_packfile).to receive(:call).and_raise(Datadog::CI::Git::UploadPackfile::ApiError, "test error")
end

it "logs a debug message and aborts the git upload" do
expect(Datadog.logger).to receive(:debug).with("Packfile upload failed with test error")

tree_uploader.call(repository_url)
end
end

context "when the packfile upload succeeds" do
it "uploads the new commits" do
expect(upload_packfile).to receive(:call).with(filepath: "packfile_path").and_return(nil)

tree_uploader.call(repository_url)
end
end
end
end
end
end
end

0 comments on commit 39f3f3c

Please sign in to comment.