Skip to content

Commit

Permalink
* Added new function for checking if repo is sparsely checked out
Browse files Browse the repository at this point in the history
* added directory to test steps
  • Loading branch information
tetracionist committed Dec 13, 2024
1 parent a77c666 commit 1ec2834
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 46 deletions.
12 changes: 12 additions & 0 deletions src/prefect/deployments/steps/pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ async def agit_clone(
include_submodules: bool = False,
access_token: Optional[str] = None,
credentials: Optional["Block"] = None,
directories: Optional[str] = None,
) -> dict[str, str]:
"""
Asynchronously clones a git repository into the current working directory.
Expand Down Expand Up @@ -81,6 +82,7 @@ async def agit_clone(
credentials=_credentials,
branch=branch,
include_submodules=include_submodules,
directories=directories,
)

await _pull_git_repository_with_retries(storage)
Expand All @@ -95,6 +97,7 @@ def git_clone(
include_submodules: bool = False,
access_token: Optional[str] = None,
credentials: Optional["Block"] = None,
directories: Optional[str] = None,
) -> dict[str, str]:
"""
Clones a git repository into the current working directory.
Expand Down Expand Up @@ -164,6 +167,14 @@ def git_clone(
- prefect.deployments.steps.git_clone:
repository: git@github.com:org/repo.git
```
Clone a repository using sparse-checkout (allows specific folders of the repository to be checked out)
```yaml
pull:
- prefect.deployments.steps.git_clone:
repository: https://github.com/org/repo.git
directories: ["dir_1", "dir_2", "prefect"]
```
"""
if access_token and credentials:
raise ValueError(
Expand All @@ -177,6 +188,7 @@ def git_clone(
credentials=_credentials,
branch=branch,
include_submodules=include_submodules,
directories=directories,
)

run_coro_as_sync(_pull_git_repository_with_retries(storage))
Expand Down
34 changes: 34 additions & 0 deletions src/prefect/runner/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class GitRepository:
pull_interval: The interval in seconds at which to pull contents from
remote storage to local storage. If None, remote storage will perform
a one-time sync.
directories: The directories to pull from the Git repository (uses git sparse-checkout)
Examples:
Pull the contents of a private git repository to the local filesystem:
Expand All @@ -111,6 +112,7 @@ def __init__(
branch: Optional[str] = None,
include_submodules: bool = False,
pull_interval: Optional[int] = 60,
directories: Optional[str] = None,
):
if credentials is None:
credentials = {}
Expand All @@ -134,6 +136,7 @@ def __init__(
self._logger = get_logger(f"runner.storage.git-repository.{self._name}")
self._storage_base_path = Path.cwd()
self._pull_interval = pull_interval
self._directories = directories

@property
def destination(self) -> Path:
Expand Down Expand Up @@ -178,6 +181,19 @@ def _repository_url_with_credentials(self) -> str:

return repository_url

async def is_sparsely_checked_out(self) -> bool:
"""
Check if existing repo is sparsely checked out
"""

try:
result = await run_process(
["git", "config", "--get", "core.sparseCheckout"], cwd=self.destination
)
return result.strip().lower() == "true"
except Exception:
return False

async def pull_code(self):
"""
Pulls the contents of the configured repository to the local filesystem.
Expand Down Expand Up @@ -206,6 +222,13 @@ async def pull_code(self):
f"does not match the configured repository {self._url}"
)

# Sparsely checkout the repository if directories are specified and the repo is not in sparse-checkout mode already
if self._directories and not await self.is_sparsely_checked_out():
await run_process(
["git", "sparse-checkout", "set"] + self._directories,
cwd=self.destination,
)

self._logger.debug("Pulling latest changes from origin/%s", self._branch)
# Update the existing repository
cmd = ["git", "pull", "origin"]
Expand Down Expand Up @@ -245,6 +268,10 @@ async def _clone_repo(self):
if self._include_submodules:
cmd += ["--recurse-submodules"]

# This will only checkout the top-level directory
if self._directories:
cmd += ["--sparse"]

# Limit git history and set path to clone to
cmd += ["--depth", "1", str(self.destination)]

Expand All @@ -258,6 +285,13 @@ async def _clone_repo(self):
f" {exc.returncode}."
) from exc_chain

# Once repository is cloned and the repo is in sparse-checkout mode then grow the working directory
if await self.is_sparsely_checked_out():
await run_process(
["git", "sparse-checkout", "set"] + self._directories,
cwd=self.destination,
)

def __eq__(self, __value) -> bool:
if isinstance(__value, GitRepository):
return (
Expand Down
51 changes: 5 additions & 46 deletions tests/deployment/test_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,6 @@ async def test_git_clone(self, git_repository_mock):
branch=None,
include_submodules=False,
directories=None,
cone_mode=True,
)
git_repository_mock.return_value.pull_code.assert_awaited_once()

Expand All @@ -455,7 +454,6 @@ async def test_git_clone_include_submodules(self, git_repository_mock):
branch=None,
include_submodules=True,
directories=None,
cone_mode=True,
)
git_repository_mock.return_value.pull_code.assert_awaited_once()

Expand All @@ -475,7 +473,6 @@ async def test_git_clone_with_access_token(self, git_repository_mock):
branch=None,
include_submodules=False,
directories=None,
cone_mode=True,
)
git_repository_mock.return_value.pull_code.assert_awaited_once()

Expand Down Expand Up @@ -503,7 +500,6 @@ class MockGitCredentials(Block):
branch=None,
include_submodules=False,
directories=None,
cone_mode=True,
)
git_repository_mock.return_value.pull_code.assert_awaited_once()

Expand Down Expand Up @@ -540,7 +536,6 @@ async def mock_sleep(seconds):
branch=None,
include_submodules=False,
directories=None,
cone_mode=True,
)

assert mock_git_repo.call_args_list == [expected_call]
Expand All @@ -555,6 +550,7 @@ async def test_agit_clone_basic(self, git_repository_mock):
credentials=None,
branch=None,
include_submodules=False,
directories=None,
)
git_repository_mock.return_value.pull_code.assert_awaited_once()

Expand All @@ -567,6 +563,7 @@ async def test_agit_clone_with_all_options(self, git_repository_mock):
branch="dev",
include_submodules=True,
access_token="my-access-token",
directories=None,
)

assert output["directory"] == "repo"
Expand All @@ -575,6 +572,7 @@ async def test_agit_clone_with_all_options(self, git_repository_mock):
credentials={"access_token": "my-access-token"},
branch="dev",
include_submodules=True,
directories=None,
)
git_repository_mock.return_value.pull_code.assert_awaited_once()

Expand All @@ -597,6 +595,7 @@ class MockGitCredentials(Block):
credentials=creds,
branch=None,
include_submodules=False,
directories=None,
)
git_repository_mock.return_value.pull_code.assert_awaited_once()

Expand Down Expand Up @@ -679,50 +678,10 @@ async def mock_sleep(seconds):
credentials=None,
branch=None,
include_submodules=False,
directories=None,
)
assert mock_git_repo.call_args_list == [expected_call]

async def test_git_clone_sparse_checkout(self, git_repository_mock):
output = await run_step(
{
"prefect.deployments.steps.git_clone": {
"repository": "https://github.com/org/repo.git",
"directories": ["directory_1", "directory_2"],
}
}
)
assert output["directory"] == "repo"
git_repository_mock.assert_called_once_with(
url="https://github.com/org/repo.git",
credentials=None,
branch=None,
include_submodules=False,
directories=["directory_1", "directory_2"],
cone_mode=True,
)
git_repository_mock.return_value.pull_code.assert_awaited_once()

async def test_git_clone_sparse_checkout_no_cone(self, git_repository_mock):
output = await run_step(
{
"prefect.deployments.steps.git_clone": {
"repository": "https://github.com/org/repo.git",
"directories": ["directory_1", "directory_2"],
"cone_mode": False,
}
}
)
assert output["directory"] == "repo"
git_repository_mock.assert_called_once_with(
url="https://github.com/org/repo.git",
credentials=None,
branch=None,
include_submodules=False,
directories=["directory_1", "directory_2"],
cone_mode=False,
)
git_repository_mock.return_value.pull_code.assert_awaited_once()


class TestPullFromRemoteStorage:
@pytest.fixture
Expand Down

0 comments on commit 1ec2834

Please sign in to comment.