Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add git_helpers.py w/ function for safe directory handling #35

Merged
merged 10 commits into from
Jul 10, 2023
5 changes: 5 additions & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ Environment
.. automodule:: ska_helpers.environment
:members:

Git helpers
-----------

.. automodule:: ska_helpers.git_helpers
:members:

Logging
-------
Expand Down
84 changes: 84 additions & 0 deletions ska_helpers/git_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""
Helper functions for using git.
"""
import functools
import re
import subprocess
import sys
import warnings
from pathlib import Path

from ska_file import chdir


__all__ = ["make_git_repo_safe"]


@functools.lru_cache()
def make_git_repo_safe(path: str | Path) -> None:
"""Ensure git repo at ``path`` is a safe git repository.

A "safe" repo is one which is owned by the user calling this function. See:
https://github.blog/2022-04-12-git-security-vulnerability-announced/#cve-2022-24765

If an unsafe repo is detected then this command issues a warning to that
effect and then updates the user's git config to add this repo as a safe
directory.

This function should only be called for known safe git repos such as
``$SKA/data/chandra_models``.

:param path: str, Path
Path to top level of a git repository
""" # noqa
path = Path(path)
if not (path / ".git").exists():
raise FileNotFoundError(f"'{path}' is not the top level of a git repo")

with chdir(path):
# Run a lightweight command which will fail for an unsafe repo
proc = subprocess.run(["git", "rev-parse"], capture_output=True)
if proc.returncode != 0:
_handle_rev_parse_failure(path, proc)
# Ensure that the repo is now safe. This will raise an exception
# otherwise.
subprocess.check_call(["git", "rev-parse"])


def _handle_rev_parse_failure(path: Path, proc: subprocess.CompletedProcess):
"""Handle a failure of `git --rev-parse` command.

This is most likely due to repo not being safe. If that is the case (based
on the command error output) then issue a warning and update the user git
config. Otherwise print the error text and raise an exception.
"""
# Regex of command that is provided by git to mark a directory as safe. The
# suggested directory name (last group) may be very different from the
# original ``path``. For example on Windows with a VM shared directory
# Y:\data\chandra_models, the required git config safe.directory is
# %(prefix)///Mac/ska/data/chandra_models. Using the original ``path`` does
# not work.
git_safe_config_RE = r"(git) (config) (--global) (--add) (safe\.directory) (\S+)"
jeanconn marked this conversation as resolved.
Show resolved Hide resolved

# Error message from the failed git command, which looks like this:
# $ git status
# fatal: detected dubious ownership in repository at '//Mac/Home/ska/data/chandra_models' # noqa: E501
# To add an exception for this directory, call:
#
# git config --global --add safe.directory '%(prefix)///Mac/Home/ska/data/chandra_models' # noqa: E501

err = proc.stderr.decode().strip()

if match := re.search(git_safe_config_RE, err, re.MULTILINE):
cmds = list(match.groups())
cmds[-1] = cmds[-1].strip("'")
warnings.warn(
"Updating git config to allow read-only git operations on "
f"trusted repository {path}. Contact Ska team for questions.",
stacklevel=3,
)
subprocess.check_call(cmds)
else:
print(err, file=sys.stderr)
proc.check_returncode()
43 changes: 43 additions & 0 deletions ska_helpers/tests/test_git_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Licensed under a 3-clause BSD style license - see LICENSE.rst
import getpass
import tempfile

import git
import pytest

from ska_helpers import git_helpers, paths
from ska_helpers.utils import get_owner

CHANDRA_MODELS = paths.chandra_models_repo_path()


def ska_ownership_ok():
# Check that the OS and volume support file ownership
# (not the case with shared directories on a Windows VM on parallels)
# and that the chandra_models dir is owned by the current user
try:
return get_owner(CHANDRA_MODELS) == getpass.getuser()
except Exception:
return False


@pytest.mark.skipif(
not CHANDRA_MODELS.exists(),
reason="Chandra models dir is not there",
)
@pytest.mark.skipif(ska_ownership_ok(), reason="Chandra models dir ownership is OK")
def test_make_git_repo_safe(monkeypatch):
with tempfile.TemporaryDirectory() as tempdir:
# temporarily set HOME to a temp dir so .gitconfig comes from there
monkeypatch.setenv("HOME", tempdir)
repo = git.Repo(CHANDRA_MODELS)
with pytest.raises(git.exc.GitCommandError):
# This statement fails with the error
# fatal: detected dubious ownership
# unless the repo is marked safe in .gitconfig
repo.is_dirty()
with pytest.warns(UserWarning, match="Updating git config"):
# marke the repo safe and issue warning
git_helpers.make_git_repo_safe(CHANDRA_MODELS)
# success
taldcroft marked this conversation as resolved.
Show resolved Hide resolved
repo.is_dirty()
32 changes: 32 additions & 0 deletions ska_helpers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,38 @@
__all__ = ["LazyDict", "LazyVal"]


def get_owner(path):
"""
Returns the owner of a file or directory.

Parameters:
-----------
path : str or pathlib.Path
The path to the file or directory.

Returns:
--------
str
The name of the owner of the file or directory.
"""

from testr import test_helper
from pathlib import Path

if test_helper.is_windows():
import win32security

# Suggested by copilot chat, seems to
security_descriptor = win32security.GetFileSecurity(
str(path), win32security.OWNER_SECURITY_INFORMATION
)
owner_sid = security_descriptor.GetSecurityDescriptorOwner()
owner_name, _, _ = win32security.LookupAccountSid(None, owner_sid)
else:
owner_name = Path(path).owner()
return owner_name


def _lazy_load_wrap(unbound_method):
@functools.wraps(unbound_method)
def wrapper(self, *args, **kwargs):
Expand Down