From f04509af7def00f58d94fc39be3ee6fd7869b185 Mon Sep 17 00:00:00 2001 From: Tom Aldcroft Date: Fri, 2 Jun 2023 19:17:17 -0400 Subject: [PATCH] Fixes to get chandra_models and paths working on Windows --- ska_helpers/chandra_models.py | 49 ++++++++++++++++++++++++--------- ska_helpers/tests/test_paths.py | 3 ++ 2 files changed, 39 insertions(+), 13 deletions(-) diff --git a/ska_helpers/chandra_models.py b/ska_helpers/chandra_models.py index 2f952ca..48965ba 100644 --- a/ska_helpers/chandra_models.py +++ b/ska_helpers/chandra_models.py @@ -5,6 +5,7 @@ import contextlib import hashlib import os +import shutil import tempfile import warnings from pathlib import Path @@ -26,6 +27,34 @@ ) +@contextlib.contextmanager +def get_local_repo(repo_path, version): + """Get local version of ``repo_path`` and ensure correct clean-up.""" + + def onerror(func, path, exc_info): + os.chmod(path, 0o0777) + try: + func(path) + except Exception as exc: + print(f"Warning: temp_dir() could not remove {path} because of {exc}") + + clone = str(repo_path).startswith("https://github.com") or version is not None + if clone: + repo_path_local = tempfile.mkdtemp() + repo = git.Repo.clone_from(repo_path, repo_path_local) + if version is not None: + repo.git.checkout(version) + else: + repo = git.Repo(repo_path) + repo_path_local = repo_path + + yield repo, repo_path_local + + repo.close() + if clone: + shutil.rmtree(repo_path_local, onerror=onerror) + + def get_data( file_path: str | Path, version: Optional[str] = None, @@ -159,18 +188,11 @@ def get_data( # - If the repo is remote on GitHub then we always clone to a temp dir # - If the repo is local and the version is not the default then we clone to a temp # to allow checking out at the specified version. - with contextlib.ExitStack() as stack: - if str(repo_path).startswith("https://github.com") or version is not None: - # For a remote GitHub repo or non-default version, clone to a temp dir - repo_path_local = stack.enter_context(tempfile.TemporaryDirectory()) - repo = git.Repo.clone_from(repo_path, repo_path_local) - if version is not None: - repo.git.checkout(version) - else: - # For a local repo at the default version use the existing directory - repo = git.Repo(repo_path) - repo_path_local = repo_path - + # This is all done with a context manager that ensure the repo object is + # properly closed and that all temporary files are cleaned up. Doing this + # on Windows was challenging. Search on slack: + # "The process cannot access the file because it is being used" + with get_local_repo(repo_path, version) as (repo, repo_path_local): repo_file_path = Path(repo_path_local) / file_path if not repo_file_path.exists(): raise FileNotFoundError(f"chandra_models {file_path=} does not exist") @@ -193,7 +215,8 @@ def get_data( data, repo_file_path = read_func(repo_file_path, **read_func_kwargs) # Compute the MD5 sum of repo_file_path. - md5 = hashlib.md5(repo_file_path.read_bytes()).hexdigest() + file_bytes = repo_file_path.read_bytes().replace(b"\r", b"") + md5 = hashlib.md5(file_bytes).hexdigest() # Store some info about this request in the cache. info.update( diff --git a/ska_helpers/tests/test_paths.py b/ska_helpers/tests/test_paths.py index 8b6fb01..16c2f60 100644 --- a/ska_helpers/tests/test_paths.py +++ b/ska_helpers/tests/test_paths.py @@ -16,6 +16,9 @@ def test_chandra_models_paths(repo_source, chandra_models_repo_dir, monkeypatch) monkeypatch.setenv(chandra_models_repo_dir, str(root)) kwargs = {} elif repo_source == "default": + # Make sure no path-changing env vars are set + for env_var in paths.CHANDRA_MODELS_ROOT_ENV_VARS: + monkeypatch.delenv(env_var, raising=False) root = Path(os.environ["SKA"]) / "data" / "chandra_models" kwargs = {} elif repo_source == "kwargs":