From 8c42e7980c87df9edb3742a6cb8e33c7676f940d Mon Sep 17 00:00:00 2001 From: Matthieu Darbois Date: Tue, 17 Sep 2024 00:12:19 +0200 Subject: [PATCH] fix: file ownership of files copied into the container (#2007) Revert to using `cat`/`tar` to copy files/folders into the container. --- cibuildwheel/oci_container.py | 38 ++++++++++++++++++++++++++++++--- unit_test/oci_container_test.py | 14 ++++++++++++ 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/cibuildwheel/oci_container.py b/cibuildwheel/oci_container.py index f846ca190..b13417e3b 100644 --- a/cibuildwheel/oci_container.py +++ b/cibuildwheel/oci_container.py @@ -5,6 +5,7 @@ import os import platform import shlex +import shutil import subprocess import sys import typing @@ -169,6 +170,9 @@ def __init__( self.cwd = cwd self.name: str | None = None self.engine = engine + self.host_tar_format = "" + if sys.platform.startswith("darwin"): + self.host_tar_format = "--format gnutar" def _get_platform_args(self, *, oci_platform: OCIPlatform | None = None) -> tuple[str, str]: if oci_platform is None: @@ -311,10 +315,38 @@ def __exit__( def copy_into(self, from_path: Path, to_path: PurePath) -> None: if from_path.is_dir(): self.call(["mkdir", "-p", to_path]) - call(self.engine.name, "cp", f"{from_path}/.", f"{self.name}:{to_path}") + subprocess.run( + f"tar -c {self.host_tar_format} -f - . | {self.engine.name} exec -i {self.name} tar --no-same-owner -xC {shell_quote(to_path)} -f -", + shell=True, + check=True, + cwd=from_path, + ) else: - self.call(["mkdir", "-p", to_path.parent]) - call(self.engine.name, "cp", from_path, f"{self.name}:{to_path}") + exec_process: subprocess.Popen[bytes] + with subprocess.Popen( + [ + self.engine.name, + "exec", + "-i", + str(self.name), + "sh", + "-c", + f"cat > {shell_quote(to_path)}", + ], + stdin=subprocess.PIPE, + ) as exec_process: + assert exec_process.stdin + with open(from_path, "rb") as from_file: + # Bug in mypy, https://github.com/python/mypy/issues/15031 + shutil.copyfileobj(from_file, exec_process.stdin) # type: ignore[misc] + + exec_process.stdin.close() + exec_process.wait() + + if exec_process.returncode: + raise subprocess.CalledProcessError( + exec_process.returncode, exec_process.args, None, None + ) def copy_out(self, from_path: PurePath, to_path: Path) -> None: # note: we assume from_path is a dir diff --git a/unit_test/oci_container_test.py b/unit_test/oci_container_test.py index e3fcc59be..d88ca52eb 100644 --- a/unit_test/oci_container_test.py +++ b/unit_test/oci_container_test.py @@ -244,6 +244,9 @@ def test_file_operation(tmp_path: Path, container_engine): container.copy_into(original_test_file, dst_file) + owner = container.call(["stat", "-c", "%u:%g", dst_file], capture_output=True).strip() + assert owner == "0:0" + output = container.call(["cat", dst_file], capture_output=True) assert test_binary_data == bytes(output, encoding="utf8", errors="surrogateescape") @@ -266,6 +269,12 @@ def test_dir_operations(tmp_path: Path, container_engine): dst_file = dst_dir / "test.dat" container.copy_into(test_dir, dst_dir) + owner = container.call(["stat", "-c", "%u:%g", dst_dir], capture_output=True).strip() + assert owner == "0:0" + + owner = container.call(["stat", "-c", "%u:%g", dst_file], capture_output=True).strip() + assert owner == "0:0" + output = container.call(["cat", dst_file], capture_output=True) assert test_binary_data == bytes(output, encoding="utf8", errors="surrogateescape") @@ -276,6 +285,11 @@ def test_dir_operations(tmp_path: Path, container_engine): new_test_dir = tmp_path / "test_dir_new" container.copy_out(dst_dir, new_test_dir) + assert os.getuid() == new_test_dir.stat().st_uid + assert os.getgid() == new_test_dir.stat().st_gid + assert os.getuid() == (new_test_dir / "test.dat").stat().st_uid + assert os.getgid() == (new_test_dir / "test.dat").stat().st_gid + assert test_binary_data == (new_test_dir / "test.dat").read_bytes()