Skip to content

Commit

Permalink
Merge pull request #687 from matrss/fix-git-submodules
Browse files Browse the repository at this point in the history
Fix git submodule detection if .git is not a file
  • Loading branch information
carmenbianca authored Oct 24, 2023
2 parents 34d1d87 + 32e21e8 commit b12fba8
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 30 deletions.
2 changes: 2 additions & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ Contributors

- Pietro Albini <pietro.albini@ferrous-systems.com>

- Matthias Riße

- Stefan Hynek <stefan.hynek@uni-goettingen.de>

- rajivsunar07
Expand Down
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ CLI command and its behaviour. There are no guarantees of stability for the
### Changed

- Alpine Docker image now uses 3.18 as base. (#846)
- The Git submodule detection was made less naïve. Where previously it detected
a directory with a `.git` file as a submodule, it now uses the git command to
detect submodules. This helps detect (quoted from Git man page)
"[repositories] that were cloned independently and later added as a submodule
or old setups", which "have the submodule's git directory inside the submodule
instead of embedded into the superproject's git directory". (#687)

- No longer scan binary or uncommentable files for their contents in search of
REUSE information. (#825)
Expand Down
7 changes: 4 additions & 3 deletions src/reuse/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: 2022 Florian Snow <florian@familysnow.net>
# SPDX-FileCopyrightText: 2023 DB Systel GmbH
# SPDX-FileCopyrightText: 2023 Carmen Bianca BAKKER <carmenbianca@fsfe.org>
#
# SPDX-FileCopyrightText: 2023 Matthias Riße
# SPDX-License-Identifier: GPL-3.0-or-later

"""Module that contains the central Project class."""
Expand Down Expand Up @@ -124,8 +124,9 @@ def all_files(self, directory: Optional[StrPath] = None) -> Iterator[Path]:
_LOGGER.debug("skipping symlink '%s'", the_dir)
dirs.remove(dir_)
elif (
the_dir / ".git"
).is_file() and not self.include_submodules:
not self.include_submodules
and self.vcs_strategy.is_submodule(the_dir)
):
_LOGGER.info(
"ignoring '%s' because it is a submodule", the_dir
)
Expand Down
39 changes: 39 additions & 0 deletions src/reuse/vcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ def __init__(self, project: Project):
def is_ignored(self, path: StrPath) -> bool:
"""Is *path* ignored by the VCS?"""

@abstractmethod
def is_submodule(self, path: StrPath) -> bool:
"""Is *path* a VCS submodule?"""

@classmethod
@abstractmethod
def in_repo(cls, directory: StrPath) -> bool:
Expand Down Expand Up @@ -63,6 +67,9 @@ def __init__(self, project: Project):
def is_ignored(self, path: StrPath) -> bool:
return False

def is_submodule(self, path: StrPath) -> bool:
return False

@classmethod
def in_repo(cls, directory: StrPath) -> bool:
return False
Expand All @@ -80,6 +87,7 @@ def __init__(self, project: Project):
if not GIT_EXE:
raise FileNotFoundError("Could not find binary for Git")
self._all_ignored_files = self._find_all_ignored_files()
self._submodules = self._find_submodules()

def _find_all_ignored_files(self) -> Set[Path]:
"""Return a set of all files ignored by git. If a whole directory is
Expand All @@ -103,10 +111,37 @@ def _find_all_ignored_files(self) -> Set[Path]:
all_files = result.stdout.decode("utf-8").split("\0")
return {Path(file_) for file_ in all_files}

def _find_submodules(self) -> Set[Path]:
command = [
str(GIT_EXE),
"config",
"-z",
"--file",
".gitmodules",
"--get-regexp",
r"\.path$",
]
result = execute_command(command, _LOGGER, cwd=self.project.root)
# The final element may be an empty string. Filter it.
submodule_entries = [
entry
for entry in result.stdout.decode("utf-8").split("\0")
if entry
]
# Each entry looks a little like 'submodule.submodule.path\nmy_path'.
return {Path(entry.splitlines()[1]) for entry in submodule_entries}

def is_ignored(self, path: StrPath) -> bool:
path = self.project.relative_from_root(path)
return path in self._all_ignored_files

def is_submodule(self, path: StrPath) -> bool:
return any(
self.project.relative_from_root(path).resolve()
== submodule_path.resolve()
for submodule_path in self._submodules
)

@classmethod
def in_repo(cls, directory: StrPath) -> bool:
if directory is None:
Expand Down Expand Up @@ -171,6 +206,10 @@ def is_ignored(self, path: StrPath) -> bool:
path = self.project.relative_from_root(path)
return path in self._all_ignored_files

def is_submodule(self, path: StrPath) -> bool:
# TODO: Implement me.
return False

@classmethod
def in_repo(cls, directory: StrPath) -> bool:
if directory is None:
Expand Down
75 changes: 48 additions & 27 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# SPDX-FileCopyrightText: 2017 Free Software Foundation Europe e.V. <https://fsfe.org>
# SPDX-FileCopyrightText: 2022 Florian Snow <florian@familysnow.net>
# SPDX-FileCopyrightText: 2022 Carmen Bianca Bakker <carmenbianca@fsfe.org>
# SPDX-FileCopyrightText: 2022 Florian Snow <florian@familysnow.net>
# SPDX-FileCopyrightText: 2023 Matthias Riße
#
# SPDX-License-Identifier: GPL-3.0-or-later

Expand Down Expand Up @@ -223,9 +224,9 @@ def hg_repository(fake_repository: Path, hg_exe: str) -> Path:
return fake_repository


@pytest.fixture()
@pytest.fixture(params=["submodule-add", "manual"])
def submodule_repository(
git_repository: Path, git_exe: str, tmpdir_factory
git_repository: Path, git_exe: str, tmpdir_factory, request
) -> Path:
"""Create a git repository that contains a submodule."""
header = cleandoc(
Expand Down Expand Up @@ -260,33 +261,53 @@ def submodule_repository(

os.chdir(git_repository)

if request.param == "submodule-add":
subprocess.run(
[
git_exe,
# https://git-scm.com/docs/git-config#Documentation/git-config.txt-protocolallow
#
# This circumvents a bug/behaviour caused by CVE-2022-39253
# where you cannot use `git submodule add repository path` where
# repository is a file on the filesystem.
"-c",
"protocol.file.allow=always",
"submodule",
"add",
str(submodule.resolve()),
"submodule",
],
check=True,
)
elif request.param == "manual":
subprocess.run(
[git_exe, "clone", str(submodule.resolve()), "submodule"],
check=True,
)
with open(
git_repository / ".gitmodules", mode="a", encoding="utf-8"
) as gitmodules_file:
gitmodules_file.write(
f"""[submodule "submodule"]
path = submodule
url = {submodule.resolve().as_posix()}
"""
)
subprocess.run(
[
git_exe,
"add",
"--no-warn-embedded-repo",
".gitmodules",
"submodule",
],
check=True,
)

subprocess.run(
[
git_exe,
# https://git-scm.com/docs/git-config#Documentation/git-config.txt-protocolallow
#
# This circumvents a bug/behaviour caused by CVE-2022-39253 where
# you cannot use `git submodule add repository path` where
# repository is a file on the filesystem.
"-c",
"protocol.file.allow=always",
"submodule",
"add",
str(submodule.resolve()),
"submodule",
],
check=True,
)
subprocess.run(
[
git_exe,
"commit",
"-m",
"add submodule",
],
[git_exe, "commit", "-m", "add submodule"],
check=True,
)

(git_repository / ".gitmodules.license").write_text(header)

return git_repository
Expand Down

0 comments on commit b12fba8

Please sign in to comment.