Skip to content

Commit

Permalink
Git hashing for Git Fetching
Browse files Browse the repository at this point in the history
The git fetcher is now more tree-hash-oriented, and we will want to
integrate this with git fetching eventually. This PR exposes `treeHash`
inputs and outputs in a few ways for this purpose.

Eventually, we should add something like `builtins.derivation`'s
`outputHashMode` to `builtins.fetchTree`, in order to specify we should
use git hashing, and then this and the store-layer git hashing should
meet together, ensuring we have the same tree hash end-to-end.

Part of RFC 133

Co-Authored-By: Matthew Bauer <mjbauer95@gmail.com>
Co-Authored-By: Carlo Nucera <carlo.nucera@protonmail.com>
Co-authored-by: Robert Hensing <roberth@users.noreply.github.com>
  • Loading branch information
4 people committed Mar 27, 2024
1 parent ce9ea7e commit a8314ff
Show file tree
Hide file tree
Showing 6 changed files with 155 additions and 44 deletions.
14 changes: 11 additions & 3 deletions src/libexpr/primops/fetchTree.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,13 @@ void emitTreeAttrs(

// FIXME: support arbitrary input attributes.

auto narHash = input.getNarHash();
assert(narHash);
attrs.alloc("narHash").mkString(narHash->to_string(HashFormat::SRI, true));
if (auto narHash = input.getNarHash()) {
attrs.alloc("narHash").mkString(narHash->to_string(HashFormat::SRI, true));
} else if (auto treeHash = input.getTreeHash()) {
attrs.alloc("treeHash").mkString(treeHash->to_string(HashFormat::SRI, true));
} else
/* Must have either tree hash or NAR hash */
assert(false);

if (input.getType() == "git")
attrs.alloc("submodules").mkBool(
Expand All @@ -51,6 +55,10 @@ void emitTreeAttrs(
attrs.alloc("shortRev").mkString(emptyHash.gitShortRev());
}

if (auto treeHash = input.getTreeHash()) {
attrs.alloc("treeHash").mkString(treeHash->gitRev());
}

if (auto revCount = input.getRevCount())
attrs.alloc("revCount").mkInt(*revCount);
else if (emptyRevFallback)
Expand Down
30 changes: 22 additions & 8 deletions src/libfetchers/fetchers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -301,14 +301,19 @@ std::string Input::getName() const

StorePath Input::computeStorePath(Store & store) const
{
auto narHash = getNarHash();
if (!narHash)
throw Error("cannot compute store path for unlocked input '%s'", to_string());
return store.makeFixedOutputPath(getName(), FixedOutputInfo {
.method = FileIngestionMethod::Recursive,
.hash = *narHash,
.references = {},
});
if (auto treeHash = getTreeHash())
return store.makeFixedOutputPath(getName(), FixedOutputInfo {
.method = FileIngestionMethod::Git,
.hash = *treeHash,
.references = {},
});
if (auto narHash = getNarHash())
return store.makeFixedOutputPath(getName(), FixedOutputInfo {
.method = FileIngestionMethod::Recursive,
.hash = *narHash,
.references = {},
});
throw Error("cannot compute store path for unlocked input '%s'", to_string());
}

std::string Input::getType() const
Expand Down Expand Up @@ -351,6 +356,15 @@ std::optional<Hash> Input::getRev() const
return hash;
}

std::optional<Hash> Input::getTreeHash() const
{
if (auto s = maybeGetStrAttr(attrs, "treeHash")) {
experimentalFeatureSettings.require(Xp::GitHashing);
return Hash::parseAny(*s, HashAlgorithm::SHA1);
}
return {};
}

std::optional<uint64_t> Input::getRevCount() const
{
if (auto n = maybeGetIntAttr(attrs, "revCount"))
Expand Down
1 change: 1 addition & 0 deletions src/libfetchers/fetchers.hh
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ public:
std::optional<Hash> getNarHash() const;
std::optional<std::string> getRef() const;
std::optional<Hash> getRev() const;
std::optional<Hash> getTreeHash() const;
std::optional<uint64_t> getRevCount() const;
std::optional<time_t> getLastModified() const;

Expand Down
103 changes: 71 additions & 32 deletions src/libfetchers/git.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "globals.hh"
#include "tarfile.hh"
#include "store-api.hh"
#include "git.hh"
#include "url-parts.hh"
#include "pathlocks.hh"
#include "processes.hh"
Expand Down Expand Up @@ -178,7 +179,7 @@ struct GitInputScheme : InputScheme
attrs.emplace("type", "git");

for (auto & [name, value] : url.query) {
if (name == "rev" || name == "ref" || name == "keytype" || name == "publicKey" || name == "publicKeys")
if (name == "rev" || name == "ref" || name == "treeHash" || name == "keytype" || name == "publicKey" || name == "publicKeys")
attrs.emplace(name, value);
else if (name == "shallow" || name == "submodules" || name == "exportIgnore" || name == "allRefs" || name == "verifyCommit")
attrs.emplace(name, Explicit<bool> { value == "1" });
Expand Down Expand Up @@ -253,6 +254,7 @@ struct GitInputScheme : InputScheme
auto url = parseURL(getStrAttr(input.attrs, "url"));
if (url.scheme != "git") url.scheme = "git+" + url.scheme;
if (auto rev = input.getRev()) url.query.insert_or_assign("rev", rev->gitRev());
if (auto treeHash = input.getTreeHash()) url.query.insert_or_assign("treeHash", treeHash->gitRev());
if (auto ref = input.getRef()) url.query.insert_or_assign("ref", *ref);
if (getShallowAttr(input))
url.query.insert_or_assign("shallow", "1");
Expand Down Expand Up @@ -403,6 +405,9 @@ struct GitInputScheme : InputScheme
if (auto rev = input.getRev())
checkHashAlgorithm(rev);

if (auto treeHash = input.getTreeHash())
checkHashAlgorithm(treeHash);

RepoInfo repoInfo;

// file:// URIs are normally not cloned (but otherwise treated the
Expand All @@ -415,9 +420,9 @@ struct GitInputScheme : InputScheme
repoInfo.isLocal = url.scheme == "file" && !forceHttp && !isBareRepository;
repoInfo.url = repoInfo.isLocal ? url.path : url.base;

// If this is a local directory and no ref or revision is
// If this is a local directory and no ref or revision or tree hash is
// given, then allow the use of an unclean working tree.
if (!input.getRef() && !input.getRev() && repoInfo.isLocal)
if (!input.getRef() && !input.getRev() && !input.getTreeHash() && repoInfo.isLocal)
repoInfo.workdirInfo = GitRepo::openRepo(repoInfo.url)->getWorkdirInfo();

return repoInfo;
Expand Down Expand Up @@ -512,7 +517,7 @@ struct GitInputScheme : InputScheme

if (repoInfo.isLocal) {
repoDir = repoInfo.url;
if (!input.getRev())
if (!input.getRev() && !input.getTreeHash())
input.attrs.insert_or_assign("rev", GitRepo::openRepo(repoDir)->resolveRef(ref).gitRev());
} else {
Path cacheDir = getCachePath(repoInfo.url, getShallowAttr(input));
Expand All @@ -532,10 +537,14 @@ struct GitInputScheme : InputScheme
bool doFetch;
time_t now = time(0);

/* If a rev was specified, we need to fetch if it's not in the
repo. */
if (auto rev = input.getRev()) {
doFetch = !repo->hasObject(*rev);
/* If a rev / tree hash was specified, we need to fetch if
it's not in the repo. */

auto obj = input.getRev();
if (!obj) obj = input.getTreeHash();

if (obj) {
doFetch = !repo->hasObject(*obj);
} else {
if (getAllRefsAttr(input)) {
doFetch = true;
Expand Down Expand Up @@ -574,14 +583,16 @@ struct GitInputScheme : InputScheme
warn("could not update cached head '%s' for '%s'", ref, repoInfo.url);
}

if (auto rev = input.getRev()) {
if (!repo->hasObject(*rev))
if (obj) {
if (!repo->hasObject(*obj))
throw Error(
"Cannot find Git revision '%s' in ref '%s' of repository '%s'! "
"Please make sure that the " ANSI_BOLD "rev" ANSI_NORMAL " exists on the "
"Cannot find Git revision or tree hash '%s' in ref '%s' of repository '%s'! "
"Please make sure that the "
ANSI_BOLD "rev" ANSI_NORMAL " or "
ANSI_BOLD "treeHash" ANSI_NORMAL " exists on the "
ANSI_BOLD "ref" ANSI_NORMAL " you've specified or add " ANSI_BOLD
"allRefs = true;" ANSI_NORMAL " to " ANSI_BOLD "fetchGit" ANSI_NORMAL ".",
rev->gitRev(),
obj->gitRev(),
ref,
repoInfo.url
);
Expand All @@ -598,27 +609,46 @@ struct GitInputScheme : InputScheme
if (isShallow && !getShallowAttr(input))
throw Error("'%s' is a shallow Git repository, but shallow repositories are only allowed when `shallow = true;` is specified", repoInfo.url);

// FIXME: check whether rev is an ancestor of ref?
// FIXME: check whether rev (or some rev with treeHash) is an
// ancestor of ref?

auto rev = *input.getRev();
Attrs infoAttrs;

auto gotTreeHash = repo->getPlainAccessor(rev)->getTreeHash();
auto [fetchHash, fetchHashType] = input.getTreeHash()
? (std::pair { input.getTreeHash().value(), true })
: (std::pair { input.getRev().value(), false });

Attrs infoAttrs({
{"rev", rev.gitRev()},
{"lastModified", getLastModified(repoInfo, repoDir, rev)},
});
auto gotTreeHash = repo->getPlainAccessor(fetchHash)->getTreeHash();

if (!getShallowAttr(input))
infoAttrs.insert_or_assign("revCount",
getRevCount(repoInfo, repoDir, rev));
if (auto optH = input.getTreeHash()) {
auto h = *std::move(optH);
infoAttrs.insert_or_assign("treeHash", h.gitRev());
/* if a tree hash was specified, ensure that it matches.
Assert because it shouldn't be possible for this to fail.
*/
assert(h == gotTreeHash);
}

printTalkative("using revision %s of repo '%s'", rev.gitRev(), repoInfo.url);
if (auto optH = input.getRev()) {
auto rev = *std::move(optH);
infoAttrs.insert_or_assign("rev", rev.gitRev());
infoAttrs.insert_or_assign("lastModified",
getLastModified(repoInfo, repoDir, rev));
if (!getShallowAttr(input))
infoAttrs.insert_or_assign("revCount",
getRevCount(repoInfo, repoDir, rev));
}

printTalkative(
"using %s %s of repo '%s'",
fetchHashType ? "tree hash" : "revision",
fetchHash.gitRev(),
repoInfo.url);

verifyCommit(input, repo);

bool exportIgnore = getExportIgnoreAttr(input);
auto accessor = repo->getAccessor(rev, exportIgnore);
auto accessor = repo->getAccessor(fetchHash, exportIgnore);

accessor->setPathDisplay("«" + input.to_string() + "»");

Expand All @@ -628,7 +658,7 @@ struct GitInputScheme : InputScheme
if (getSubmodulesAttr(input)) {
std::map<CanonPath, nix::ref<InputAccessor>> mounts;

for (auto & [submodule, submoduleRev] : repo->getSubmodules(rev, exportIgnore)) {
for (auto & [submodule, submoduleRev] : repo->getSubmodules(fetchHash, exportIgnore)) {
auto resolved = repo->resolveSubmoduleUrl(submodule.url, repoInfo.url);
debug("Git submodule %s: %s %s %s -> %s",
submodule.path, submodule.url, submodule.branch, submoduleRev.gitRev(), resolved);
Expand Down Expand Up @@ -656,10 +686,12 @@ struct GitInputScheme : InputScheme
input.attrs.insert_or_assign("treeHash", gotTreeHash.gitRev());
}

assert(!origRev || origRev == rev);
if (!getShallowAttr(input))

assert(!origRev || origRev == fetchHash);
if (!getShallowAttr(input) && input.getRev())
input.attrs.insert_or_assign("revCount", getIntAttr(infoAttrs, "revCount"));
input.attrs.insert_or_assign("lastModified", getIntAttr(infoAttrs, "lastModified"));
if (input.getRev())
input.attrs.insert_or_assign("lastModified", getIntAttr(infoAttrs, "lastModified"));

return {accessor, std::move(input)};
}
Expand Down Expand Up @@ -765,7 +797,7 @@ struct GitInputScheme : InputScheme
}

auto [accessor, final] =
input.getRef() || input.getRev() || !repoInfo.isLocal
input.getRef() || input.getRev() || input.getTreeHash() || !repoInfo.isLocal
? getAccessorFromCommit(store, repoInfo, std::move(input))
: getAccessorFromWorkdir(store, repoInfo, std::move(input));

Expand All @@ -774,15 +806,22 @@ struct GitInputScheme : InputScheme

std::optional<std::string> getFingerprint(ref<Store> store, const Input & input) const override
{
auto rest = [&]() {
return std::string { getSubmodulesAttr(input) ? ";s" : "" }
+ (getExportIgnoreAttr(input) ? ";e" : "");
};

if (auto rev = input.getRev())
return rev->gitRev() + (getSubmodulesAttr(input) ? ";s" : "") + (getExportIgnoreAttr(input) ? ";e" : "");
return rev->gitRev() + rest();
else if (auto rev = input.getTreeHash())
return rev->gitRev() + ";t" + rest();
else
return std::nullopt;
}

bool isLocked(const Input & input) const override
{
return (bool) input.getRev();
return (bool) input.getRev() || (bool) input.getTreeHash();
}
};

Expand Down
48 changes: 48 additions & 0 deletions tests/functional/git-hashing/fetching.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
source common.sh

[[ -n $(type -p git) ]] || skipTest "no git"

repo=$TEST_ROOT/git

rm -rf $repo $TEST_HOME/.cache/nix

git init $repo
git -C $repo config user.email "foobar@example.com"
git -C $repo config user.name "Foobar"

echo utrecht > $repo/hello
touch $repo/.gitignore
git -C $repo add hello .gitignore
git -C $repo commit -m 'Bla1'

echo world > $repo/hello
git -C $repo commit -m 'Bla2' -a

treeHash=$(git -C $repo rev-parse HEAD:)

# Fetch the default branch.
path=$(nix eval --raw --expr "(builtins.fetchTree { type = \"git\"; url = file://$repo; treeHash = \"$treeHash\"; }).outPath")
[[ $(cat $path/hello) = world ]]

# Submodules are fine with nar hashing the result
pathSub=$(nix eval --raw --expr "(builtins.fetchTree { type = \"git\"; url = file://$repo; treeHash = \"$treeHash\"; submodules = true; }).outPath")
[[ "$path" = "$pathSub" ]]

# This might not work any more because of caching changes?
#
# # Check that we can substitute it from other places.
# nix copy --to file://$cacheDir $path
# nix-store --delete $path
# path2=$(nix eval --raw --expr "(builtins.fetchTree { type = \"git\"; url = file:///no-such-repo; treeHash = \"$treeHash\"; }).outPath" --substituters file://$cacheDir --option substitute true)
# [ $path2 = $path ]

# HEAD should be the same path and tree hash as tree
nix eval --impure --expr "(builtins.fetchTree { type = \"git\"; url = file://$repo; ref = \"HEAD\"; })"
treeHash2=$(nix eval --impure --raw --expr "(builtins.fetchTree { type = \"git\"; url = file://$repo; ref = \"HEAD\"; }).treeHash")
[ $treeHash = $treeHash2 ]
path3=$(nix eval --impure --raw --expr "(builtins.fetchTree { type = \"git\"; url = file://$repo; ref = \"HEAD\"; }).outPath")
[ $path3 = $path ]
caFromNix=$(nix path-info --json "$path" | jq -r ".[] | .ca")

# FIXME still using NAR hashing, should use git hashing
# test "fixed:git:sha1:$(nix hash convert --to nix32 "sha1:$treeHash")" = "$caFromNix"
3 changes: 2 additions & 1 deletion tests/functional/git-hashing/local.mk
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
git-hashing-tests := \
$(d)/simple.sh
$(d)/simple.sh \
$(d)/fetching.sh

install-tests-groups += git-hashing

Expand Down

0 comments on commit a8314ff

Please sign in to comment.