Skip to content

Commit

Permalink
Add Git object hashing to the store layer
Browse files Browse the repository at this point in the history
Part of RFC 133

Extracted from our old IPFS branches.

Co-Authored-By: Matthew Bauer <mjbauer95@gmail.com>
Co-Authored-By: Carlo Nucera <carlo.nucera@protonmail.com>
Co-authored-by: Robert Hensing <roberth@users.noreply.github.com>
Co-authored-by: Florian Klink <flokli@flokli.de>
  • Loading branch information
5 people committed Feb 27, 2024
1 parent 04836c7 commit 201551c
Show file tree
Hide file tree
Showing 27 changed files with 485 additions and 87 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ ifeq ($(ENABLE_FUNCTIONAL_TESTS), yes)
makefiles += \
tests/functional/local.mk \
tests/functional/ca/local.mk \
tests/functional/git-hashing/local.mk \
tests/functional/dyn-drv/local.mk \
tests/functional/test-libstoreconsumer/local.mk \
tests/functional/plugins/local.mk
Expand Down
9 changes: 7 additions & 2 deletions doc/manual/src/protocols/store-path.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,15 +89,20 @@ where

- `rec` = one of:

- ```ebnf
| ""
```
(empty string) for hashes of the flat (single file) serialization

- ```ebnf
| "r:"
```
hashes of the for [Nix Archive (NAR)] (arbitrary file system object) serialization

- ```ebnf
| ""
| "git:"
```
(empty string) for hashes of the flat (single file) serialization
hashes of the [Git blob/tree](https://git-scm.com/book/en/v2/Git-Internals-Git-Objects) [Merkel tree](https://en.wikipedia.org/wiki/Merkle_tree) format

- ```ebnf
algo = "md5" | "sha1" | "sha256"
Expand Down
2 changes: 1 addition & 1 deletion perl/lib/Nix/Store.xs
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ hashPath(char * algo, int base32, char * path)
auto [accessor, canonPath] = PosixSourceAccessor::createAtRoot(path);
Hash h = hashPath(
accessor, canonPath,
FileIngestionMethod::Recursive, parseHashAlgo(algo)).first;
FileIngestionMethod::Recursive, parseHashAlgo(algo));
auto s = h.to_string(base32 ? HashFormat::Nix32 : HashFormat::Base16, false);
XPUSHs(sv_2mortal(newSVpv(s.c_str(), 0)));
} catch (Error & e) {
Expand Down
5 changes: 4 additions & 1 deletion src/libexpr/primops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1138,7 +1138,10 @@ drvName, Bindings * attrs, Value & v)
auto handleHashMode = [&](const std::string_view s) {
if (s == "recursive") ingestionMethod = FileIngestionMethod::Recursive;
else if (s == "flat") ingestionMethod = FileIngestionMethod::Flat;
else if (s == "text") {
else if (s == "git") {
experimentalFeatureSettings.require(Xp::GitHashing);
ingestionMethod = FileIngestionMethod::Git;
} else if (s == "text") {
experimentalFeatureSettings.require(Xp::DynamicDerivations);
ingestionMethod = TextIngestionMethod {};
} else
Expand Down
7 changes: 6 additions & 1 deletion src/libstore/binary-cache-store.cc
Original file line number Diff line number Diff line change
Expand Up @@ -324,13 +324,18 @@ StorePath BinaryCacheStore::addToStoreFromDump(
nar = dump2.s;
break;
case FileIngestionMethod::Flat:
{
// The dump is Flat, so we need to convert it to NAR with a
// single file.
StringSink s;
dumpString(dump2.s, s);
nar = std::move(s.s);
break;
}
case FileIngestionMethod::Git:
unsupported("addToStoreFromDump");
break;
}
} else {
// Otherwise, we have to do th same hashing as NAR so our single
// hash will suffice for both purposes.
Expand Down Expand Up @@ -450,7 +455,7 @@ StorePath BinaryCacheStore::addToStore(
non-recursive+sha256 so we can just use the default
implementation of this method in terms of addToStoreFromDump. */

auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter).first;
auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter);

auto source = sinkToSource([&](Sink & sink) {
accessor.dumpPath(path, sink, filter);
Expand Down
2 changes: 1 addition & 1 deletion src/libstore/binary-cache-store.hh
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ public:

void narFromPath(const StorePath & path, Sink & sink) override;

ref<SourceAccessor> getFSAccessor(bool requireValidPath) override;
ref<SourceAccessor> getFSAccessor(bool requireValidPath = true) override;

void addSignatures(const StorePath & storePath, const StringSet & sigs) override;

Expand Down
34 changes: 24 additions & 10 deletions src/libstore/build/local-derivation-goal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "finally.hh"
#include "util.hh"
#include "archive.hh"
#include "git.hh"
#include "compression.hh"
#include "daemon.hh"
#include "topo-sort.hh"
Expand Down Expand Up @@ -2457,15 +2458,28 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
rewriteOutput(outputRewrites);
/* FIXME optimize and deduplicate with addToStore */
std::string oldHashPart { scratchPath->hashPart() };
auto got = ({
HashModuloSink caSink { outputHash.hashAlgo, oldHashPart };
auto got = [&]{
PosixSourceAccessor accessor;
dumpPath(
accessor, CanonPath { actualPath },
caSink,
outputHash.method.getFileIngestionMethod());
caSink.finish().first;
});
auto fim = outputHash.method.getFileIngestionMethod();
switch (fim) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
{
HashModuloSink caSink { outputHash.hashAlgo, oldHashPart };
auto fim = outputHash.method.getFileIngestionMethod();
dumpPath(
accessor, CanonPath { actualPath },
caSink,
(FileSerialisationMethod) fim);
return caSink.finish().first;
}
case FileIngestionMethod::Git: {
return git::dumpHash(
outputHash.hashAlgo, accessor,
CanonPath { tmpDir + "/tmp" }).hash;
}
}
}();

ValidPathInfo newInfo0 {
worker.store,
Expand All @@ -2491,7 +2505,7 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
PosixSourceAccessor accessor;
HashResult narHashAndSize = hashPath(
accessor, CanonPath { actualPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256);
FileSerialisationMethod::Recursive, HashAlgorithm::SHA256);
newInfo0.narHash = narHashAndSize.first;
newInfo0.narSize = narHashAndSize.second;
}
Expand All @@ -2515,7 +2529,7 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
PosixSourceAccessor accessor;
HashResult narHashAndSize = hashPath(
accessor, CanonPath { actualPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256);
FileSerialisationMethod::Recursive, HashAlgorithm::SHA256);
ValidPathInfo newInfo0 { requiredFinalPath, narHashAndSize.first };
newInfo0.narSize = narHashAndSize.second;
auto refs = rewriteRefs();
Expand Down
4 changes: 2 additions & 2 deletions src/libstore/build/worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -529,11 +529,11 @@ bool Worker::pathContentsGood(const StorePath & path)
if (!pathExists(store.printStorePath(path)))
res = false;
else {
HashResult current = hashPath(
Hash current = hashPath(
*store.getFSAccessor(), CanonPath { store.printStorePath(path) },
FileIngestionMethod::Recursive, info->narHash.algo);
Hash nullHash(HashAlgorithm::SHA256);
res = info->narHash == nullHash || info->narHash == current.first;
res = info->narHash == nullHash || info->narHash == current;
}
pathContentsGoodCache.insert_or_assign(path, res);
if (!res)
Expand Down
11 changes: 11 additions & 0 deletions src/libstore/content-address.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ std::string_view makeFileIngestionPrefix(FileIngestionMethod m)
return "";
case FileIngestionMethod::Recursive:
return "r:";
case FileIngestionMethod::Git:
experimentalFeatureSettings.require(Xp::GitHashing);
return "git:";
default:
throw Error("impossible, caught both cases");
}
Expand Down Expand Up @@ -51,6 +54,10 @@ ContentAddressMethod ContentAddressMethod::parsePrefix(std::string_view & m)
if (splitPrefix(m, "r:")) {
return FileIngestionMethod::Recursive;
}
else if (splitPrefix(m, "git:")) {
experimentalFeatureSettings.require(Xp::GitHashing);
return FileIngestionMethod::Git;
}
else if (splitPrefix(m, "text:")) {
return TextIngestionMethod {};
}
Expand Down Expand Up @@ -131,6 +138,10 @@ static std::pair<ContentAddressMethod, HashAlgorithm> parseContentAddressMethodP
auto method = FileIngestionMethod::Flat;
if (splitPrefix(rest, "r:"))
method = FileIngestionMethod::Recursive;
else if (splitPrefix(rest, "git:")) {
experimentalFeatureSettings.require(Xp::GitHashing);
method = FileIngestionMethod::Git;
}
HashAlgorithm hashAlgo = parseHashAlgorithm_();
return {
std::move(method),
Expand Down
7 changes: 6 additions & 1 deletion src/libstore/daemon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "archive.hh"
#include "derivations.hh"
#include "args.hh"
#include "git.hh"

namespace nix::daemon {

Expand Down Expand Up @@ -443,13 +444,17 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
TeeSource savedNARSource(from, saved);
NullFileSystemObjectSink sink; /* just parse the NAR */
parseDump(sink, savedNARSource);
} else {
} else if (method == FileIngestionMethod::Flat) {
/* Incrementally parse the NAR file, stripping the
metadata, and streaming the sole file we expect into
`saved`. */
RegularFileSink savedRegular { saved };
parseDump(savedRegular, from);
if (!savedRegular.regular) throw Error("regular file expected");
} else {
/* Should have validated above that no other file ingestion
method was used. */
assert(false);
}
});
logger->startWork();
Expand Down
2 changes: 1 addition & 1 deletion src/libstore/local-fs-store.hh
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public:
LocalFSStore(const Params & params);

void narFromPath(const StorePath & path, Sink & sink) override;
ref<SourceAccessor> getFSAccessor(bool requireValidPath) override;
ref<SourceAccessor> getFSAccessor(bool requireValidPath = true) override;

/**
* Creates symlink from the `gcRoot` to the `storePath` and
Expand Down
85 changes: 71 additions & 14 deletions src/libstore/local-store.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "local-store.hh"
#include "globals.hh"
#include "git.hh"
#include "archive.hh"
#include "pathlocks.hh"
#include "worker-protocol.hh"
Expand Down Expand Up @@ -1097,19 +1098,29 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source,
if (info.ca) {
auto & specified = *info.ca;
auto actualHash = ({
HashModuloSink caSink {
specified.hash.algo,
std::string { info.path.hashPart() },
};
PosixSourceAccessor accessor;
dumpPath(
*getFSAccessor(false),
CanonPath { printStorePath(info.path) },
caSink,
specified.method.getFileIngestionMethod());
auto accessor = getFSAccessor(false);
CanonPath path { printStorePath(info.path) };
Hash h { HashAlgorithm::SHA256 }; // throwaway def to appease C++
auto fim = specified.method.getFileIngestionMethod();
switch (fim) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
{
HashModuloSink caSink {
specified.hash.algo,
std::string { info.path.hashPart() },
};
dumpPath(*accessor, path, caSink, (FileSerialisationMethod) fim);
h = caSink.finish().first;
break;
}
case FileIngestionMethod::Git:
h = git::dumpHash(specified.hash.algo, *accessor, path).hash;
break;
}
ContentAddress {
.method = specified.method,
.hash = caSink.finish().first,
.hash = std::move(h),
};
});
if (specified.hash != actualHash.hash) {
Expand Down Expand Up @@ -1199,7 +1210,30 @@ StorePath LocalStore::addToStoreFromDump(
delTempDir = std::make_unique<AutoDelete>(tempDir);
tempPath = tempDir + "/x";

restorePath(tempPath, bothSource, method.getFileIngestionMethod());
auto fim = method.getFileIngestionMethod();
switch (fim) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
restorePath(tempPath, bothSource, (FileSerialisationMethod) fim);
break;
case FileIngestionMethod::Git: {
RestoreSink sink;
sink.dstPath = tempPath;
auto accessor = getFSAccessor();
git::restore(sink, bothSource, [&](Hash childHash) {
return std::pair<SourceAccessor *, CanonPath> {
&*accessor,
CanonPath {
printStorePath(this->makeFixedOutputPath("git", FixedOutputInfo {
.method = FileIngestionMethod::Git,
.hash = childHash,
}))
},
};
});
break;
}
}

dumpBuffer.reset();
dump = {};
Expand Down Expand Up @@ -1238,7 +1272,30 @@ StorePath LocalStore::addToStoreFromDump(
if (inMemory) {
StringSource dumpSource { dump };
/* Restore from the buffer in memory. */
restorePath(realPath, dumpSource, method.getFileIngestionMethod());
auto fim = method.getFileIngestionMethod();
switch (fim) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
restorePath(realPath, dumpSource, (FileSerialisationMethod) fim);
break;
case FileIngestionMethod::Git: {
RestoreSink sink;
sink.dstPath = realPath;
auto accessor = getFSAccessor();
git::restore(sink, dumpSource, [&](Hash childHash) {
return std::pair<SourceAccessor *, CanonPath> {
&*accessor,
CanonPath {
printStorePath(this->makeFixedOutputPath("git", FixedOutputInfo {
.method = FileIngestionMethod::Git,
.hash = childHash,
}))
},
};
});
break;
}
}
} else {
/* Move the temporary path we restored above. */
moveFile(tempPath, realPath);
Expand Down Expand Up @@ -1367,7 +1424,7 @@ bool LocalStore::verifyStore(bool checkContents, RepairFlag repair)
PosixSourceAccessor accessor;
std::string hash = hashPath(
accessor, CanonPath { linkPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first.to_string(HashFormat::Nix32, false);
FileIngestionMethod::Recursive, HashAlgorithm::SHA256).to_string(HashFormat::Nix32, false);
if (hash != link.name) {
printError("link '%s' was modified! expected hash '%s', got '%s'",
linkPath, link.name, hash);
Expand Down
4 changes: 2 additions & 2 deletions src/libstore/optimise-store.cc
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ void LocalStore::optimisePath_(Activity * act, OptimiseStats & stats,
PosixSourceAccessor accessor;
hashPath(
accessor, CanonPath { path },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first;
FileSerialisationMethod::Recursive, HashAlgorithm::SHA256).first;
});
debug("'%1%' has hash '%2%'", path, hash.to_string(HashFormat::Nix32, true));

Expand All @@ -166,7 +166,7 @@ void LocalStore::optimisePath_(Activity * act, OptimiseStats & stats,
PosixSourceAccessor accessor;
hashPath(
accessor, CanonPath { linkPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first;
FileSerialisationMethod::Recursive, HashAlgorithm::SHA256).first;
})))
{
// XXX: Consider overwriting linkPath with our valid version.
Expand Down
1 change: 1 addition & 0 deletions src/libstore/remote-store.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "derivations.hh"
#include "pool.hh"
#include "finally.hh"
#include "git.hh"
#include "logging.hh"
#include "callback.hh"
#include "filetransfer.hh"
Expand Down
2 changes: 1 addition & 1 deletion src/libstore/remote-store.hh
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ protected:

friend struct ConnectionHandle;

virtual ref<SourceAccessor> getFSAccessor(bool requireValidPath) override;
virtual ref<SourceAccessor> getFSAccessor(bool requireValidPath = true) override;

virtual void narFromPath(const StorePath & path, Sink & sink) override;

Expand Down
Loading

0 comments on commit 201551c

Please sign in to comment.