Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Git object hashing in libstore #8918

Merged
merged 3 commits into from
Feb 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ ifeq ($(ENABLE_FUNCTIONAL_TESTS), yes)
makefiles += \
tests/functional/local.mk \
tests/functional/ca/local.mk \
tests/functional/git-hashing/local.mk \
tests/functional/dyn-drv/local.mk \
tests/functional/test-libstoreconsumer/local.mk \
tests/functional/plugins/local.mk
Expand Down
9 changes: 7 additions & 2 deletions doc/manual/src/protocols/store-path.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,15 +89,20 @@ where

- `rec` = one of:

- ```ebnf
| ""
```
(empty string) for hashes of the flat (single file) serialization

- ```ebnf
| "r:"
```
hashes of the for [Nix Archive (NAR)] (arbitrary file system object) serialization

- ```ebnf
| ""
| "git:"
```
(empty string) for hashes of the flat (single file) serialization
hashes of the [Git blob/tree](https://git-scm.com/book/en/v2/Git-Internals-Git-Objects) [Merkel tree](https://en.wikipedia.org/wiki/Merkle_tree) format

- ```ebnf
algo = "md5" | "sha1" | "sha256"
Expand Down
10 changes: 4 additions & 6 deletions package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@ in {
(lib.getBin lowdown)
mdbook
mdbook-linkcheck
] ++ lib.optionals doInstallCheck [
git
mercurial
openssh
] ++ lib.optionals (doInstallCheck || enableManual) [
jq # Also for custom mdBook preprocessor.
] ++ lib.optional stdenv.hostPlatform.isLinux util-linux
Expand Down Expand Up @@ -249,12 +253,6 @@ in {
dontBuild = !attrs.doBuild;
doCheck = attrs.doCheck;

nativeCheckInputs = [
git
mercurial
openssh
];

disallowedReferences = [ boost ];

preConfigure = lib.optionalString (doBuild && ! stdenv.hostPlatform.isStatic) (
Expand Down
2 changes: 1 addition & 1 deletion perl/lib/Nix/Store.xs
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ hashPath(char * algo, int base32, char * path)
auto [accessor, canonPath] = PosixSourceAccessor::createAtRoot(path);
Hash h = hashPath(
accessor, canonPath,
FileIngestionMethod::Recursive, parseHashAlgo(algo)).first;
FileIngestionMethod::Recursive, parseHashAlgo(algo));
auto s = h.to_string(base32 ? HashFormat::Nix32 : HashFormat::Base16, false);
XPUSHs(sv_2mortal(newSVpv(s.c_str(), 0)));
} catch (Error & e) {
Expand Down
7 changes: 5 additions & 2 deletions src/libexpr/primops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1138,7 +1138,10 @@ drvName, Bindings * attrs, Value & v)
auto handleHashMode = [&](const std::string_view s) {
if (s == "recursive") ingestionMethod = FileIngestionMethod::Recursive;
else if (s == "flat") ingestionMethod = FileIngestionMethod::Flat;
else if (s == "text") {
else if (s == "git") {
experimentalFeatureSettings.require(Xp::GitHashing);
ingestionMethod = FileIngestionMethod::Git;
} else if (s == "text") {
experimentalFeatureSettings.require(Xp::DynamicDerivations);
ingestionMethod = TextIngestionMethod {};
} else
Expand Down Expand Up @@ -2089,7 +2092,7 @@ static void prim_toFile(EvalState & state, const PosIdx pos, Value * * args, Val
})
: ({
StringSource s { contents };
state.store->addToStoreFromDump(s, name, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, state.repair);
state.store->addToStoreFromDump(s, name, FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, state.repair);
});

/* Note: we don't need to add `context' to the context of the
Expand Down
28 changes: 20 additions & 8 deletions src/libstore/binary-cache-store.cc
Original file line number Diff line number Diff line change
Expand Up @@ -305,36 +305,48 @@ void BinaryCacheStore::addToStore(const ValidPathInfo & info, Source & narSource
StorePath BinaryCacheStore::addToStoreFromDump(
Source & dump,
std::string_view name,
ContentAddressMethod method,
FileSerialisationMethod dumpMethod,
ContentAddressMethod hashMethod,
HashAlgorithm hashAlgo,
const StorePathSet & references,
RepairFlag repair)
{
std::optional<Hash> caHash;
std::string nar;

// Calculating Git hash from NAR stream not yet implemented. May not
// be possible to implement in single-pass if the NAR is in an
// inconvenient order. Could fetch after uploading, however.
if (hashMethod.getFileIngestionMethod() == FileIngestionMethod::Git)
unsupported("addToStoreFromDump");

if (auto * dump2p = dynamic_cast<StringSource *>(&dump)) {
auto & dump2 = *dump2p;
// Hack, this gives us a "replayable" source so we can compute
// multiple hashes more easily.
caHash = hashString(HashAlgorithm::SHA256, dump2.s);
switch (method.getFileIngestionMethod()) {
case FileIngestionMethod::Recursive:
//
// Only calculate if the dump is in the right format, however.
if (static_cast<FileIngestionMethod>(dumpMethod) == hashMethod.getFileIngestionMethod())
caHash = hashString(HashAlgorithm::SHA256, dump2.s);
switch (dumpMethod) {
case FileSerialisationMethod::Recursive:
// The dump is already NAR in this case, just use it.
nar = dump2.s;
break;
case FileIngestionMethod::Flat:
case FileSerialisationMethod::Flat:
{
// The dump is Flat, so we need to convert it to NAR with a
// single file.
StringSink s;
dumpString(dump2.s, s);
nar = std::move(s.s);
break;
}
}
} else {
// Otherwise, we have to do th same hashing as NAR so our single
// hash will suffice for both purposes.
if (method != FileIngestionMethod::Recursive || hashAlgo != HashAlgorithm::SHA256)
if (dumpMethod != FileSerialisationMethod::Recursive || hashAlgo != HashAlgorithm::SHA256)
unsupported("addToStoreFromDump");
}
StringSource narDump { nar };
Expand All @@ -349,7 +361,7 @@ StorePath BinaryCacheStore::addToStoreFromDump(
*this,
name,
ContentAddressWithReferences::fromParts(
method,
hashMethod,
caHash ? *caHash : nar.first,
{
.others = references,
Expand Down Expand Up @@ -450,7 +462,7 @@ StorePath BinaryCacheStore::addToStore(
non-recursive+sha256 so we can just use the default
implementation of this method in terms of addToStoreFromDump. */

auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter).first;
auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter);

auto source = sinkToSource([&](Sink & sink) {
accessor.dumpPath(path, sink, filter);
Expand Down
5 changes: 3 additions & 2 deletions src/libstore/binary-cache-store.hh
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ public:
StorePath addToStoreFromDump(
Source & dump,
std::string_view name,
ContentAddressMethod method,
FileSerialisationMethod dumpMethod,
ContentAddressMethod hashMethod,
HashAlgorithm hashAlgo,
const StorePathSet & references,
RepairFlag repair) override;
Expand All @@ -147,7 +148,7 @@ public:

void narFromPath(const StorePath & path, Sink & sink) override;

ref<SourceAccessor> getFSAccessor(bool requireValidPath) override;
ref<SourceAccessor> getFSAccessor(bool requireValidPath = true) override;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Specifying defaults on overrides should be superfluous, so I think it's best to omit them.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually see https://en.cppreference.com/w/cpp/language/default_arguments

The overriders of virtual functions do not acquire the default arguments from the base class declarations, and when the virtual function call is made, the default arguments are decided based on the static type of the object.


void addSignatures(const StorePath & storePath, const StringSet & sigs) override;

Expand Down
39 changes: 27 additions & 12 deletions src/libstore/build/local-derivation-goal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "finally.hh"
#include "util.hh"
#include "archive.hh"
#include "git.hh"
#include "compression.hh"
#include "daemon.hh"
#include "topo-sort.hh"
Expand Down Expand Up @@ -1311,12 +1312,13 @@ struct RestrictedStore : public virtual RestrictedStoreConfig, public virtual In
StorePath addToStoreFromDump(
Source & dump,
std::string_view name,
ContentAddressMethod method,
FileSerialisationMethod dumpMethod,
ContentAddressMethod hashMethod,
HashAlgorithm hashAlgo,
const StorePathSet & references,
RepairFlag repair) override
{
auto path = next->addToStoreFromDump(dump, name, method, hashAlgo, references, repair);
auto path = next->addToStoreFromDump(dump, name, dumpMethod, hashMethod, hashAlgo, references, repair);
goal.addDependency(path);
return path;
}
Expand Down Expand Up @@ -2457,15 +2459,28 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
rewriteOutput(outputRewrites);
/* FIXME optimize and deduplicate with addToStore */
std::string oldHashPart { scratchPath->hashPart() };
auto got = ({
HashModuloSink caSink { outputHash.hashAlgo, oldHashPart };
auto got = [&]{
PosixSourceAccessor accessor;
dumpPath(
accessor, CanonPath { actualPath },
caSink,
outputHash.method.getFileIngestionMethod());
caSink.finish().first;
});
auto fim = outputHash.method.getFileIngestionMethod();
switch (fim) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
{
HashModuloSink caSink { outputHash.hashAlgo, oldHashPart };
auto fim = outputHash.method.getFileIngestionMethod();
dumpPath(
accessor, CanonPath { actualPath },
caSink,
(FileSerialisationMethod) fim);
return caSink.finish().first;
}
case FileIngestionMethod::Git: {
return git::dumpHash(
outputHash.hashAlgo, accessor,
CanonPath { tmpDir + "/tmp" }).hash;
}
}
}();

ValidPathInfo newInfo0 {
worker.store,
Expand All @@ -2491,7 +2506,7 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
PosixSourceAccessor accessor;
HashResult narHashAndSize = hashPath(
accessor, CanonPath { actualPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256);
FileSerialisationMethod::Recursive, HashAlgorithm::SHA256);
newInfo0.narHash = narHashAndSize.first;
newInfo0.narSize = narHashAndSize.second;
}
Expand All @@ -2515,7 +2530,7 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
PosixSourceAccessor accessor;
HashResult narHashAndSize = hashPath(
accessor, CanonPath { actualPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256);
FileSerialisationMethod::Recursive, HashAlgorithm::SHA256);
ValidPathInfo newInfo0 { requiredFinalPath, narHashAndSize.first };
newInfo0.narSize = narHashAndSize.second;
auto refs = rewriteRefs();
Expand Down
4 changes: 2 additions & 2 deletions src/libstore/build/worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -529,11 +529,11 @@ bool Worker::pathContentsGood(const StorePath & path)
if (!pathExists(store.printStorePath(path)))
res = false;
else {
HashResult current = hashPath(
Hash current = hashPath(
*store.getFSAccessor(), CanonPath { store.printStorePath(path) },
FileIngestionMethod::Recursive, info->narHash.algo);
Hash nullHash(HashAlgorithm::SHA256);
res = info->narHash == nullHash || info->narHash == current.first;
res = info->narHash == nullHash || info->narHash == current;
}
pathContentsGoodCache.insert_or_assign(path, res);
if (!res)
Expand Down
11 changes: 11 additions & 0 deletions src/libstore/content-address.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ std::string_view makeFileIngestionPrefix(FileIngestionMethod m)
return "";
case FileIngestionMethod::Recursive:
return "r:";
case FileIngestionMethod::Git:
Ericson2314 marked this conversation as resolved.
Show resolved Hide resolved
experimentalFeatureSettings.require(Xp::GitHashing);
return "git:";
default:
throw Error("impossible, caught both cases");
}
Expand Down Expand Up @@ -51,6 +54,10 @@ ContentAddressMethod ContentAddressMethod::parsePrefix(std::string_view & m)
if (splitPrefix(m, "r:")) {
return FileIngestionMethod::Recursive;
}
else if (splitPrefix(m, "git:")) {
experimentalFeatureSettings.require(Xp::GitHashing);
return FileIngestionMethod::Git;
}
else if (splitPrefix(m, "text:")) {
return TextIngestionMethod {};
}
Expand Down Expand Up @@ -131,6 +138,10 @@ static std::pair<ContentAddressMethod, HashAlgorithm> parseContentAddressMethodP
auto method = FileIngestionMethod::Flat;
if (splitPrefix(rest, "r:"))
method = FileIngestionMethod::Recursive;
else if (splitPrefix(rest, "git:")) {
experimentalFeatureSettings.require(Xp::GitHashing);
method = FileIngestionMethod::Git;
}
HashAlgorithm hashAlgo = parseHashAlgorithm_();
return {
std::move(method),
Expand Down
56 changes: 31 additions & 25 deletions src/libstore/daemon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "archive.hh"
#include "derivations.hh"
#include "args.hh"
#include "git.hh"

namespace nix::daemon {

Expand Down Expand Up @@ -400,11 +401,23 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
logger->startWork();
auto pathInfo = [&]() {
// NB: FramedSource must be out of scope before logger->stopWork();
auto [contentAddressMethod, hashAlgo_] = ContentAddressMethod::parseWithAlgo(camStr);
auto hashAlgo = hashAlgo_; // work around clang bug
auto [contentAddressMethod, hashAlgo] = ContentAddressMethod::parseWithAlgo(camStr);
FramedSource source(from);
FileSerialisationMethod dumpMethod;
switch (contentAddressMethod.getFileIngestionMethod()) {
case FileIngestionMethod::Flat:
dumpMethod = FileSerialisationMethod::Flat;
break;
case FileIngestionMethod::Recursive:
dumpMethod = FileSerialisationMethod::Recursive;
break;
case FileIngestionMethod::Git:
// Use NAR; Git is not a serialization method
dumpMethod = FileSerialisationMethod::Recursive;
break;
}
// TODO these two steps are essentially RemoteStore::addCAToStore. Move it up to Store.
auto path = store->addToStoreFromDump(source, name, contentAddressMethod, hashAlgo, refs, repair);
auto path = store->addToStoreFromDump(source, name, dumpMethod, contentAddressMethod, hashAlgo, refs, repair);
return store->queryPathInfo(path);
}();
logger->stopWork();
Expand All @@ -430,30 +443,23 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
hashAlgo = parseHashAlgo(hashAlgoRaw);
}

// Old protocol always sends NAR, regardless of hashing method
auto dumpSource = sinkToSource([&](Sink & saved) {
if (method == FileIngestionMethod::Recursive) {
/* We parse the NAR dump through into `saved` unmodified,
so why all this extra work? We still parse the NAR so
that we aren't sending arbitrary data to `saved`
unwittingly`, and we know when the NAR ends so we don't
consume the rest of `from` and can't parse another
command. (We don't trust `addToStoreFromDump` to not
eagerly consume the entire stream it's given, past the
length of the Nar. */
TeeSource savedNARSource(from, saved);
NullFileSystemObjectSink sink; /* just parse the NAR */
parseDump(sink, savedNARSource);
} else {
/* Incrementally parse the NAR file, stripping the
metadata, and streaming the sole file we expect into
`saved`. */
RegularFileSink savedRegular { saved };
parseDump(savedRegular, from);
if (!savedRegular.regular) throw Error("regular file expected");
}
/* We parse the NAR dump through into `saved` unmodified,
so why all this extra work? We still parse the NAR so
that we aren't sending arbitrary data to `saved`
unwittingly`, and we know when the NAR ends so we don't
consume the rest of `from` and can't parse another
command. (We don't trust `addToStoreFromDump` to not
eagerly consume the entire stream it's given, past the
length of the Nar. */
TeeSource savedNARSource(from, saved);
NullFileSystemObjectSink sink; /* just parse the NAR */
parseDump(sink, savedNARSource);
});
logger->startWork();
auto path = store->addToStoreFromDump(*dumpSource, baseName, method, hashAlgo);
auto path = store->addToStoreFromDump(
*dumpSource, baseName, FileSerialisationMethod::Recursive, method, hashAlgo);
logger->stopWork();

to << store->printStorePath(path);
Expand Down Expand Up @@ -485,7 +491,7 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
logger->startWork();
auto path = ({
StringSource source { s };
store->addToStoreFromDump(source, suffix, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, NoRepair);
store->addToStoreFromDump(source, suffix, FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, NoRepair);
});
logger->stopWork();
to << store->printStorePath(path);
Expand Down
2 changes: 1 addition & 1 deletion src/libstore/derivations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ StorePath writeDerivation(Store & store,
})
: ({
StringSource s { contents };
store.addToStoreFromDump(s, suffix, TextIngestionMethod {}, HashAlgorithm::SHA256, references, repair);
store.addToStoreFromDump(s, suffix, FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, references, repair);
});
}

Expand Down
Loading
Loading