Skip to content

Commit

Permalink
Merge pull request #8918 from obsidiansystems/git-objects
Browse files Browse the repository at this point in the history
Git object hashing in libstore
  • Loading branch information
Ericson2314 committed Feb 28, 2024
2 parents c3e9e3d + d4ad1fc commit f489a6e
Show file tree
Hide file tree
Showing 35 changed files with 545 additions and 150 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ ifeq ($(ENABLE_FUNCTIONAL_TESTS), yes)
makefiles += \
tests/functional/local.mk \
tests/functional/ca/local.mk \
tests/functional/git-hashing/local.mk \
tests/functional/dyn-drv/local.mk \
tests/functional/test-libstoreconsumer/local.mk \
tests/functional/plugins/local.mk
Expand Down
9 changes: 7 additions & 2 deletions doc/manual/src/protocols/store-path.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,15 +89,20 @@ where

- `rec` = one of:

- ```ebnf
| ""
```
(empty string) for hashes of the flat (single file) serialization

- ```ebnf
| "r:"
```
hashes of the for [Nix Archive (NAR)] (arbitrary file system object) serialization

- ```ebnf
| ""
| "git:"
```
(empty string) for hashes of the flat (single file) serialization
hashes of the [Git blob/tree](https://git-scm.com/book/en/v2/Git-Internals-Git-Objects) [Merkel tree](https://en.wikipedia.org/wiki/Merkle_tree) format

- ```ebnf
algo = "md5" | "sha1" | "sha256"
Expand Down
10 changes: 4 additions & 6 deletions package.nix
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@ in {
(lib.getBin lowdown)
mdbook
mdbook-linkcheck
] ++ lib.optionals doInstallCheck [
git
mercurial
openssh
] ++ lib.optionals (doInstallCheck || enableManual) [
jq # Also for custom mdBook preprocessor.
] ++ lib.optional stdenv.hostPlatform.isLinux util-linux
Expand Down Expand Up @@ -249,12 +253,6 @@ in {
dontBuild = !attrs.doBuild;
doCheck = attrs.doCheck;

nativeCheckInputs = [
git
mercurial
openssh
];

disallowedReferences = [ boost ];

preConfigure = lib.optionalString (doBuild && ! stdenv.hostPlatform.isStatic) (
Expand Down
2 changes: 1 addition & 1 deletion perl/lib/Nix/Store.xs
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ hashPath(char * algo, int base32, char * path)
auto [accessor, canonPath] = PosixSourceAccessor::createAtRoot(path);
Hash h = hashPath(
accessor, canonPath,
FileIngestionMethod::Recursive, parseHashAlgo(algo)).first;
FileIngestionMethod::Recursive, parseHashAlgo(algo));
auto s = h.to_string(base32 ? HashFormat::Nix32 : HashFormat::Base16, false);
XPUSHs(sv_2mortal(newSVpv(s.c_str(), 0)));
} catch (Error & e) {
Expand Down
7 changes: 5 additions & 2 deletions src/libexpr/primops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1138,7 +1138,10 @@ drvName, Bindings * attrs, Value & v)
auto handleHashMode = [&](const std::string_view s) {
if (s == "recursive") ingestionMethod = FileIngestionMethod::Recursive;
else if (s == "flat") ingestionMethod = FileIngestionMethod::Flat;
else if (s == "text") {
else if (s == "git") {
experimentalFeatureSettings.require(Xp::GitHashing);
ingestionMethod = FileIngestionMethod::Git;
} else if (s == "text") {
experimentalFeatureSettings.require(Xp::DynamicDerivations);
ingestionMethod = TextIngestionMethod {};
} else
Expand Down Expand Up @@ -2089,7 +2092,7 @@ static void prim_toFile(EvalState & state, const PosIdx pos, Value * * args, Val
})
: ({
StringSource s { contents };
state.store->addToStoreFromDump(s, name, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, state.repair);
state.store->addToStoreFromDump(s, name, FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, state.repair);
});

/* Note: we don't need to add `context' to the context of the
Expand Down
28 changes: 20 additions & 8 deletions src/libstore/binary-cache-store.cc
Original file line number Diff line number Diff line change
Expand Up @@ -305,36 +305,48 @@ void BinaryCacheStore::addToStore(const ValidPathInfo & info, Source & narSource
StorePath BinaryCacheStore::addToStoreFromDump(
Source & dump,
std::string_view name,
ContentAddressMethod method,
FileSerialisationMethod dumpMethod,
ContentAddressMethod hashMethod,
HashAlgorithm hashAlgo,
const StorePathSet & references,
RepairFlag repair)
{
std::optional<Hash> caHash;
std::string nar;

// Calculating Git hash from NAR stream not yet implemented. May not
// be possible to implement in single-pass if the NAR is in an
// inconvenient order. Could fetch after uploading, however.
if (hashMethod.getFileIngestionMethod() == FileIngestionMethod::Git)
unsupported("addToStoreFromDump");

if (auto * dump2p = dynamic_cast<StringSource *>(&dump)) {
auto & dump2 = *dump2p;
// Hack, this gives us a "replayable" source so we can compute
// multiple hashes more easily.
caHash = hashString(HashAlgorithm::SHA256, dump2.s);
switch (method.getFileIngestionMethod()) {
case FileIngestionMethod::Recursive:
//
// Only calculate if the dump is in the right format, however.
if (static_cast<FileIngestionMethod>(dumpMethod) == hashMethod.getFileIngestionMethod())
caHash = hashString(HashAlgorithm::SHA256, dump2.s);
switch (dumpMethod) {
case FileSerialisationMethod::Recursive:
// The dump is already NAR in this case, just use it.
nar = dump2.s;
break;
case FileIngestionMethod::Flat:
case FileSerialisationMethod::Flat:
{
// The dump is Flat, so we need to convert it to NAR with a
// single file.
StringSink s;
dumpString(dump2.s, s);
nar = std::move(s.s);
break;
}
}
} else {
// Otherwise, we have to do th same hashing as NAR so our single
// hash will suffice for both purposes.
if (method != FileIngestionMethod::Recursive || hashAlgo != HashAlgorithm::SHA256)
if (dumpMethod != FileSerialisationMethod::Recursive || hashAlgo != HashAlgorithm::SHA256)
unsupported("addToStoreFromDump");
}
StringSource narDump { nar };
Expand All @@ -349,7 +361,7 @@ StorePath BinaryCacheStore::addToStoreFromDump(
*this,
name,
ContentAddressWithReferences::fromParts(
method,
hashMethod,
caHash ? *caHash : nar.first,
{
.others = references,
Expand Down Expand Up @@ -450,7 +462,7 @@ StorePath BinaryCacheStore::addToStore(
non-recursive+sha256 so we can just use the default
implementation of this method in terms of addToStoreFromDump. */

auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter).first;
auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter);

auto source = sinkToSource([&](Sink & sink) {
accessor.dumpPath(path, sink, filter);
Expand Down
5 changes: 3 additions & 2 deletions src/libstore/binary-cache-store.hh
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ public:
StorePath addToStoreFromDump(
Source & dump,
std::string_view name,
ContentAddressMethod method,
FileSerialisationMethod dumpMethod,
ContentAddressMethod hashMethod,
HashAlgorithm hashAlgo,
const StorePathSet & references,
RepairFlag repair) override;
Expand All @@ -147,7 +148,7 @@ public:

void narFromPath(const StorePath & path, Sink & sink) override;

ref<SourceAccessor> getFSAccessor(bool requireValidPath) override;
ref<SourceAccessor> getFSAccessor(bool requireValidPath = true) override;

void addSignatures(const StorePath & storePath, const StringSet & sigs) override;

Expand Down
39 changes: 27 additions & 12 deletions src/libstore/build/local-derivation-goal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "finally.hh"
#include "util.hh"
#include "archive.hh"
#include "git.hh"
#include "compression.hh"
#include "daemon.hh"
#include "topo-sort.hh"
Expand Down Expand Up @@ -1311,12 +1312,13 @@ struct RestrictedStore : public virtual RestrictedStoreConfig, public virtual In
StorePath addToStoreFromDump(
Source & dump,
std::string_view name,
ContentAddressMethod method,
FileSerialisationMethod dumpMethod,
ContentAddressMethod hashMethod,
HashAlgorithm hashAlgo,
const StorePathSet & references,
RepairFlag repair) override
{
auto path = next->addToStoreFromDump(dump, name, method, hashAlgo, references, repair);
auto path = next->addToStoreFromDump(dump, name, dumpMethod, hashMethod, hashAlgo, references, repair);
goal.addDependency(path);
return path;
}
Expand Down Expand Up @@ -2457,15 +2459,28 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
rewriteOutput(outputRewrites);
/* FIXME optimize and deduplicate with addToStore */
std::string oldHashPart { scratchPath->hashPart() };
auto got = ({
HashModuloSink caSink { outputHash.hashAlgo, oldHashPart };
auto got = [&]{
PosixSourceAccessor accessor;
dumpPath(
accessor, CanonPath { actualPath },
caSink,
outputHash.method.getFileIngestionMethod());
caSink.finish().first;
});
auto fim = outputHash.method.getFileIngestionMethod();
switch (fim) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
{
HashModuloSink caSink { outputHash.hashAlgo, oldHashPart };
auto fim = outputHash.method.getFileIngestionMethod();
dumpPath(
accessor, CanonPath { actualPath },
caSink,
(FileSerialisationMethod) fim);
return caSink.finish().first;
}
case FileIngestionMethod::Git: {
return git::dumpHash(
outputHash.hashAlgo, accessor,
CanonPath { tmpDir + "/tmp" }).hash;
}
}
}();

ValidPathInfo newInfo0 {
worker.store,
Expand All @@ -2491,7 +2506,7 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
PosixSourceAccessor accessor;
HashResult narHashAndSize = hashPath(
accessor, CanonPath { actualPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256);
FileSerialisationMethod::Recursive, HashAlgorithm::SHA256);
newInfo0.narHash = narHashAndSize.first;
newInfo0.narSize = narHashAndSize.second;
}
Expand All @@ -2515,7 +2530,7 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
PosixSourceAccessor accessor;
HashResult narHashAndSize = hashPath(
accessor, CanonPath { actualPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256);
FileSerialisationMethod::Recursive, HashAlgorithm::SHA256);
ValidPathInfo newInfo0 { requiredFinalPath, narHashAndSize.first };
newInfo0.narSize = narHashAndSize.second;
auto refs = rewriteRefs();
Expand Down
4 changes: 2 additions & 2 deletions src/libstore/build/worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -529,11 +529,11 @@ bool Worker::pathContentsGood(const StorePath & path)
if (!pathExists(store.printStorePath(path)))
res = false;
else {
HashResult current = hashPath(
Hash current = hashPath(
*store.getFSAccessor(), CanonPath { store.printStorePath(path) },
FileIngestionMethod::Recursive, info->narHash.algo);
Hash nullHash(HashAlgorithm::SHA256);
res = info->narHash == nullHash || info->narHash == current.first;
res = info->narHash == nullHash || info->narHash == current;
}
pathContentsGoodCache.insert_or_assign(path, res);
if (!res)
Expand Down
11 changes: 11 additions & 0 deletions src/libstore/content-address.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ std::string_view makeFileIngestionPrefix(FileIngestionMethod m)
return "";
case FileIngestionMethod::Recursive:
return "r:";
case FileIngestionMethod::Git:
experimentalFeatureSettings.require(Xp::GitHashing);
return "git:";
default:
throw Error("impossible, caught both cases");
}
Expand Down Expand Up @@ -51,6 +54,10 @@ ContentAddressMethod ContentAddressMethod::parsePrefix(std::string_view & m)
if (splitPrefix(m, "r:")) {
return FileIngestionMethod::Recursive;
}
else if (splitPrefix(m, "git:")) {
experimentalFeatureSettings.require(Xp::GitHashing);
return FileIngestionMethod::Git;
}
else if (splitPrefix(m, "text:")) {
return TextIngestionMethod {};
}
Expand Down Expand Up @@ -131,6 +138,10 @@ static std::pair<ContentAddressMethod, HashAlgorithm> parseContentAddressMethodP
auto method = FileIngestionMethod::Flat;
if (splitPrefix(rest, "r:"))
method = FileIngestionMethod::Recursive;
else if (splitPrefix(rest, "git:")) {
experimentalFeatureSettings.require(Xp::GitHashing);
method = FileIngestionMethod::Git;
}
HashAlgorithm hashAlgo = parseHashAlgorithm_();
return {
std::move(method),
Expand Down
56 changes: 31 additions & 25 deletions src/libstore/daemon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "archive.hh"
#include "derivations.hh"
#include "args.hh"
#include "git.hh"

namespace nix::daemon {

Expand Down Expand Up @@ -400,11 +401,23 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
logger->startWork();
auto pathInfo = [&]() {
// NB: FramedSource must be out of scope before logger->stopWork();
auto [contentAddressMethod, hashAlgo_] = ContentAddressMethod::parseWithAlgo(camStr);
auto hashAlgo = hashAlgo_; // work around clang bug
auto [contentAddressMethod, hashAlgo] = ContentAddressMethod::parseWithAlgo(camStr);
FramedSource source(from);
FileSerialisationMethod dumpMethod;
switch (contentAddressMethod.getFileIngestionMethod()) {
case FileIngestionMethod::Flat:
dumpMethod = FileSerialisationMethod::Flat;
break;
case FileIngestionMethod::Recursive:
dumpMethod = FileSerialisationMethod::Recursive;
break;
case FileIngestionMethod::Git:
// Use NAR; Git is not a serialization method
dumpMethod = FileSerialisationMethod::Recursive;
break;
}
// TODO these two steps are essentially RemoteStore::addCAToStore. Move it up to Store.
auto path = store->addToStoreFromDump(source, name, contentAddressMethod, hashAlgo, refs, repair);
auto path = store->addToStoreFromDump(source, name, dumpMethod, contentAddressMethod, hashAlgo, refs, repair);
return store->queryPathInfo(path);
}();
logger->stopWork();
Expand All @@ -430,30 +443,23 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
hashAlgo = parseHashAlgo(hashAlgoRaw);
}

// Old protocol always sends NAR, regardless of hashing method
auto dumpSource = sinkToSource([&](Sink & saved) {
if (method == FileIngestionMethod::Recursive) {
/* We parse the NAR dump through into `saved` unmodified,
so why all this extra work? We still parse the NAR so
that we aren't sending arbitrary data to `saved`
unwittingly`, and we know when the NAR ends so we don't
consume the rest of `from` and can't parse another
command. (We don't trust `addToStoreFromDump` to not
eagerly consume the entire stream it's given, past the
length of the Nar. */
TeeSource savedNARSource(from, saved);
NullFileSystemObjectSink sink; /* just parse the NAR */
parseDump(sink, savedNARSource);
} else {
/* Incrementally parse the NAR file, stripping the
metadata, and streaming the sole file we expect into
`saved`. */
RegularFileSink savedRegular { saved };
parseDump(savedRegular, from);
if (!savedRegular.regular) throw Error("regular file expected");
}
/* We parse the NAR dump through into `saved` unmodified,
so why all this extra work? We still parse the NAR so
that we aren't sending arbitrary data to `saved`
unwittingly`, and we know when the NAR ends so we don't
consume the rest of `from` and can't parse another
command. (We don't trust `addToStoreFromDump` to not
eagerly consume the entire stream it's given, past the
length of the Nar. */
TeeSource savedNARSource(from, saved);
NullFileSystemObjectSink sink; /* just parse the NAR */
parseDump(sink, savedNARSource);
});
logger->startWork();
auto path = store->addToStoreFromDump(*dumpSource, baseName, method, hashAlgo);
auto path = store->addToStoreFromDump(
*dumpSource, baseName, FileSerialisationMethod::Recursive, method, hashAlgo);
logger->stopWork();

to << store->printStorePath(path);
Expand Down Expand Up @@ -485,7 +491,7 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
logger->startWork();
auto path = ({
StringSource source { s };
store->addToStoreFromDump(source, suffix, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, NoRepair);
store->addToStoreFromDump(source, suffix, FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, NoRepair);
});
logger->stopWork();
to << store->printStorePath(path);
Expand Down
2 changes: 1 addition & 1 deletion src/libstore/derivations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ StorePath writeDerivation(Store & store,
})
: ({
StringSource s { contents };
store.addToStoreFromDump(s, suffix, TextIngestionMethod {}, HashAlgorithm::SHA256, references, repair);
store.addToStoreFromDump(s, suffix, FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, references, repair);
});
}

Expand Down
Loading

0 comments on commit f489a6e

Please sign in to comment.