diff --git a/Makefile b/Makefile index d3542c3e981a..b8bc942a88b4 100644 --- a/Makefile +++ b/Makefile @@ -42,6 +42,7 @@ ifeq ($(ENABLE_FUNCTIONAL_TESTS), yes) makefiles += \ tests/functional/local.mk \ tests/functional/ca/local.mk \ + tests/functional/git-hashing/local.mk \ tests/functional/dyn-drv/local.mk \ tests/functional/test-libstoreconsumer/local.mk \ tests/functional/plugins/local.mk diff --git a/doc/manual/src/protocols/store-path.md b/doc/manual/src/protocols/store-path.md index fcf8038fcc3f..565c4fa7505b 100644 --- a/doc/manual/src/protocols/store-path.md +++ b/doc/manual/src/protocols/store-path.md @@ -89,15 +89,20 @@ where - `rec` = one of: + - ```ebnf + | "" + ``` + (empty string) for hashes of the flat (single file) serialization + - ```ebnf | "r:" ``` hashes of the for [Nix Archive (NAR)] (arbitrary file system object) serialization - ```ebnf - | "" + | "git:" ``` - (empty string) for hashes of the flat (single file) serialization + hashes of the [Git blob/tree](https://git-scm.com/book/en/v2/Git-Internals-Git-Objects) [Merkel tree](https://en.wikipedia.org/wiki/Merkle_tree) format - ```ebnf algo = "md5" | "sha1" | "sha256" diff --git a/perl/lib/Nix/Store.xs b/perl/lib/Nix/Store.xs index 4a928594bd5f..1c64cc66b14c 100644 --- a/perl/lib/Nix/Store.xs +++ b/perl/lib/Nix/Store.xs @@ -259,7 +259,7 @@ hashPath(char * algo, int base32, char * path) auto [accessor, canonPath] = PosixSourceAccessor::createAtRoot(path); Hash h = hashPath( accessor, canonPath, - FileIngestionMethod::Recursive, parseHashAlgo(algo)).first; + FileIngestionMethod::Recursive, parseHashAlgo(algo)); auto s = h.to_string(base32 ? HashFormat::Nix32 : HashFormat::Base16, false); XPUSHs(sv_2mortal(newSVpv(s.c_str(), 0))); } catch (Error & e) { diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 42cfa4917dc5..83164bea63dc 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -1123,7 +1123,10 @@ drvName, Bindings * attrs, Value & v) auto handleHashMode = [&](const std::string_view s) { if (s == "recursive") ingestionMethod = FileIngestionMethod::Recursive; else if (s == "flat") ingestionMethod = FileIngestionMethod::Flat; - else if (s == "text") { + else if (s == "git") { + experimentalFeatureSettings.require(Xp::GitHashing); + ingestionMethod = FileIngestionMethod::Git; + } else if (s == "text") { experimentalFeatureSettings.require(Xp::DynamicDerivations); ingestionMethod = TextIngestionMethod {}; } else diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index 189d1d305cf6..d6047dd7e67d 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -324,6 +324,7 @@ StorePath BinaryCacheStore::addToStoreFromDump( nar = dump2.s; break; case FileIngestionMethod::Flat: + { // The dump is Flat, so we need to convert it to NAR with a // single file. StringSink s; @@ -331,6 +332,10 @@ StorePath BinaryCacheStore::addToStoreFromDump( nar = std::move(s.s); break; } + case FileIngestionMethod::Git: + unsupported("addToStoreFromDump"); + break; + } } else { // Otherwise, we have to do th same hashing as NAR so our single // hash will suffice for both purposes. @@ -450,7 +455,7 @@ StorePath BinaryCacheStore::addToStore( non-recursive+sha256 so we can just use the default implementation of this method in terms of addToStoreFromDump. */ - auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter).first; + auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter); auto source = sinkToSource([&](Sink & sink) { accessor.dumpPath(path, sink, filter); diff --git a/src/libstore/binary-cache-store.hh b/src/libstore/binary-cache-store.hh index 00ab73905e2d..76de2d11addf 100644 --- a/src/libstore/binary-cache-store.hh +++ b/src/libstore/binary-cache-store.hh @@ -147,7 +147,7 @@ public: void narFromPath(const StorePath & path, Sink & sink) override; - ref getFSAccessor(bool requireValidPath) override; + ref getFSAccessor(bool requireValidPath = true) override; void addSignatures(const StorePath & storePath, const StringSet & sigs) override; diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index b373c74b26a8..d92966a7432d 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -8,6 +8,7 @@ #include "finally.hh" #include "util.hh" #include "archive.hh" +#include "git.hh" #include "compression.hh" #include "daemon.hh" #include "topo-sort.hh" @@ -2457,15 +2458,28 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs() rewriteOutput(outputRewrites); /* FIXME optimize and deduplicate with addToStore */ std::string oldHashPart { scratchPath->hashPart() }; - auto got = ({ - HashModuloSink caSink { outputHash.hashAlgo, oldHashPart }; + auto got = [&]{ PosixSourceAccessor accessor; - dumpPath( - accessor, CanonPath { actualPath }, - caSink, - outputHash.method.getFileIngestionMethod()); - caSink.finish().first; - }); + auto fim = outputHash.method.getFileIngestionMethod(); + switch (fim) { + case FileIngestionMethod::Flat: + case FileIngestionMethod::Recursive: + { + HashModuloSink caSink { outputHash.hashAlgo, oldHashPart }; + auto fim = outputHash.method.getFileIngestionMethod(); + dumpPath( + accessor, CanonPath { actualPath }, + caSink, + (FileSerialisationMethod) fim); + return caSink.finish().first; + } + case FileIngestionMethod::Git: { + return git::dumpHash( + outputHash.hashAlgo, accessor, + CanonPath { tmpDir + "/tmp" }).hash; + } + } + }(); ValidPathInfo newInfo0 { worker.store, @@ -2491,7 +2505,7 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs() PosixSourceAccessor accessor; HashResult narHashAndSize = hashPath( accessor, CanonPath { actualPath }, - FileIngestionMethod::Recursive, HashAlgorithm::SHA256); + FileSerialisationMethod::Recursive, HashAlgorithm::SHA256); newInfo0.narHash = narHashAndSize.first; newInfo0.narSize = narHashAndSize.second; } @@ -2515,7 +2529,7 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs() PosixSourceAccessor accessor; HashResult narHashAndSize = hashPath( accessor, CanonPath { actualPath }, - FileIngestionMethod::Recursive, HashAlgorithm::SHA256); + FileSerialisationMethod::Recursive, HashAlgorithm::SHA256); ValidPathInfo newInfo0 { requiredFinalPath, narHashAndSize.first }; newInfo0.narSize = narHashAndSize.second; auto refs = rewriteRefs(); diff --git a/src/libstore/build/worker.cc b/src/libstore/build/worker.cc index 3a34f4006ebc..815ded3d5b01 100644 --- a/src/libstore/build/worker.cc +++ b/src/libstore/build/worker.cc @@ -529,11 +529,11 @@ bool Worker::pathContentsGood(const StorePath & path) if (!pathExists(store.printStorePath(path))) res = false; else { - HashResult current = hashPath( + Hash current = hashPath( *store.getFSAccessor(), CanonPath { store.printStorePath(path) }, FileIngestionMethod::Recursive, info->narHash.algo); Hash nullHash(HashAlgorithm::SHA256); - res = info->narHash == nullHash || info->narHash == current.first; + res = info->narHash == nullHash || info->narHash == current; } pathContentsGoodCache.insert_or_assign(path, res); if (!res) diff --git a/src/libstore/content-address.cc b/src/libstore/content-address.cc index 2091f8e0267b..f249b1dde787 100644 --- a/src/libstore/content-address.cc +++ b/src/libstore/content-address.cc @@ -11,6 +11,9 @@ std::string_view makeFileIngestionPrefix(FileIngestionMethod m) return ""; case FileIngestionMethod::Recursive: return "r:"; + case FileIngestionMethod::Git: + experimentalFeatureSettings.require(Xp::GitHashing); + return "git:"; default: throw Error("impossible, caught both cases"); } @@ -51,6 +54,10 @@ ContentAddressMethod ContentAddressMethod::parsePrefix(std::string_view & m) if (splitPrefix(m, "r:")) { return FileIngestionMethod::Recursive; } + else if (splitPrefix(m, "git:")) { + experimentalFeatureSettings.require(Xp::GitHashing); + return FileIngestionMethod::Git; + } else if (splitPrefix(m, "text:")) { return TextIngestionMethod {}; } @@ -131,6 +138,10 @@ static std::pair parseContentAddressMethodP auto method = FileIngestionMethod::Flat; if (splitPrefix(rest, "r:")) method = FileIngestionMethod::Recursive; + if (splitPrefix(rest, "git:")) { + experimentalFeatureSettings.require(Xp::GitHashing); + method = FileIngestionMethod::Git; + } HashAlgorithm hashAlgo = parseHashAlgorithm_(); return { std::move(method), diff --git a/src/libstore/daemon.cc b/src/libstore/daemon.cc index cf5020dfe08e..873065e14744 100644 --- a/src/libstore/daemon.cc +++ b/src/libstore/daemon.cc @@ -13,6 +13,7 @@ #include "archive.hh" #include "derivations.hh" #include "args.hh" +#include "git.hh" namespace nix::daemon { @@ -443,13 +444,17 @@ static void performOp(TunnelLogger * logger, ref store, TeeSource savedNARSource(from, saved); NullFileSystemObjectSink sink; /* just parse the NAR */ parseDump(sink, savedNARSource); - } else { + } else if (method == FileIngestionMethod::Flat) { /* Incrementally parse the NAR file, stripping the metadata, and streaming the sole file we expect into `saved`. */ RegularFileSink savedRegular { saved }; parseDump(savedRegular, from); if (!savedRegular.regular) throw Error("regular file expected"); + } else { + /* Should have validated above that no other file ingestion + method was used. */ + assert(false); } }); logger->startWork(); diff --git a/src/libstore/local-fs-store.hh b/src/libstore/local-fs-store.hh index bf855b67ecea..8fb08120012d 100644 --- a/src/libstore/local-fs-store.hh +++ b/src/libstore/local-fs-store.hh @@ -43,7 +43,7 @@ public: LocalFSStore(const Params & params); void narFromPath(const StorePath & path, Sink & sink) override; - ref getFSAccessor(bool requireValidPath) override; + ref getFSAccessor(bool requireValidPath = true) override; /** * Creates symlink from the `gcRoot` to the `storePath` and diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 2c22bfe319d9..5f35cf3a856b 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1,5 +1,6 @@ #include "local-store.hh" #include "globals.hh" +#include "git.hh" #include "archive.hh" #include "pathlocks.hh" #include "worker-protocol.hh" @@ -1097,19 +1098,29 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source, if (info.ca) { auto & specified = *info.ca; auto actualHash = ({ - HashModuloSink caSink { - specified.hash.algo, - std::string { info.path.hashPart() }, - }; - PosixSourceAccessor accessor; - dumpPath( - *getFSAccessor(false), - CanonPath { printStorePath(info.path) }, - caSink, - specified.method.getFileIngestionMethod()); + auto accessor = getFSAccessor(false); + CanonPath path { printStorePath(info.path) }; + Hash h { HashAlgorithm::SHA256 }; // throwaway def to appease C++ + auto fim = specified.method.getFileIngestionMethod(); + switch (fim) { + case FileIngestionMethod::Flat: + case FileIngestionMethod::Recursive: + { + HashModuloSink caSink { + specified.hash.algo, + std::string { info.path.hashPart() }, + }; + dumpPath(*accessor, path, caSink, (FileSerialisationMethod) fim); + h = caSink.finish().first; + break; + } + case FileIngestionMethod::Git: + h = git::dumpHash(specified.hash.algo, *accessor, path).hash; + break; + } ContentAddress { .method = specified.method, - .hash = caSink.finish().first, + .hash = std::move(h), }; }); if (specified.hash != actualHash.hash) { @@ -1199,7 +1210,30 @@ StorePath LocalStore::addToStoreFromDump( delTempDir = std::make_unique(tempDir); tempPath = tempDir + "/x"; - restorePath(tempPath, bothSource, method.getFileIngestionMethod()); + auto fim = method.getFileIngestionMethod(); + switch (fim) { + case FileIngestionMethod::Flat: + case FileIngestionMethod::Recursive: + restorePath(tempPath, bothSource, (FileSerialisationMethod) fim); + break; + case FileIngestionMethod::Git: { + RestoreSink sink; + sink.dstPath = tempPath; + auto accessor = getFSAccessor(); + git::restore(sink, bothSource, [&](Hash childHash) { + return std::pair { + &*accessor, + CanonPath { + printStorePath(this->makeFixedOutputPath("git", FixedOutputInfo { + .method = FileIngestionMethod::Git, + .hash = childHash, + })) + }, + }; + }); + break; + } + } dumpBuffer.reset(); dump = {}; @@ -1238,7 +1272,30 @@ StorePath LocalStore::addToStoreFromDump( if (inMemory) { StringSource dumpSource { dump }; /* Restore from the buffer in memory. */ - restorePath(realPath, dumpSource, method.getFileIngestionMethod()); + auto fim = method.getFileIngestionMethod(); + switch (fim) { + case FileIngestionMethod::Flat: + case FileIngestionMethod::Recursive: + restorePath(realPath, dumpSource, (FileSerialisationMethod) fim); + break; + case FileIngestionMethod::Git: { + RestoreSink sink; + sink.dstPath = realPath; + auto accessor = getFSAccessor(); + git::restore(sink, dumpSource, [&](Hash childHash) { + return std::pair { + &*accessor, + CanonPath { + printStorePath(this->makeFixedOutputPath("git", FixedOutputInfo { + .method = FileIngestionMethod::Git, + .hash = childHash, + })) + }, + }; + }); + break; + } + } } else { /* Move the temporary path we restored above. */ moveFile(tempPath, realPath); @@ -1367,7 +1424,7 @@ bool LocalStore::verifyStore(bool checkContents, RepairFlag repair) PosixSourceAccessor accessor; std::string hash = hashPath( accessor, CanonPath { linkPath }, - FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first.to_string(HashFormat::Nix32, false); + FileIngestionMethod::Recursive, HashAlgorithm::SHA256).to_string(HashFormat::Nix32, false); if (hash != link.name) { printError("link '%s' was modified! expected hash '%s', got '%s'", linkPath, link.name, hash); diff --git a/src/libstore/optimise-store.cc b/src/libstore/optimise-store.cc index 78e4f6d86b2c..daaaaf0733e2 100644 --- a/src/libstore/optimise-store.cc +++ b/src/libstore/optimise-store.cc @@ -151,7 +151,7 @@ void LocalStore::optimisePath_(Activity * act, OptimiseStats & stats, PosixSourceAccessor accessor; hashPath( accessor, CanonPath { path }, - FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first; + FileSerialisationMethod::Recursive, HashAlgorithm::SHA256).first; }); debug("'%1%' has hash '%2%'", path, hash.to_string(HashFormat::Nix32, true)); @@ -166,7 +166,7 @@ void LocalStore::optimisePath_(Activity * act, OptimiseStats & stats, PosixSourceAccessor accessor; hashPath( accessor, CanonPath { linkPath }, - FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first; + FileSerialisationMethod::Recursive, HashAlgorithm::SHA256).first; }))) { // XXX: Consider overwriting linkPath with our valid version. diff --git a/src/libstore/remote-store.cc b/src/libstore/remote-store.cc index fadef45ffcca..0cae84828d37 100644 --- a/src/libstore/remote-store.cc +++ b/src/libstore/remote-store.cc @@ -13,6 +13,7 @@ #include "derivations.hh" #include "pool.hh" #include "finally.hh" +#include "git.hh" #include "logging.hh" #include "callback.hh" #include "filetransfer.hh" diff --git a/src/libstore/remote-store.hh b/src/libstore/remote-store.hh index 87704985b4dc..c51a21375628 100644 --- a/src/libstore/remote-store.hh +++ b/src/libstore/remote-store.hh @@ -184,7 +184,7 @@ protected: friend struct ConnectionHandle; - virtual ref getFSAccessor(bool requireValidPath) override; + virtual ref getFSAccessor(bool requireValidPath = true) override; virtual void narFromPath(const StorePath & path, Sink & sink) override; diff --git a/src/libstore/store-api.cc b/src/libstore/store-api.cc index 4238cbbf5b5e..c44612ec51ac 100644 --- a/src/libstore/store-api.cc +++ b/src/libstore/store-api.cc @@ -12,7 +12,9 @@ #include "references.hh" #include "archive.hh" #include "callback.hh" +#include "git.hh" #include "remote-store.hh" +#include "posix-source-accessor.hh" // FIXME this should not be here, see TODO below on // `addMultipleToStore`. #include "worker-protocol.hh" @@ -119,6 +121,9 @@ static std::string makeType( StorePath StoreDirConfig::makeFixedOutputPath(std::string_view name, const FixedOutputInfo & info) const { + if (info.method == FileIngestionMethod::Git && info.hash.algo != HashAlgorithm::SHA1) + throw Error("Git file ingestion must use SHA-1 hash"); + if (info.hash.algo == HashAlgorithm::SHA256 && info.method == FileIngestionMethod::Recursive) { return makeStorePath(makeType(*this, "source", info.references), info.hash, name); } else { @@ -166,7 +171,7 @@ std::pair StoreDirConfig::computeStorePath( const StorePathSet & references, PathFilter & filter) const { - auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter).first; + auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter); return { makeFixedOutputPathFromCA( name, @@ -193,7 +198,37 @@ StorePath Store::addToStore( RepairFlag repair) { auto source = sinkToSource([&](Sink & sink) { - dumpPath(accessor, path, sink, method.getFileIngestionMethod(), filter); + auto fim = method.getFileIngestionMethod(); + switch (fim) { + case FileIngestionMethod::Flat: + case FileIngestionMethod::Recursive: + { + dumpPath(accessor, path, sink, (FileSerialisationMethod) fim, filter); + break; + } + case FileIngestionMethod::Git: + { + git::dump( + accessor, path, + sink, + // recursively add to store if path is a directory + [&](const CanonPath & path) -> git::TreeEntry { + auto storePath = addToStore("git", accessor, path, method, hashAlgo, references, filter, repair); + auto info = queryPathInfo(storePath); + assert(info->ca); + assert(info->ca->method == FileIngestionMethod::Git); + auto stat = getFSAccessor()->lstat(CanonPath(printStorePath(storePath))); + auto gitModeOpt = git::convertMode(stat.type); + assert(gitModeOpt); + return { + .mode = *gitModeOpt, + .hash = info->ca->hash, + }; + }, + filter); + break; + } + } }); return addToStoreFromDump(*source, name, method, hashAlgo, references, repair); } @@ -355,9 +390,7 @@ ValidPathInfo Store::addToStoreSlow( NullFileSystemObjectSink blank; auto & parseSink = method.getFileIngestionMethod() == FileIngestionMethod::Flat ? (FileSystemObjectSink &) fileSink - : method.getFileIngestionMethod() == FileIngestionMethod::Recursive - ? (FileSystemObjectSink &) blank - : (abort(), (FileSystemObjectSink &)*(FileSystemObjectSink *)nullptr); // handled both cases + : (FileSystemObjectSink &) blank; // for recursive or git we do recursive /* The information that flows from tapped (besides being replicated in narSink), is now put in parseSink. */ @@ -369,6 +402,8 @@ ValidPathInfo Store::addToStoreSlow( auto hash = method == FileIngestionMethod::Recursive && hashAlgo == HashAlgorithm::SHA256 ? narHash + : method == FileIngestionMethod::Git + ? git::dumpHash(hashAlgo, accessor, srcPath).hash : caHashSink.finish().first; if (expectedCAHash && expectedCAHash != hash) diff --git a/src/libstore/uds-remote-store.hh b/src/libstore/uds-remote-store.hh index a5ac9080ad19..8bce8994a3a4 100644 --- a/src/libstore/uds-remote-store.hh +++ b/src/libstore/uds-remote-store.hh @@ -35,7 +35,7 @@ public: static std::set uriSchemes() { return {"unix"}; } - ref getFSAccessor(bool requireValidPath) override + ref getFSAccessor(bool requireValidPath = true) override { return LocalFSStore::getFSAccessor(requireValidPath); } void narFromPath(const StorePath & path, Sink & sink) override diff --git a/src/libutil/file-content-address.cc b/src/libutil/file-content-address.cc index 6753e0f490b0..a03668272686 100644 --- a/src/libutil/file-content-address.cc +++ b/src/libutil/file-content-address.cc @@ -1,16 +1,53 @@ #include "file-content-address.hh" #include "archive.hh" +#include "git.hh" namespace nix { -FileIngestionMethod parseFileIngestionMethod(std::string_view input) +static std::optional parseFileSerialisationMethodOpt(std::string_view input) { if (input == "flat") { - return FileIngestionMethod::Flat; + return FileSerialisationMethod::Flat; } else if (input == "nar") { - return FileIngestionMethod::Recursive; + return FileSerialisationMethod::Recursive; + } else { + return std::nullopt; + } +} + +FileSerialisationMethod parseFileSerialisationMethod(std::string_view input) +{ + auto ret = parseFileSerialisationMethodOpt(input); + if (ret) + return *ret; + else + throw UsageError("Unknown file serialiation method '%s', expect `flat` or `nar`"); +} + + +FileIngestionMethod parseFileIngestionMethod(std::string_view input) +{ + if (input == "git") { + return FileIngestionMethod::Git; } else { - throw UsageError("Unknown file ingestion method '%s', expect `flat` or `nar`"); + auto ret = parseFileSerialisationMethodOpt(input); + if (ret) + return static_cast(*ret); + else + throw UsageError("Unknown file ingestion method '%s', expect `flat`, `nar`, or `git`"); + } +} + + +std::string_view renderFileSerialisationMethod(FileSerialisationMethod method) +{ + switch (method) { + case FileSerialisationMethod::Flat: + return "flat"; + case FileSerialisationMethod::Recursive: + return "nar"; + default: + assert(false); } } @@ -19,9 +56,11 @@ std::string_view renderFileIngestionMethod(FileIngestionMethod method) { switch (method) { case FileIngestionMethod::Flat: - return "flat"; case FileIngestionMethod::Recursive: - return "nar"; + return renderFileSerialisationMethod( + static_cast(method)); + case FileIngestionMethod::Git: + return "git"; default: abort(); } @@ -31,14 +70,14 @@ std::string_view renderFileIngestionMethod(FileIngestionMethod method) void dumpPath( SourceAccessor & accessor, const CanonPath & path, Sink & sink, - FileIngestionMethod method, + FileSerialisationMethod method, PathFilter & filter) { switch (method) { - case FileIngestionMethod::Flat: + case FileSerialisationMethod::Flat: accessor.readFile(path, sink); break; - case FileIngestionMethod::Recursive: + case FileSerialisationMethod::Recursive: accessor.dumpPath(path, sink, filter); break; } @@ -48,13 +87,13 @@ void dumpPath( void restorePath( const Path & path, Source & source, - FileIngestionMethod method) + FileSerialisationMethod method) { switch (method) { - case FileIngestionMethod::Flat: + case FileSerialisationMethod::Flat: writeFile(path, source); break; - case FileIngestionMethod::Recursive: + case FileSerialisationMethod::Recursive: restorePath(path, source); break; } @@ -63,7 +102,7 @@ void restorePath( HashResult hashPath( SourceAccessor & accessor, const CanonPath & path, - FileIngestionMethod method, HashAlgorithm ht, + FileSerialisationMethod method, HashAlgorithm ht, PathFilter & filter) { HashSink sink { ht }; @@ -71,4 +110,20 @@ HashResult hashPath( return sink.finish(); } + +Hash hashPath( + SourceAccessor & accessor, const CanonPath & path, + FileIngestionMethod method, HashAlgorithm ht, + PathFilter & filter) +{ + switch (method) { + case FileIngestionMethod::Flat: + case FileIngestionMethod::Recursive: + return hashPath(accessor, path, (FileSerialisationMethod) method, ht, filter).first; + case FileIngestionMethod::Git: + return git::dumpHash(ht, accessor, path, filter).hash; + } + +} + } diff --git a/src/libutil/file-content-address.hh b/src/libutil/file-content-address.hh index 41f23f2af169..a88e4a0e6ffe 100644 --- a/src/libutil/file-content-address.hh +++ b/src/libutil/file-content-address.hh @@ -8,37 +8,38 @@ namespace nix { /** - * An enumeration of the main ways we can serialize file system + * An enumeration of the ways we can serialize file system * objects. */ -enum struct FileIngestionMethod : uint8_t { +enum struct FileSerialisationMethod : uint8_t { /** - * Flat-file hashing. Directly ingest the contents of a single file + * Flat-file. The contents of a single file exactly. */ - Flat = 0, + Flat, + /** - * Recursive (or NAR) hashing. Serializes the file-system object in - * Nix Archive format and ingest that. + * Nix Archive. Serializes the file-system object in + * Nix Archive format. */ - Recursive = 1, + Recursive, }; /** - * Parse a `FileIngestionMethod` by name. Choice of: + * Parse a `FileSerialisationMethod` by name. Choice of: * - * - `flat`: `FileIngestionMethod::Flat` - * - `nar`: `FileIngestionMethod::Recursive` + * - `flat`: `FileSerialisationMethod::Flat` + * - `nar`: `FileSerialisationMethod::Recursive` * - * Oppostite of `renderFileIngestionMethod`. + * Opposite of `renderFileSerialisationMethod`. */ -FileIngestionMethod parseFileIngestionMethod(std::string_view input); +FileSerialisationMethod parseFileSerialisationMethod(std::string_view input); /** - * Render a `FileIngestionMethod` by name. + * Render a `FileSerialisationMethod` by name. * - * Oppostite of `parseFileIngestionMethod`. + * Opposite of `parseFileSerialisationMethod`. */ -std::string_view renderFileIngestionMethod(FileIngestionMethod method); +std::string_view renderFileSerialisationMethod(FileSerialisationMethod method); /** * Dump a serialization of the given file system object. @@ -46,26 +47,97 @@ std::string_view renderFileIngestionMethod(FileIngestionMethod method); void dumpPath( SourceAccessor & accessor, const CanonPath & path, Sink & sink, - FileIngestionMethod method, + FileSerialisationMethod method, PathFilter & filter = defaultPathFilter); /** - * Restore a serialization of the given file system object. + * Restore a serialisation of the given file system object. * * @TODO use an arbitrary `FileSystemObjectSink`. */ void restorePath( const Path & path, Source & source, - FileIngestionMethod method); + FileSerialisationMethod method); + /** * Compute the hash of the given file system object according to the * given method. * - * The hash is defined as (essentially) hashString(ht, dumpPath(path)). + * the hash is defined as (in pseudocode): + * + * ``` + * hashString(ht, dumpPath(...)) + * ``` */ HashResult hashPath( + SourceAccessor & accessor, const CanonPath & path, + FileSerialisationMethod method, HashAlgorithm ht, + PathFilter & filter = defaultPathFilter); + +/** + * An enumeration of the ways we can ingest file system + * objects, producing a hash or digest. + */ +enum struct FileIngestionMethod : uint8_t { + /** + * Hash `FileSerialisationMethod::Flat` serialisation. + */ + Flat, + + /** + * Hash `FileSerialisationMethod::Git` serialisation. + */ + Recursive, + + /** + * Git hashing. In particular files are hashed as git "blobs", and + * directories are hashed as git "trees". + * + * Unlike `Flat` and `Recursive`, this is not a hash of a single + * serialisation but a [Merkle + * DAG](https://en.wikipedia.org/wiki/Merkle_tree) of multiple + * rounds of serialisation and hashing. + * + * @note Git's data model is slightly different, in that a plain + * file doesn't have an executable bit, directory entries do + * instead. We decide treat a bare file as non-executable by fiat, + * as we do with `FileIngestionMethod::Flat` which also lacks this + * information. Thus, Git can encode some but all of Nix's "File + * System Objects", and this sort of hashing is likewise partial. + */ + Git, +}; + +/** + * Parse a `FileIngestionMethod` by name. Choice of: + * + * - `flat`: `FileIngestionMethod::Flat` + * - `nar`: `FileIngestionMethod::Recursive` + * - `git`: `FileIngestionMethod::Git` + * + * Opposite of `renderFileIngestionMethod`. + */ +FileIngestionMethod parseFileIngestionMethod(std::string_view input); + +/** + * Render a `FileIngestionMethod` by name. + * + * Opposite of `parseFileIngestionMethod`. + */ +std::string_view renderFileIngestionMethod(FileIngestionMethod method); + +/** + * Compute the hash of the given file system object according to the + * given method. + * + * Unlike the other `hashPath`, this works on an arbitrary + * `FileIngestionMethod` instead of `FileSerialisationMethod`, but + * doesn't return the size as this is this is not a both simple and + * useful defined for a merkle format. + */ +Hash hashPath( SourceAccessor & accessor, const CanonPath & path, FileIngestionMethod method, HashAlgorithm ht, PathFilter & filter = defaultPathFilter); diff --git a/src/nix-store/nix-store.cc b/src/nix-store/nix-store.cc index 99dbfe6e333d..7c8905da6360 100644 --- a/src/nix-store/nix-store.cc +++ b/src/nix-store/nix-store.cc @@ -555,7 +555,7 @@ static void registerValidity(bool reregister, bool hashGiven, bool canonicalise) HashResult hash = hashPath( *store->getFSAccessor(false), CanonPath { store->printStorePath(info->path) }, - FileIngestionMethod::Recursive, HashAlgorithm::SHA256); + FileSerialisationMethod::Recursive, HashAlgorithm::SHA256); info->narHash = hash.first; info->narSize = hash.second; } diff --git a/src/nix/add-to-store.cc b/src/nix/add-to-store.cc index ca2daecabad4..02154715f81b 100644 --- a/src/nix/add-to-store.cc +++ b/src/nix/add-to-store.cc @@ -2,6 +2,7 @@ #include "common-args.hh" #include "store-api.hh" #include "archive.hh" +#include "git.hh" #include "posix-source-accessor.hh" #include "misc-store-flags.hh" diff --git a/src/nix/hash.cc b/src/nix/hash.cc index 98d227f0e60d..f849bf0cfae6 100644 --- a/src/nix/hash.cc +++ b/src/nix/hash.cc @@ -5,6 +5,7 @@ #include "shared.hh" #include "references.hh" #include "archive.hh" +#include "git.hh" #include "posix-source-accessor.hh" #include "misc-store-flags.hh" @@ -66,9 +67,11 @@ struct CmdHashBase : Command { switch (mode) { case FileIngestionMethod::Flat: - return "print cryptographic hash of a regular file"; + return "print cryptographic hash of a regular file"; case FileIngestionMethod::Recursive: return "print cryptographic hash of the NAR serialisation of a path"; + case FileIngestionMethod::Git: + return "print cryptographic hash of the Git serialisation of a path"; default: assert(false); }; @@ -77,17 +80,41 @@ struct CmdHashBase : Command void run() override { for (auto path : paths) { + auto makeSink = [&]() -> std::unique_ptr { + if (modulus) + return std::make_unique(hashAlgo, *modulus); + else + return std::make_unique(hashAlgo); + }; + + auto [accessor_, canonPath] = PosixSourceAccessor::createAtRoot(path); + auto & accessor = accessor_; + Hash h { HashAlgorithm::SHA256 }; // throwaway def to appease C++ + switch (mode) { + case FileIngestionMethod::Flat: + case FileIngestionMethod::Recursive: + { + auto hashSink = makeSink(); + dumpPath(accessor, canonPath, *hashSink, (FileSerialisationMethod) mode); + h = hashSink->finish().first; + break; + } + case FileIngestionMethod::Git: { + std::function hook; + hook = [&](const CanonPath & path) -> git::TreeEntry { + auto hashSink = makeSink(); + auto mode = dump(accessor, path, *hashSink, hook); + auto hash = hashSink->finish().first; + return { + .mode = mode, + .hash = hash, + }; + }; + h = hook(canonPath).hash; + break; + } + } - std::unique_ptr hashSink; - if (modulus) - hashSink = std::make_unique(hashAlgo, *modulus); - else - hashSink = std::make_unique(hashAlgo); - - auto [accessor, canonPath] = PosixSourceAccessor::createAtRoot(path); - dumpPath(accessor, canonPath, *hashSink, mode); - - Hash h = hashSink->finish().first; if (truncate && h.hashSize > 20) h = compressHash(h, 20); logger->cout(h.to_string(hashFormat, hashFormat == HashFormat::SRI)); } diff --git a/tests/functional/git-hashing/common.sh b/tests/functional/git-hashing/common.sh new file mode 100644 index 000000000000..5de96e74f160 --- /dev/null +++ b/tests/functional/git-hashing/common.sh @@ -0,0 +1,11 @@ +source ../common.sh + +clearStore +clearCache + +# Need backend to support git-hashing too +requireDaemonNewerThan "2.18.0pre20230908" + +enableFeatures "git-hashing" + +restartDaemon diff --git a/tests/functional/git-hashing/local.mk b/tests/functional/git-hashing/local.mk new file mode 100644 index 000000000000..ebec019402b9 --- /dev/null +++ b/tests/functional/git-hashing/local.mk @@ -0,0 +1,7 @@ +git-hashing-tests := \ + $(d)/simple.sh + +install-tests-groups += git-hashing + +clean-files += \ + $(d)/config.nix diff --git a/tests/functional/git-hashing/simple.sh b/tests/functional/git-hashing/simple.sh new file mode 100644 index 000000000000..74b0220f8213 --- /dev/null +++ b/tests/functional/git-hashing/simple.sh @@ -0,0 +1,58 @@ +source common.sh + +repo="$TEST_ROOT/scratch" +git init "$repo" + +git -C "$repo" config user.email "you@example.com" +git -C "$repo" config user.name "Your Name" + +try () { + hash=$(nix hash path --mode git --format base16 --algo sha1 $TEST_ROOT/hash-path) + [[ "$hash" == "$1" ]] + + git -C "$repo" rm -rf hash-path || true + cp -r "$TEST_ROOT/hash-path" "$TEST_ROOT/scratch/hash-path" + git -C "$repo" add hash-path + git -C "$repo" commit -m "x" + git -C "$repo" status + hash2=$(git -C "$TEST_ROOT/scratch" rev-parse HEAD:hash-path) + [[ "$hash2" = "$1" ]] +} + +# blob +rm -rf $TEST_ROOT/hash-path +echo "Hello World" > $TEST_ROOT/hash-path +try "557db03de997c86a4a028e1ebd3a1ceb225be238" + +# tree with children +rm -rf $TEST_ROOT/hash-path +mkdir $TEST_ROOT/hash-path +echo "Hello World" > $TEST_ROOT/hash-path/hello +echo "Run Hello World" > $TEST_ROOT/hash-path/executable +chmod +x $TEST_ROOT/hash-path/executable +try "e5c0a11a556801a5c9dcf330ca9d7e2c572697f4" + +rm -rf $TEST_ROOT/dummy1 +echo Hello World! > $TEST_ROOT/dummy1 +path1=$(nix store add --mode git --hash-algo sha1 $TEST_ROOT/dummy1) +hash1=$(nix-store -q --hash $path1) +test "$hash1" = "sha256:1brffhvj2c0z6x8qismd43m0iy8dsgfmy10bgg9w11szway2wp9v" + +rm -rf $TEST_ROOT/dummy2 +mkdir -p $TEST_ROOT/dummy2 +echo Hello World! > $TEST_ROOT/dummy2/hello +path2=$(nix store add --mode git --hash-algo sha1 $TEST_ROOT/dummy2) +hash2=$(nix-store -q --hash $path2) +test "$hash2" = "sha256:1vhv7zxam7x277q0y0jcypm7hwhccbzss81vkdgf0ww5sm2am4y0" + +rm -rf $TEST_ROOT/dummy3 +mkdir -p $TEST_ROOT/dummy3 +mkdir -p $TEST_ROOT/dummy3/dir +touch $TEST_ROOT/dummy3/dir/file +echo Hello World! > $TEST_ROOT/dummy3/dir/file +touch $TEST_ROOT/dummy3/dir/executable +chmod +x $TEST_ROOT/dummy3/dir/executable +echo Run Hello World! > $TEST_ROOT/dummy3/dir/executable +path3=$(nix store add --mode git --hash-algo sha1 $TEST_ROOT/dummy3) +hash3=$(nix-store -q --hash $path3) +test "$hash3" = "sha256:08y3nm3mvn9qvskqnf13lfgax5lh73krxz4fcjd5cp202ggpw9nv" diff --git a/tests/unit/libstore/content-address.cc b/tests/unit/libstore/content-address.cc index 98c1eace3684..cc1c7fcc69d4 100644 --- a/tests/unit/libstore/content-address.cc +++ b/tests/unit/libstore/content-address.cc @@ -13,6 +13,7 @@ TEST(ContentAddressMethod, testRoundTripPrintParse_1) { ContentAddressMethod { TextIngestionMethod {} }, ContentAddressMethod { FileIngestionMethod::Flat }, ContentAddressMethod { FileIngestionMethod::Recursive }, + ContentAddressMethod { FileIngestionMethod::Git }, }) { EXPECT_EQ(ContentAddressMethod::parse(cam.render()), cam); } @@ -23,6 +24,7 @@ TEST(ContentAddressMethod, testRoundTripPrintParse_2) { "text", "flat", "nar", + "git", }) { EXPECT_EQ(ContentAddressMethod::parse(camS).render(), camS); } diff --git a/tests/unit/libutil/file-content-address.cc b/tests/unit/libutil/file-content-address.cc index 2e819ce408dc..294e39806837 100644 --- a/tests/unit/libutil/file-content-address.cc +++ b/tests/unit/libutil/file-content-address.cc @@ -4,6 +4,32 @@ namespace nix { +/* ---------------------------------------------------------------------------- + * parseFileSerialisationMethod, renderFileSerialisationMethod + * --------------------------------------------------------------------------*/ + +TEST(FileSerialisationMethod, testRoundTripPrintParse_1) { + for (const FileSerialisationMethod fim : { + FileSerialisationMethod::Flat, + FileSerialisationMethod::Recursive, + }) { + EXPECT_EQ(parseFileSerialisationMethod(renderFileSerialisationMethod(fim)), fim); + } +} + +TEST(FileSerialisationMethod, testRoundTripPrintParse_2) { + for (const std::string_view fimS : { + "flat", + "nar", + }) { + EXPECT_EQ(renderFileSerialisationMethod(parseFileSerialisationMethod(fimS)), fimS); + } +} + +TEST(FileSerialisationMethod, testParseFileSerialisationMethodOptException) { + EXPECT_THROW(parseFileSerialisationMethod("narwhal"), UsageError); +} + /* ---------------------------------------------------------------------------- * parseFileIngestionMethod, renderFileIngestionMethod * --------------------------------------------------------------------------*/ @@ -12,6 +38,7 @@ TEST(FileIngestionMethod, testRoundTripPrintParse_1) { for (const FileIngestionMethod fim : { FileIngestionMethod::Flat, FileIngestionMethod::Recursive, + FileIngestionMethod::Git, }) { EXPECT_EQ(parseFileIngestionMethod(renderFileIngestionMethod(fim)), fim); } @@ -21,6 +48,7 @@ TEST(FileIngestionMethod, testRoundTripPrintParse_2) { for (const std::string_view fimS : { "flat", "nar", + "git", }) { EXPECT_EQ(renderFileIngestionMethod(parseFileIngestionMethod(fimS)), fimS); }