Skip to content

Commit

Permalink
Git object hashing
Browse files Browse the repository at this point in the history
Part of RFC 133

Extracted from our old IPFS branches.

Co-Authored-By: Matthew Bauer <mjbauer95@gmail.com>
Co-Authored-By: Carlo Nucera <carlo.nucera@protonmail.com>
  • Loading branch information
3 people committed Sep 21, 2023
1 parent c2bdcb9 commit ef57479
Show file tree
Hide file tree
Showing 26 changed files with 586 additions and 40 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ makefiles += \
tests/local.mk \
tests/ca/local.mk \
tests/dyn-drv/local.mk \
tests/git-hashing/local.mk \
tests/test-libstoreconsumer/local.mk \
tests/plugins/local.mk
else
Expand Down
5 changes: 4 additions & 1 deletion src/libexpr/primops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1111,7 +1111,10 @@ drvName, Bindings * attrs, Value & v)
auto handleHashMode = [&](const std::string_view s) {
if (s == "recursive") ingestionMethod = FileIngestionMethod::Recursive;
else if (s == "flat") ingestionMethod = FileIngestionMethod::Flat;
else if (s == "text") {
else if (s == "git") {
experimentalFeatureSettings.require(Xp::GitHashing);
ingestionMethod = FileIngestionMethod::Flat;
} else if (s == "text") {
experimentalFeatureSettings.require(Xp::DynamicDerivations);
ingestionMethod = TextIngestionMethod {};
} else
Expand Down
9 changes: 7 additions & 2 deletions src/libstore/binary-cache-store.cc
Original file line number Diff line number Diff line change
Expand Up @@ -411,10 +411,15 @@ StorePath BinaryCacheStore::addToStore(
implementation of this method in terms of addToStoreFromDump. */

HashSink sink { hashAlgo };
if (method == FileIngestionMethod::Recursive) {
switch (method) {
case FileIngestionMethod::Recursive:
dumpPath(srcPath, sink, filter);
} else {
break;
case FileIngestionMethod::Flat:
readFile(srcPath, sink);
break;
case FileIngestionMethod::Git:
throw Error("cannot add to binary cache store using the git file ingestion method");
}
auto h = sink.finish().first;

Expand Down
20 changes: 16 additions & 4 deletions src/libstore/build/local-derivation-goal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "finally.hh"
#include "util.hh"
#include "archive.hh"
#include "git.hh"
#include "compression.hh"
#include "daemon.hh"
#include "topo-sort.hh"
Expand Down Expand Up @@ -2480,23 +2481,34 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
rewriteOutput(outputRewrites);
/* FIXME optimize and deduplicate with addToStore */
std::string oldHashPart { scratchPath->hashPart() };
HashModuloSink caSink { outputHash.hashType, oldHashPart };
Hash got { outputHash.hashType }; // Dummy value
std::visit(overloaded {
[&](const TextIngestionMethod &) {
HashModuloSink caSink { outputHash.hashType, oldHashPart };
readFile(actualPath, caSink);
got = caSink.finish().first;
},
[&](const FileIngestionMethod & m2) {
switch (m2) {
case FileIngestionMethod::Recursive:
case FileIngestionMethod::Recursive: {
HashModuloSink caSink { outputHash.hashType, oldHashPart };
dumpPath(actualPath, caSink);
got = caSink.finish().first;
break;
case FileIngestionMethod::Flat:
}
case FileIngestionMethod::Flat: {
HashModuloSink caSink { outputHash.hashType, oldHashPart };
readFile(actualPath, caSink);
got = caSink.finish().first;
break;
}
case FileIngestionMethod::Git: {
got = dumpGitHash(outputHash.hashType, (Path) tmpDir + "/tmp");
break;
}
}
},
}, outputHash.method.raw);
auto got = caSink.finish().first;

auto optCA = ContentAddressWithReferences::fromPartsOpt(
outputHash.method,
Expand Down
11 changes: 11 additions & 0 deletions src/libstore/content-address.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ std::string makeFileIngestionPrefix(FileIngestionMethod m)
return "";
case FileIngestionMethod::Recursive:
return "r:";
case FileIngestionMethod::Git:
experimentalFeatureSettings.require(Xp::GitHashing);
return "git:";
default:
throw Error("impossible, caught both cases");
}
Expand All @@ -32,6 +35,10 @@ ContentAddressMethod ContentAddressMethod::parsePrefix(std::string_view & m)
ContentAddressMethod method = FileIngestionMethod::Flat;
if (splitPrefix(m, "r:"))
method = FileIngestionMethod::Recursive;
if (splitPrefix(m, "git:")) {
experimentalFeatureSettings.require(Xp::GitHashing);
method = FileIngestionMethod::Git;
}
else if (splitPrefix(m, "text:"))
method = TextIngestionMethod {};
return method;
Expand Down Expand Up @@ -99,6 +106,10 @@ static std::pair<ContentAddressMethod, HashType> parseContentAddressMethodPrefix
auto method = FileIngestionMethod::Flat;
if (splitPrefix(rest, "r:"))
method = FileIngestionMethod::Recursive;
if (splitPrefix(rest, "git:")) {
experimentalFeatureSettings.require(Xp::GitHashing);
method = FileIngestionMethod::Git;
}
HashType hashType = parseHashType_();
return {
std::move(method),
Expand Down
18 changes: 16 additions & 2 deletions src/libstore/content-address.hh
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,26 @@ enum struct FileIngestionMethod : uint8_t {
/**
* Flat-file hashing. Directly ingest the contents of a single file
*/
Flat = false,
Flat,

/**
* Recursive (or NAR) hashing. Serializes the file-system object in Nix
* Archive format and ingest that
*/
Recursive = true
Recursive,

/**
* Git hashing. In particular files are hashed as git "blobs", and
* directories are hashed as git "trees".
*
* @note Git's data model is slightly different, in that a plain
* fail doesn't have an executable bit, directory entries do
* instead. We decide treat a bare file as non-executable by fiat,
* as we do with `FileIngestionMethod::Flat` which also lacks this
* information. Thus, Git can encode some but all of Nix's "File
* System Objects", and this sort of hashing is likewise partial.
*/
Git,
};

/**
Expand Down
7 changes: 6 additions & 1 deletion src/libstore/daemon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "archive.hh"
#include "derivations.hh"
#include "args.hh"
#include "git.hh"

namespace nix::daemon {

Expand Down Expand Up @@ -462,13 +463,17 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
TeeSource savedNARSource(from, saved);
ParseSink sink; /* null sink; just parse the NAR */
parseDump(sink, savedNARSource);
} else {
} else if (method == FileIngestionMethod::Flat) {
/* Incrementally parse the NAR file, stripping the
metadata, and streaming the sole file we expect into
`saved`. */
RetrieveRegularNARSink savedRegular { saved };
parseDump(savedRegular, from);
if (!savedRegular.regular) throw Error("regular file expected");
} else {
/* Should have validated above that no other file ingestion
method was used. */
assert(false);
}
});
logger->startWork();
Expand Down
57 changes: 47 additions & 10 deletions src/libstore/local-store.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "local-store.hh"
#include "globals.hh"
#include "git.hh"
#include "archive.hh"
#include "pathlocks.hh"
#include "worker-protocol.hh"
Expand Down Expand Up @@ -1322,10 +1323,22 @@ StorePath LocalStore::addToStoreFromDump(Source & source0, std::string_view name
delTempDir = std::make_unique<AutoDelete>(tempDir);
tempPath = tempDir + "/x";

if (method == FileIngestionMethod::Recursive)
restorePath(tempPath, bothSource);
else
switch (method) {
case FileIngestionMethod::Flat:
writeFile(tempPath, bothSource);
break;
case FileIngestionMethod::Recursive:
restorePath(tempPath, bothSource);
break;
case FileIngestionMethod::Git:
restoreGit(tempPath, bothSource, [&](Hash childHash) {
return this->Store::toRealPath(this->makeFixedOutputPath("git", FixedOutputInfo {
.method = FileIngestionMethod::Git,
.hash = childHash,
}));
});
break;
}

dump.clear();
}
Expand Down Expand Up @@ -1364,10 +1377,22 @@ StorePath LocalStore::addToStoreFromDump(Source & source0, std::string_view name
if (inMemory) {
StringSource dumpSource { dump };
/* Restore from the NAR in memory. */
if (method == FileIngestionMethod::Recursive)
restorePath(realPath, dumpSource);
else
switch (method) {
case FileIngestionMethod::Flat:
writeFile(realPath, dumpSource);
break;
case FileIngestionMethod::Recursive:
restorePath(realPath, dumpSource);
break;
case FileIngestionMethod::Git:
restoreGit(realPath, dumpSource, [&](Hash childHash) {
return this->Store::toRealPath(this->makeFixedOutputPath("git", FixedOutputInfo {
.method = FileIngestionMethod::Git,
.hash = childHash,
}));
});
break;
}
} else {
/* Move the temporary path we restored above. */
moveFile(tempPath, realPath);
Expand Down Expand Up @@ -1866,25 +1891,37 @@ ContentAddress LocalStore::hashCAPath(
const std::string_view pathHash
)
{
HashModuloSink caSink ( hashType, std::string(pathHash) );
Hash hash { htSHA256 }; // throwaway def to appease C++
std::visit(overloaded {
[&](const TextIngestionMethod &) {
HashModuloSink caSink ( hashType, std::string(pathHash) );
readFile(path, caSink);
hash = caSink.finish().first;
},
[&](const FileIngestionMethod & m2) {
switch (m2) {
case FileIngestionMethod::Recursive:
case FileIngestionMethod::Recursive: {
HashModuloSink caSink ( hashType, std::string(pathHash) );
dumpPath(path, caSink);
hash = caSink.finish().first;
break;
case FileIngestionMethod::Flat:
}
case FileIngestionMethod::Flat: {
HashModuloSink caSink ( hashType, std::string(pathHash) );
readFile(path, caSink);
hash = caSink.finish().first;
break;
}
case FileIngestionMethod::Git: {
hash = dumpGitHash(hashType, path);
break;
}
}
},
}, method.raw);
return ContentAddress {
.method = method,
.hash = caSink.finish().first,
.hash = std::move(hash),
};
}

Expand Down
3 changes: 2 additions & 1 deletion src/libstore/nar-accessor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,10 @@ struct NarAccessor : public FSAccessor
createMember(path, {FSAccessor::Type::tDirectory, false, 0, 0});
}

void createRegularFile(const Path & path) override
void createRegularFile(const Path & path, bool executable = false) override
{
createMember(path, {FSAccessor::Type::tRegular, false, 0, 0});
if (executable) isExecutable();
}

void closeRegularFile() override
Expand Down
1 change: 1 addition & 0 deletions src/libstore/remote-store.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "derivations.hh"
#include "pool.hh"
#include "finally.hh"
#include "git.hh"
#include "logging.hh"
#include "callback.hh"
#include "filetransfer.hh"
Expand Down
52 changes: 45 additions & 7 deletions src/libstore/store-api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "references.hh"
#include "archive.hh"
#include "callback.hh"
#include "git.hh"
#include "remote-store.hh"

#include <nlohmann/json.hpp>
Expand Down Expand Up @@ -114,8 +115,8 @@ StorePath Store::followLinksToStorePath(std::string_view path) const
for paths copied by addToStore() or produced by fixed-output
derivations:
the string "fixed:out:<rec><algo>:<hash>:", where
<rec> = "r:" for recursive (path) hashes, or "" for flat
(file) hashes
<rec> = "r:" for recursive (path) hashes, "git:" for git
paths, or "" for flat (file) hashes
<algo> = "md5", "sha1" or "sha256"
<hash> = base-16 representation of the path or flat hash of
the contents of the path (or expected contents of the
Expand Down Expand Up @@ -184,6 +185,9 @@ static std::string makeType(

StorePath Store::makeFixedOutputPath(std::string_view name, const FixedOutputInfo & info) const
{
if (info.method == FileIngestionMethod::Git && info.hash.type != htSHA1)
throw Error("Git file ingestion must use sha1 hash");

if (info.hash.type == htSHA256 && info.method == FileIngestionMethod::Recursive) {
return makeStorePath(makeType(*this, "source", info.references), info.hash, name);
} else {
Expand Down Expand Up @@ -228,9 +232,22 @@ StorePath Store::makeFixedOutputPathFromCA(std::string_view name, const ContentA
std::pair<StorePath, Hash> Store::computeStorePathForPath(std::string_view name,
const Path & srcPath, FileIngestionMethod method, HashType hashAlgo, PathFilter & filter) const
{
Hash h = method == FileIngestionMethod::Recursive
? hashPath(hashAlgo, srcPath, filter).first
: hashFile(hashAlgo, srcPath);
Hash h { htSHA256 }; // throwaway def to appease C++
switch (method) {
case FileIngestionMethod::Recursive: {
h = hashPath(hashAlgo, srcPath, filter).first;
break;
}
case FileIngestionMethod::Git: {
h = hashGit(hashAlgo, srcPath, filter).first;
break;
}
case FileIngestionMethod::Flat: {
h = hashFile(hashAlgo, srcPath);
break;
}
}

FixedOutputInfo caInfo {
.method = method,
.hash = h,
Expand Down Expand Up @@ -263,10 +280,29 @@ StorePath Store::addToStore(
{
Path srcPath(absPath(_srcPath));
auto source = sinkToSource([&](Sink & sink) {
if (method == FileIngestionMethod::Recursive)
switch (method) {
case FileIngestionMethod::Recursive: {
dumpPath(srcPath, sink, filter);
else
break;
}
case FileIngestionMethod::Git: {
// recursively add to store if path is a directory

struct stat st;
if (lstat(srcPath.c_str(), &st))
throw SysError("getting attributes of path '%1%'", srcPath);
if (S_ISDIR(st.st_mode))
for (auto & i : readDirectory(srcPath))
addToStore("git", srcPath + "/" + i.name, method, hashAlgo, filter, repair);

dumpGit(hashAlgo, srcPath, sink, filter);
break;
}
case FileIngestionMethod::Flat: {
readFile(srcPath, sink);
break;
}
}
});
return addToStoreFromDump(*source, name, method, hashAlgo, repair, references);
}
Expand Down Expand Up @@ -430,6 +466,8 @@ ValidPathInfo Store::addToStoreSlow(std::string_view name, const Path & srcPath,

auto hash = method == FileIngestionMethod::Recursive && hashAlgo == htSHA256
? narHash
: method == FileIngestionMethod::Git
? hashGit(hashAlgo, srcPath).first
: caHashSink.finish().first;

if (expectedCAHash && expectedCAHash != hash)
Expand Down
Loading

0 comments on commit ef57479

Please sign in to comment.