Skip to content

Commit

Permalink
Avoid creating temporary store object for git over the wire
Browse files Browse the repository at this point in the history
Instead, serialize as NAR and send that over, then rehash sever side.
This is alorithmically simpler, but comes at the cost of a newer
parameter to `Store::addToStoreFromDump`.
  • Loading branch information
Ericson2314 committed Jan 21, 2024
1 parent 70d3ed0 commit 36e5ff3
Show file tree
Hide file tree
Showing 16 changed files with 137 additions and 138 deletions.
2 changes: 1 addition & 1 deletion src/libexpr/primops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2087,7 +2087,7 @@ static void prim_toFile(EvalState & state, const PosIdx pos, Value * * args, Val
})
: ({
StringSource s { contents };
state.store->addToStoreFromDump(s, name, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, state.repair);
state.store->addToStoreFromDump(s, name, FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, state.repair);
});

/* Note: we don't need to add `context' to the context of the
Expand Down
27 changes: 17 additions & 10 deletions src/libstore/binary-cache-store.cc
Original file line number Diff line number Diff line change
Expand Up @@ -305,25 +305,35 @@ void BinaryCacheStore::addToStore(const ValidPathInfo & info, Source & narSource
StorePath BinaryCacheStore::addToStoreFromDump(
Source & dump,
std::string_view name,
ContentAddressMethod method,
FileSerialisationMethod dumpMethod,
ContentAddressMethod hashMethod,
HashAlgorithm hashAlgo,
const StorePathSet & references,
RepairFlag repair)
{
std::optional<Hash> caHash;
std::string nar;

// Calculating Git hash from NAR stream not yet implemented. May not
// be possible to implement in single-pass in NAR is in an
// inconvenient order. Could fetch after uploading, however.
if (hashMethod.getFileIngestionMethod() == FileIngestionMethod::Git)
unsupported("addToStoreFromDump");

if (auto * dump2p = dynamic_cast<StringSource *>(&dump)) {
auto & dump2 = *dump2p;
// Hack, this gives us a "replayable" source so we can compute
// multiple hashes more easily.
caHash = hashString(HashAlgorithm::SHA256, dump2.s);
switch (method.getFileIngestionMethod()) {
case FileIngestionMethod::Recursive:
//
// Only calculate if the dump is in the right format, however.
if (static_cast<FileIngestionMethod>(dumpMethod) == hashMethod.getFileIngestionMethod())
caHash = hashString(HashAlgorithm::SHA256, dump2.s);
switch (dumpMethod) {
case FileSerialisationMethod::Recursive:
// The dump is already NAR in this case, just use it.
nar = dump2.s;
break;
case FileIngestionMethod::Flat:
case FileSerialisationMethod::Flat:
{
// The dump is Flat, so we need to convert it to NAR with a
// single file.
Expand All @@ -332,14 +342,11 @@ StorePath BinaryCacheStore::addToStoreFromDump(
nar = std::move(s.s);
break;
}
case FileIngestionMethod::Git:
unsupported("addToStoreFromDump");
break;
}
} else {
// Otherwise, we have to do th same hashing as NAR so our single
// hash will suffice for both purposes.
if (method != FileIngestionMethod::Recursive || hashAlgo != HashAlgorithm::SHA256)
if (dumpMethod != FileSerialisationMethod::Recursive || hashAlgo != HashAlgorithm::SHA256)
unsupported("addToStoreFromDump");
}
StringSource narDump { nar };
Expand All @@ -354,7 +361,7 @@ StorePath BinaryCacheStore::addToStoreFromDump(
*this,
name,
ContentAddressWithReferences::fromParts(
method,
hashMethod,
caHash ? *caHash : nar.first,
{
.others = references,
Expand Down
3 changes: 2 additions & 1 deletion src/libstore/binary-cache-store.hh
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ public:
StorePath addToStoreFromDump(
Source & dump,
std::string_view name,
ContentAddressMethod method,
FileSerialisationMethod dumpMethod,
ContentAddressMethod hashMethod,
HashAlgorithm hashAlgo,
const StorePathSet & references,
RepairFlag repair) override;
Expand Down
5 changes: 3 additions & 2 deletions src/libstore/build/local-derivation-goal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1312,12 +1312,13 @@ struct RestrictedStore : public virtual RestrictedStoreConfig, public virtual In
StorePath addToStoreFromDump(
Source & dump,
std::string_view name,
ContentAddressMethod method,
FileSerialisationMethod dumpMethod,
ContentAddressMethod hashMethod,
HashAlgorithm hashAlgo,
const StorePathSet & references,
RepairFlag repair) override
{
auto path = next->addToStoreFromDump(dump, name, method, hashAlgo, references, repair);
auto path = next->addToStoreFromDump(dump, name, dumpMethod, hashMethod, hashAlgo, references, repair);
goal.addDependency(path);
return path;
}
Expand Down
59 changes: 30 additions & 29 deletions src/libstore/daemon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -401,11 +401,23 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
logger->startWork();
auto pathInfo = [&]() {
// NB: FramedSource must be out of scope before logger->stopWork();
auto [contentAddressMethod, hashAlgo_] = ContentAddressMethod::parseWithAlgo(camStr);
auto hashAlgo = hashAlgo_; // work around clang bug
auto [contentAddressMethod, hashAlgo] = ContentAddressMethod::parseWithAlgo(camStr);
FramedSource source(from);
FileSerialisationMethod dumpMethod;
switch (contentAddressMethod.getFileIngestionMethod()) {
case FileIngestionMethod::Flat:
dumpMethod = FileSerialisationMethod::Flat;
break;
case FileIngestionMethod::Recursive:
dumpMethod = FileSerialisationMethod::Recursive;
break;
case FileIngestionMethod::Git:
// Use NAR; Git is not a serialization method
dumpMethod = FileSerialisationMethod::Recursive;
break;
}
// TODO these two steps are essentially RemoteStore::addCAToStore. Move it up to Store.
auto path = store->addToStoreFromDump(source, name, contentAddressMethod, hashAlgo, refs, repair);
auto path = store->addToStoreFromDump(source, name, dumpMethod, contentAddressMethod, hashAlgo, refs, repair);
return store->queryPathInfo(path);
}();
logger->stopWork();
Expand All @@ -431,34 +443,23 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
hashAlgo = parseHashAlgo(hashAlgoRaw);
}

// Old protocol always sends NAR, regardless of hashing method
auto dumpSource = sinkToSource([&](Sink & saved) {
if (method == FileIngestionMethod::Recursive) {
/* We parse the NAR dump through into `saved` unmodified,
so why all this extra work? We still parse the NAR so
that we aren't sending arbitrary data to `saved`
unwittingly`, and we know when the NAR ends so we don't
consume the rest of `from` and can't parse another
command. (We don't trust `addToStoreFromDump` to not
eagerly consume the entire stream it's given, past the
length of the Nar. */
TeeSource savedNARSource(from, saved);
NullParseSink sink; /* just parse the NAR */
parseDump(sink, savedNARSource);
} else if (method == FileIngestionMethod::Flat) {
/* Incrementally parse the NAR file, stripping the
metadata, and streaming the sole file we expect into
`saved`. */
RegularFileSink savedRegular { saved };
parseDump(savedRegular, from);
if (!savedRegular.regular) throw Error("regular file expected");
} else {
/* Should have validated above that no other file ingestion
method was used. */
assert(false);
}
/* We parse the NAR dump through into `saved` unmodified,
so why all this extra work? We still parse the NAR so
that we aren't sending arbitrary data to `saved`
unwittingly`, and we know when the NAR ends so we don't
consume the rest of `from` and can't parse another
command. (We don't trust `addToStoreFromDump` to not
eagerly consume the entire stream it's given, past the
length of the Nar. */
TeeSource savedNARSource(from, saved);
NullParseSink sink; /* just parse the NAR */
parseDump(sink, savedNARSource);
});
logger->startWork();
auto path = store->addToStoreFromDump(*dumpSource, baseName, method, hashAlgo);
auto path = store->addToStoreFromDump(
*dumpSource, baseName, FileSerialisationMethod::Recursive, method, hashAlgo);
logger->stopWork();

to << store->printStorePath(path);
Expand Down Expand Up @@ -490,7 +491,7 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
logger->startWork();
auto path = ({
StringSource source { s };
store->addToStoreFromDump(source, suffix, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, NoRepair);
store->addToStoreFromDump(source, suffix, FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, NoRepair);
});
logger->stopWork();
to << store->printStorePath(path);
Expand Down
2 changes: 1 addition & 1 deletion src/libstore/derivations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ StorePath writeDerivation(Store & store,
})
: ({
StringSource s { contents };
store.addToStoreFromDump(s, suffix, TextIngestionMethod {}, HashAlgorithm::SHA256, references, repair);
store.addToStoreFromDump(s, suffix, FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, references, repair);
});
}

Expand Down
3 changes: 2 additions & 1 deletion src/libstore/dummy-store.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ struct DummyStore : public virtual DummyStoreConfig, public virtual Store
virtual StorePath addToStoreFromDump(
Source & dump,
std::string_view name,
ContentAddressMethod method = FileIngestionMethod::Recursive,
FileSerialisationMethod dumpMethod = FileSerialisationMethod::Recursive,
ContentAddressMethod hashMethod = FileIngestionMethod::Recursive,
HashAlgorithm hashAlgo = HashAlgorithm::SHA256,
const StorePathSet & references = StorePathSet(),
RepairFlag repair = NoRepair) override
Expand Down
3 changes: 2 additions & 1 deletion src/libstore/legacy-ssh-store.hh
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,8 @@ struct LegacySSHStore : public virtual LegacySSHStoreConfig, public virtual Stor
virtual StorePath addToStoreFromDump(
Source & dump,
std::string_view name,
ContentAddressMethod method = FileIngestionMethod::Recursive,
FileSerialisationMethod dumpMethod = FileSerialisationMethod::Recursive,
ContentAddressMethod hashMethod = FileIngestionMethod::Recursive,
HashAlgorithm hashAlgo = HashAlgorithm::SHA256,
const StorePathSet & references = StorePathSet(),
RepairFlag repair = NoRepair) override
Expand Down
77 changes: 27 additions & 50 deletions src/libstore/local-store.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1148,7 +1148,8 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source,
StorePath LocalStore::addToStoreFromDump(
Source & source0,
std::string_view name,
ContentAddressMethod method,
FileSerialisationMethod dumpMethod,
ContentAddressMethod hashMethod,
HashAlgorithm hashAlgo,
const StorePathSet & references,
RepairFlag repair)
Expand Down Expand Up @@ -1201,7 +1202,13 @@ StorePath LocalStore::addToStoreFromDump(
Path tempDir;
AutoCloseFD tempDirFd;

if (!inMemory) {
bool methodsMatch = (FileIngestionMethod) dumpMethod == hashMethod;

/* If the methods don't match, our streaming hash of the dump is the
wrong sort, and we need to rehash. */
bool inMemoryAndDontNeedRestore = inMemory && methodsMatch;

if (!inMemoryAndDontNeedRestore) {
/* Drain what we pulled so far, and then keep on pulling */
StringSource dumpSource { dump };
ChainSource bothSource { dumpSource, source };
Expand All @@ -1210,40 +1217,23 @@ StorePath LocalStore::addToStoreFromDump(
delTempDir = std::make_unique<AutoDelete>(tempDir);
tempPath = tempDir + "/x";

auto fim = method.getFileIngestionMethod();
switch (fim) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
restorePath(tempPath, bothSource, (FileSerialisationMethod) fim);
break;
case FileIngestionMethod::Git: {
RestoreSink sink;
sink.dstPath = tempPath;
auto accessor = getFSAccessor();
git::restore(sink, bothSource, [&](Hash childHash) {
return std::pair<SourceAccessor *, CanonPath> {
&*accessor,
CanonPath {
printStorePath(this->makeFixedOutputPath("git", FixedOutputInfo {
.method = FileIngestionMethod::Git,
.hash = childHash,
}))
},
};
});
break;
}
}
restorePath(tempPath, bothSource, dumpMethod);

dumpBuffer.reset();
dump = {};
}

auto [hash, size] = hashSink->finish();
auto [dumpHash, size] = hashSink->finish();

PosixSourceAccessor accessor;

auto desc = ContentAddressWithReferences::fromParts(
method,
hash,
hashMethod,
methodsMatch
? dumpHash
: hashPath(
accessor, CanonPath { tempPath },
hashMethod.getFileIngestionMethod(), hashAlgo),
{
.others = references,
// caller is not capable of creating a self-reference, because this is content-addressed without modulus
Expand All @@ -1269,32 +1259,19 @@ StorePath LocalStore::addToStoreFromDump(

autoGC();

if (inMemory) {
if (inMemoryAndDontNeedRestore) {
StringSource dumpSource { dump };
/* Restore from the buffer in memory. */
auto fim = method.getFileIngestionMethod();
auto fim = hashMethod.getFileIngestionMethod();
switch (fim) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
restorePath(realPath, dumpSource, (FileSerialisationMethod) fim);
break;
case FileIngestionMethod::Git: {
RestoreSink sink;
sink.dstPath = realPath;
auto accessor = getFSAccessor();
git::restore(sink, dumpSource, [&](Hash childHash) {
return std::pair<SourceAccessor *, CanonPath> {
&*accessor,
CanonPath {
printStorePath(this->makeFixedOutputPath("git", FixedOutputInfo {
.method = FileIngestionMethod::Git,
.hash = childHash,
}))
},
};
});
break;
}
case FileIngestionMethod::Git:
// doesn't correspond to serialization method, so
// this should be unreachable
assert(false);
}
} else {
/* Move the temporary path we restored above. */
Expand All @@ -1303,8 +1280,8 @@ StorePath LocalStore::addToStoreFromDump(

/* For computing the nar hash. In recursive SHA-256 mode, this
is the same as the store hash, so no need to do it again. */
auto narHash = std::pair { hash, size };
if (method != FileIngestionMethod::Recursive || hashAlgo != HashAlgorithm::SHA256) {
auto narHash = std::pair { dumpHash, size };
if (dumpMethod != FileSerialisationMethod::Recursive || hashAlgo != HashAlgorithm::SHA256) {
HashSink narSink { HashAlgorithm::SHA256 };
dumpPath(realPath, narSink);
narHash = narSink.finish();
Expand Down
3 changes: 2 additions & 1 deletion src/libstore/local-store.hh
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,8 @@ public:
StorePath addToStoreFromDump(
Source & dump,
std::string_view name,
ContentAddressMethod method,
FileSerialisationMethod dumpMethod,
ContentAddressMethod hashMethod,
HashAlgorithm hashAlgo,
const StorePathSet & references,
RepairFlag repair) override;
Expand Down
20 changes: 18 additions & 2 deletions src/libstore/remote-store.cc
Original file line number Diff line number Diff line change
Expand Up @@ -509,12 +509,28 @@ ref<const ValidPathInfo> RemoteStore::addCAToStore(
StorePath RemoteStore::addToStoreFromDump(
Source & dump,
std::string_view name,
ContentAddressMethod method,
FileSerialisationMethod dumpMethod,
ContentAddressMethod hashMethod,
HashAlgorithm hashAlgo,
const StorePathSet & references,
RepairFlag repair)
{
return addCAToStore(dump, name, method, hashAlgo, references, repair)->path;
FileSerialisationMethod fsm;
switch (hashMethod.getFileIngestionMethod()) {
case FileIngestionMethod::Flat:
fsm = FileSerialisationMethod::Flat;
break;
case FileIngestionMethod::Recursive:
fsm = FileSerialisationMethod::Recursive;
break;
case FileIngestionMethod::Git:
// Use NAR; Git is not a serialization method
fsm = FileSerialisationMethod::Recursive;
break;
}
if (fsm != dumpMethod)
unsupported("RemoteStore::addToStoreFromDump doesn't support this `dumpMethod` `hashMethod` combination");
return addCAToStore(dump, name, hashMethod, hashAlgo, references, repair)->path;
}


Expand Down
3 changes: 2 additions & 1 deletion src/libstore/remote-store.hh
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ public:
StorePath addToStoreFromDump(
Source & dump,
std::string_view name,
ContentAddressMethod method = FileIngestionMethod::Recursive,
FileSerialisationMethod dumpMethod = FileSerialisationMethod::Recursive,
ContentAddressMethod hashMethod = FileIngestionMethod::Recursive,
HashAlgorithm hashAlgo = HashAlgorithm::SHA256,
const StorePathSet & references = StorePathSet(),
RepairFlag repair = NoRepair) override;
Expand Down
Loading

0 comments on commit 36e5ff3

Please sign in to comment.