From 9ec10046e04a509cc982102f587cf06840f02327 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sat, 11 Jul 2020 16:06:24 +0000 Subject: [PATCH 01/11] Narrow scope of temporary value --- src/libstore/daemon.cc | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/libstore/daemon.cc b/src/libstore/daemon.cc index db7139374c1..6e0b290ed4d 100644 --- a/src/libstore/daemon.cc +++ b/src/libstore/daemon.cc @@ -375,21 +375,24 @@ static void performOp(TunnelLogger * logger, ref store, } case wopAddToStore: { - std::string s, baseName; + HashType hashAlgo; + std::string baseName; FileIngestionMethod method; { - bool fixed; uint8_t recursive; - from >> baseName >> fixed /* obsolete */ >> recursive >> s; + bool fixed; + uint8_t recursive; + std::string hashAlgoRaw; + from >> baseName >> fixed /* obsolete */ >> recursive >> hashAlgoRaw; if (recursive > (uint8_t) FileIngestionMethod::Recursive) throw Error("unsupported FileIngestionMethod with value of %i; you may need to upgrade nix-daemon", recursive); method = FileIngestionMethod { recursive }; /* Compatibility hack. */ if (!fixed) { - s = "sha256"; + hashAlgoRaw = "sha256"; method = FileIngestionMethod::Recursive; } + hashAlgo = parseHashType(hashAlgoRaw); } - HashType hashAlgo = parseHashType(s); StringSink savedNAR; TeeSource savedNARSource(from, savedNAR); From c86fc3a9657096b74fe967f2f0bbd120e46908f6 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sat, 11 Jul 2020 15:55:04 +0000 Subject: [PATCH 02/11] Crudely make `addToStoreFromDump` take `Source` not string I just as little beyond the type as possible, so the implementation changes this enables can be reviewed separately. --- src/libstore/build.cc | 2 +- src/libstore/daemon.cc | 5 ++++- src/libstore/local-store.cc | 5 ++++- src/libstore/local-store.hh | 2 +- src/libstore/store-api.hh | 2 +- 5 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/libstore/build.cc b/src/libstore/build.cc index ac2e675740d..62294a08cdd 100644 --- a/src/libstore/build.cc +++ b/src/libstore/build.cc @@ -2774,7 +2774,7 @@ struct RestrictedStore : public LocalFSStore goal.addDependency(info.path); } - StorePath addToStoreFromDump(const string & dump, const string & name, + StorePath addToStoreFromDump(Source & dump, const string & name, FileIngestionMethod method = FileIngestionMethod::Recursive, HashType hashAlgo = htSHA256, RepairFlag repair = NoRepair) override { auto path = next->addToStoreFromDump(dump, name, method, hashAlgo, repair); diff --git a/src/libstore/daemon.cc b/src/libstore/daemon.cc index 6e0b290ed4d..69d7ef511e8 100644 --- a/src/libstore/daemon.cc +++ b/src/libstore/daemon.cc @@ -410,8 +410,11 @@ static void performOp(TunnelLogger * logger, ref store, logger->startWork(); if (!savedRegular.regular) throw Error("regular file expected"); + StringSource dumpSource { + method == FileIngestionMethod::Recursive ? *savedNAR.s : savedRegular.s + }; auto path = store->addToStoreFromDump( - method == FileIngestionMethod::Recursive ? *savedNAR.s : savedRegular.s, + dumpSource, baseName, method, hashAlgo); diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 26b226fe8d2..603f3635248 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1033,9 +1033,12 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source, } -StorePath LocalStore::addToStoreFromDump(const string & dump, const string & name, +StorePath LocalStore::addToStoreFromDump(Source & dumpSource, const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair) { + // FIXME: See if we can use the original source to reduce memory usage. + auto dump = dumpSource.drain(); + Hash h = hashString(hashAlgo, dump); auto dstPath = makeFixedOutputPath(method, h, name); diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh index c0e5d028682..355c2814f2c 100644 --- a/src/libstore/local-store.hh +++ b/src/libstore/local-store.hh @@ -153,7 +153,7 @@ public: in `dump', which is either a NAR serialisation (if recursive == true) or simply the contents of a regular file (if recursive == false). */ - StorePath addToStoreFromDump(const string & dump, const string & name, + StorePath addToStoreFromDump(Source & dump, const string & name, FileIngestionMethod method = FileIngestionMethod::Recursive, HashType hashAlgo = htSHA256, RepairFlag repair = NoRepair) override; StorePath addTextToStore(const string & name, const string & s, diff --git a/src/libstore/store-api.hh b/src/libstore/store-api.hh index a4be0411ee7..d1cb2035fe1 100644 --- a/src/libstore/store-api.hh +++ b/src/libstore/store-api.hh @@ -460,7 +460,7 @@ public: std::optional expectedCAHash = {}); // FIXME: remove? - virtual StorePath addToStoreFromDump(const string & dump, const string & name, + virtual StorePath addToStoreFromDump(Source & dump, const string & name, FileIngestionMethod method = FileIngestionMethod::Recursive, HashType hashAlgo = htSHA256, RepairFlag repair = NoRepair) { throw Error("addToStoreFromDump() is not supported by this store"); From 9de96ef7d409fedea092045c4dbae7177f88962a Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sat, 11 Jul 2020 19:03:39 +0000 Subject: [PATCH 03/11] Dedup `LocalStore::addToStore*` The downsides is that the coroutine has byte-by-byte loop transfer. Will fix that next. --- src/libstore/local-store.cc | 83 ++++++++++--------------------------- src/libstore/local-store.hh | 4 ++ 2 files changed, 25 insertions(+), 62 deletions(-) diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 603f3635248..925ac25bfc5 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1033,65 +1033,16 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source, } -StorePath LocalStore::addToStoreFromDump(Source & dumpSource, const string & name, +StorePath LocalStore::addToStoreFromDump(Source & dump, const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair) { - // FIXME: See if we can use the original source to reduce memory usage. - auto dump = dumpSource.drain(); - - Hash h = hashString(hashAlgo, dump); - - auto dstPath = makeFixedOutputPath(method, h, name); - - addTempRoot(dstPath); - - if (repair || !isValidPath(dstPath)) { - - /* The first check above is an optimisation to prevent - unnecessary lock acquisition. */ - - auto realPath = Store::toRealPath(dstPath); - - PathLocks outputLock({realPath}); - - if (repair || !isValidPath(dstPath)) { - - deletePath(realPath); - - autoGC(); - - if (method == FileIngestionMethod::Recursive) { - StringSource source(dump); - restorePath(realPath, source); - } else - writeFile(realPath, dump); - - canonicalisePathMetaData(realPath, -1); - - /* Register the SHA-256 hash of the NAR serialisation of - the path in the database. We may just have computed it - above (if called with recursive == true and hashAlgo == - sha256); otherwise, compute it here. */ - HashResult hash; - if (method == FileIngestionMethod::Recursive) { - hash.first = hashAlgo == htSHA256 ? h : hashString(htSHA256, dump); - hash.second = dump.size(); - } else - hash = hashPath(htSHA256, realPath); - - optimisePath(realPath); // FIXME: combine with hashPath() - - ValidPathInfo info(dstPath); - info.narHash = hash.first; - info.narSize = hash.second; - info.ca = FixedOutputHash { .method = method, .hash = h }; - registerValidPath(info); + return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink) { + while (1) { + uint8_t buf[1]; + auto n = dump.read(buf, 1); + sink(buf, n); } - - outputLock.setDeletion(true); - } - - return dstPath; + }); } @@ -1100,6 +1051,19 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, { Path srcPath(absPath(_srcPath)); + return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink) { + if (method == FileIngestionMethod::Recursive) + dumpPath(srcPath, sink, filter); + else + readFile(srcPath, sink); + }); +} + + +StorePath LocalStore::addToStoreCommon( + const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair, + std::function demux) +{ /* For computing the NAR hash. */ auto sha256Sink = std::make_unique(htSHA256); @@ -1120,7 +1084,6 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, std::string nar; auto source = sinkToSource([&](Sink & sink) { - LambdaSink sink2([&](const unsigned char * buf, size_t len) { (*sha256Sink)(buf, len); if (hashSink) (*hashSink)(buf, len); @@ -1138,11 +1101,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, if (!inMemory) sink(buf, len); }); - - if (method == FileIngestionMethod::Recursive) - dumpPath(srcPath, sink2, filter); - else - readFile(srcPath, sink2); + demux(sink2); }); std::unique_ptr delTempDir; diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh index 355c2814f2c..215731f87d4 100644 --- a/src/libstore/local-store.hh +++ b/src/libstore/local-store.hh @@ -290,6 +290,10 @@ private: specified by the ‘secret-key-files’ option. */ void signPathInfo(ValidPathInfo & info); + StorePath addToStoreCommon( + const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair, + std::function demux); + Path getRealStoreDir() override { return realStoreDir; } void createUser(const std::string & userName, uid_t userId) override; From 592851fb67cd15807109d6f65fb81f6af89af966 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sat, 11 Jul 2020 23:40:49 +0000 Subject: [PATCH 04/11] LocalStore::addToStoreFromDump copy in chunks Rather than copying byte-by-byte, we let the coroutine know how much data we would like it to send back to us. --- src/libstore/local-store.cc | 16 +++++++++------- src/libstore/local-store.hh | 2 +- src/libutil/serialise.cc | 33 ++++++++++++++++++++------------- src/libutil/serialise.hh | 11 ++++++++++- 4 files changed, 40 insertions(+), 22 deletions(-) diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 925ac25bfc5..dac7a50c4e0 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1036,11 +1036,13 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source, StorePath LocalStore::addToStoreFromDump(Source & dump, const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair) { - return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink) { + return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink, size_t & wanted) { while (1) { - uint8_t buf[1]; - auto n = dump.read(buf, 1); + constexpr size_t bufSize = 1024; + uint8_t buf[bufSize]; + auto n = dump.read(buf, std::min(wanted, bufSize)); sink(buf, n); + // when control is yielded back to us wanted will be updated. } }); } @@ -1051,7 +1053,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, { Path srcPath(absPath(_srcPath)); - return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink) { + return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink, size_t & _) { if (method == FileIngestionMethod::Recursive) dumpPath(srcPath, sink, filter); else @@ -1062,7 +1064,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, StorePath LocalStore::addToStoreCommon( const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair, - std::function demux) + std::function demux) { /* For computing the NAR hash. */ auto sha256Sink = std::make_unique(htSHA256); @@ -1083,7 +1085,7 @@ StorePath LocalStore::addToStoreCommon( bool inMemory = true; std::string nar; - auto source = sinkToSource([&](Sink & sink) { + auto source = sinkToSource([&](Sink & sink, size_t & wanted) { LambdaSink sink2([&](const unsigned char * buf, size_t len) { (*sha256Sink)(buf, len); if (hashSink) (*hashSink)(buf, len); @@ -1101,7 +1103,7 @@ StorePath LocalStore::addToStoreCommon( if (!inMemory) sink(buf, len); }); - demux(sink2); + demux(sink2, wanted); }); std::unique_ptr delTempDir; diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh index 215731f87d4..ae23004c474 100644 --- a/src/libstore/local-store.hh +++ b/src/libstore/local-store.hh @@ -292,7 +292,7 @@ private: StorePath addToStoreCommon( const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair, - std::function demux); + std::function demux); Path getRealStoreDir() override { return realStoreDir; } diff --git a/src/libutil/serialise.cc b/src/libutil/serialise.cc index c8b71188fe0..141e9e9767f 100644 --- a/src/libutil/serialise.cc +++ b/src/libutil/serialise.cc @@ -165,35 +165,43 @@ size_t StringSource::read(unsigned char * data, size_t len) #endif std::unique_ptr sinkToSource( - std::function fun, + std::function fun, std::function eof) { struct SinkToSource : Source { - typedef boost::coroutines2::coroutine coro_t; + typedef boost::coroutines2::coroutine> coro_t; - std::function fun; + std::function fun; std::function eof; std::optional coro; bool started = false; - SinkToSource(std::function fun, std::function eof) + /* It would be nicer to have the co-routines have both args and a + return value, but unfortunately that was removed from Boost's + implementation for some reason, so we use some extra state instead. + */ + size_t wanted = 0; + + SinkToSource(std::function fun, std::function eof) : fun(fun), eof(eof) { } - std::string cur; + std::basic_string cur; size_t pos = 0; size_t read(unsigned char * data, size_t len) override { - if (!coro) + wanted = len < cur.size() ? 0 : len - cur.size(); + if (!coro) { coro = coro_t::pull_type([&](coro_t::push_type & yield) { - LambdaSink sink([&](const unsigned char * data, size_t len) { - if (len) yield(std::string((const char *) data, len)); + LambdaSink sink([&](const uint8_t * data, size_t len) { + if (len) yield(std::basic_string { data, len }); }); - fun(sink); + fun(sink, wanted); }); + } if (!*coro) { eof(); abort(); } @@ -203,11 +211,10 @@ std::unique_ptr sinkToSource( pos = 0; } - auto n = std::min(cur.size() - pos, len); - memcpy(data, (unsigned char *) cur.data() + pos, n); - pos += n; + auto numCopied = cur.copy(data, len, pos); + pos += numCopied; - return n; + return numCopied; } }; diff --git a/src/libutil/serialise.hh b/src/libutil/serialise.hh index 8386a499124..6cb9d1bf506 100644 --- a/src/libutil/serialise.hh +++ b/src/libutil/serialise.hh @@ -260,11 +260,20 @@ struct LambdaSource : Source /* Convert a function that feeds data into a Sink into a Source. The Source executes the function as a coroutine. */ std::unique_ptr sinkToSource( - std::function fun, + std::function fun, std::function eof = []() { throw EndOfFile("coroutine has finished"); }); +static inline std::unique_ptr sinkToSource( + std::function fun, + std::function eof = []() { + throw EndOfFile("coroutine has finished"); + }) +{ + return sinkToSource([fun](Sink & s, size_t & _) { fun(s); }, eof); +} + void writePadding(size_t len, Sink & sink); void writeString(const unsigned char * buf, size_t len, Sink & sink); From 8173e7bfefc6a5771b2c9ec48bd6edd3b161dd90 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Tue, 14 Jul 2020 21:11:08 +0000 Subject: [PATCH 05/11] Fix localhost::addToStore(...Path...) We were calculating the nar hash wrong when the file ingestion method was flat. I don't think there's anything we can do in that case but dump the file again, so that's what I do. As an optomization, we again could reuse the original dump for just the recursive and non-sha256 case, but I rather do that after this fix, and after my other PRs which deduplicate this code. --- src/libstore/local-store.cc | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 26b226fe8d2..5827dfc589a 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1097,15 +1097,8 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, { Path srcPath(absPath(_srcPath)); - /* For computing the NAR hash. */ - auto sha256Sink = std::make_unique(htSHA256); - - /* For computing the store path. In recursive SHA-256 mode, this - is the same as the NAR hash, so no need to do it again. */ - std::unique_ptr hashSink = - method == FileIngestionMethod::Recursive && hashAlgo == htSHA256 - ? nullptr - : std::make_unique(hashAlgo); + /* For computing the store path. */ + auto hashSink = std::make_unique(hashAlgo); /* Read the source path into memory, but only if it's up to narBufferSize bytes. If it's larger, write it to a temporary @@ -1114,13 +1107,12 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, temporary path. Otherwise, we move it to the destination store path. */ bool inMemory = true; - std::string nar; + std::string nar; // TODO rename from "nar" to "dump" auto source = sinkToSource([&](Sink & sink) { LambdaSink sink2([&](const unsigned char * buf, size_t len) { - (*sha256Sink)(buf, len); - if (hashSink) (*hashSink)(buf, len); + (*hashSink)(buf, len); if (inMemory) { if (nar.size() + len > settings.narBufferSize) { @@ -1165,9 +1157,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, /* The NAR fits in memory, so we didn't do restorePath(). */ } - auto sha256 = sha256Sink->finish(); - - Hash hash = hashSink ? hashSink->finish().first : sha256.first; + auto [hash, size] = hashSink->finish(); auto dstPath = makeFixedOutputPath(method, hash, name); @@ -1201,13 +1191,22 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, throw Error("renaming '%s' to '%s'", tempPath, realPath); } + /* For computing the nar hash. In recursive SHA-256 mode, this + is the same as the store hash, so no need to do it again. */ + auto narHash = std::pair { hash, size }; + if (method != FileIngestionMethod::Recursive || hashAlgo != htSHA256) { + HashSink narSink { htSHA256 }; + dumpPath(realPath, narSink); + narHash = narSink.finish(); + } + canonicalisePathMetaData(realPath, -1); // FIXME: merge into restorePath optimisePath(realPath); ValidPathInfo info(dstPath); - info.narHash = sha256.first; - info.narSize = sha256.second; + info.narHash = narHash.first; + info.narSize = narHash.second; info.ca = FixedOutputHash { .method = method, .hash = hash }; registerValidPath(info); } From 650c2c655810c375296b52997e2f85298c7c566a Mon Sep 17 00:00:00 2001 From: John Ericson Date: Tue, 14 Jul 2020 21:28:50 +0000 Subject: [PATCH 06/11] Rename variable `nar` -> `dump` according to TODO --- src/libstore/local-store.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 5827dfc589a..cd92f138c87 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1107,7 +1107,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, temporary path. Otherwise, we move it to the destination store path. */ bool inMemory = true; - std::string nar; // TODO rename from "nar" to "dump" + std::string dump; auto source = sinkToSource([&](Sink & sink) { @@ -1115,13 +1115,13 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, (*hashSink)(buf, len); if (inMemory) { - if (nar.size() + len > settings.narBufferSize) { + if (dump.size() + len > settings.narBufferSize) { inMemory = false; sink << 1; - sink((const unsigned char *) nar.data(), nar.size()); - nar.clear(); + sink((const unsigned char *) dump.data(), dump.size()); + dump.clear(); } else { - nar.append((const char *) buf, len); + dump.append((const char *) buf, len); } } @@ -1180,7 +1180,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, if (inMemory) { /* Restore from the NAR in memory. */ - StringSource source(nar); + StringSource source(dump); if (method == FileIngestionMethod::Recursive) restorePath(realPath, source); else From d087cf48552ee82e2bc78bb6c99854bab350ee00 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Wed, 15 Jul 2020 21:10:33 +0000 Subject: [PATCH 07/11] Revert "Revert "LocalStore::addToStore(srcPath): Handle the flat case"" This reverts commit cff2157185912025c24a1b9dc99056161634176c. --- src/libstore/local-store.cc | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index b3f4b3f7d55..26b226fe8d2 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1097,16 +1097,13 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, { Path srcPath(absPath(_srcPath)); - if (method != FileIngestionMethod::Recursive) - return addToStoreFromDump(readFile(srcPath), name, method, hashAlgo, repair); - /* For computing the NAR hash. */ auto sha256Sink = std::make_unique(htSHA256); /* For computing the store path. In recursive SHA-256 mode, this is the same as the NAR hash, so no need to do it again. */ std::unique_ptr hashSink = - hashAlgo == htSHA256 + method == FileIngestionMethod::Recursive && hashAlgo == htSHA256 ? nullptr : std::make_unique(hashAlgo); @@ -1139,7 +1136,10 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, if (!inMemory) sink(buf, len); }); - dumpPath(srcPath, sink2, filter); + if (method == FileIngestionMethod::Recursive) + dumpPath(srcPath, sink2, filter); + else + readFile(srcPath, sink2); }); std::unique_ptr delTempDir; @@ -1155,7 +1155,10 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, delTempDir = std::make_unique(tempDir); tempPath = tempDir + "/x"; - restorePath(tempPath, *source); + if (method == FileIngestionMethod::Recursive) + restorePath(tempPath, *source); + else + writeFile(tempPath, *source); } catch (EndOfFile &) { if (!inMemory) throw; @@ -1188,7 +1191,10 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, if (inMemory) { /* Restore from the NAR in memory. */ StringSource source(nar); - restorePath(realPath, source); + if (method == FileIngestionMethod::Recursive) + restorePath(realPath, source); + else + writeFile(realPath, source); } else { /* Move the temporary path we restored above. */ if (rename(tempPath.c_str(), realPath.c_str())) From bc109648c41f8021707b55b815e68a890a09f2f6 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Wed, 15 Jul 2020 23:14:30 +0000 Subject: [PATCH 08/11] Get rid of `LocalStore::addToStoreCommon` I got it to just become `LocalStore::addToStoreFromDump`, cleanly taking a store and then doing nothing too fancy with it. `LocalStore::addToStore(...Path...)` is now just a simple wrapper with a bare-bones sinkToSource of the right dump command. --- src/libstore/local-store.cc | 93 ++++++++++++++++--------------------- src/libstore/local-store.hh | 4 -- src/libutil/serialise.cc | 13 ++++++ src/libutil/serialise.hh | 15 +++++- 4 files changed, 67 insertions(+), 58 deletions(-) diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index b9fae608955..07e1679da03 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1033,38 +1033,22 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source, } -StorePath LocalStore::addToStoreFromDump(Source & dump, const string & name, - FileIngestionMethod method, HashType hashAlgo, RepairFlag repair) -{ - return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink, size_t & wanted) { - while (1) { - constexpr size_t bufSize = 1024; - uint8_t buf[bufSize]; - auto n = dump.read(buf, std::min(wanted, bufSize)); - sink(buf, n); - // when control is yielded back to us wanted will be updated. - } - }); -} - - StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, FileIngestionMethod method, HashType hashAlgo, PathFilter & filter, RepairFlag repair) { Path srcPath(absPath(_srcPath)); - - return addToStoreCommon(name, method, hashAlgo, repair, [&](auto & sink, size_t & _) { + auto source = sinkToSource([&](Sink & sink, size_t & wanted) { if (method == FileIngestionMethod::Recursive) dumpPath(srcPath, sink, filter); else readFile(srcPath, sink); }); + return addToStoreFromDump(*source, name, method, hashAlgo, repair); } -StorePath LocalStore::addToStoreCommon( - const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair, - std::function demux) +StorePath LocalStore::addToStoreFromDump(Source & source, const string & name, + FileIngestionMethod method, HashType hashAlgo, RepairFlag repair) { /* For computing the store path. */ auto hashSink = std::make_unique(hashAlgo); @@ -1075,50 +1059,53 @@ StorePath LocalStore::addToStoreCommon( destination store path is already valid, we just delete the temporary path. Otherwise, we move it to the destination store path. */ - bool inMemory = true; - std::string dump; + bool inMemory = false; - auto source = sinkToSource([&](Sink & sink, size_t & wanted) { - LambdaSink sink2([&](const unsigned char * buf, size_t len) { - (*hashSink)(buf, len); - - if (inMemory) { - if (dump.size() + len > settings.narBufferSize) { - inMemory = false; - sink << 1; - sink((const unsigned char *) dump.data(), dump.size()); - dump.clear(); - } else { - dump.append((const char *) buf, len); - } - } + std::string dump; - if (!inMemory) sink(buf, len); - }); - demux(sink2, wanted); - }); + /* Fill out buffer, and decide whether we are working strictly in + memory based on whether we break out because the buffer is full + or the original source is empty */ + while (dump.size() < settings.narBufferSize) { + auto oldSize = dump.size(); + constexpr size_t chunkSize = 1024; + auto want = std::min(chunkSize, settings.narBufferSize - oldSize); + dump.resize(oldSize + want); + auto got = 0; + try { + got = source.read((uint8_t *) dump.data() + oldSize, want); + } catch (EndOfFile &) { + inMemory = true; + break; + } + /* Start hashing as we get data */ + (*hashSink)((const uint8_t *) dump.data() + oldSize, got); + dump.resize(oldSize + got); + } std::unique_ptr delTempDir; Path tempPath; - try { - /* Wait for the source coroutine to give us some dummy - data. This is so that we don't create the temporary - directory if the NAR fits in memory. */ - readInt(*source); + if (!inMemory) { + StringSource dumpSource { dump }; + TeeSource rest { source, *hashSink }; + ChainSource bothSource { + .source1 = dumpSource, + /* Continue hashing what's left, but don't rehash what we + already did. */ + .source2 = rest, + }; auto tempDir = createTempDir(realStoreDir, "add"); delTempDir = std::make_unique(tempDir); tempPath = tempDir + "/x"; if (method == FileIngestionMethod::Recursive) - restorePath(tempPath, *source); + restorePath(tempPath, bothSource); else - writeFile(tempPath, *source); + writeFile(tempPath, bothSource); - } catch (EndOfFile &) { - if (!inMemory) throw; - /* The NAR fits in memory, so we didn't do restorePath(). */ + dump.clear(); } auto [hash, size] = hashSink->finish(); @@ -1143,12 +1130,12 @@ StorePath LocalStore::addToStoreCommon( autoGC(); if (inMemory) { + StringSource dumpSource { dump }; /* Restore from the NAR in memory. */ - StringSource source(dump); if (method == FileIngestionMethod::Recursive) - restorePath(realPath, source); + restorePath(realPath, dumpSource); else - writeFile(realPath, source); + writeFile(realPath, dumpSource); } else { /* Move the temporary path we restored above. */ if (rename(tempPath.c_str(), realPath.c_str())) diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh index ae23004c474..355c2814f2c 100644 --- a/src/libstore/local-store.hh +++ b/src/libstore/local-store.hh @@ -290,10 +290,6 @@ private: specified by the ‘secret-key-files’ option. */ void signPathInfo(ValidPathInfo & info); - StorePath addToStoreCommon( - const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair, - std::function demux); - Path getRealStoreDir() override { return realStoreDir; } void createUser(const std::string & userName, uid_t userId) override; diff --git a/src/libutil/serialise.cc b/src/libutil/serialise.cc index 141e9e9767f..4c72dc9f267 100644 --- a/src/libutil/serialise.cc +++ b/src/libutil/serialise.cc @@ -329,5 +329,18 @@ void StringSink::operator () (const unsigned char * data, size_t len) s->append((const char *) data, len); } +size_t ChainSource::read(unsigned char * data, size_t len) +{ + if (useSecond) { + return source2.read(data, len); + } else { + try { + return source1.read(data, len); + } catch (EndOfFile &) { + useSecond = true; + return this->read(data, len); + } + } +} } diff --git a/src/libutil/serialise.hh b/src/libutil/serialise.hh index 6cb9d1bf506..3e3735ca53e 100644 --- a/src/libutil/serialise.hh +++ b/src/libutil/serialise.hh @@ -256,6 +256,19 @@ struct LambdaSource : Source } }; +/* Chain two sources together so after the first is exhausted, the second is + used */ +struct ChainSource : Source +{ + Source & source1, & source2; + bool useSecond = false; + ChainSource(Source & s1, Source & s2) + : source1(s1), source2(s2) + { } + + size_t read(unsigned char * data, size_t len) override; +}; + /* Convert a function that feeds data into a Sink into a Source. The Source executes the function as a coroutine. */ @@ -271,7 +284,7 @@ static inline std::unique_ptr sinkToSource( throw EndOfFile("coroutine has finished"); }) { - return sinkToSource([fun](Sink & s, size_t & _) { fun(s); }, eof); + return sinkToSource([fun](Sink & s, size_t & _) { fun(s); }, eof); } From 5602637d9ea195784368e99a226718fc95e6b978 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Wed, 15 Jul 2020 23:19:41 +0000 Subject: [PATCH 09/11] Revert "LocalStore::addToStoreFromDump copy in chunks" This reverts commit 592851fb67cd15807109d6f65fb81f6af89af966. We don't need this extra feature anymore --- src/libstore/local-store.cc | 2 +- src/libutil/serialise.cc | 33 +++++++++++++-------------------- src/libutil/serialise.hh | 11 +---------- 3 files changed, 15 insertions(+), 31 deletions(-) diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 07e1679da03..b2b5afaddfe 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1037,7 +1037,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, FileIngestionMethod method, HashType hashAlgo, PathFilter & filter, RepairFlag repair) { Path srcPath(absPath(_srcPath)); - auto source = sinkToSource([&](Sink & sink, size_t & wanted) { + auto source = sinkToSource([&](Sink & sink) { if (method == FileIngestionMethod::Recursive) dumpPath(srcPath, sink, filter); else diff --git a/src/libutil/serialise.cc b/src/libutil/serialise.cc index 4c72dc9f267..00c94511375 100644 --- a/src/libutil/serialise.cc +++ b/src/libutil/serialise.cc @@ -165,43 +165,35 @@ size_t StringSource::read(unsigned char * data, size_t len) #endif std::unique_ptr sinkToSource( - std::function fun, + std::function fun, std::function eof) { struct SinkToSource : Source { - typedef boost::coroutines2::coroutine> coro_t; + typedef boost::coroutines2::coroutine coro_t; - std::function fun; + std::function fun; std::function eof; std::optional coro; bool started = false; - /* It would be nicer to have the co-routines have both args and a - return value, but unfortunately that was removed from Boost's - implementation for some reason, so we use some extra state instead. - */ - size_t wanted = 0; - - SinkToSource(std::function fun, std::function eof) + SinkToSource(std::function fun, std::function eof) : fun(fun), eof(eof) { } - std::basic_string cur; + std::string cur; size_t pos = 0; size_t read(unsigned char * data, size_t len) override { - wanted = len < cur.size() ? 0 : len - cur.size(); - if (!coro) { + if (!coro) coro = coro_t::pull_type([&](coro_t::push_type & yield) { - LambdaSink sink([&](const uint8_t * data, size_t len) { - if (len) yield(std::basic_string { data, len }); + LambdaSink sink([&](const unsigned char * data, size_t len) { + if (len) yield(std::string((const char *) data, len)); }); - fun(sink, wanted); + fun(sink); }); - } if (!*coro) { eof(); abort(); } @@ -211,10 +203,11 @@ std::unique_ptr sinkToSource( pos = 0; } - auto numCopied = cur.copy(data, len, pos); - pos += numCopied; + auto n = std::min(cur.size() - pos, len); + memcpy(data, (unsigned char *) cur.data() + pos, n); + pos += n; - return numCopied; + return n; } }; diff --git a/src/libutil/serialise.hh b/src/libutil/serialise.hh index 3e3735ca53e..aa6b42597be 100644 --- a/src/libutil/serialise.hh +++ b/src/libutil/serialise.hh @@ -273,19 +273,10 @@ struct ChainSource : Source /* Convert a function that feeds data into a Sink into a Source. The Source executes the function as a coroutine. */ std::unique_ptr sinkToSource( - std::function fun, - std::function eof = []() { - throw EndOfFile("coroutine has finished"); - }); - -static inline std::unique_ptr sinkToSource( std::function fun, std::function eof = []() { throw EndOfFile("coroutine has finished"); - }) -{ - return sinkToSource([fun](Sink & s, size_t & _) { fun(s); }, eof); -} + }); void writePadding(size_t len, Sink & sink); From 3dcca18c30cbc09652f5ac644a9f8750f9ced0c9 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Thu, 16 Jul 2020 13:39:03 +0000 Subject: [PATCH 10/11] Fix bug in TeeSource We use this to simplify `LocalStore::addToStoreFromDump`. Also, hope I fixed build error with old clang (used in Darwin CI). --- src/libstore/local-store.cc | 14 ++++---------- src/libutil/serialise.hh | 2 +- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index b2b5afaddfe..96d10d9bac8 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1047,11 +1047,12 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, } -StorePath LocalStore::addToStoreFromDump(Source & source, const string & name, +StorePath LocalStore::addToStoreFromDump(Source & source0, const string & name, FileIngestionMethod method, HashType hashAlgo, RepairFlag repair) { /* For computing the store path. */ auto hashSink = std::make_unique(hashAlgo); + TeeSource source { source0, *hashSink }; /* Read the source path into memory, but only if it's up to narBufferSize bytes. If it's larger, write it to a temporary @@ -1078,8 +1079,6 @@ StorePath LocalStore::addToStoreFromDump(Source & source, const string & name, inMemory = true; break; } - /* Start hashing as we get data */ - (*hashSink)((const uint8_t *) dump.data() + oldSize, got); dump.resize(oldSize + got); } @@ -1087,14 +1086,9 @@ StorePath LocalStore::addToStoreFromDump(Source & source, const string & name, Path tempPath; if (!inMemory) { + /* Drain what we pulled so far, and then keep on pulling */ StringSource dumpSource { dump }; - TeeSource rest { source, *hashSink }; - ChainSource bothSource { - .source1 = dumpSource, - /* Continue hashing what's left, but don't rehash what we - already did. */ - .source2 = rest, - }; + ChainSource bothSource { dumpSource, source }; auto tempDir = createTempDir(realStoreDir, "add"); delTempDir = std::make_unique(tempDir); diff --git a/src/libutil/serialise.hh b/src/libutil/serialise.hh index aa6b42597be..5d9acf8878c 100644 --- a/src/libutil/serialise.hh +++ b/src/libutil/serialise.hh @@ -189,7 +189,7 @@ struct TeeSource : Source size_t read(unsigned char * data, size_t len) { size_t n = orig.read(data, len); - sink(data, len); + sink(data, n); return n; } }; From 9aae179f34ec2f38167585c07f18a8ab8acefafb Mon Sep 17 00:00:00 2001 From: Carlo Nucera Date: Mon, 20 Jul 2020 20:18:12 -0400 Subject: [PATCH 11/11] Correct bug, thoroughly document addToStoreSlow --- src/libstore/store-api.cc | 62 ++++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 17 deletions(-) diff --git a/src/libstore/store-api.cc b/src/libstore/store-api.cc index 6c0a61766e2..14661722d90 100644 --- a/src/libstore/store-api.cc +++ b/src/libstore/store-api.cc @@ -222,40 +222,68 @@ StorePath Store::computeStorePathForText(const string & name, const string & s, } +/* +The aim of this function is to compute in one pass the correct ValidPathInfo for +the files that we are trying to add to the store. To accomplish that in one +pass, given the different kind of inputs that we can take (normal nar archives, +nar archives with non SHA-256 hashes, and flat files), we set up a net of sinks +and aliases. Also, since the dataflow is obfuscated by this, we include here a +graphviz diagram: + +digraph graphname { + node [shape=box] + fileSource -> narSink + narSink [style=dashed] + narSink -> unsualHashTee [style = dashed, label = "Recursive && !SHA-256"] + narSink -> narHashSink [style = dashed, label = "else"] + unsualHashTee -> narHashSink + unsualHashTee -> caHashSink + fileSource -> parseSink + parseSink [style=dashed] + parseSink-> fileSink [style = dashed, label = "Flat"] + parseSink -> blank [style = dashed, label = "Recursive"] + fileSink -> caHashSink +} +*/ ValidPathInfo Store::addToStoreSlow(std::string_view name, const Path & srcPath, FileIngestionMethod method, HashType hashAlgo, std::optional expectedCAHash) { - /* FIXME: inefficient: we're reading/hashing 'tmpFile' two - times. */ HashSink narHashSink { htSHA256 }; HashSink caHashSink { hashAlgo }; - RetrieveRegularNARSink fileSink { caHashSink }; - TeeSink sinkIfNar { narHashSink, caHashSink }; + /* Note that fileSink and unusualHashTee must be mutually exclusive, since + they both write to caHashSink. Note that that requisite is currently true + because the former is only used in the flat case. */ + RetrieveRegularNARSink fileSink { caHashSink }; + TeeSink unusualHashTee { narHashSink, caHashSink }; - /* We use the tee sink if we need to hash the nar twice */ - auto & sink = method == FileIngestionMethod::Recursive && hashAlgo != htSHA256 - ? static_cast(sinkIfNar) + auto & narSink = method == FileIngestionMethod::Recursive && hashAlgo != htSHA256 + ? static_cast(unusualHashTee) : narHashSink; - auto fileSource = sinkToSource([&](Sink & sink) { - dumpPath(srcPath, sink); + /* Functionally, this means that fileSource will yield the content of + srcPath. The fact that we use scratchpadSink as a temporary buffer here + is an implementation detail. */ + auto fileSource = sinkToSource([&](Sink & scratchpadSink) { + dumpPath(srcPath, scratchpadSink); }); - TeeSource tapped { *fileSource, sink }; + /* tapped provides the same data as fileSource, but we also write all the + information to narSink. */ + TeeSource tapped { *fileSource, narSink }; ParseSink blank; auto & parseSink = method == FileIngestionMethod::Flat ? fileSink : blank; - parseDump( - parseSink, - method == FileIngestionMethod::Recursive && hashAlgo == htSHA256 - ? *fileSource // don't need to hash twice if we just can use the `narHash` twice - : tapped); + /* The information that flows from tapped (besides being replicated in + narSink), is now put in parseSink. */ + parseDump(parseSink, tapped); + /* We extract the result of the computation from the sink by calling + finish. */ auto [narHash, narSize] = narHashSink.finish(); auto hash = method == FileIngestionMethod::Recursive && hashAlgo == htSHA256 @@ -271,8 +299,8 @@ ValidPathInfo Store::addToStoreSlow(std::string_view name, const Path & srcPath, info.ca = FixedOutputHash { .method = method, .hash = hash }; if (!isValidPath(info.path)) { - auto source = sinkToSource([&](Sink & sink) { - dumpPath(srcPath, sink); + auto source = sinkToSource([&](Sink & scratchpadSink) { + dumpPath(srcPath, scratchpadSink); }); addToStore(info, *source); }