Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Constant space addToStoreFromDump and deduplicate code #3801

Merged
merged 15 commits into from
Jul 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/libstore/build.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2774,7 +2774,7 @@ struct RestrictedStore : public LocalFSStore
goal.addDependency(info.path);
}

StorePath addToStoreFromDump(const string & dump, const string & name,
StorePath addToStoreFromDump(Source & dump, const string & name,
FileIngestionMethod method = FileIngestionMethod::Recursive, HashType hashAlgo = htSHA256, RepairFlag repair = NoRepair) override
{
auto path = next->addToStoreFromDump(dump, name, method, hashAlgo, repair);
Expand Down
17 changes: 11 additions & 6 deletions src/libstore/daemon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -350,21 +350,24 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
}

case wopAddToStore: {
std::string s, baseName;
HashType hashAlgo;
std::string baseName;
FileIngestionMethod method;
{
bool fixed; uint8_t recursive;
from >> baseName >> fixed /* obsolete */ >> recursive >> s;
bool fixed;
uint8_t recursive;
std::string hashAlgoRaw;
from >> baseName >> fixed /* obsolete */ >> recursive >> hashAlgoRaw;
if (recursive > (uint8_t) FileIngestionMethod::Recursive)
throw Error("unsupported FileIngestionMethod with value of %i; you may need to upgrade nix-daemon", recursive);
method = FileIngestionMethod { recursive };
/* Compatibility hack. */
if (!fixed) {
s = "sha256";
hashAlgoRaw = "sha256";
method = FileIngestionMethod::Recursive;
}
hashAlgo = parseHashType(hashAlgoRaw);
}
HashType hashAlgo = parseHashType(s);

StringSink saved;
TeeSource savedNARSource(from, saved);
Expand All @@ -382,7 +385,9 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
logger->startWork();
if (!savedRegular.regular) throw Error("regular file expected");

auto path = store->addToStoreFromDump(*saved.s, baseName, method, hashAlgo);
// FIXME: try to stream directly from `from`.
StringSource dumpSource { *saved.s };
auto path = store->addToStoreFromDump(dumpSource, baseName, method, hashAlgo);
logger->stopWork();

to << store->printStorePath(path);
Expand Down
172 changes: 61 additions & 111 deletions src/libstore/local-store.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1033,138 +1033,76 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source,
}


StorePath LocalStore::addToStoreFromDump(const string & dump, const string & name,
FileIngestionMethod method, HashType hashAlgo, RepairFlag repair)
{
Hash h = hashString(hashAlgo, dump);

auto dstPath = makeFixedOutputPath(method, h, name);

addTempRoot(dstPath);

if (repair || !isValidPath(dstPath)) {

/* The first check above is an optimisation to prevent
unnecessary lock acquisition. */

auto realPath = Store::toRealPath(dstPath);

PathLocks outputLock({realPath});

if (repair || !isValidPath(dstPath)) {

deletePath(realPath);

autoGC();

if (method == FileIngestionMethod::Recursive) {
StringSource source(dump);
restorePath(realPath, source);
} else
writeFile(realPath, dump);

canonicalisePathMetaData(realPath, -1);

/* Register the SHA-256 hash of the NAR serialisation of
the path in the database. We may just have computed it
above (if called with recursive == true and hashAlgo ==
sha256); otherwise, compute it here. */
HashResult hash;
if (method == FileIngestionMethod::Recursive) {
hash.first = hashAlgo == htSHA256 ? h : hashString(htSHA256, dump);
hash.second = dump.size();
} else
hash = hashPath(htSHA256, realPath);

optimisePath(realPath); // FIXME: combine with hashPath()

ValidPathInfo info(dstPath);
info.narHash = hash.first;
info.narSize = hash.second;
info.ca = FixedOutputHash { .method = method, .hash = h };
registerValidPath(info);
}

outputLock.setDeletion(true);
}

return dstPath;
}


StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
FileIngestionMethod method, HashType hashAlgo, PathFilter & filter, RepairFlag repair)
{
Path srcPath(absPath(_srcPath));
auto source = sinkToSource([&](Sink & sink) {
if (method == FileIngestionMethod::Recursive)
dumpPath(srcPath, sink, filter);
else
readFile(srcPath, sink);
});
return addToStoreFromDump(*source, name, method, hashAlgo, repair);
}

if (method != FileIngestionMethod::Recursive)
return addToStoreFromDump(readFile(srcPath), name, method, hashAlgo, repair);

/* For computing the NAR hash. */
auto sha256Sink = std::make_unique<HashSink>(htSHA256);

/* For computing the store path. In recursive SHA-256 mode, this
is the same as the NAR hash, so no need to do it again. */
std::unique_ptr<HashSink> hashSink =
hashAlgo == htSHA256
? nullptr
: std::make_unique<HashSink>(hashAlgo);
StorePath LocalStore::addToStoreFromDump(Source & source0, const string & name,
FileIngestionMethod method, HashType hashAlgo, RepairFlag repair)
{
/* For computing the store path. */
auto hashSink = std::make_unique<HashSink>(hashAlgo);
TeeSource source { source0, *hashSink };

/* Read the source path into memory, but only if it's up to
narBufferSize bytes. If it's larger, write it to a temporary
location in the Nix store. If the subsequently computed
destination store path is already valid, we just delete the
temporary path. Otherwise, we move it to the destination store
path. */
bool inMemory = true;
std::string nar;

auto source = sinkToSource([&](Sink & sink) {

LambdaSink sink2([&](const unsigned char * buf, size_t len) {
(*sha256Sink)(buf, len);
if (hashSink) (*hashSink)(buf, len);

if (inMemory) {
if (nar.size() + len > settings.narBufferSize) {
inMemory = false;
sink << 1;
sink((const unsigned char *) nar.data(), nar.size());
nar.clear();
} else {
nar.append((const char *) buf, len);
}
}

if (!inMemory) sink(buf, len);
});

dumpPath(srcPath, sink2, filter);
});
bool inMemory = false;

std::string dump;

/* Fill out buffer, and decide whether we are working strictly in
memory based on whether we break out because the buffer is full
or the original source is empty */
while (dump.size() < settings.narBufferSize) {
auto oldSize = dump.size();
constexpr size_t chunkSize = 1024;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
constexpr size_t chunkSize = 1024;
constexpr size_t chunkSize = 65536;

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, this is now corrected in #3844

auto want = std::min(chunkSize, settings.narBufferSize - oldSize);
dump.resize(oldSize + want);
auto got = 0;
try {
got = source.read((uint8_t *) dump.data() + oldSize, want);
} catch (EndOfFile &) {
inMemory = true;
break;
}
dump.resize(oldSize + got);
}

std::unique_ptr<AutoDelete> delTempDir;
Path tempPath;

try {
/* Wait for the source coroutine to give us some dummy
data. This is so that we don't create the temporary
directory if the NAR fits in memory. */
readInt(*source);
if (!inMemory) {
/* Drain what we pulled so far, and then keep on pulling */
StringSource dumpSource { dump };
ChainSource bothSource { dumpSource, source };

auto tempDir = createTempDir(realStoreDir, "add");
delTempDir = std::make_unique<AutoDelete>(tempDir);
tempPath = tempDir + "/x";

restorePath(tempPath, *source);
if (method == FileIngestionMethod::Recursive)
restorePath(tempPath, bothSource);
else
writeFile(tempPath, bothSource);

} catch (EndOfFile &) {
if (!inMemory) throw;
/* The NAR fits in memory, so we didn't do restorePath(). */
dump.clear();
}

auto sha256 = sha256Sink->finish();

Hash hash = hashSink ? hashSink->finish().first : sha256.first;
auto [hash, size] = hashSink->finish();

auto dstPath = makeFixedOutputPath(method, hash, name);

Expand All @@ -1186,22 +1124,34 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
autoGC();

if (inMemory) {
StringSource dumpSource { dump };
/* Restore from the NAR in memory. */
StringSource source(nar);
restorePath(realPath, source);
if (method == FileIngestionMethod::Recursive)
restorePath(realPath, dumpSource);
else
writeFile(realPath, dumpSource);
} else {
/* Move the temporary path we restored above. */
if (rename(tempPath.c_str(), realPath.c_str()))
throw Error("renaming '%s' to '%s'", tempPath, realPath);
}

/* For computing the nar hash. In recursive SHA-256 mode, this
is the same as the store hash, so no need to do it again. */
auto narHash = std::pair { hash, size };
if (method != FileIngestionMethod::Recursive || hashAlgo != htSHA256) {
HashSink narSink { htSHA256 };
dumpPath(realPath, narSink);
narHash = narSink.finish();
}

canonicalisePathMetaData(realPath, -1); // FIXME: merge into restorePath

optimisePath(realPath);

ValidPathInfo info(dstPath);
info.narHash = sha256.first;
info.narSize = sha256.second;
info.narHash = narHash.first;
info.narSize = narHash.second;
info.ca = FixedOutputHash { .method = method, .hash = hash };
registerValidPath(info);
}
Expand Down
2 changes: 1 addition & 1 deletion src/libstore/local-store.hh
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ public:
in `dump', which is either a NAR serialisation (if recursive ==
true) or simply the contents of a regular file (if recursive ==
false). */
StorePath addToStoreFromDump(const string & dump, const string & name,
StorePath addToStoreFromDump(Source & dump, const string & name,
FileIngestionMethod method = FileIngestionMethod::Recursive, HashType hashAlgo = htSHA256, RepairFlag repair = NoRepair) override;

StorePath addTextToStore(const string & name, const string & s,
Expand Down
62 changes: 45 additions & 17 deletions src/libstore/store-api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -222,40 +222,68 @@ StorePath Store::computeStorePathForText(const string & name, const string & s,
}


/*
The aim of this function is to compute in one pass the correct ValidPathInfo for
the files that we are trying to add to the store. To accomplish that in one
pass, given the different kind of inputs that we can take (normal nar archives,
nar archives with non SHA-256 hashes, and flat files), we set up a net of sinks
and aliases. Also, since the dataflow is obfuscated by this, we include here a
graphviz diagram:

digraph graphname {
node [shape=box]
fileSource -> narSink
narSink [style=dashed]
narSink -> unsualHashTee [style = dashed, label = "Recursive && !SHA-256"]
narSink -> narHashSink [style = dashed, label = "else"]
unsualHashTee -> narHashSink
unsualHashTee -> caHashSink
fileSource -> parseSink
parseSink [style=dashed]
parseSink-> fileSink [style = dashed, label = "Flat"]
parseSink -> blank [style = dashed, label = "Recursive"]
fileSink -> caHashSink
}
*/
ValidPathInfo Store::addToStoreSlow(std::string_view name, const Path & srcPath,
FileIngestionMethod method, HashType hashAlgo,
std::optional<Hash> expectedCAHash)
{
/* FIXME: inefficient: we're reading/hashing 'tmpFile' two
times. */
HashSink narHashSink { htSHA256 };
HashSink caHashSink { hashAlgo };
RetrieveRegularNARSink fileSink { caHashSink };

TeeSink sinkIfNar { narHashSink, caHashSink };
/* Note that fileSink and unusualHashTee must be mutually exclusive, since
they both write to caHashSink. Note that that requisite is currently true
because the former is only used in the flat case. */
RetrieveRegularNARSink fileSink { caHashSink };
TeeSink unusualHashTee { narHashSink, caHashSink };

/* We use the tee sink if we need to hash the nar twice */
auto & sink = method == FileIngestionMethod::Recursive && hashAlgo != htSHA256
? static_cast<Sink &>(sinkIfNar)
auto & narSink = method == FileIngestionMethod::Recursive && hashAlgo != htSHA256
? static_cast<Sink &>(unusualHashTee)
: narHashSink;

auto fileSource = sinkToSource([&](Sink & sink) {
dumpPath(srcPath, sink);
/* Functionally, this means that fileSource will yield the content of
srcPath. The fact that we use scratchpadSink as a temporary buffer here
is an implementation detail. */
auto fileSource = sinkToSource([&](Sink & scratchpadSink) {
dumpPath(srcPath, scratchpadSink);
});

TeeSource tapped { *fileSource, sink };
/* tapped provides the same data as fileSource, but we also write all the
information to narSink. */
TeeSource tapped { *fileSource, narSink };

ParseSink blank;
auto & parseSink = method == FileIngestionMethod::Flat
? fileSink
: blank;

parseDump(
parseSink,
method == FileIngestionMethod::Recursive && hashAlgo == htSHA256
? *fileSource // don't need to hash twice if we just can use the `narHash` twice
: tapped);
/* The information that flows from tapped (besides being replicated in
narSink), is now put in parseSink. */
parseDump(parseSink, tapped);

/* We extract the result of the computation from the sink by calling
finish. */
auto [narHash, narSize] = narHashSink.finish();

auto hash = method == FileIngestionMethod::Recursive && hashAlgo == htSHA256
Expand All @@ -271,8 +299,8 @@ ValidPathInfo Store::addToStoreSlow(std::string_view name, const Path & srcPath,
info.ca = FixedOutputHash { .method = method, .hash = hash };

if (!isValidPath(info.path)) {
auto source = sinkToSource([&](Sink & sink) {
dumpPath(srcPath, sink);
auto source = sinkToSource([&](Sink & scratchpadSink) {
dumpPath(srcPath, scratchpadSink);
});
addToStore(info, *source);
}
Expand Down
2 changes: 1 addition & 1 deletion src/libstore/store-api.hh
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ public:
std::optional<Hash> expectedCAHash = {});

// FIXME: remove?
virtual StorePath addToStoreFromDump(const string & dump, const string & name,
virtual StorePath addToStoreFromDump(Source & dump, const string & name,
FileIngestionMethod method = FileIngestionMethod::Recursive, HashType hashAlgo = htSHA256, RepairFlag repair = NoRepair)
{
throw Error("addToStoreFromDump() is not supported by this store");
Expand Down
Loading