diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index dcfec3b3872d..1afde74d89d4 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -2087,7 +2087,7 @@ static void prim_toFile(EvalState & state, const PosIdx pos, Value * * args, Val }) : ({ StringSource s { contents }; - state.store->addToStoreFromDump(s, name, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, state.repair); + state.store->addToStoreFromDump(s, name, FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, state.repair); }); /* Note: we don't need to add `context' to the context of the diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index 4aa9f9a1c1f5..a38f8ff28b56 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -305,7 +305,8 @@ void BinaryCacheStore::addToStore(const ValidPathInfo & info, Source & narSource StorePath BinaryCacheStore::addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method, + FileSerialisationMethod dumpMethod, + ContentAddressMethod hashMethod, HashAlgorithm hashAlgo, const StorePathSet & references, RepairFlag repair) @@ -313,17 +314,26 @@ StorePath BinaryCacheStore::addToStoreFromDump( std::optional caHash; std::string nar; + // Calculating Git hash from NAR stream not yet implemented. May not + // be possible to implement in single-pass in NAR is in an + // inconvenient order. Could fetch after uploading, however. + if (hashMethod.getFileIngestionMethod() == FileIngestionMethod::Git) + unsupported("addToStoreFromDump"); + if (auto * dump2p = dynamic_cast(&dump)) { auto & dump2 = *dump2p; // Hack, this gives us a "replayable" source so we can compute // multiple hashes more easily. - caHash = hashString(HashAlgorithm::SHA256, dump2.s); - switch (method.getFileIngestionMethod()) { - case FileIngestionMethod::Recursive: + // + // Only calculate if the dump is in the right format, however. + if (static_cast(dumpMethod) == hashMethod.getFileIngestionMethod()) + caHash = hashString(HashAlgorithm::SHA256, dump2.s); + switch (dumpMethod) { + case FileSerialisationMethod::Recursive: // The dump is already NAR in this case, just use it. nar = dump2.s; break; - case FileIngestionMethod::Flat: + case FileSerialisationMethod::Flat: { // The dump is Flat, so we need to convert it to NAR with a // single file. @@ -332,14 +342,11 @@ StorePath BinaryCacheStore::addToStoreFromDump( nar = std::move(s.s); break; } - case FileIngestionMethod::Git: - unsupported("addToStoreFromDump"); - break; } } else { // Otherwise, we have to do th same hashing as NAR so our single // hash will suffice for both purposes. - if (method != FileIngestionMethod::Recursive || hashAlgo != HashAlgorithm::SHA256) + if (dumpMethod != FileSerialisationMethod::Recursive || hashAlgo != HashAlgorithm::SHA256) unsupported("addToStoreFromDump"); } StringSource narDump { nar }; @@ -354,7 +361,7 @@ StorePath BinaryCacheStore::addToStoreFromDump( *this, name, ContentAddressWithReferences::fromParts( - method, + hashMethod, caHash ? *caHash : nar.first, { .others = references, diff --git a/src/libstore/binary-cache-store.hh b/src/libstore/binary-cache-store.hh index 76de2d11addf..7c282830933f 100644 --- a/src/libstore/binary-cache-store.hh +++ b/src/libstore/binary-cache-store.hh @@ -125,7 +125,8 @@ public: StorePath addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method, + FileSerialisationMethod dumpMethod, + ContentAddressMethod hashMethod, HashAlgorithm hashAlgo, const StorePathSet & references, RepairFlag repair) override; diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index 3e967a178196..08652a98bedc 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -1312,12 +1312,13 @@ struct RestrictedStore : public virtual RestrictedStoreConfig, public virtual In StorePath addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method, + FileSerialisationMethod dumpMethod, + ContentAddressMethod hashMethod, HashAlgorithm hashAlgo, const StorePathSet & references, RepairFlag repair) override { - auto path = next->addToStoreFromDump(dump, name, method, hashAlgo, references, repair); + auto path = next->addToStoreFromDump(dump, name, dumpMethod, hashMethod, hashAlgo, references, repair); goal.addDependency(path); return path; } diff --git a/src/libstore/daemon.cc b/src/libstore/daemon.cc index 46eadeed61ab..0c47664f34ad 100644 --- a/src/libstore/daemon.cc +++ b/src/libstore/daemon.cc @@ -401,11 +401,23 @@ static void performOp(TunnelLogger * logger, ref store, logger->startWork(); auto pathInfo = [&]() { // NB: FramedSource must be out of scope before logger->stopWork(); - auto [contentAddressMethod, hashAlgo_] = ContentAddressMethod::parse(camStr); - auto hashAlgo = hashAlgo_; // work around clang bug + auto [contentAddressMethod, hashAlgo] = ContentAddressMethod::parse(camStr); FramedSource source(from); + FileSerialisationMethod dumpMethod; + switch (contentAddressMethod.getFileIngestionMethod()) { + case FileIngestionMethod::Flat: + dumpMethod = FileSerialisationMethod::Flat; + break; + case FileIngestionMethod::Recursive: + dumpMethod = FileSerialisationMethod::Recursive; + break; + case FileIngestionMethod::Git: + // Use NAR; Git is not a serialization method + dumpMethod = FileSerialisationMethod::Recursive; + break; + } // TODO these two steps are essentially RemoteStore::addCAToStore. Move it up to Store. - auto path = store->addToStoreFromDump(source, name, contentAddressMethod, hashAlgo, refs, repair); + auto path = store->addToStoreFromDump(source, name, dumpMethod, contentAddressMethod, hashAlgo, refs, repair); return store->queryPathInfo(path); }(); logger->stopWork(); @@ -431,34 +443,23 @@ static void performOp(TunnelLogger * logger, ref store, hashAlgo = parseHashAlgo(hashAlgoRaw); } + // Old protocol always sends NAR, regardless of hashing method auto dumpSource = sinkToSource([&](Sink & saved) { - if (method == FileIngestionMethod::Recursive) { - /* We parse the NAR dump through into `saved` unmodified, - so why all this extra work? We still parse the NAR so - that we aren't sending arbitrary data to `saved` - unwittingly`, and we know when the NAR ends so we don't - consume the rest of `from` and can't parse another - command. (We don't trust `addToStoreFromDump` to not - eagerly consume the entire stream it's given, past the - length of the Nar. */ - TeeSource savedNARSource(from, saved); - NullParseSink sink; /* just parse the NAR */ - parseDump(sink, savedNARSource); - } else if (method == FileIngestionMethod::Flat) { - /* Incrementally parse the NAR file, stripping the - metadata, and streaming the sole file we expect into - `saved`. */ - RegularFileSink savedRegular { saved }; - parseDump(savedRegular, from); - if (!savedRegular.regular) throw Error("regular file expected"); - } else { - /* Should have validated above that no other file ingestion - method was used. */ - assert(false); - } + /* We parse the NAR dump through into `saved` unmodified, + so why all this extra work? We still parse the NAR so + that we aren't sending arbitrary data to `saved` + unwittingly`, and we know when the NAR ends so we don't + consume the rest of `from` and can't parse another + command. (We don't trust `addToStoreFromDump` to not + eagerly consume the entire stream it's given, past the + length of the Nar. */ + TeeSource savedNARSource(from, saved); + NullParseSink sink; /* just parse the NAR */ + parseDump(sink, savedNARSource); }); logger->startWork(); - auto path = store->addToStoreFromDump(*dumpSource, baseName, method, hashAlgo); + auto path = store->addToStoreFromDump( + *dumpSource, baseName, FileSerialisationMethod::Recursive, method, hashAlgo); logger->stopWork(); to << store->printStorePath(path); @@ -490,7 +491,7 @@ static void performOp(TunnelLogger * logger, ref store, logger->startWork(); auto path = ({ StringSource source { s }; - store->addToStoreFromDump(source, suffix, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, NoRepair); + store->addToStoreFromDump(source, suffix, FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, NoRepair); }); logger->stopWork(); to << store->printStorePath(path); diff --git a/src/libstore/derivations.cc b/src/libstore/derivations.cc index 2fafcb8e7dd7..ba2f8ff4f53d 100644 --- a/src/libstore/derivations.cc +++ b/src/libstore/derivations.cc @@ -150,7 +150,7 @@ StorePath writeDerivation(Store & store, }) : ({ StringSource s { contents }; - store.addToStoreFromDump(s, suffix, TextIngestionMethod {}, HashAlgorithm::SHA256, references, repair); + store.addToStoreFromDump(s, suffix, FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, references, repair); }); } diff --git a/src/libstore/dummy-store.cc b/src/libstore/dummy-store.cc index e4f13b8f4100..30f23cff91a4 100644 --- a/src/libstore/dummy-store.cc +++ b/src/libstore/dummy-store.cc @@ -61,7 +61,8 @@ struct DummyStore : public virtual DummyStoreConfig, public virtual Store virtual StorePath addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method = FileIngestionMethod::Recursive, + FileSerialisationMethod dumpMethod = FileSerialisationMethod::Recursive, + ContentAddressMethod hashMethod = FileIngestionMethod::Recursive, HashAlgorithm hashAlgo = HashAlgorithm::SHA256, const StorePathSet & references = StorePathSet(), RepairFlag repair = NoRepair) override diff --git a/src/libstore/legacy-ssh-store.hh b/src/libstore/legacy-ssh-store.hh index 7cee31d66d0e..f929e4b3e8e4 100644 --- a/src/libstore/legacy-ssh-store.hh +++ b/src/libstore/legacy-ssh-store.hh @@ -72,7 +72,8 @@ struct LegacySSHStore : public virtual LegacySSHStoreConfig, public virtual Stor virtual StorePath addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method = FileIngestionMethod::Recursive, + FileSerialisationMethod dumpMethod = FileSerialisationMethod::Recursive, + ContentAddressMethod hashMethod = FileIngestionMethod::Recursive, HashAlgorithm hashAlgo = HashAlgorithm::SHA256, const StorePathSet & references = StorePathSet(), RepairFlag repair = NoRepair) override diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 0c9f9e333ffa..133303141bff 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1144,7 +1144,8 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source, StorePath LocalStore::addToStoreFromDump( Source & source0, std::string_view name, - ContentAddressMethod method, + FileSerialisationMethod dumpMethod, + ContentAddressMethod hashMethod, HashAlgorithm hashAlgo, const StorePathSet & references, RepairFlag repair) @@ -1197,7 +1198,13 @@ StorePath LocalStore::addToStoreFromDump( Path tempDir; AutoCloseFD tempDirFd; - if (!inMemory) { + bool methodsMatch = (FileIngestionMethod) dumpMethod == hashMethod; + + /* If the methods don't match, our streaming hash of the dump is the + wrong sort, and we need to rehash. */ + bool inMemoryAndDontNeedRestore = inMemory && methodsMatch; + + if (!inMemoryAndDontNeedRestore) { /* Drain what we pulled so far, and then keep on pulling */ StringSource dumpSource { dump }; ChainSource bothSource { dumpSource, source }; @@ -1206,40 +1213,23 @@ StorePath LocalStore::addToStoreFromDump( delTempDir = std::make_unique(tempDir); tempPath = tempDir + "/x"; - auto fim = method.getFileIngestionMethod(); - switch (fim) { - case FileIngestionMethod::Flat: - case FileIngestionMethod::Recursive: - restorePath(tempPath, bothSource, (FileSerialisationMethod) fim); - break; - case FileIngestionMethod::Git: { - RestoreSink sink; - sink.dstPath = tempPath; - auto accessor = getFSAccessor(); - git::restore(sink, bothSource, [&](Hash childHash) { - return std::pair { - &*accessor, - CanonPath { - printStorePath(this->makeFixedOutputPath("git", FixedOutputInfo { - .method = FileIngestionMethod::Git, - .hash = childHash, - })) - }, - }; - }); - break; - } - } + restorePath(tempPath, bothSource, dumpMethod); dumpBuffer.reset(); dump = {}; } - auto [hash, size] = hashSink->finish(); + auto [dumpHash, size] = hashSink->finish(); + + PosixSourceAccessor accessor; auto desc = ContentAddressWithReferences::fromParts( - method, - hash, + hashMethod, + methodsMatch + ? dumpHash + : hashPath( + accessor, CanonPath { tempPath }, + hashMethod.getFileIngestionMethod(), hashAlgo), { .others = references, // caller is not capable of creating a self-reference, because this is content-addressed without modulus @@ -1265,32 +1255,19 @@ StorePath LocalStore::addToStoreFromDump( autoGC(); - if (inMemory) { + if (inMemoryAndDontNeedRestore) { StringSource dumpSource { dump }; /* Restore from the buffer in memory. */ - auto fim = method.getFileIngestionMethod(); + auto fim = hashMethod.getFileIngestionMethod(); switch (fim) { case FileIngestionMethod::Flat: case FileIngestionMethod::Recursive: restorePath(realPath, dumpSource, (FileSerialisationMethod) fim); break; - case FileIngestionMethod::Git: { - RestoreSink sink; - sink.dstPath = realPath; - auto accessor = getFSAccessor(); - git::restore(sink, dumpSource, [&](Hash childHash) { - return std::pair { - &*accessor, - CanonPath { - printStorePath(this->makeFixedOutputPath("git", FixedOutputInfo { - .method = FileIngestionMethod::Git, - .hash = childHash, - })) - }, - }; - }); - break; - } + case FileIngestionMethod::Git: + // doesn't correspond to serialization method, so + // this should be unreachable + assert(false); } } else { /* Move the temporary path we restored above. */ @@ -1299,8 +1276,8 @@ StorePath LocalStore::addToStoreFromDump( /* For computing the nar hash. In recursive SHA-256 mode, this is the same as the store hash, so no need to do it again. */ - auto narHash = std::pair { hash, size }; - if (method != FileIngestionMethod::Recursive || hashAlgo != HashAlgorithm::SHA256) { + auto narHash = std::pair { dumpHash, size }; + if (dumpMethod != FileSerialisationMethod::Recursive || hashAlgo != HashAlgorithm::SHA256) { HashSink narSink { HashAlgorithm::SHA256 }; dumpPath(realPath, narSink); narHash = narSink.finish(); diff --git a/src/libstore/local-store.hh b/src/libstore/local-store.hh index ba56d3ead125..7eff1d6905ea 100644 --- a/src/libstore/local-store.hh +++ b/src/libstore/local-store.hh @@ -180,7 +180,8 @@ public: StorePath addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method, + FileSerialisationMethod dumpMethod, + ContentAddressMethod hashMethod, HashAlgorithm hashAlgo, const StorePathSet & references, RepairFlag repair) override; diff --git a/src/libstore/remote-store.cc b/src/libstore/remote-store.cc index ef0685a2602e..f2cbf72179d2 100644 --- a/src/libstore/remote-store.cc +++ b/src/libstore/remote-store.cc @@ -509,12 +509,28 @@ ref RemoteStore::addCAToStore( StorePath RemoteStore::addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method, + FileSerialisationMethod dumpMethod, + ContentAddressMethod hashMethod, HashAlgorithm hashAlgo, const StorePathSet & references, RepairFlag repair) { - return addCAToStore(dump, name, method, hashAlgo, references, repair)->path; + FileSerialisationMethod fsm; + switch (hashMethod.getFileIngestionMethod()) { + case FileIngestionMethod::Flat: + fsm = FileSerialisationMethod::Flat; + break; + case FileIngestionMethod::Recursive: + fsm = FileSerialisationMethod::Recursive; + break; + case FileIngestionMethod::Git: + // Use NAR; Git is not a serialization method + fsm = FileSerialisationMethod::Recursive; + break; + } + if (fsm != dumpMethod) + unsupported("RemoteStore::addToStoreFromDump doesn't support this `dumpMethod` `hashMethod` combination"); + return addCAToStore(dump, name, hashMethod, hashAlgo, references, repair)->path; } diff --git a/src/libstore/remote-store.hh b/src/libstore/remote-store.hh index c51a21375628..d630adc08b79 100644 --- a/src/libstore/remote-store.hh +++ b/src/libstore/remote-store.hh @@ -87,7 +87,8 @@ public: StorePath addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method = FileIngestionMethod::Recursive, + FileSerialisationMethod dumpMethod = FileSerialisationMethod::Recursive, + ContentAddressMethod hashMethod = FileIngestionMethod::Recursive, HashAlgorithm hashAlgo = HashAlgorithm::SHA256, const StorePathSet & references = StorePathSet(), RepairFlag repair = NoRepair) override; diff --git a/src/libstore/store-api.cc b/src/libstore/store-api.cc index fcb67b04e19f..8a6f0a80df98 100644 --- a/src/libstore/store-api.cc +++ b/src/libstore/store-api.cc @@ -269,40 +269,23 @@ StorePath Store::addToStore( PathFilter & filter, RepairFlag repair) { + FileSerialisationMethod fsm; + switch (method.getFileIngestionMethod()) { + case FileIngestionMethod::Flat: + fsm = FileSerialisationMethod::Flat; + break; + case FileIngestionMethod::Recursive: + fsm = FileSerialisationMethod::Recursive; + break; + case FileIngestionMethod::Git: + // Use NAR; Git is not a serialization method + fsm = FileSerialisationMethod::Recursive; + break; + } auto source = sinkToSource([&](Sink & sink) { - auto fim = method.getFileIngestionMethod(); - switch (fim) { - case FileIngestionMethod::Flat: - case FileIngestionMethod::Recursive: - { - dumpPath(accessor, path, sink, (FileSerialisationMethod) fim, filter); - break; - } - case FileIngestionMethod::Git: - { - git::dump( - accessor, path, - sink, - // recursively add to store if path is a directory - [&](const CanonPath & path) -> git::TreeEntry { - auto storePath = addToStore("git", accessor, path, method, hashAlgo, references, filter, repair); - auto info = queryPathInfo(storePath); - assert(info->ca); - assert(info->ca->method == FileIngestionMethod::Git); - auto stat = getFSAccessor()->lstat(CanonPath(printStorePath(storePath))); - auto gitModeOpt = git::convertMode(stat.type); - assert(gitModeOpt); - return { - .mode = *gitModeOpt, - .hash = info->ca->hash, - }; - }, - filter); - break; - } - } + dumpPath(accessor, path, sink, fsm, filter); }); - return addToStoreFromDump(*source, name, method, hashAlgo, references, repair); + return addToStoreFromDump(*source, name, fsm, method, hashAlgo, references, repair); } void Store::addMultipleToStore( diff --git a/src/libstore/store-api.hh b/src/libstore/store-api.hh index b3c935db104d..b15bfaea484b 100644 --- a/src/libstore/store-api.hh +++ b/src/libstore/store-api.hh @@ -456,14 +456,23 @@ public: * in `dump`, which is either a NAR serialisation (if recursive == * true) or simply the contents of a regular file (if recursive == * false). - * `dump` may be drained * - * \todo remove? + * `dump` may be drained. + * + * @param dumpMethod What serialisation format is `dump`, i.e. how + * to deserialize it. Must either match hashMethod or be + * `FileSerialisationMethod::Recursive`. + * + * @param hashMethod How content addressing? Need not match be the + * same as `dumpMethod`. + * + * @todo remove? */ virtual StorePath addToStoreFromDump( Source & dump, std::string_view name, - ContentAddressMethod method = FileIngestionMethod::Recursive, + FileSerialisationMethod dumpMethod = FileSerialisationMethod::Recursive, + ContentAddressMethod hashMethod = FileIngestionMethod::Recursive, HashAlgorithm hashAlgo = HashAlgorithm::SHA256, const StorePathSet & references = StorePathSet(), RepairFlag repair = NoRepair) = 0; @@ -762,7 +771,7 @@ protected: * Helper for methods that are not unsupported: this is used for * default definitions for virtual methods that are meant to be overriden. * - * \todo Using this should be a last resort. It is better to make + * @todo Using this should be a last resort. It is better to make * the method "virtual pure" and/or move it to a subclass. */ [[noreturn]] void unsupported(const std::string & op) diff --git a/src/libutil/file-content-address.hh b/src/libutil/file-content-address.hh index 838db64aa323..793a82b2d5cb 100644 --- a/src/libutil/file-content-address.hh +++ b/src/libutil/file-content-address.hh @@ -98,7 +98,7 @@ enum struct FileIngestionMethod : uint8_t { * given method. * * Unlike the other `hashPath`, this works on an arbitrary - * `FileIngestionMethod` instead of `FileSerializationMethod`, but + * `FileIngestionMethod` instead of `FileSerialisationMethod`, but * doesn't return the size as this is this is not a both simple and * useful defined for a merkle format. */ diff --git a/src/nix-env/user-env.cc b/src/nix-env/user-env.cc index 2f9c988d5f20..8bebe2b9eeee 100644 --- a/src/nix-env/user-env.cc +++ b/src/nix-env/user-env.cc @@ -113,7 +113,7 @@ bool createUserEnv(EvalState & state, PackageInfos & elems, std::string str2 = str.str(); StringSource source { str2 }; state.store->addToStoreFromDump( - source, "env-manifest.nix", TextIngestionMethod {}, HashAlgorithm::SHA256, references); + source, "env-manifest.nix", FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, references); }); /* Get the environment builder expression. */ diff --git a/src/nix/develop.cc b/src/nix/develop.cc index 1f2891378386..637689c36dbb 100644 --- a/src/nix/develop.cc +++ b/src/nix/develop.cc @@ -226,7 +226,7 @@ static StorePath getDerivationEnvironment(ref store, ref evalStore auto getEnvShPath = ({ StringSource source { getEnvSh }; evalStore->addToStoreFromDump( - source, "get-env.sh", TextIngestionMethod {}, HashAlgorithm::SHA256, {}); + source, "get-env.sh", FileSerialisationMethod::Flat, TextIngestionMethod {}, HashAlgorithm::SHA256, {}); }); drv.args = {store->printStorePath(getEnvShPath)};