Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Content addressing and adding to store cleanup #9325

Merged
merged 2 commits into from
Dec 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions perl/lib/Nix/Store.xs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "globals.hh"
#include "store-api.hh"
#include "crypto.hh"
#include "posix-source-accessor.hh"

#include <sodium.h>
#include <nlohmann/json.hpp>
Expand Down Expand Up @@ -205,7 +206,10 @@ void importPaths(int fd, int dontCheckSigs)
SV * hashPath(char * algo, int base32, char * path)
PPCODE:
try {
Hash h = hashPath(parseHashAlgo(algo), path).first;
PosixSourceAccessor accessor;
Hash h = hashPath(
accessor, CanonPath::fromCwd(path),
FileIngestionMethod::Recursive, parseHashAlgo(algo)).first;
auto s = h.to_string(base32 ? HashFormat::Nix32 : HashFormat::Base16, false);
XPUSHs(sv_2mortal(newSVpv(s.c_str(), 0)));
} catch (Error & e) {
Expand Down Expand Up @@ -281,7 +285,11 @@ SV * addToStore(char * srcPath, int recursive, char * algo)
PPCODE:
try {
auto method = recursive ? FileIngestionMethod::Recursive : FileIngestionMethod::Flat;
auto path = store()->addToStore(std::string(baseNameOf(srcPath)), srcPath, method, parseHashAlgo(algo));
PosixSourceAccessor accessor;
auto path = store()->addToStore(
std::string(baseNameOf(srcPath)),
accessor, CanonPath::fromCwd(srcPath),
method, parseHashAlgo(algo));
XPUSHs(sv_2mortal(newSVpv(store()->printStorePath(path).c_str(), 0)));
} catch (Error & e) {
croak("%s", e.what());
Expand Down
2 changes: 1 addition & 1 deletion src/libcmd/installable-value.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ ref<InstallableValue> InstallableValue::require(ref<Installable> installable)
std::optional<DerivedPathWithInfo> InstallableValue::trySinglePathToDerivedPaths(Value & v, const PosIdx pos, std::string_view errorCtx)
{
if (v.type() == nPath) {
auto storePath = v.path().fetchToStore(state->store);
auto storePath = v.path().fetchToStore(*state->store);
return {{
.path = DerivedPath::Opaque {
.path = std::move(storePath),
Expand Down
2 changes: 1 addition & 1 deletion src/libexpr/eval.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2317,7 +2317,7 @@ StorePath EvalState::copyPathToStore(NixStringContext & context, const SourcePat
auto dstPath = i != srcToStore.end()
? i->second
: [&]() {
auto dstPath = path.fetchToStore(store, path.baseName(), FileIngestionMethod::Recursive, nullptr, repair);
auto dstPath = path.fetchToStore(*store, path.baseName(), FileIngestionMethod::Recursive, nullptr, repair);
allowPath(dstPath);
srcToStore.insert_or_assign(path, dstPath);
printMsg(lvlChatty, "copied source '%1%' -> '%2%'", path, store->printStorePath(dstPath));
Expand Down
12 changes: 9 additions & 3 deletions src/libexpr/primops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2072,8 +2072,14 @@ static void prim_toFile(EvalState & state, const PosIdx pos, Value * * args, Val
}

auto storePath = settings.readOnlyMode
? state.store->computeStorePathForText(name, contents, refs)
: state.store->addTextToStore(name, contents, refs, state.repair);
? state.store->makeFixedOutputPathFromCA(name, TextInfo {
.hash = hashString(HashAlgorithm::SHA256, contents),
.references = std::move(refs),
})
: ({
StringSource s { contents };
state.store->addToStoreFromDump(s, name, TextIngestionMethod {}, HashAlgorithm::SHA256, refs, state.repair);
});

/* Note: we don't need to add `context' to the context of the
result, since `storePath' itself has references to the paths
Expand Down Expand Up @@ -2229,7 +2235,7 @@ static void addPath(
});

if (!expectedHash || !state.store->isValidPath(*expectedStorePath)) {
auto dstPath = path.fetchToStore(state.store, name, method, filter.get(), state.repair);
auto dstPath = path.fetchToStore(*state.store, name, method, filter.get(), state.repair);
if (expectedHash && expectedStorePath != dstPath)
state.debugThrowLastTrace(Error("store path mismatch in (possibly filtered) path added from '%s'", path));
state.allowAndSetStorePathString(dstPath, v);
Expand Down
16 changes: 8 additions & 8 deletions src/libfetchers/cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ struct CacheImpl : Cache
}

void add(
ref<Store> store,
Store & store,
const Attrs & inAttrs,
const Attrs & infoAttrs,
const StorePath & storePath,
Expand All @@ -115,13 +115,13 @@ struct CacheImpl : Cache
_state.lock()->add.use()
(attrsToJSON(inAttrs).dump())
(attrsToJSON(infoAttrs).dump())
(store->printStorePath(storePath))
(store.printStorePath(storePath))
(locked)
(time(0)).exec();
}

std::optional<std::pair<Attrs, StorePath>> lookup(
ref<Store> store,
Store & store,
const Attrs & inAttrs) override
{
if (auto res = lookupExpired(store, inAttrs)) {
Expand All @@ -134,7 +134,7 @@ struct CacheImpl : Cache
}

std::optional<Result> lookupExpired(
ref<Store> store,
Store & store,
const Attrs & inAttrs) override
{
auto state(_state.lock());
Expand All @@ -148,19 +148,19 @@ struct CacheImpl : Cache
}

auto infoJSON = stmt.getStr(0);
auto storePath = store->parseStorePath(stmt.getStr(1));
auto storePath = store.parseStorePath(stmt.getStr(1));
auto locked = stmt.getInt(2) != 0;
auto timestamp = stmt.getInt(3);

store->addTempRoot(storePath);
if (!store->isValidPath(storePath)) {
store.addTempRoot(storePath);
if (!store.isValidPath(storePath)) {
// FIXME: we could try to substitute 'storePath'.
debug("ignoring disappeared cache entry '%s'", inAttrsJSON);
return {};
}

debug("using cache entry '%s' -> '%s', '%s'",
inAttrsJSON, infoJSON, store->printStorePath(storePath));
inAttrsJSON, infoJSON, store.printStorePath(storePath));

return Result {
.expired = !locked && (settings.tarballTtl.get() == 0 || timestamp + settings.tarballTtl < time(0)),
Expand Down
6 changes: 3 additions & 3 deletions src/libfetchers/cache.hh
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,14 @@ struct Cache

/* Old cache for things that have a store path. */
virtual void add(
ref<Store> store,
Store & store,
const Attrs & inAttrs,
const Attrs & infoAttrs,
const StorePath & storePath,
bool locked) = 0;

virtual std::optional<std::pair<Attrs, StorePath>> lookup(
ref<Store> store,
Store & store,
const Attrs & inAttrs) = 0;

struct Result
Expand All @@ -68,7 +68,7 @@ struct Cache
};

virtual std::optional<Result> lookupExpired(
ref<Store> store,
Store & store,
const Attrs & inAttrs) = 0;
};

Expand Down
2 changes: 1 addition & 1 deletion src/libfetchers/fetchers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ void InputScheme::clone(const Input & input, const Path & destDir) const
std::pair<StorePath, Input> InputScheme::fetch(ref<Store> store, const Input & input)
{
auto [accessor, input2] = getAccessor(store, input);
auto storePath = SourcePath(accessor).fetchToStore(store, input2.getName());
auto storePath = SourcePath(accessor).fetchToStore(*store, input2.getName());
return {storePath, input2};
}

Expand Down
4 changes: 2 additions & 2 deletions src/libfetchers/git.cc
Original file line number Diff line number Diff line change
Expand Up @@ -368,14 +368,14 @@ struct GitInputScheme : InputScheme

RepoInfo getRepoInfo(const Input & input) const
{
auto checkHashType = [&](const std::optional<Hash> & hash)
auto checkHashAlgorithm = [&](const std::optional<Hash> & hash)
{
if (hash.has_value() && !(hash->algo == HashAlgorithm::SHA1 || hash->algo == HashAlgorithm::SHA256))
throw Error("Hash '%s' is not supported by Git. Supported types are sha1 and sha256.", hash->to_string(HashFormat::Base16, true));
};

if (auto rev = input.getRev())
checkHashType(rev);
checkHashAlgorithm(rev);

RepoInfo repoInfo;

Expand Down
4 changes: 2 additions & 2 deletions src/libfetchers/github.cc
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ struct GitArchiveInputScheme : InputScheme
{"rev", rev->gitRev()},
});

if (auto res = getCache()->lookup(store, lockedAttrs)) {
if (auto res = getCache()->lookup(*store, lockedAttrs)) {
input.attrs.insert_or_assign("lastModified", getIntAttr(res->first, "lastModified"));
return {std::move(res->second), input};
}
Expand All @@ -213,7 +213,7 @@ struct GitArchiveInputScheme : InputScheme
input.attrs.insert_or_assign("lastModified", uint64_t(result.lastModified));

getCache()->add(
store,
*store,
lockedAttrs,
{
{"rev", rev->gitRev()},
Expand Down
39 changes: 25 additions & 14 deletions src/libfetchers/input-accessor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
namespace nix {

StorePath InputAccessor::fetchToStore(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to note but some of this code is here to minimize the diff with lazy-trees. So if we start changing this, it will increases the lazy-trees diff again.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@edolstra happy to fix that merge conflict :)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right now there is a conflict in lazy trees from #8848 that is very domain-specific on both side. I can't help with that so much, but I am happy to do all the other conflicts!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fetchToStore stays non-virtual on lazy-trees so I think there is no issue here. This PR makes Store able to consume SourceAccessors directly so we just use that. Anything in an InputAcessor beyond the underlying SourceAccessor Store::addToStore won't care about anyways.

Copy link
Member Author

@Ericson2314 Ericson2314 Nov 10, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the fingerprinting, Sure that method could go back, but we could also flip around the logic so the SourceAccesor itself has a maybeLstat-like method to try to cheaply content-address file system object, and the InputAccesor implementations do the getCache() to look this up.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On lazy-trees, InputAccessor::fetchToStore() implements a SQLite cache for SourcePath -> StorePath mappings that I want to backport to master. It optimizes stuff like src = <some big tree>. That's why it's important not to get rid of fetchToStore().

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK I'll put it back

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is now put back (not sure why this thread isn't marked outdated).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not on a red or green line, is probably why.

ref<Store> store,
Store & store,
const CanonPath & path,
std::string_view name,
FileIngestionMethod method,
ContentAddressMethod method,
PathFilter * filter,
RepairFlag repair)
{
Expand All @@ -20,10 +20,24 @@ StorePath InputAccessor::fetchToStore(
if (!filter && fingerprint) {
cacheKey = fetchers::Attrs{
{"_what", "fetchToStore"},
{"store", store->storeDir},
{"store", store.storeDir},
{"name", std::string(name)},
{"fingerprint", *fingerprint},
{"method", (uint8_t) method},
{
"method",
std::visit(overloaded {
[](const TextIngestionMethod &) {
return "text";
},
[](const FileIngestionMethod & fim) {
switch (fim) {
case FileIngestionMethod::Flat: return "flat";
case FileIngestionMethod::Recursive: return "nar";
default: assert(false);
}
},
}, method.raw),
},
{"path", path.abs()}
};
if (auto res = fetchers::getCache()->lookup(store, *cacheKey)) {
Expand All @@ -35,17 +49,14 @@ StorePath InputAccessor::fetchToStore(

Activity act(*logger, lvlChatty, actUnknown, fmt("copying '%s' to the store", showPath(path)));

auto source = sinkToSource([&](Sink & sink) {
if (method == FileIngestionMethod::Recursive)
dumpPath(path, sink, filter ? *filter : defaultPathFilter);
else
readFile(path, sink);
});
auto filter2 = filter ? *filter : defaultPathFilter;

auto storePath =
settings.readOnlyMode
? store->computeStorePathFromDump(*source, name, method, HashAlgorithm::SHA256).first
: store->addToStoreFromDump(*source, name, method, HashAlgorithm::SHA256, repair);
? store.computeStorePath(
name, *this, path, method, HashAlgorithm::SHA256, {}, filter2).first
: store.addToStore(
name, *this, path, method, HashAlgorithm::SHA256, {}, filter2, repair);

if (cacheKey)
fetchers::getCache()->add(store, *cacheKey, {}, storePath, true);
Expand All @@ -60,9 +71,9 @@ std::ostream & operator << (std::ostream & str, const SourcePath & path)
}

StorePath SourcePath::fetchToStore(
ref<Store> store,
Store & store,
std::string_view name,
FileIngestionMethod method,
ContentAddressMethod method,
PathFilter * filter,
RepairFlag repair) const
{
Expand Down
8 changes: 4 additions & 4 deletions src/libfetchers/input-accessor.hh
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ struct InputAccessor : virtual SourceAccessor, std::enable_shared_from_this<Inpu
}

StorePath fetchToStore(
ref<Store> store,
Store & store,
const CanonPath & path,
std::string_view name = "source",
FileIngestionMethod method = FileIngestionMethod::Recursive,
ContentAddressMethod method = FileIngestionMethod::Recursive,
PathFilter * filter = nullptr,
RepairFlag repair = NoRepair);
};
Expand Down Expand Up @@ -116,9 +116,9 @@ struct SourcePath
* Copy this `SourcePath` to the Nix store.
*/
StorePath fetchToStore(
ref<Store> store,
Store & store,
std::string_view name = "source",
FileIngestionMethod method = FileIngestionMethod::Recursive,
ContentAddressMethod method = FileIngestionMethod::Recursive,
PathFilter * filter = nullptr,
RepairFlag repair = NoRepair) const;

Expand Down
25 changes: 16 additions & 9 deletions src/libfetchers/mercurial.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "tarfile.hh"
#include "store-api.hh"
#include "url-parts.hh"
#include "posix-source-accessor.hh"

#include "fetch-settings.hh"

Expand Down Expand Up @@ -210,15 +211,20 @@ struct MercurialInputScheme : InputScheme
return files.count(file);
};

auto storePath = store->addToStore(input.getName(), actualPath, FileIngestionMethod::Recursive, HashAlgorithm::SHA256, filter);
PosixSourceAccessor accessor;
auto storePath = store->addToStore(
input.getName(),
accessor, CanonPath { actualPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256, {},
filter);

return {std::move(storePath), input};
}
}

if (!input.getRef()) input.attrs.insert_or_assign("ref", "default");

auto checkHashType = [&](const std::optional<Hash> & hash)
auto checkHashAlgorithm = [&](const std::optional<Hash> & hash)
{
if (hash.has_value() && hash->algo != HashAlgorithm::SHA1)
throw Error("Hash '%s' is not supported by Mercurial. Only sha1 is supported.", hash->to_string(HashFormat::Base16, true));
Expand All @@ -227,7 +233,7 @@ struct MercurialInputScheme : InputScheme

auto getLockedAttrs = [&]()
{
checkHashType(input.getRev());
checkHashAlgorithm(input.getRev());

return Attrs({
{"type", "hg"},
Expand All @@ -246,7 +252,7 @@ struct MercurialInputScheme : InputScheme
};

if (input.getRev()) {
if (auto res = getCache()->lookup(store, getLockedAttrs()))
if (auto res = getCache()->lookup(*store, getLockedAttrs()))
return makeResult(res->first, std::move(res->second));
}

Expand All @@ -259,7 +265,7 @@ struct MercurialInputScheme : InputScheme
{"ref", *input.getRef()},
});

if (auto res = getCache()->lookup(store, unlockedAttrs)) {
if (auto res = getCache()->lookup(*store, unlockedAttrs)) {
auto rev2 = Hash::parseAny(getStrAttr(res->first, "rev"), HashAlgorithm::SHA1);
if (!input.getRev() || input.getRev() == rev2) {
input.attrs.insert_or_assign("rev", rev2.gitRev());
Expand Down Expand Up @@ -305,7 +311,7 @@ struct MercurialInputScheme : InputScheme
auto revCount = std::stoull(tokens[1]);
input.attrs.insert_or_assign("ref", tokens[2]);

if (auto res = getCache()->lookup(store, getLockedAttrs()))
if (auto res = getCache()->lookup(*store, getLockedAttrs()))
return makeResult(res->first, std::move(res->second));

Path tmpDir = createTempDir();
Expand All @@ -315,7 +321,8 @@ struct MercurialInputScheme : InputScheme

deletePath(tmpDir + "/.hg_archival.txt");

auto storePath = store->addToStore(name, tmpDir);
PosixSourceAccessor accessor;
auto storePath = store->addToStore(name, accessor, CanonPath { tmpDir });

Attrs infoAttrs({
{"rev", input.getRev()->gitRev()},
Expand All @@ -324,14 +331,14 @@ struct MercurialInputScheme : InputScheme

if (!_input.getRev())
getCache()->add(
store,
*store,
unlockedAttrs,
infoAttrs,
storePath,
false);

getCache()->add(
store,
*store,
getLockedAttrs(),
infoAttrs,
storePath,
Expand Down
Loading
Loading