From 88b5a98f7ac61dda9df10d006dfc07946ecb5a69 Mon Sep 17 00:00:00 2001 From: Matthew Bauer Date: Tue, 2 Jun 2020 18:54:52 -0500 Subject: [PATCH 1/7] Handle treeHash in Git fetcher Handles the treeHash attr in the Git fetcher. This can be used like: builtins.fetchTree { type = "git"; url = "https://github.com/nixos/nix.git"; treeHash = "0674cab54e1226d8e1e1d04572f07c5296edc862"; } Nix stores tree hash as the content hash in the store so that fetched objects are deduped. This uses the Git file ingestion method, which converts the files into git objects for hashing. --- src/libfetchers/git.cc | 101 +++++++++++++++++++++++++++++------------ 1 file changed, 73 insertions(+), 28 deletions(-) diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 17cc602285c..93fdd0ba8a8 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -3,6 +3,7 @@ #include "globals.hh" #include "tarfile.hh" #include "store-api.hh" +#include "git.hh" #include @@ -27,6 +28,7 @@ struct GitInput : Input ParsedURL url; std::optional ref; std::optional rev; + std::optional treeHash; bool shallow = false; bool submodules = false; @@ -42,12 +44,13 @@ struct GitInput : Input other2 && url == other2->url && rev == other2->rev + && treeHash == other2->treeHash && ref == other2->ref; } bool isImmutable() const override { - return (bool) rev || narHash; + return (bool) rev || treeHash || narHash; } std::optional getRef() const override { return ref; } @@ -59,6 +62,7 @@ struct GitInput : Input ParsedURL url2(url); if (url2.scheme != "git") url2.scheme = "git+" + url2.scheme; if (rev) url2.query.insert_or_assign("rev", rev->gitRev()); + if (treeHash) url2.query.insert_or_assign("treeHash", treeHash->gitRev()); if (ref) url2.query.insert_or_assign("ref", *ref); if (shallow) url2.query.insert_or_assign("shallow", "1"); return url2; @@ -72,6 +76,8 @@ struct GitInput : Input attrs.emplace("ref", *ref); if (rev) attrs.emplace("rev", rev->gitRev()); + if (treeHash) + attrs.emplace("treeHash", treeHash->gitRev()); if (shallow) attrs.emplace("shallow", true); if (submodules) @@ -96,6 +102,9 @@ struct GitInput : Input auto input = std::make_shared(*this); assert(!rev || rev->type == htSHA1); + assert(!treeHash || treeHash->type == htSHA1); + + auto ingestionMethod = treeHash ? FileIngestionMethod::Git : FileIngestionMethod::Recursive; std::string cacheType = "git"; if (shallow) cacheType += "-shallow"; @@ -103,18 +112,23 @@ struct GitInput : Input auto getImmutableAttrs = [&]() { - return Attrs({ + Attrs attrs({ {"type", cacheType}, {"name", name}, - {"rev", input->rev->gitRev()}, }); + if (input->treeHash) + attrs.insert_or_assign("treeHash", input->treeHash->gitRev()); + else + attrs.insert_or_assign("rev", input->rev->gitRev()); + return attrs; }; auto makeResult = [&](const Attrs & infoAttrs, StorePath && storePath) -> std::pair> { - assert(input->rev); + assert(input->rev || input->treeHash); assert(!rev || rev == input->rev); + assert(!treeHash || treeHash == input->treeHash); return { Tree { .actualPath = store->toRealPath(storePath), @@ -138,7 +152,7 @@ struct GitInput : Input // If this is a local directory and no ref or revision is // given, then allow the use of an unclean working tree. - if (!input->ref && !input->rev && isLocal) { + if (!input->ref && !input->rev && !input->treeHash && isLocal) { bool clean = false; /* Check whether this repo has any commits. There are @@ -195,7 +209,7 @@ struct GitInput : Input return files.count(file); }; - auto storePath = store->addToStore("source", actualUrl, FileIngestionMethod::Recursive, htSHA256, filter); + auto storePath = store->addToStore("source", actualUrl, ingestionMethod, htSHA256, filter); auto tree = Tree { .actualPath = store->printStorePath(storePath), @@ -224,7 +238,7 @@ struct GitInput : Input if (isLocal) { - if (!input->rev) + if (!input->rev && !input->treeHash) input->rev = Hash(chomp(runProgram("git", true, { "-C", actualUrl, "rev-parse", *input->ref })), htSHA1); repoDir = actualUrl; @@ -233,12 +247,20 @@ struct GitInput : Input if (auto res = getCache()->lookup(store, mutableAttrs)) { auto rev2 = Hash(getStrAttr(res->first, "rev"), htSHA1); - if (!rev || rev == rev2) { + if (!input->rev || rev == rev2) { input->rev = rev2; return makeResult(res->first, std::move(res->second)); } } + if (auto res = getCache()->lookup(store, mutableAttrs)) { + auto treeHash2 = Hash(getStrAttr(res->first, "treeHash"), htSHA1); + if (!input->treeHash || treeHash == treeHash2) { + input->treeHash = treeHash2; + return makeResult(res->first, std::move(res->second)); + } + } + Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, actualUrl).to_string(Base32, false); repoDir = cacheDir; @@ -255,11 +277,12 @@ struct GitInput : Input bool doFetch; time_t now = time(0); - /* If a rev was specified, we need to fetch if it's not in the - repo. */ - if (input->rev) { + /* If a rev or treeHash is specified, we need to fetch if + it's not in the repo. */ + if (input->rev || input->treeHash) { try { - runProgram("git", true, { "-C", repoDir, "cat-file", "-e", input->rev->gitRev() }); + auto gitHash = input->treeHash ? input->treeHash : input->rev; + runProgram("git", true, { "-C", repoDir, "cat-file", "-e", gitHash->gitRev() }); doFetch = false; } catch (ExecError & e) { if (WIFEXITED(e.status)) { @@ -297,7 +320,7 @@ struct GitInput : Input utimes(localRefFile.c_str(), times); } - if (!input->rev) + if (!input->rev && !input->treeHash) input->rev = Hash(chomp(readFile(localRefFile)), htSHA1); } @@ -308,7 +331,10 @@ struct GitInput : Input // FIXME: check whether rev is an ancestor of ref. - printTalkative("using revision %s of repo '%s'", input->rev->gitRev(), actualUrl); + if (input->rev) + printTalkative("using revision %s of repo '%s'", input->rev->gitRev(), actualUrl); + else if (input->treeHash) + printTalkative("using tree %s of repo '%s'", input->treeHash->gitRev(), actualUrl); /* Now that we know the ref, check again whether we have it in the store. */ @@ -319,6 +345,9 @@ struct GitInput : Input AutoDelete delTmpDir(tmpDir, true); PathFilter filter = defaultPathFilter; + if (submodules && treeHash) + throw Error("Cannot combine tree hashes with git submodules"); + if (submodules) { Path tmpGitDir = createTempDir(); AutoDelete delTmpGitDir(tmpGitDir, true); @@ -330,7 +359,7 @@ struct GitInput : Input runProgram("git", true, { "-C", tmpDir, "fetch", "--quiet", "--force", "--update-head-ok", "--", repoDir, "refs/*:refs/*" }); - runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", input->rev->gitRev() }); + runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", input->treeHash ? input->treeHash->gitRev() : input->rev->gitRev() }); runProgram("git", true, { "-C", tmpDir, "remote", "add", "origin", actualUrl }); runProgram("git", true, { "-C", tmpDir, "submodule", "--quiet", "update", "--init", "--recursive" }); @@ -339,7 +368,7 @@ struct GitInput : Input // FIXME: should pipe this, or find some better way to extract a // revision. auto source = sinkToSource([&](Sink & sink) { - RunOptions gitOptions("git", { "-C", repoDir, "archive", input->rev->gitRev() }); + RunOptions gitOptions("git", { "-C", repoDir, "archive", input->treeHash ? input->treeHash->gitRev() : input->rev->gitRev() }); gitOptions.standardOut = &sink; runProgram2(gitOptions); }); @@ -347,20 +376,33 @@ struct GitInput : Input unpackTarfile(*source, tmpDir); } - auto storePath = store->addToStore(name, tmpDir, FileIngestionMethod::Recursive, htSHA256, filter); + auto storePath = store->addToStore(name, tmpDir, ingestionMethod, ingestionMethod == FileIngestionMethod::Git ? htSHA1 : htSHA256, filter); - auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "log", "-1", "--format=%ct", input->rev->gitRev() })); + // verify treeHash is what we actually obtained in the nix store + if (input->treeHash) { + auto path = store->toRealPath(store->printStorePath(storePath)); + auto gotHash = dumpGitHash(htSHA1, path); + if (gotHash != input->treeHash) + throw Error("Git hash mismatch in input '%s' (%s), expected '%s', got '%s'", + to_string(), path, input->treeHash->gitRev(), gotHash.gitRev()); + } - Attrs infoAttrs({ - {"rev", input->rev->gitRev()}, - {"lastModified", lastModified}, - }); + Attrs infoAttrs({}); + if (input->treeHash) { + infoAttrs.insert_or_assign("treeHash", input->treeHash->gitRev()); + infoAttrs.insert_or_assign("revCount", 0); + infoAttrs.insert_or_assign("lastModified", 0); + } else { + auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "log", "-1", "--format=%ct", input->rev->gitRev() })); + infoAttrs.insert_or_assign("lastModified", lastModified); + infoAttrs.insert_or_assign("rev", input->rev->gitRev()); - if (!shallow) - infoAttrs.insert_or_assign("revCount", - std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", input->rev->gitRev() }))); + if (!shallow) + infoAttrs.insert_or_assign("revCount", + std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", input->rev->gitRev() }))); + } - if (!this->rev) + if (!this->rev && !this->treeHash) getCache()->add( store, mutableAttrs, @@ -397,7 +439,7 @@ struct GitInputScheme : InputScheme attrs.emplace("type", "git"); for (auto &[name, value] : url.query) { - if (name == "rev" || name == "ref") + if (name == "rev" || name == "ref" || name == "treeHash") attrs.emplace(name, value); else url2.query.emplace(name, value); @@ -413,7 +455,7 @@ struct GitInputScheme : InputScheme if (maybeGetStrAttr(attrs, "type") != "git") return {}; for (auto & [name, value] : attrs) - if (name != "type" && name != "url" && name != "ref" && name != "rev" && name != "shallow" && name != "submodules") + if (name != "type" && name != "url" && name != "ref" && name != "rev" && name != "shallow" && name != "submodules" && name != "treeHash") throw Error("unsupported Git input attribute '%s'", name); auto input = std::make_unique(parseURL(getStrAttr(attrs, "url"))); @@ -425,6 +467,9 @@ struct GitInputScheme : InputScheme if (auto rev = maybeGetStrAttr(attrs, "rev")) input->rev = Hash(*rev, htSHA1); + if (auto treeHash = maybeGetStrAttr(attrs, "treeHash")) + input->treeHash = Hash(*treeHash, htSHA1); + input->shallow = maybeGetBoolAttr(attrs, "shallow").value_or(false); input->submodules = maybeGetBoolAttr(attrs, "submodules").value_or(false); From 7f77ba17d498a04a57f39d8fb984e1388f1ca6a2 Mon Sep 17 00:00:00 2001 From: Matthew Bauer Date: Wed, 3 Jun 2020 16:08:58 -0500 Subject: [PATCH 2/7] Support null bytes in hashes for dumpGitTree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit we need to keep this in vector form so that we don’t accidentally skip strings starting with 0x00. --- src/libutil/git.cc | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/libutil/git.cc b/src/libutil/git.cc index 3e613949a26..dfc9b5de8e7 100644 --- a/src/libutil/git.cc +++ b/src/libutil/git.cc @@ -154,7 +154,8 @@ GitMode dumpGitBlob(const Path & path, const struct stat st, Sink & sink) GitMode dumpGitTree(const GitTree & entries, Sink & sink) { - std::string s1 = ""; + vector v1; + for (auto & i : entries) { unsigned int mode; switch (i.second.first) { @@ -162,14 +163,20 @@ GitMode dumpGitTree(const GitTree & entries, Sink & sink) case GitMode::Executable: mode = 100755; break; case GitMode::Regular: mode = 100644; break; } - s1 += (format("%06d %s\0%s"s) % mode % i.first % i.second.second.hash).str(); + auto s1 = (format("%06d %s") % mode % i.first).str(); + std::copy(s1.begin(), s1.end(), std::back_inserter(v1)); + v1.push_back(0); + std::copy(i.second.second.hash, i.second.second.hash + 20, std::back_inserter(v1)); } - std::string s2 = (format("tree %d\0%s"s) % s1.size() % s1).str(); + vector v2; + auto s2 = (format("tree %d"s) % v1.size()).str(); + std::copy(s2.begin(), s2.end(), std::back_inserter(v2)); + v2.push_back(0); + std::copy(v1.begin(), v1.end(), std::back_inserter(v2)); + + sink(v2.data(), v2.size()); - vector v; - std::copy(s2.begin(), s2.end(), std::back_inserter(v)); - sink(v.data(), v.size()); return GitMode::Directory; } From 6611e362caae2f0dc500457e526422b3f41c319c Mon Sep 17 00:00:00 2001 From: Matthew Bauer Date: Wed, 3 Jun 2020 16:30:53 -0500 Subject: [PATCH 3/7] =?UTF-8?q?Use=20=E2=80=9Cgit=E2=80=9D=20for=20all=20g?= =?UTF-8?q?it=20object=20names?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit git trees can have objects starting with “.” and other cases that Nix store names can’t have. To avoid having to create some escaping format, just use git for the name everywhere. The tree object has the real name. --- src/libstore/local-store.cc | 2 +- src/libstore/remote-store.cc | 2 +- src/libutil/git.cc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 14c816e7282..d1079ca4c30 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1143,7 +1143,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath, throw SysError(format("getting attributes of path '%1%'") % srcPath); if (S_ISDIR(st.st_mode)) for (auto & i : readDirectory(srcPath)) - addToStore(i.name, srcPath + "/" + i.name, method, hashAlgo, filter, repair); + addToStore("git", srcPath + "/" + i.name, method, hashAlgo, filter, repair); dumpGit(hashAlgo, srcPath, sink, filter); break; diff --git a/src/libstore/remote-store.cc b/src/libstore/remote-store.cc index fbc161cc92c..31ede2f2fcf 100644 --- a/src/libstore/remote-store.cc +++ b/src/libstore/remote-store.cc @@ -501,7 +501,7 @@ StorePath RemoteStore::addToStore(const string & name, const Path & _srcPath, throw SysError(format("getting attributes of path '%1%'") % srcPath); if (S_ISDIR(st.st_mode)) for (auto & i : readDirectory(srcPath)) - addToStore(i.name, srcPath + "/" + i.name, method, hashAlgo, filter, repair); + addToStore("git", srcPath + "/" + i.name, method, hashAlgo, filter, repair); } auto conn(getConnection()); diff --git a/src/libutil/git.cc b/src/libutil/git.cc index dfc9b5de8e7..72ed922f92c 100644 --- a/src/libutil/git.cc +++ b/src/libutil/git.cc @@ -112,7 +112,7 @@ static void parse(ParseSink & sink, Source & source, const Path & path, const Pa Hash hash(htSHA1); std::copy(hashs.begin(), hashs.end(), hash.hash); - string entryName = getStoreEntry(storeDir, hash, name); + string entryName = getStoreEntry(storeDir, hash, "git"); Path entry = absPath(realStoreDir + "/" + entryName); struct stat st; From d07212c0565eacf247150b91a11ed53646eb4ff6 Mon Sep 17 00:00:00 2001 From: Matthew Bauer Date: Wed, 3 Jun 2020 17:11:41 -0500 Subject: [PATCH 4/7] Show error when git treeHash is not a tree --- src/libfetchers/git.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 93fdd0ba8a8..2d09197b03e 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -324,6 +324,12 @@ struct GitInput : Input input->rev = Hash(chomp(readFile(localRefFile)), htSHA1); } + if (input->treeHash) { + auto type = chomp(runProgram("git", true, { "-C", repoDir, "cat-file", "-t", input->treeHash->gitRev() })); + if (type != "tree") + throw Error(format("Need a tree object, found '%s' object in %s") % type % input->treeHash->gitRev()); + } + bool isShallow = chomp(runProgram("git", true, { "-C", repoDir, "rev-parse", "--is-shallow-repository" })) == "true"; if (isShallow && !shallow) From 3457b3eba50703a0d458e4644182a0a7ae74f345 Mon Sep 17 00:00:00 2001 From: Matthew Bauer Date: Wed, 3 Jun 2020 17:31:21 -0500 Subject: [PATCH 5/7] Copy executable bit correctly in fs-sink --- src/libutil/fs-sink.hh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/libutil/fs-sink.hh b/src/libutil/fs-sink.hh index efd13685df3..e07695064d3 100644 --- a/src/libutil/fs-sink.hh +++ b/src/libutil/fs-sink.hh @@ -101,7 +101,10 @@ struct RestoreSink : ParseSink if (lstat(entry.c_str(), &st)) throw SysError(format("getting attributes of path '%1%'") % entry); if (S_ISREG(st.st_mode)) { - createRegularFile(destination + "/" + i.name); + if (st.st_mode & S_IXUSR) + createExecutableFile(destination + "/" + i.name); + else + createRegularFile(destination + "/" + i.name); copyFile(entry); } else if (S_ISDIR(st.st_mode)) copyDirectory(entry, destination + "/" + i.name); From 3e8ddacdb74b54e68733db6b6e90b353129ca647 Mon Sep 17 00:00:00 2001 From: Matthew Bauer Date: Wed, 3 Jun 2020 18:07:39 -0500 Subject: [PATCH 6/7] =?UTF-8?q?Don=E2=80=99t=20add=200=20in=20front=20of?= =?UTF-8?q?=20dumpGitTree=20format?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this should just be 4000, not 04000. --- src/libutil/git.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libutil/git.cc b/src/libutil/git.cc index 72ed922f92c..14dd0bf8d9d 100644 --- a/src/libutil/git.cc +++ b/src/libutil/git.cc @@ -163,7 +163,7 @@ GitMode dumpGitTree(const GitTree & entries, Sink & sink) case GitMode::Executable: mode = 100755; break; case GitMode::Regular: mode = 100644; break; } - auto s1 = (format("%06d %s") % mode % i.first).str(); + auto s1 = (format("%d %s") % mode % i.first).str(); std::copy(s1.begin(), s1.end(), std::back_inserter(v1)); v1.push_back(0); std::copy(i.second.second.hash, i.second.second.hash + 20, std::back_inserter(v1)); From 192ea2b99215ed0d9a1da1f90e19b072d4f21cf7 Mon Sep 17 00:00:00 2001 From: Matthew Bauer Date: Wed, 3 Jun 2020 18:25:26 -0500 Subject: [PATCH 7/7] Observe correct git sort order test/ should come *after* test.sh see http://git.661346.n2.nabble.com/In-tree-object-Must-the-td7446900.html#a7447657 --- src/libutil/git.cc | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/libutil/git.cc b/src/libutil/git.cc index 14dd0bf8d9d..3c49530f435 100644 --- a/src/libutil/git.cc +++ b/src/libutil/git.cc @@ -163,7 +163,10 @@ GitMode dumpGitTree(const GitTree & entries, Sink & sink) case GitMode::Executable: mode = 100755; break; case GitMode::Regular: mode = 100644; break; } - auto s1 = (format("%d %s") % mode % i.first).str(); + auto name = i.first; + if (i.second.first == GitMode::Directory) + name.pop_back(); + auto s1 = (format("%d %s") % mode % name).str(); std::copy(s1.begin(), s1.end(), std::back_inserter(v1)); v1.push_back(0); std::copy(i.second.second.hash, i.second.second.hash + 20, std::back_inserter(v1)); @@ -194,8 +197,17 @@ static GitMode dumpGitInternal(HashType ht, const Path & path, Sink & sink, Path else if (S_ISDIR(st.st_mode)) { GitTree entries; for (auto & i : readDirectory(path)) - if (filter(path + "/" + i.name)) - entries[i.name] = dumpGitHashInternal(ht, path + "/" + i.name, filter); + if (filter(path + "/" + i.name)) { + auto result = dumpGitHashInternal(ht, path + "/" + i.name, filter); + + // correctly observe git order, see + // https://github.com/mirage/irmin/issues/352 + auto name = i.name; + if (result.first == GitMode::Directory) + name += "/"; + + entries[name] = result; + } perm = dumpGitTree(entries, sink); } else throw Error(format("file '%1%' has an unsupported type") % path);