Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make fetchGit/fetchTree use Git file ingestion #6

Merged
merged 8 commits into from
Jun 4, 2020
107 changes: 79 additions & 28 deletions src/libfetchers/git.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "globals.hh"
#include "tarfile.hh"
#include "store-api.hh"
#include "git.hh"

#include <sys/time.h>

Expand All @@ -27,6 +28,7 @@ struct GitInput : Input
ParsedURL url;
std::optional<std::string> ref;
std::optional<Hash> rev;
std::optional<Hash> treeHash;
bool shallow = false;
bool submodules = false;

Expand All @@ -42,12 +44,13 @@ struct GitInput : Input
other2
&& url == other2->url
&& rev == other2->rev
&& treeHash == other2->treeHash
&& ref == other2->ref;
}

bool isImmutable() const override
{
return (bool) rev || narHash;
return (bool) rev || treeHash || narHash;
}

std::optional<std::string> getRef() const override { return ref; }
Expand All @@ -59,6 +62,7 @@ struct GitInput : Input
ParsedURL url2(url);
if (url2.scheme != "git") url2.scheme = "git+" + url2.scheme;
if (rev) url2.query.insert_or_assign("rev", rev->gitRev());
if (treeHash) url2.query.insert_or_assign("treeHash", treeHash->gitRev());
if (ref) url2.query.insert_or_assign("ref", *ref);
if (shallow) url2.query.insert_or_assign("shallow", "1");
return url2;
Expand All @@ -72,6 +76,8 @@ struct GitInput : Input
attrs.emplace("ref", *ref);
if (rev)
attrs.emplace("rev", rev->gitRev());
if (treeHash)
attrs.emplace("treeHash", treeHash->gitRev());
if (shallow)
attrs.emplace("shallow", true);
if (submodules)
Expand All @@ -96,25 +102,33 @@ struct GitInput : Input
auto input = std::make_shared<GitInput>(*this);

assert(!rev || rev->type == HashType::SHA1);
assert(!treeHash || treeHash->type == HashType::SHA1);

auto ingestionMethod = treeHash ? FileIngestionMethod::Git : FileIngestionMethod::Recursive;

std::string cacheType = "git";
if (shallow) cacheType += "-shallow";
if (submodules) cacheType += "-submodules";

auto getImmutableAttrs = [&]()
{
return Attrs({
Attrs attrs({
{"type", cacheType},
{"name", name},
{"rev", input->rev->gitRev()},
});
if (input->treeHash)
attrs.insert_or_assign("treeHash", input->treeHash->gitRev());
else
attrs.insert_or_assign("rev", input->rev->gitRev());
return attrs;
};

auto makeResult = [&](const Attrs & infoAttrs, StorePath && storePath)
-> std::pair<Tree, std::shared_ptr<const Input>>
{
assert(input->rev);
assert(input->rev || input->treeHash);
assert(!rev || rev == input->rev);
assert(!treeHash || treeHash == input->treeHash);
return {
Tree {
.actualPath = store->toRealPath(storePath),
Expand All @@ -138,7 +152,7 @@ struct GitInput : Input

// If this is a local directory and no ref or revision is
// given, then allow the use of an unclean working tree.
if (!input->ref && !input->rev && isLocal) {
if (!input->ref && !input->rev && !input->treeHash && isLocal) {
bool clean = false;

/* Check whether this repo has any commits. There are
Expand Down Expand Up @@ -195,7 +209,7 @@ struct GitInput : Input
return files.count(file);
};

auto storePath = store->addToStore("source", actualUrl, FileIngestionMethod::Recursive, HashType::SHA256, filter);
auto storePath = store->addToStore("source", actualUrl, ingestionMethod, HashType::SHA256, filter);

auto tree = Tree {
.actualPath = store->printStorePath(storePath),
Expand Down Expand Up @@ -224,7 +238,7 @@ struct GitInput : Input

if (isLocal) {

if (!input->rev)
if (!input->rev && !input->treeHash)
input->rev = Hash(chomp(runProgram("git", true, { "-C", actualUrl, "rev-parse", *input->ref })), HashType::SHA1);

repoDir = actualUrl;
Expand All @@ -233,12 +247,20 @@ struct GitInput : Input

if (auto res = getCache()->lookup(store, mutableAttrs)) {
auto rev2 = Hash(getStrAttr(res->first, "rev"), HashType::SHA1);
if (!rev || rev == rev2) {
if (!input->rev || rev == rev2) {
input->rev = rev2;
return makeResult(res->first, std::move(res->second));
}
}

if (auto res = getCache()->lookup(store, mutableAttrs)) {
auto treeHash2 = Hash(getStrAttr(res->first, "treeHash"), HashType::SHA1);
if (!input->treeHash || treeHash == treeHash2) {
input->treeHash = treeHash2;
return makeResult(res->first, std::move(res->second));
}
}

Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(HashType::SHA256, actualUrl).to_string(Base::Base32, false);
repoDir = cacheDir;

Expand All @@ -255,11 +277,12 @@ struct GitInput : Input
bool doFetch;
time_t now = time(0);

/* If a rev was specified, we need to fetch if it's not in the
repo. */
if (input->rev) {
/* If a rev or treeHash is specified, we need to fetch if
it's not in the repo. */
if (input->rev || input->treeHash) {
try {
runProgram("git", true, { "-C", repoDir, "cat-file", "-e", input->rev->gitRev() });
auto gitHash = input->treeHash ? input->treeHash : input->rev;
runProgram("git", true, { "-C", repoDir, "cat-file", "-e", gitHash->gitRev() });
doFetch = false;
} catch (ExecError & e) {
if (WIFEXITED(e.status)) {
Expand Down Expand Up @@ -300,18 +323,27 @@ struct GitInput : Input
utimes(localRefFile.c_str(), times);
}

if (!input->rev)
if (!input->rev && !input->treeHash)
input->rev = Hash(chomp(readFile(localRefFile)), HashType::SHA1);
}

if (input->treeHash) {
auto type = chomp(runProgram("git", true, { "-C", repoDir, "cat-file", "-t", input->treeHash->gitRev() }));
if (type != "tree")
throw Error(format("Need a tree object, found '%s' object in %s") % type % input->treeHash->gitRev());
}

bool isShallow = chomp(runProgram("git", true, { "-C", repoDir, "rev-parse", "--is-shallow-repository" })) == "true";

if (isShallow && !shallow)
throw Error("'%s' is a shallow Git repository, but a non-shallow repository is needed", actualUrl);

// FIXME: check whether rev is an ancestor of ref.

printTalkative("using revision %s of repo '%s'", input->rev->gitRev(), actualUrl);
if (input->rev)
printTalkative("using revision %s of repo '%s'", input->rev->gitRev(), actualUrl);
else if (input->treeHash)
printTalkative("using tree %s of repo '%s'", input->treeHash->gitRev(), actualUrl);

/* Now that we know the ref, check again whether we have it in
the store. */
Expand All @@ -322,6 +354,9 @@ struct GitInput : Input
AutoDelete delTmpDir(tmpDir, true);
PathFilter filter = defaultPathFilter;

if (submodules && treeHash)
throw Error("Cannot combine tree hashes with git submodules");

if (submodules) {
Path tmpGitDir = createTempDir();
AutoDelete delTmpGitDir(tmpGitDir, true);
Expand All @@ -333,7 +368,7 @@ struct GitInput : Input
runProgram("git", true, { "-C", tmpDir, "fetch", "--quiet", "--force",
"--update-head-ok", "--", repoDir, "refs/*:refs/*" });

runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", input->rev->gitRev() });
runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", input->treeHash ? input->treeHash->gitRev() : input->rev->gitRev() });
runProgram("git", true, { "-C", tmpDir, "remote", "add", "origin", actualUrl });
runProgram("git", true, { "-C", tmpDir, "submodule", "--quiet", "update", "--init", "--recursive" });

Expand All @@ -342,28 +377,41 @@ struct GitInput : Input
// FIXME: should pipe this, or find some better way to extract a
// revision.
auto source = sinkToSource([&](Sink & sink) {
RunOptions gitOptions("git", { "-C", repoDir, "archive", input->rev->gitRev() });
RunOptions gitOptions("git", { "-C", repoDir, "archive", input->treeHash ? input->treeHash->gitRev() : input->rev->gitRev() });
gitOptions.standardOut = &sink;
runProgram2(gitOptions);
});

unpackTarfile(*source, tmpDir);
}

auto storePath = store->addToStore(name, tmpDir, FileIngestionMethod::Recursive, HashType::SHA256, filter);
auto storePath = store->addToStore(name, tmpDir, FileIngestionMethod::Recursive, ingestionMethod == FileIngestionMethod::Git ? HashType::SHA1 : HashType::SHA256, filter);

auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "log", "-1", "--format=%ct", input->rev->gitRev() }));
// verify treeHash is what we actually obtained in the nix store
if (input->treeHash) {
auto path = store->toRealPath(store->printStorePath(storePath));
auto gotHash = dumpGitHash(HashType::SHA1, path);
if (gotHash != input->treeHash)
throw Error("Git hash mismatch in input '%s' (%s), expected '%s', got '%s'",
to_string(), path, input->treeHash->gitRev(), gotHash.gitRev());
}

Attrs infoAttrs({
{"rev", input->rev->gitRev()},
{"lastModified", lastModified},
});
Attrs infoAttrs({});
if (input->treeHash) {
infoAttrs.insert_or_assign("treeHash", input->treeHash->gitRev());
infoAttrs.insert_or_assign("revCount", 0);
infoAttrs.insert_or_assign("lastModified", 0);
} else {
auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "log", "-1", "--format=%ct", input->rev->gitRev() }));
infoAttrs.insert_or_assign("lastModified", lastModified);
infoAttrs.insert_or_assign("rev", input->rev->gitRev());

if (!shallow)
infoAttrs.insert_or_assign("revCount",
std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", input->rev->gitRev() })));
if (!shallow)
infoAttrs.insert_or_assign("revCount",
std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", input->rev->gitRev() })));
}

if (!this->rev)
if (!this->rev && !this->treeHash)
getCache()->add(
store,
mutableAttrs,
Expand Down Expand Up @@ -400,7 +448,7 @@ struct GitInputScheme : InputScheme
attrs.emplace("type", "git");

for (auto &[name, value] : url.query) {
if (name == "rev" || name == "ref")
if (name == "rev" || name == "ref" || name == "treeHash")
attrs.emplace(name, value);
else
url2.query.emplace(name, value);
Expand All @@ -416,7 +464,7 @@ struct GitInputScheme : InputScheme
if (maybeGetStrAttr(attrs, "type") != "git") return {};

for (auto & [name, value] : attrs)
if (name != "type" && name != "url" && name != "ref" && name != "rev" && name != "shallow" && name != "submodules")
if (name != "type" && name != "url" && name != "ref" && name != "rev" && name != "shallow" && name != "submodules" && name != "treeHash")
throw Error("unsupported Git input attribute '%s'", name);

auto input = std::make_unique<GitInput>(parseURL(getStrAttr(attrs, "url")));
Expand All @@ -428,6 +476,9 @@ struct GitInputScheme : InputScheme
if (auto rev = maybeGetStrAttr(attrs, "rev"))
input->rev = Hash(*rev, HashType::SHA1);

if (auto treeHash = maybeGetStrAttr(attrs, "treeHash"))
input->treeHash = Hash(*treeHash, HashType::SHA1);

input->shallow = maybeGetBoolAttr(attrs, "shallow").value_or(false);

input->submodules = maybeGetBoolAttr(attrs, "submodules").value_or(false);
Expand Down
2 changes: 1 addition & 1 deletion src/libstore/local-store.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1143,7 +1143,7 @@ StorePath LocalStore::addToStore(const string & name, const Path & _srcPath,
throw SysError(format("getting attributes of path '%1%'") % srcPath);
if (S_ISDIR(st.st_mode))
for (auto & i : readDirectory(srcPath))
addToStore(i.name, srcPath + "/" + i.name, method, hashAlgo, filter, repair);
addToStore("git", srcPath + "/" + i.name, method, hashAlgo, filter, repair);

dumpGit(hashAlgo, srcPath, sink, filter);
break;
Expand Down
2 changes: 1 addition & 1 deletion src/libstore/remote-store.cc
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,7 @@ StorePath RemoteStore::addToStore(const string & name, const Path & _srcPath,
throw SysError(format("getting attributes of path '%1%'") % srcPath);
if (S_ISDIR(st.st_mode))
for (auto & i : readDirectory(srcPath))
addToStore(i.name, srcPath + "/" + i.name, method, hashAlgo, filter, repair);
addToStore("git", srcPath + "/" + i.name, method, hashAlgo, filter, repair);
}

auto conn(getConnection());
Expand Down
5 changes: 4 additions & 1 deletion src/libutil/fs-sink.hh
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,10 @@ struct RestoreSink : ParseSink
if (lstat(entry.c_str(), &st))
throw SysError(format("getting attributes of path '%1%'") % entry);
if (S_ISREG(st.st_mode)) {
createRegularFile(destination + "/" + i.name);
if (st.st_mode & S_IXUSR)
createExecutableFile(destination + "/" + i.name);
else
createRegularFile(destination + "/" + i.name);
copyFile(entry);
} else if (S_ISDIR(st.st_mode))
copyDirectory(entry, destination + "/" + i.name);
Expand Down
37 changes: 28 additions & 9 deletions src/libutil/git.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ static void parse(ParseSink & sink, Source & source, const Path & path, const Pa
Hash hash(HashType::SHA1);
std::copy(hashs.begin(), hashs.end(), hash.hash);

string entryName = getStoreEntry(storeDir, hash, name);
string entryName = getStoreEntry(storeDir, hash, "git");
Path entry = absPath(realStoreDir + "/" + entryName);

struct stat st;
Expand Down Expand Up @@ -154,22 +154,32 @@ GitMode dumpGitBlob(const Path & path, const struct stat st, Sink & sink)

GitMode dumpGitTree(const GitTree & entries, Sink & sink)
{
std::string s1 = "";
vector<uint8_t> v1;

for (auto & i : entries) {
unsigned int mode;
switch (i.second.first) {
case GitMode::Directory: mode = 40000; break;
case GitMode::Executable: mode = 100755; break;
case GitMode::Regular: mode = 100644; break;
}
s1 += (format("%06d %s\0%s"s) % mode % i.first % i.second.second.hash).str();
auto name = i.first;
if (i.second.first == GitMode::Directory)
name.pop_back();
auto s1 = (format("%d %s") % mode % name).str();
std::copy(s1.begin(), s1.end(), std::back_inserter(v1));
v1.push_back(0);
std::copy(i.second.second.hash, i.second.second.hash + 20, std::back_inserter(v1));
}

std::string s2 = (format("tree %d\0%s"s) % s1.size() % s1).str();
vector<uint8_t> v2;
auto s2 = (format("tree %d"s) % v1.size()).str();
std::copy(s2.begin(), s2.end(), std::back_inserter(v2));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a front_inserter? Then we could use just one array.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like there is!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh it doesn't work, because it would be quadratic.

v2.push_back(0);
std::copy(v1.begin(), v1.end(), std::back_inserter(v2));

sink(v2.data(), v2.size());

vector<uint8_t> v;
std::copy(s2.begin(), s2.end(), std::back_inserter(v));
sink(v.data(), v.size());
return GitMode::Directory;
}

Expand All @@ -187,8 +197,17 @@ static GitMode dumpGitInternal(HashType ht, const Path & path, Sink & sink, Path
else if (S_ISDIR(st.st_mode)) {
GitTree entries;
for (auto & i : readDirectory(path))
if (filter(path + "/" + i.name))
entries[i.name] = dumpGitHashInternal(ht, path + "/" + i.name, filter);
if (filter(path + "/" + i.name)) {
auto result = dumpGitHashInternal(ht, path + "/" + i.name, filter);

// correctly observe git order, see
// https://github.com/mirage/irmin/issues/352
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice catch!

auto name = i.name;
if (result.first == GitMode::Directory)
name += "/";

entries[name] = result;
}
perm = dumpGitTree(entries, sink);
} else throw Error(format("file '%1%' has an unsupported type") % path);

Expand Down