Skip to content

Commit

Permalink
Merge pull request #11077 from hercules-ci/support-hardlinks-in-tarballs
Browse files Browse the repository at this point in the history
Support hardlinks in tarballs
  • Loading branch information
Ericson2314 authored Jul 11, 2024
2 parents 142e566 + 4fd8f19 commit 426e2af
Show file tree
Hide file tree
Showing 11 changed files with 280 additions and 8 deletions.
59 changes: 57 additions & 2 deletions src/libfetchers/git-utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,10 @@ git_oid hashToOID(const Hash & hash)
return oid;
}

Object lookupObject(git_repository * repo, const git_oid & oid)
Object lookupObject(git_repository * repo, const git_oid & oid, git_object_t type = GIT_OBJECT_ANY)
{
Object obj;
if (git_object_lookup(Setter(obj), repo, &oid, GIT_OBJECT_ANY)) {
if (git_object_lookup(Setter(obj), repo, &oid, type)) {
auto err = git_error_last();
throw Error("getting Git object '%s': %s", oid, err->message);
}
Expand Down Expand Up @@ -909,6 +909,61 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink
addToTree(*pathComponents.rbegin(), oid, GIT_FILEMODE_LINK);
}

void createHardlink(const CanonPath & path, const CanonPath & target) override
{
std::vector<std::string> pathComponents;
for (auto & c : path)
pathComponents.emplace_back(c);

if (!prepareDirs(pathComponents, false)) return;

// We can't just look up the path from the start of the root, since
// some parent directories may not have finished yet, so we compute
// a relative path that helps us find the right git_tree_builder or object.
auto relTarget = CanonPath(path).parent()->makeRelative(target);

auto dir = pendingDirs.rbegin();

// For each ../ component at the start, go up one directory.
// CanonPath::makeRelative() always puts all .. elements at the start,
// so they're all handled by this loop:
std::string_view relTargetLeft(relTarget);
while (hasPrefix(relTargetLeft, "../")) {
if (dir == pendingDirs.rend())
throw Error("invalid hard link target '%s' for path '%s'", target, path);
++dir;
relTargetLeft = relTargetLeft.substr(3);
}
if (dir == pendingDirs.rend())
throw Error("invalid hard link target '%s' for path '%s'", target, path);

// Look up the remainder of the target, starting at the
// top-most `git_treebuilder`.
std::variant<git_treebuilder *, git_oid> curDir{dir->builder.get()};
Object tree; // needed to keep `entry` alive
const git_tree_entry * entry = nullptr;

for (auto & c : CanonPath(relTargetLeft)) {
if (auto builder = std::get_if<git_treebuilder *>(&curDir)) {
assert(*builder);
if (!(entry = git_treebuilder_get(*builder, std::string(c).c_str())))
throw Error("cannot find hard link target '%s' for path '%s'", target, path);
curDir = *git_tree_entry_id(entry);
} else if (auto oid = std::get_if<git_oid>(&curDir)) {
tree = lookupObject(*repo, *oid, GIT_OBJECT_TREE);
if (!(entry = git_tree_entry_byname((const git_tree *) &*tree, std::string(c).c_str())))
throw Error("cannot find hard link target '%s' for path '%s'", target, path);
curDir = *git_tree_entry_id(entry);
}
}

assert(entry);

addToTree(*pathComponents.rbegin(),
*git_tree_entry_id(entry),
git_tree_entry_filemode(entry));
}

Hash sync() override {
updateBuilders({});

Expand Down
2 changes: 1 addition & 1 deletion src/libfetchers/git-utils.hh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ namespace nix {

namespace fetchers { struct PublicKey; }

struct GitFileSystemObjectSink : FileSystemObjectSink
struct GitFileSystemObjectSink : ExtendedFileSystemObjectSink
{
/**
* Flush builder and return a final Git hash.
Expand Down
13 changes: 13 additions & 0 deletions src/libutil/fs-sink.hh
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,19 @@ struct FileSystemObjectSink
virtual void createSymlink(const CanonPath & path, const std::string & target) = 0;
};

/**
* An extension of `FileSystemObjectSink` that supports file types
* that are not supported by Nix's FSO model.
*/
struct ExtendedFileSystemObjectSink : virtual FileSystemObjectSink
{
/**
* Create a hard link. The target must be the path of a previously
* encountered file relative to the root of the FSO.
*/
virtual void createHardlink(const CanonPath & path, const CanonPath & target) = 0;
};

/**
* Recursively copy file system objects from the source into the sink.
*/
Expand Down
11 changes: 8 additions & 3 deletions src/libutil/tarfile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ void unpackTarfile(const Path & tarFile, const Path & destDir)
extract_archive(archive, destDir);
}

time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSink)
time_t unpackTarfileToSink(TarArchive & archive, ExtendedFileSystemObjectSink & parseSink)
{
time_t lastModified = 0;

Expand All @@ -195,7 +195,12 @@ time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSin

lastModified = std::max(lastModified, archive_entry_mtime(entry));

switch (archive_entry_filetype(entry)) {
if (auto target = archive_entry_hardlink(entry)) {
parseSink.createHardlink(cpath, CanonPath(target));
continue;
}

switch (auto type = archive_entry_filetype(entry)) {

case AE_IFDIR:
parseSink.createDirectory(cpath);
Expand Down Expand Up @@ -232,7 +237,7 @@ time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSin
}

default:
throw Error("file '%s' in tarball has unsupported file type", path);
throw Error("file '%s' in tarball has unsupported file type %d", path, type);
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/libutil/tarfile.hh
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,6 @@ void unpackTarfile(Source & source, const Path & destDir);

void unpackTarfile(const Path & tarFile, const Path & destDir);

time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSink);
time_t unpackTarfileToSink(TarArchive & archive, ExtendedFileSystemObjectSink & parseSink);

}
12 changes: 12 additions & 0 deletions tests/functional/tarball.sh
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,15 @@ test_tarball() {
test_tarball '' cat
test_tarball .xz xz
test_tarball .gz gzip

# Test hard links.
# All entries in tree.tar.gz refer to the same file, and all have the same inode when unpacked by GNU tar.
# We don't preserve the hard links, because that's an optimization we think is not worth the complexity,
# so we only make sure that the contents are copied correctly.
path="$(nix flake prefetch --json "tarball+file://$(pwd)/tree.tar.gz" | jq -r .storePath)"
[[ $(cat "$path/a/b/foo") = bar ]]
[[ $(cat "$path/a/b/xyzzy") = bar ]]
[[ $(cat "$path/a/yyy") = bar ]]
[[ $(cat "$path/a/zzz") = bar ]]
[[ $(cat "$path/c/aap") = bar ]]
[[ $(cat "$path/fnord") = bar ]]
Binary file added tests/functional/tree.tar.gz
Binary file not shown.
112 changes: 112 additions & 0 deletions tests/unit/libfetchers/git-utils.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#include "git-utils.hh"
#include "file-system.hh"
#include "gmock/gmock.h"
#include <git2/global.h>
#include <git2/repository.h>
#include <git2/types.h>
#include <gtest/gtest.h>
#include "fs-sink.hh"
#include "serialise.hh"

namespace nix {

class GitUtilsTest : public ::testing::Test
{
// We use a single repository for all tests.
Path tmpDir;
std::unique_ptr<AutoDelete> delTmpDir;

public:
void SetUp() override
{
tmpDir = createTempDir();
delTmpDir = std::make_unique<AutoDelete>(tmpDir, true);

// Create the repo with libgit2
git_libgit2_init();
git_repository * repo = nullptr;
auto r = git_repository_init(&repo, tmpDir.c_str(), 0);
ASSERT_EQ(r, 0);
git_repository_free(repo);
}

void TearDown() override
{
// Destroy the AutoDelete, triggering removal
// not AutoDelete::reset(), which would cancel the deletion.
delTmpDir.reset();
}

ref<GitRepo> openRepo()
{
return GitRepo::openRepo(tmpDir, true, false);
}
};

void writeString(CreateRegularFileSink & fileSink, std::string contents, bool executable)
{
if (executable)
fileSink.isExecutable();
fileSink.preallocateContents(contents.size());
fileSink(contents);
}

TEST_F(GitUtilsTest, sink_basic)
{
auto repo = openRepo();
auto sink = repo->getFileSystemObjectSink();

// TODO/Question: It seems a little odd that we use the tarball-like convention of requiring a top-level directory
// here
// The sync method does not document this behavior, should probably renamed because it's not very
// general, and I can't imagine that "non-conventional" archives or any other source to be handled by
// this sink.

sink->createDirectory(CanonPath("foo-1.1"));

sink->createRegularFile(CanonPath("foo-1.1/hello"), [](CreateRegularFileSink & fileSink) {
writeString(fileSink, "hello world", false);
});
sink->createRegularFile(CanonPath("foo-1.1/bye"), [](CreateRegularFileSink & fileSink) {
writeString(fileSink, "thanks for all the fish", false);
});
sink->createSymlink(CanonPath("foo-1.1/bye-link"), "bye");
sink->createDirectory(CanonPath("foo-1.1/empty"));
sink->createDirectory(CanonPath("foo-1.1/links"));
sink->createHardlink(CanonPath("foo-1.1/links/foo"), CanonPath("foo-1.1/hello"));

// sink->createHardlink("foo-1.1/links/foo-2", CanonPath("foo-1.1/hello"));

auto result = sink->sync();
auto accessor = repo->getAccessor(result, false);
auto entries = accessor->readDirectory(CanonPath::root);
ASSERT_EQ(entries.size(), 5);
ASSERT_EQ(accessor->readFile(CanonPath("hello")), "hello world");
ASSERT_EQ(accessor->readFile(CanonPath("bye")), "thanks for all the fish");
ASSERT_EQ(accessor->readLink(CanonPath("bye-link")), "bye");
ASSERT_EQ(accessor->readDirectory(CanonPath("empty")).size(), 0);
ASSERT_EQ(accessor->readFile(CanonPath("links/foo")), "hello world");
};

TEST_F(GitUtilsTest, sink_hardlink)
{
auto repo = openRepo();
auto sink = repo->getFileSystemObjectSink();

sink->createDirectory(CanonPath("foo-1.1"));

sink->createRegularFile(CanonPath("foo-1.1/hello"), [](CreateRegularFileSink & fileSink) {
writeString(fileSink, "hello world", false);
});

try {
sink->createHardlink(CanonPath("foo-1.1/link"), CanonPath("hello"));
FAIL() << "Expected an exception";
} catch (const nix::Error & e) {
ASSERT_THAT(e.msg(), testing::HasSubstr("invalid hard link target"));
ASSERT_THAT(e.msg(), testing::HasSubstr("/hello"));
ASSERT_THAT(e.msg(), testing::HasSubstr("foo-1.1/link"));
}
};

} // namespace nix
2 changes: 1 addition & 1 deletion tests/unit/libfetchers/local.mk
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ libfetchers-tests_LIBS = \
libstore-test-support libutil-test-support \
libfetchers libstore libutil

libfetchers-tests_LDFLAGS := -lrapidcheck $(GTEST_LIBS)
libfetchers-tests_LDFLAGS := -lrapidcheck $(GTEST_LIBS) $(LIBGIT2_LIBS)

ifdef HOST_WINDOWS
# Increase the default reserved stack size to 65 MB so Nix doesn't run out of space
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#include <iostream>
#include "tracing-file-system-object-sink.hh"

namespace nix::test {

void TracingFileSystemObjectSink::createDirectory(const CanonPath & path)
{
std::cerr << "createDirectory(" << path << ")\n";
sink.createDirectory(path);
}

void TracingFileSystemObjectSink::createRegularFile(
const CanonPath & path, std::function<void(CreateRegularFileSink &)> fn)
{
std::cerr << "createRegularFile(" << path << ")\n";
sink.createRegularFile(path, [&](CreateRegularFileSink & crf) {
// We could wrap this and trace about the chunks of data and such
fn(crf);
});
}

void TracingFileSystemObjectSink::createSymlink(const CanonPath & path, const std::string & target)
{
std::cerr << "createSymlink(" << path << ", target: " << target << ")\n";
sink.createSymlink(path, target);
}

void TracingExtendedFileSystemObjectSink::createHardlink(const CanonPath & path, const CanonPath & target)
{
std::cerr << "createHardlink(" << path << ", target: " << target << ")\n";
sink.createHardlink(path, target);
}

} // namespace nix::test
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#pragma once
#include "fs-sink.hh"

namespace nix::test {

/**
* A `FileSystemObjectSink` that traces calls, writing to stderr.
*/
class TracingFileSystemObjectSink : public virtual FileSystemObjectSink
{
FileSystemObjectSink & sink;
public:
TracingFileSystemObjectSink(FileSystemObjectSink & sink)
: sink(sink)
{
}

void createDirectory(const CanonPath & path) override;

void createRegularFile(const CanonPath & path, std::function<void(CreateRegularFileSink &)> fn) override;

void createSymlink(const CanonPath & path, const std::string & target) override;
};

/**
* A `ExtendedFileSystemObjectSink` that traces calls, writing to stderr.
*/
class TracingExtendedFileSystemObjectSink : public TracingFileSystemObjectSink, public ExtendedFileSystemObjectSink
{
ExtendedFileSystemObjectSink & sink;
public:
TracingExtendedFileSystemObjectSink(ExtendedFileSystemObjectSink & sink)
: TracingFileSystemObjectSink(sink)
, sink(sink)
{
}

void createHardlink(const CanonPath & path, const CanonPath & target) override;
};

}

0 comments on commit 426e2af

Please sign in to comment.