Skip to content

Commit

Permalink
Merge pull request #9881 from obsidiansystems/purify-canon-path
Browse files Browse the repository at this point in the history
Purify `CanonPath`
  • Loading branch information
Ericson2314 committed Feb 16, 2024
2 parents d53c890 + d17e1d9 commit 60936f2
Show file tree
Hide file tree
Showing 5 changed files with 155 additions and 57 deletions.
15 changes: 12 additions & 3 deletions src/libutil/canon-path.cc
Original file line number Diff line number Diff line change
@@ -1,16 +1,25 @@
#include "canon-path.hh"
#include "file-system.hh"
#include "util.hh"
#include "file-path-impl.hh"

namespace nix {

CanonPath CanonPath::root = CanonPath("/");

static std::string absPathPure(std::string_view path)
{
return canonPathInner(path, [](auto &, auto &){});
}

CanonPath::CanonPath(std::string_view raw)
: path(absPath(raw, "/"))
: path(absPathPure(concatStrings("/", raw)))
{ }

CanonPath::CanonPath(std::string_view raw, const CanonPath & root)
: path(absPath(raw, root.abs()))
: path(absPathPure(
raw.size() > 0 && raw[0] == '/'
? raw
: concatStrings(root.abs(), "/", raw)))
{ }

CanonPath::CanonPath(const std::vector<std::string> & elems)
Expand Down
18 changes: 15 additions & 3 deletions src/libutil/canon-path.hh
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,21 @@ namespace nix {
*
* - There are no components equal to '.' or '..'.
*
* Note that the path does not need to correspond to an actually
* existing path, and there is no guarantee that symlinks are
* resolved.
* `CanonPath` are "virtual" Nix paths for abstract file system objects;
* they are always Unix-style paths, regardless of what OS Nix is
* running on. The `/` root doesn't denote the ambient host file system
* root, but some virtual FS root.
*
* @note It might be useful to compare `openat(some_fd, "foo/bar")` on
* Unix. `"foo/bar"` is a relative path because an absolute path would
* "override" the `some_fd` directory file descriptor and escape to the
* "system root". Conversely, Nix's abstract file operations *never* escape the
* designated virtual file system (i.e. `SourceAccessor` or
* `ParseSink`), so `CanonPath` does not need an absolute/relative
* distinction.
*
* @note The path does not need to correspond to an actually existing
* path, and the path may or may not have unresolved symlinks.
*/
class CanonPath
{
Expand Down
81 changes: 81 additions & 0 deletions src/libutil/file-path-impl.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#pragma once
/**
* @file
*
* Pure (no IO) infrastructure just for defining other path types;
* should not be used directly outside of utilities.
*/
#include <string>
#include <string_view>

namespace nix {

/**
* Core pure path canonicalization algorithm.
*
* @param hookComponent
* A callback which is passed two arguments,
* references to
*
* 1. the result so far
*
* 2. the remaining path to resolve
*
* This is a chance to modify those two paths in arbitrary way, e.g. if
* "result" points to a symlink.
*/
typename std::string canonPathInner(
std::string_view remaining,
auto && hookComponent)
{
assert(remaining != "");

std::string result;
result.reserve(256);

while (true) {

/* Skip slashes. */
while (!remaining.empty() && remaining[0] == '/')
remaining.remove_prefix(1);

if (remaining.empty()) break;

auto nextComp = ({
auto nextPathSep = remaining.find('/');
nextPathSep == remaining.npos ? remaining : remaining.substr(0, nextPathSep);
});

/* Ignore `.'. */
if (nextComp == ".")
remaining.remove_prefix(1);

/* If `..', delete the last component. */
else if (nextComp == "..")
{
if (!result.empty()) result.erase(result.rfind('/'));
remaining.remove_prefix(2);
}

/* Normal component; copy it. */
else {
result += '/';
if (const auto slash = remaining.find('/'); slash == result.npos) {
result += remaining;
remaining = {};
} else {
result += remaining.substr(0, slash);
remaining = remaining.substr(slash);
}

hookComponent(result, remaining);
}
}

if (result.empty())
result = "/";

return result;
}

}
80 changes: 29 additions & 51 deletions src/libutil/file-system.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "environment-variables.hh"
#include "file-system.hh"
#include "file-path-impl.hh"
#include "signals.hh"
#include "finally.hh"
#include "serialise.hh"
Expand All @@ -21,11 +22,18 @@ namespace fs = std::filesystem;

namespace nix {

/** Treat the string as possibly an absolute path, by inspecting the start of it. Return whether it was probably intended to be absolute. */
static bool isAbsolute(PathView path)
{
return !path.empty() && path[0] == '/';
}


Path absPath(PathView path, std::optional<PathView> dir, bool resolveSymlinks)
{
std::string scratch;

if (path.empty() || path[0] != '/') {
if (!isAbsolute(path)) {
// In this case we need to call `canonPath` on a newly-created
// string. We set `scratch` to that string first, and then set
// `path` to `scratch`. This ensures the newly-created string
Expand Down Expand Up @@ -58,69 +66,39 @@ Path canonPath(PathView path, bool resolveSymlinks)
{
assert(path != "");

std::string s;
s.reserve(256);

if (path[0] != '/')
if (!isAbsolute(path))
throw Error("not an absolute path: '%1%'", path);

/* This just exists because we cannot set the target of `remaining`
(the callback parameter) directly to a newly-constructed string,
since it is `std::string_view`. */
std::string temp;

/* Count the number of times we follow a symlink and stop at some
arbitrary (but high) limit to prevent infinite loops. */
unsigned int followCount = 0, maxFollow = 1024;

while (1) {

/* Skip slashes. */
while (!path.empty() && path[0] == '/') path.remove_prefix(1);
if (path.empty()) break;

/* Ignore `.'. */
if (path == "." || path.substr(0, 2) == "./")
path.remove_prefix(1);

/* If `..', delete the last component. */
else if (path == ".." || path.substr(0, 3) == "../")
{
if (!s.empty()) s.erase(s.rfind('/'));
path.remove_prefix(2);
}

/* Normal component; copy it. */
else {
s += '/';
if (const auto slash = path.find('/'); slash == path.npos) {
s += path;
path = {};
} else {
s += path.substr(0, slash);
path = path.substr(slash);
}

/* If s points to a symlink, resolve it and continue from there */
if (resolveSymlinks && isLink(s)) {
return canonPathInner(
path,
[&followCount, &temp, maxFollow, resolveSymlinks]
(std::string & result, std::string_view & remaining) {
if (resolveSymlinks && isLink(result)) {
if (++followCount >= maxFollow)
throw Error("infinite symlink recursion in path '%1%'", path);
temp = concatStrings(readLink(s), path);
path = temp;
if (!temp.empty() && temp[0] == '/') {
s.clear(); /* restart for symlinks pointing to absolute path */
throw Error("infinite symlink recursion in path '%0%'", remaining);
remaining = (temp = concatStrings(readLink(result), remaining));
if (isAbsolute(remaining)) {
/* restart for symlinks pointing to absolute path */
result.clear();
} else {
s = dirOf(s);
if (s == "/") { // we don’t want trailing slashes here, which dirOf only produces if s = /
s.clear();
result = dirOf(result);
if (result == "/") {
/* we don’t want trailing slashes here, which `dirOf`
only produces if `result = /` */
result.clear();
}
}
}
}
}

if (s.empty()) {
s = "/";
}

return s;
});
}


Expand Down
18 changes: 18 additions & 0 deletions tests/unit/libutil/canon-path.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,24 @@ namespace nix {
}
}

TEST(CanonPath, from_existing) {
CanonPath p0("foo//bar/");
{
CanonPath p("/baz//quux/", p0);
ASSERT_EQ(p.abs(), "/baz/quux");
ASSERT_EQ(p.rel(), "baz/quux");
ASSERT_EQ(*p.baseName(), "quux");
ASSERT_EQ(*p.dirOf(), "/baz");
}
{
CanonPath p("baz//quux/", p0);
ASSERT_EQ(p.abs(), "/foo/bar/baz/quux");
ASSERT_EQ(p.rel(), "foo/bar/baz/quux");
ASSERT_EQ(*p.baseName(), "quux");
ASSERT_EQ(*p.dirOf(), "/foo/bar/baz");
}
}

TEST(CanonPath, pop) {
CanonPath p("foo/bar/x");
ASSERT_EQ(p.abs(), "/foo/bar/x");
Expand Down

0 comments on commit 60936f2

Please sign in to comment.