diff --git a/src/libutil/canon-path.cc b/src/libutil/canon-path.cc index 1223ba33c5b..fcd53862bf5 100644 --- a/src/libutil/canon-path.cc +++ b/src/libutil/canon-path.cc @@ -1,16 +1,25 @@ #include "canon-path.hh" -#include "file-system.hh" +#include "util.hh" +#include "file-path-impl.hh" namespace nix { CanonPath CanonPath::root = CanonPath("/"); +static std::string absPathPure(std::string_view path) +{ + return canonPathInner(path, [](auto &, auto &){}); +} + CanonPath::CanonPath(std::string_view raw) - : path(absPath(raw, "/")) + : path(absPathPure(concatStrings("/", raw))) { } CanonPath::CanonPath(std::string_view raw, const CanonPath & root) - : path(absPath(raw, root.abs())) + : path(absPathPure( + raw.size() > 0 && raw[0] == '/' + ? raw + : concatStrings(root.abs(), "/", raw))) { } CanonPath::CanonPath(const std::vector & elems) diff --git a/src/libutil/canon-path.hh b/src/libutil/canon-path.hh index 2f8ff381edf..8f5a1c2793d 100644 --- a/src/libutil/canon-path.hh +++ b/src/libutil/canon-path.hh @@ -21,9 +21,21 @@ namespace nix { * * - There are no components equal to '.' or '..'. * - * Note that the path does not need to correspond to an actually - * existing path, and there is no guarantee that symlinks are - * resolved. + * `CanonPath` are "virtual" Nix paths for abstract file system objects; + * they are always Unix-style paths, regardless of what OS Nix is + * running on. The `/` root doesn't denote the ambient host file system + * root, but some virtual FS root. + * + * @note It might be useful to compare `openat(some_fd, "foo/bar")` on + * Unix. `"foo/bar"` is a relative path because an absolute path would + * "override" the `some_fd` directory file descriptor and escape to the + * "system root". Conversely, Nix's abstract file operations *never* escape the + * designated virtual file system (i.e. `SourceAccessor` or + * `ParseSink`), so `CanonPath` does not need an absolute/relative + * distinction. + * + * @note The path does not need to correspond to an actually existing + * path, and the path may or may not have unresolved symlinks. */ class CanonPath { diff --git a/src/libutil/file-path-impl.hh b/src/libutil/file-path-impl.hh new file mode 100644 index 00000000000..39159c7c29a --- /dev/null +++ b/src/libutil/file-path-impl.hh @@ -0,0 +1,81 @@ +#pragma once +/** + * @file + * + * Pure (no IO) infrastructure just for defining other path types; + * should not be used directly outside of utilities. + */ +#include +#include + +namespace nix { + +/** + * Core pure path canonicalization algorithm. + * + * @param hookComponent + * A callback which is passed two arguments, + * references to + * + * 1. the result so far + * + * 2. the remaining path to resolve + * + * This is a chance to modify those two paths in arbitrary way, e.g. if + * "result" points to a symlink. + */ +typename std::string canonPathInner( + std::string_view remaining, + auto && hookComponent) +{ + assert(remaining != ""); + + std::string result; + result.reserve(256); + + while (true) { + + /* Skip slashes. */ + while (!remaining.empty() && remaining[0] == '/') + remaining.remove_prefix(1); + + if (remaining.empty()) break; + + auto nextComp = ({ + auto nextPathSep = remaining.find('/'); + nextPathSep == remaining.npos ? remaining : remaining.substr(0, nextPathSep); + }); + + /* Ignore `.'. */ + if (nextComp == ".") + remaining.remove_prefix(1); + + /* If `..', delete the last component. */ + else if (nextComp == "..") + { + if (!result.empty()) result.erase(result.rfind('/')); + remaining.remove_prefix(2); + } + + /* Normal component; copy it. */ + else { + result += '/'; + if (const auto slash = remaining.find('/'); slash == result.npos) { + result += remaining; + remaining = {}; + } else { + result += remaining.substr(0, slash); + remaining = remaining.substr(slash); + } + + hookComponent(result, remaining); + } + } + + if (result.empty()) + result = "/"; + + return result; +} + +} diff --git a/src/libutil/file-system.cc b/src/libutil/file-system.cc index 9fa1f62dfc0..3c019a9ed19 100644 --- a/src/libutil/file-system.cc +++ b/src/libutil/file-system.cc @@ -1,5 +1,6 @@ #include "environment-variables.hh" #include "file-system.hh" +#include "file-path-impl.hh" #include "signals.hh" #include "finally.hh" #include "serialise.hh" @@ -21,11 +22,18 @@ namespace fs = std::filesystem; namespace nix { +/** Treat the string as possibly an absolute path, by inspecting the start of it. Return whether it was probably intended to be absolute. */ +static bool isAbsolute(PathView path) +{ + return !path.empty() && path[0] == '/'; +} + + Path absPath(PathView path, std::optional dir, bool resolveSymlinks) { std::string scratch; - if (path.empty() || path[0] != '/') { + if (!isAbsolute(path)) { // In this case we need to call `canonPath` on a newly-created // string. We set `scratch` to that string first, and then set // `path` to `scratch`. This ensures the newly-created string @@ -58,69 +66,39 @@ Path canonPath(PathView path, bool resolveSymlinks) { assert(path != ""); - std::string s; - s.reserve(256); - - if (path[0] != '/') + if (!isAbsolute(path)) throw Error("not an absolute path: '%1%'", path); + /* This just exists because we cannot set the target of `remaining` + (the callback parameter) directly to a newly-constructed string, + since it is `std::string_view`. */ std::string temp; /* Count the number of times we follow a symlink and stop at some arbitrary (but high) limit to prevent infinite loops. */ unsigned int followCount = 0, maxFollow = 1024; - while (1) { - - /* Skip slashes. */ - while (!path.empty() && path[0] == '/') path.remove_prefix(1); - if (path.empty()) break; - - /* Ignore `.'. */ - if (path == "." || path.substr(0, 2) == "./") - path.remove_prefix(1); - - /* If `..', delete the last component. */ - else if (path == ".." || path.substr(0, 3) == "../") - { - if (!s.empty()) s.erase(s.rfind('/')); - path.remove_prefix(2); - } - - /* Normal component; copy it. */ - else { - s += '/'; - if (const auto slash = path.find('/'); slash == path.npos) { - s += path; - path = {}; - } else { - s += path.substr(0, slash); - path = path.substr(slash); - } - - /* If s points to a symlink, resolve it and continue from there */ - if (resolveSymlinks && isLink(s)) { + return canonPathInner( + path, + [&followCount, &temp, maxFollow, resolveSymlinks] + (std::string & result, std::string_view & remaining) { + if (resolveSymlinks && isLink(result)) { if (++followCount >= maxFollow) - throw Error("infinite symlink recursion in path '%1%'", path); - temp = concatStrings(readLink(s), path); - path = temp; - if (!temp.empty() && temp[0] == '/') { - s.clear(); /* restart for symlinks pointing to absolute path */ + throw Error("infinite symlink recursion in path '%0%'", remaining); + remaining = (temp = concatStrings(readLink(result), remaining)); + if (isAbsolute(remaining)) { + /* restart for symlinks pointing to absolute path */ + result.clear(); } else { - s = dirOf(s); - if (s == "/") { // we don’t want trailing slashes here, which dirOf only produces if s = / - s.clear(); + result = dirOf(result); + if (result == "/") { + /* we don’t want trailing slashes here, which `dirOf` + only produces if `result = /` */ + result.clear(); } } } - } - } - - if (s.empty()) { - s = "/"; - } - - return s; + }); } diff --git a/tests/unit/libutil/canon-path.cc b/tests/unit/libutil/canon-path.cc index bf11abe3e9c..7f91308afe1 100644 --- a/tests/unit/libutil/canon-path.cc +++ b/tests/unit/libutil/canon-path.cc @@ -41,6 +41,24 @@ namespace nix { } } + TEST(CanonPath, from_existing) { + CanonPath p0("foo//bar/"); + { + CanonPath p("/baz//quux/", p0); + ASSERT_EQ(p.abs(), "/baz/quux"); + ASSERT_EQ(p.rel(), "baz/quux"); + ASSERT_EQ(*p.baseName(), "quux"); + ASSERT_EQ(*p.dirOf(), "/baz"); + } + { + CanonPath p("baz//quux/", p0); + ASSERT_EQ(p.abs(), "/foo/bar/baz/quux"); + ASSERT_EQ(p.rel(), "foo/bar/baz/quux"); + ASSERT_EQ(*p.baseName(), "quux"); + ASSERT_EQ(*p.dirOf(), "/foo/bar/baz"); + } + } + TEST(CanonPath, pop) { CanonPath p("foo/bar/x"); ASSERT_EQ(p.abs(), "/foo/bar/x");