Skip to content

Commit

Permalink
Merge pull request #9767 from obsidiansystems/canon-path-split
Browse files Browse the repository at this point in the history
Support Windows paths in `canonPath` and `absPath`
  • Loading branch information
edolstra committed Feb 27, 2024
2 parents 354ba27 + 319ec6f commit c3e9e3d
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 24 deletions.
2 changes: 1 addition & 1 deletion src/libutil/canon-path.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ CanonPath CanonPath::root = CanonPath("/");

static std::string absPathPure(std::string_view path)
{
return canonPathInner(path, [](auto &, auto &){});
return canonPathInner<UnixPathTrait>(path, [](auto &, auto &){});
}

CanonPath::CanonPath(std::string_view raw)
Expand Down
113 changes: 104 additions & 9 deletions src/libutil/file-path-impl.hh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,100 @@

namespace nix {

/**
* Unix-style path primives.
*
* Nix'result own "logical" paths are always Unix-style. So this is always
* used for that, and additionally used for native paths on Unix.
*/
struct UnixPathTrait
{
using CharT = char;

using String = std::string;

using StringView = std::string_view;

constexpr static char preferredSep = '/';

static inline bool isPathSep(char c)
{
return c == '/';
}

static inline size_t findPathSep(StringView path, size_t from = 0)
{
return path.find('/', from);
}

static inline size_t rfindPathSep(StringView path, size_t from = StringView::npos)
{
return path.rfind('/', from);
}
};


/**
* Windows-style path primitives.
*
* The character type is a parameter because while windows paths rightly
* work over UTF-16 (*) using `wchar_t`, at the current time we are
* often manipulating them converted to UTF-8 (*) using `char`.
*
* (Actually neither are guaranteed to be valid unicode; both are
* arbitrary non-0 8- or 16-bit bytes. But for charcters with specifical
* meaning like '/', '\\', ':', etc., we refer to an encoding scheme,
* and also for sake of UIs that display paths a text.)
*/
template<class CharT0>
struct WindowsPathTrait
{
using CharT = CharT0;

using String = std::basic_string<CharT>;

using StringView = std::basic_string_view<CharT>;

constexpr static CharT preferredSep = '\\';

static inline bool isPathSep(CharT c)
{
return c == '/' || c == preferredSep;
}

static size_t findPathSep(StringView path, size_t from = 0)
{
size_t p1 = path.find('/', from);
size_t p2 = path.find(preferredSep, from);
return p1 == String::npos ? p2 :
p2 == String::npos ? p1 :
std::min(p1, p2);
}

static size_t rfindPathSep(StringView path, size_t from = String::npos)
{
size_t p1 = path.rfind('/', from);
size_t p2 = path.rfind(preferredSep, from);
return p1 == String::npos ? p2 :
p2 == String::npos ? p1 :
std::max(p1, p2);
}
};


/**
* @todo Revisit choice of `char` or `wchar_t` for `WindowsPathTrait`
* argument.
*/
using NativePathTrait =
#ifdef _WIN32
WindowsPathTrait<char>
#else
UnixPathTrait
#endif
;


/**
* Core pure path canonicalization algorithm.
*
Expand All @@ -24,25 +118,26 @@ namespace nix {
* This is a chance to modify those two paths in arbitrary way, e.g. if
* "result" points to a symlink.
*/
typename std::string canonPathInner(
std::string_view remaining,
template<class PathDict>
typename PathDict::String canonPathInner(
typename PathDict::StringView remaining,
auto && hookComponent)
{
assert(remaining != "");

std::string result;
typename PathDict::String result;
result.reserve(256);

while (true) {

/* Skip slashes. */
while (!remaining.empty() && remaining[0] == '/')
while (!remaining.empty() && PathDict::isPathSep(remaining[0]))
remaining.remove_prefix(1);

if (remaining.empty()) break;

auto nextComp = ({
auto nextPathSep = remaining.find('/');
auto nextPathSep = PathDict::findPathSep(remaining);
nextPathSep == remaining.npos ? remaining : remaining.substr(0, nextPathSep);
});

Expand All @@ -53,14 +148,14 @@ typename std::string canonPathInner(
/* If `..', delete the last component. */
else if (nextComp == "..")
{
if (!result.empty()) result.erase(result.rfind('/'));
if (!result.empty()) result.erase(PathDict::rfindPathSep(result));
remaining.remove_prefix(2);
}

/* Normal component; copy it. */
else {
result += '/';
if (const auto slash = remaining.find('/'); slash == result.npos) {
result += PathDict::preferredSep;
if (const auto slash = PathDict::findPathSep(remaining); slash == result.npos) {
result += remaining;
remaining = {};
} else {
Expand All @@ -73,7 +168,7 @@ typename std::string canonPathInner(
}

if (result.empty())
result = "/";
result = typename PathDict::String { PathDict::preferredSep };

return result;
}
Expand Down
17 changes: 14 additions & 3 deletions src/libutil/file-system.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,14 @@ namespace fs = std::filesystem;

namespace nix {

/** Treat the string as possibly an absolute path, by inspecting the start of it. Return whether it was probably intended to be absolute. */
/**
* Treat the string as possibly an absolute path, by inspecting the
* start of it. Return whether it was probably intended to be
* absolute.
*/
static bool isAbsolute(PathView path)
{
return !path.empty() && path[0] == '/';
return fs::path { path }.is_absolute();
}


Expand Down Expand Up @@ -69,6 +73,9 @@ Path canonPath(PathView path, bool resolveSymlinks)
if (!isAbsolute(path))
throw Error("not an absolute path: '%1%'", path);

// For Windows
auto rootName = fs::path { path }.root_name();

/* This just exists because we cannot set the target of `remaining`
(the callback parameter) directly to a newly-constructed string,
since it is `std::string_view`. */
Expand All @@ -78,7 +85,7 @@ Path canonPath(PathView path, bool resolveSymlinks)
arbitrary (but high) limit to prevent infinite loops. */
unsigned int followCount = 0, maxFollow = 1024;

return canonPathInner(
auto ret = canonPathInner<NativePathTrait>(
path,
[&followCount, &temp, maxFollow, resolveSymlinks]
(std::string & result, std::string_view & remaining) {
Expand All @@ -99,6 +106,10 @@ Path canonPath(PathView path, bool resolveSymlinks)
}
}
});

if (!rootName.empty())
ret = rootName.string() + std::move(ret);
return ret;
}


Expand Down
30 changes: 19 additions & 11 deletions tests/unit/libutil/tests.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@

#include <numeric>

#ifdef _WIN32
# define FS_SEP "\\"
# define FS_ROOT "C:" FS_SEP // Need a mounted one, C drive is likely
#else
# define FS_SEP "/"
# define FS_ROOT FS_SEP
#endif

namespace nix {

/* ----------- tests for util.hh ------------------------------------------------*/
Expand All @@ -18,9 +26,9 @@ namespace nix {
* --------------------------------------------------------------------------*/

TEST(absPath, doesntChangeRoot) {
auto p = absPath("/");
auto p = absPath(FS_ROOT);

ASSERT_EQ(p, "/");
ASSERT_EQ(p, FS_ROOT);
}


Expand Down Expand Up @@ -53,11 +61,11 @@ namespace nix {


TEST(absPath, pathIsCanonicalised) {
auto path = "/some/path/with/trailing/dot/.";
auto path = FS_ROOT "some/path/with/trailing/dot/.";
auto p1 = absPath(path);
auto p2 = absPath(p1);

ASSERT_EQ(p1, "/some/path/with/trailing/dot");
ASSERT_EQ(p1, FS_ROOT "some" FS_SEP "path" FS_SEP "with" FS_SEP "trailing" FS_SEP "dot");
ASSERT_EQ(p1, p2);
}

Expand All @@ -66,24 +74,24 @@ namespace nix {
* --------------------------------------------------------------------------*/

TEST(canonPath, removesTrailingSlashes) {
auto path = "/this/is/a/path//";
auto path = FS_ROOT "this/is/a/path//";
auto p = canonPath(path);

ASSERT_EQ(p, "/this/is/a/path");
ASSERT_EQ(p, FS_ROOT "this" FS_SEP "is" FS_SEP "a" FS_SEP "path");
}

TEST(canonPath, removesDots) {
auto path = "/this/./is/a/path/./";
auto path = FS_ROOT "this/./is/a/path/./";
auto p = canonPath(path);

ASSERT_EQ(p, "/this/is/a/path");
ASSERT_EQ(p, FS_ROOT "this" FS_SEP "is" FS_SEP "a" FS_SEP "path");
}

TEST(canonPath, removesDots2) {
auto path = "/this/a/../is/a////path/foo/..";
auto path = FS_ROOT "this/a/../is/a////path/foo/..";
auto p = canonPath(path);

ASSERT_EQ(p, "/this/is/a/path");
ASSERT_EQ(p, FS_ROOT "this" FS_SEP "is" FS_SEP "a" FS_SEP "path");
}

TEST(canonPath, requiresAbsolutePath) {
Expand Down Expand Up @@ -197,7 +205,7 @@ namespace nix {
* --------------------------------------------------------------------------*/

TEST(pathExists, rootExists) {
ASSERT_TRUE(pathExists("/"));
ASSERT_TRUE(pathExists(FS_ROOT));
}

TEST(pathExists, cwdExists) {
Expand Down

0 comments on commit c3e9e3d

Please sign in to comment.