From 4531585275254f13dae1ff61434e15865a1e796a Mon Sep 17 00:00:00 2001 From: John Ericson Date: Mon, 29 Jan 2024 17:16:18 -0500 Subject: [PATCH 1/2] Factor out the Unix-specific parts of `canonPathInner` This prepares the code to also support Windows paths in the next commit. --- src/libutil/canon-path.cc | 2 +- src/libutil/file-path-impl.hh | 52 +++++++++++++++++++++++++++++------ src/libutil/file-system.cc | 2 +- 3 files changed, 45 insertions(+), 11 deletions(-) diff --git a/src/libutil/canon-path.cc b/src/libutil/canon-path.cc index fcd53862bf5..27f048697e5 100644 --- a/src/libutil/canon-path.cc +++ b/src/libutil/canon-path.cc @@ -8,7 +8,7 @@ CanonPath CanonPath::root = CanonPath("/"); static std::string absPathPure(std::string_view path) { - return canonPathInner(path, [](auto &, auto &){}); + return canonPathInner(path, [](auto &, auto &){}); } CanonPath::CanonPath(std::string_view raw) diff --git a/src/libutil/file-path-impl.hh b/src/libutil/file-path-impl.hh index 39159c7c29a..941d433e0a9 100644 --- a/src/libutil/file-path-impl.hh +++ b/src/libutil/file-path-impl.hh @@ -10,6 +10,39 @@ namespace nix { +/** + * Unix-style path primives. + * + * Nix'result own "logical" paths are always Unix-style. So this is always + * used for that, and additionally used for native paths on Unix. + */ +struct UnixPathTrait +{ + using CharT = char; + + using String = std::string; + + using StringView = std::string_view; + + constexpr static char preferredSep = '/'; + + static inline bool isPathSep(char c) + { + return c == '/'; + } + + static inline size_t findPathSep(StringView path, size_t from = 0) + { + return path.find('/', from); + } + + static inline size_t rfindPathSep(StringView path, size_t from = StringView::npos) + { + return path.rfind('/', from); + } +}; + + /** * Core pure path canonicalization algorithm. * @@ -24,25 +57,26 @@ namespace nix { * This is a chance to modify those two paths in arbitrary way, e.g. if * "result" points to a symlink. */ -typename std::string canonPathInner( - std::string_view remaining, +template +typename PathDict::String canonPathInner( + typename PathDict::StringView remaining, auto && hookComponent) { assert(remaining != ""); - std::string result; + typename PathDict::String result; result.reserve(256); while (true) { /* Skip slashes. */ - while (!remaining.empty() && remaining[0] == '/') + while (!remaining.empty() && PathDict::isPathSep(remaining[0])) remaining.remove_prefix(1); if (remaining.empty()) break; auto nextComp = ({ - auto nextPathSep = remaining.find('/'); + auto nextPathSep = PathDict::findPathSep(remaining); nextPathSep == remaining.npos ? remaining : remaining.substr(0, nextPathSep); }); @@ -53,14 +87,14 @@ typename std::string canonPathInner( /* If `..', delete the last component. */ else if (nextComp == "..") { - if (!result.empty()) result.erase(result.rfind('/')); + if (!result.empty()) result.erase(PathDict::rfindPathSep(result)); remaining.remove_prefix(2); } /* Normal component; copy it. */ else { - result += '/'; - if (const auto slash = remaining.find('/'); slash == result.npos) { + result += PathDict::preferredSep; + if (const auto slash = PathDict::findPathSep(remaining); slash == result.npos) { result += remaining; remaining = {}; } else { @@ -73,7 +107,7 @@ typename std::string canonPathInner( } if (result.empty()) - result = "/"; + result = typename PathDict::String { PathDict::preferredSep }; return result; } diff --git a/src/libutil/file-system.cc b/src/libutil/file-system.cc index 3c019a9ed19..ff83bc4ea5d 100644 --- a/src/libutil/file-system.cc +++ b/src/libutil/file-system.cc @@ -78,7 +78,7 @@ Path canonPath(PathView path, bool resolveSymlinks) arbitrary (but high) limit to prevent infinite loops. */ unsigned int followCount = 0, maxFollow = 1024; - return canonPathInner( + return canonPathInner( path, [&followCount, &temp, maxFollow, resolveSymlinks] (std::string & result, std::string_view & remaining) { From 319ec6f84accb7342160b856185402dcdebbaba9 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sun, 14 Jan 2024 14:30:25 -0500 Subject: [PATCH 2/2] Support Windows paths in `canonPath` and `absPath` `canonPath` and `absPath` work on native paths, and so should switch between supporting Unix paths and Windows paths accordingly. The templating is because `CanonPath`, which shares the implementation, should always be Unix style. It is the pure "nix-native" path type for virtual file operations --- it is part of Nix's "business logic", and should not vary with the host OS accordingly. --- src/libutil/file-path-impl.hh | 61 +++++++++++++++++++++++++++++++++++ src/libutil/file-system.cc | 17 ++++++++-- tests/unit/libutil/tests.cc | 30 ++++++++++------- 3 files changed, 94 insertions(+), 14 deletions(-) diff --git a/src/libutil/file-path-impl.hh b/src/libutil/file-path-impl.hh index 941d433e0a9..4c90150fdc9 100644 --- a/src/libutil/file-path-impl.hh +++ b/src/libutil/file-path-impl.hh @@ -43,6 +43,67 @@ struct UnixPathTrait }; +/** + * Windows-style path primitives. + * + * The character type is a parameter because while windows paths rightly + * work over UTF-16 (*) using `wchar_t`, at the current time we are + * often manipulating them converted to UTF-8 (*) using `char`. + * + * (Actually neither are guaranteed to be valid unicode; both are + * arbitrary non-0 8- or 16-bit bytes. But for charcters with specifical + * meaning like '/', '\\', ':', etc., we refer to an encoding scheme, + * and also for sake of UIs that display paths a text.) + */ +template +struct WindowsPathTrait +{ + using CharT = CharT0; + + using String = std::basic_string; + + using StringView = std::basic_string_view; + + constexpr static CharT preferredSep = '\\'; + + static inline bool isPathSep(CharT c) + { + return c == '/' || c == preferredSep; + } + + static size_t findPathSep(StringView path, size_t from = 0) + { + size_t p1 = path.find('/', from); + size_t p2 = path.find(preferredSep, from); + return p1 == String::npos ? p2 : + p2 == String::npos ? p1 : + std::min(p1, p2); + } + + static size_t rfindPathSep(StringView path, size_t from = String::npos) + { + size_t p1 = path.rfind('/', from); + size_t p2 = path.rfind(preferredSep, from); + return p1 == String::npos ? p2 : + p2 == String::npos ? p1 : + std::max(p1, p2); + } +}; + + +/** + * @todo Revisit choice of `char` or `wchar_t` for `WindowsPathTrait` + * argument. + */ +using NativePathTrait = +#ifdef _WIN32 + WindowsPathTrait +#else + UnixPathTrait +#endif + ; + + /** * Core pure path canonicalization algorithm. * diff --git a/src/libutil/file-system.cc b/src/libutil/file-system.cc index ff83bc4ea5d..b0a3f0797b4 100644 --- a/src/libutil/file-system.cc +++ b/src/libutil/file-system.cc @@ -22,10 +22,14 @@ namespace fs = std::filesystem; namespace nix { -/** Treat the string as possibly an absolute path, by inspecting the start of it. Return whether it was probably intended to be absolute. */ +/** + * Treat the string as possibly an absolute path, by inspecting the + * start of it. Return whether it was probably intended to be + * absolute. + */ static bool isAbsolute(PathView path) { - return !path.empty() && path[0] == '/'; + return fs::path { path }.is_absolute(); } @@ -69,6 +73,9 @@ Path canonPath(PathView path, bool resolveSymlinks) if (!isAbsolute(path)) throw Error("not an absolute path: '%1%'", path); + // For Windows + auto rootName = fs::path { path }.root_name(); + /* This just exists because we cannot set the target of `remaining` (the callback parameter) directly to a newly-constructed string, since it is `std::string_view`. */ @@ -78,7 +85,7 @@ Path canonPath(PathView path, bool resolveSymlinks) arbitrary (but high) limit to prevent infinite loops. */ unsigned int followCount = 0, maxFollow = 1024; - return canonPathInner( + auto ret = canonPathInner( path, [&followCount, &temp, maxFollow, resolveSymlinks] (std::string & result, std::string_view & remaining) { @@ -99,6 +106,10 @@ Path canonPath(PathView path, bool resolveSymlinks) } } }); + + if (!rootName.empty()) + ret = rootName.string() + std::move(ret); + return ret; } diff --git a/tests/unit/libutil/tests.cc b/tests/unit/libutil/tests.cc index 568f03f702d..4406fd18455 100644 --- a/tests/unit/libutil/tests.cc +++ b/tests/unit/libutil/tests.cc @@ -9,6 +9,14 @@ #include +#ifdef _WIN32 +# define FS_SEP "\\" +# define FS_ROOT "C:" FS_SEP // Need a mounted one, C drive is likely +#else +# define FS_SEP "/" +# define FS_ROOT FS_SEP +#endif + namespace nix { /* ----------- tests for util.hh ------------------------------------------------*/ @@ -18,9 +26,9 @@ namespace nix { * --------------------------------------------------------------------------*/ TEST(absPath, doesntChangeRoot) { - auto p = absPath("/"); + auto p = absPath(FS_ROOT); - ASSERT_EQ(p, "/"); + ASSERT_EQ(p, FS_ROOT); } @@ -53,11 +61,11 @@ namespace nix { TEST(absPath, pathIsCanonicalised) { - auto path = "/some/path/with/trailing/dot/."; + auto path = FS_ROOT "some/path/with/trailing/dot/."; auto p1 = absPath(path); auto p2 = absPath(p1); - ASSERT_EQ(p1, "/some/path/with/trailing/dot"); + ASSERT_EQ(p1, FS_ROOT "some" FS_SEP "path" FS_SEP "with" FS_SEP "trailing" FS_SEP "dot"); ASSERT_EQ(p1, p2); } @@ -66,24 +74,24 @@ namespace nix { * --------------------------------------------------------------------------*/ TEST(canonPath, removesTrailingSlashes) { - auto path = "/this/is/a/path//"; + auto path = FS_ROOT "this/is/a/path//"; auto p = canonPath(path); - ASSERT_EQ(p, "/this/is/a/path"); + ASSERT_EQ(p, FS_ROOT "this" FS_SEP "is" FS_SEP "a" FS_SEP "path"); } TEST(canonPath, removesDots) { - auto path = "/this/./is/a/path/./"; + auto path = FS_ROOT "this/./is/a/path/./"; auto p = canonPath(path); - ASSERT_EQ(p, "/this/is/a/path"); + ASSERT_EQ(p, FS_ROOT "this" FS_SEP "is" FS_SEP "a" FS_SEP "path"); } TEST(canonPath, removesDots2) { - auto path = "/this/a/../is/a////path/foo/.."; + auto path = FS_ROOT "this/a/../is/a////path/foo/.."; auto p = canonPath(path); - ASSERT_EQ(p, "/this/is/a/path"); + ASSERT_EQ(p, FS_ROOT "this" FS_SEP "is" FS_SEP "a" FS_SEP "path"); } TEST(canonPath, requiresAbsolutePath) { @@ -197,7 +205,7 @@ namespace nix { * --------------------------------------------------------------------------*/ TEST(pathExists, rootExists) { - ASSERT_TRUE(pathExists("/")); + ASSERT_TRUE(pathExists(FS_ROOT)); } TEST(pathExists, cwdExists) {