diff --git a/userspace/libsinsp/test/sinsp_utils.ut.cpp b/userspace/libsinsp/test/sinsp_utils.ut.cpp index 2f7ff5ea76..b781953e63 100644 --- a/userspace/libsinsp/test/sinsp_utils.ut.cpp +++ b/userspace/libsinsp/test/sinsp_utils.ut.cpp @@ -24,6 +24,12 @@ TEST(sinsp_utils_test, concatenate_paths) // Some tests were motivated by this resource: // https://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap04.html#tag_04_11 + // PLEASE NOTE: + // * current impl does not support unicode. + // * current impl does not sanitize path1 + // * current impl expects path1 to end with '/' + // * current impl skips path1 altogether if path2 is absolute + std::string path1, path2, res; res = sinsp_utils::concatenate_paths("", ""); @@ -32,12 +38,57 @@ TEST(sinsp_utils_test, concatenate_paths) path1 = ""; path2 = "../"; res = sinsp_utils::concatenate_paths(path1, path2); - EXPECT_EQ("..", res); + EXPECT_EQ("", res); + + path1 = ""; + path2 = ".."; + res = sinsp_utils::concatenate_paths(path1, path2); + EXPECT_EQ("", res); + + path1 = "/"; + path2 = "../"; + res = sinsp_utils::concatenate_paths(path1, path2); + EXPECT_EQ("/", res); + + path1 = "a"; + path2 = "../"; + res = sinsp_utils::concatenate_paths(path1, path2); + EXPECT_EQ("a..", res); // since the helper does not add any "/" between path1 and path2, we end up with this. + + path1 = "a/"; + path2 = "../"; + res = sinsp_utils::concatenate_paths(path1, path2); + EXPECT_EQ("", res); + + path1 = ""; + path2 = "/foo"; + res = sinsp_utils::concatenate_paths(path1, path2); + EXPECT_EQ("/foo", res); + + path1 = "foo/"; + path2 = "..//a"; + res = sinsp_utils::concatenate_paths(path1, path2); + EXPECT_EQ("a", res); // path2 has been sanitized, plus we moved up a folder because of ".." + + path1 = "/foo/"; + path2 = "..//a"; + res = sinsp_utils::concatenate_paths(path1, path2); + EXPECT_EQ("/a", res); // path2 has been sanitized, plus we moved up a folder because of ".." + + path1 = "heolo"; + path2 = "w////////////..//////.////////r.|"; + res = sinsp_utils::concatenate_paths(path1, path2); + EXPECT_EQ("r.|", res); // since the helper does not add any "/" between path1 and path2, we end up with this. + + path1 = "heolo"; + path2 = "w/////////////..//"; // heolow/////////////..// > heolow/..// -> / + res = sinsp_utils::concatenate_paths(path1, path2); + EXPECT_EQ("", res); // since the helper does not add any "/" between path1 and path2, we end up with this, ie a folder up from "heolow/" path1 = ""; path2 = "./"; res = sinsp_utils::concatenate_paths(path1, path2); - EXPECT_EQ(".", res); + EXPECT_EQ("", res); path1 = ""; path2 = "dir/term"; @@ -92,27 +143,28 @@ TEST(sinsp_utils_test, concatenate_paths) path1 = "./app"; path2 = "custom/term"; res = sinsp_utils::concatenate_paths(path1, path2); - EXPECT_EQ("app/custom/term", res); + EXPECT_EQ("./appcustom/term", res); // since path1 is not '/' terminated, we expect a string concat without further path fields path1 = "/app"; path2 = "custom/term"; res = sinsp_utils::concatenate_paths(path1, path2); - EXPECT_EQ("/app/custom/term", res); + EXPECT_EQ("/appcustom/term", res); // since path1 is not '/' terminated, we expect a string concat without further path fields path1 = "app"; path2 = "custom/term"; res = sinsp_utils::concatenate_paths(path1, path2); - EXPECT_EQ("app/custom/term", res); + EXPECT_EQ("appcustom/term", res); // since path1 is not '/' terminated, we expect a string concat without further path fields - path1 = "app//"; + path1 = "app/"; path2 = "custom/term"; res = sinsp_utils::concatenate_paths(path1, path2); EXPECT_EQ("app/custom/term", res); + // We don't support sanitizing path1 path1 = "app/////"; path2 = "custom////term"; res = sinsp_utils::concatenate_paths(path1, path2); - EXPECT_EQ("app/custom/term", res); + EXPECT_EQ("app/////custom/term", res); path1 = "/"; path2 = "/app/custom/dir/././././../../term/"; @@ -124,6 +176,7 @@ TEST(sinsp_utils_test, concatenate_paths) res = sinsp_utils::concatenate_paths(path1, path2); EXPECT_EQ("/app", res); + /* No unicode support path1 = "/root/"; path2 = "../😉"; res = sinsp_utils::concatenate_paths(path1, path2); @@ -142,5 +195,5 @@ TEST(sinsp_utils_test, concatenate_paths) path1 = "/root"; path2 = "c:/hello/world/"; res = sinsp_utils::concatenate_paths(path1, path2); - EXPECT_EQ("/root/c:/hello/world", res); + EXPECT_EQ("/root/c:/hello/world", res); */ } diff --git a/userspace/libsinsp/utils.cpp b/userspace/libsinsp/utils.cpp index aa9075c533..d3af60eea5 100644 --- a/userspace/libsinsp/utils.cpp +++ b/userspace/libsinsp/utils.cpp @@ -613,36 +613,184 @@ std::filesystem::path workaround_win_root_name(std::filesystem::path p) return std::filesystem::path("./" + p.string()); } -std::string sinsp_utils::concatenate_paths(std::string_view path1, std::string_view path2, size_t max_len) +// +// Helper function to move a directory up in a path string +// +static inline void rewind_to_parent_path(const char* targetbase, char** tc, const char** pc, uint32_t delta) { - auto p1 = std::filesystem::path(path1, std::filesystem::path::format::generic_format); - auto p2 = std::filesystem::path(path2, std::filesystem::path::format::generic_format); + if(*tc <= targetbase + 1) + { + (*pc) += delta; + return; + } -#ifdef _WIN32 - // This is an ugly workaround to make sure we will not try to interpret root names (e.g. "c:/", "//?/") on Windows - // since this function only deals with unix-like paths - p1 = workaround_win_root_name(p1); - p2 = workaround_win_root_name(p2); -#endif // _WIN32 + (*tc)--; + + while((*tc) >= targetbase + 1 && *((*tc) - 1) != '/') + { + (*tc)--; + } - // note: if p2 happens to be an absolute path, p1 / p2 == p2 - auto path_concat = (p1 / p2).lexically_normal(); - std::string result = path_concat.generic_string(); + (*pc) += delta; +} - // - // If the path ends with a separator, remove it, as the OS does. - // - if (result.length() > 1 && result.back() == '/') +// +// Args: +// - target: the string where we are supposed to start copying +// - targetbase: the base of the path, i.e. the furthest we can go back when +// following parent directories +// - path: the path to copy +// +static inline void copy_and_sanitize_path(char* target, char* targetbase, const char *path, char separator) +{ + char* tc = target; + const char* pc = path; + g_invalidchar ic; + const bool empty_base = target == targetbase; + + while(true) + { + if(*pc == 0) + { + *tc = 0; + + // + // If the path ends with a separator, remove it, as the OS does. + // Properly manage case where path is just "/". + // + if((tc > (targetbase + 1)) && (*(tc - 1) == separator)) + { + *(tc - 1) = 0; + } + + return; + } + + if(ic(*pc)) + { + // + // Invalid char, substitute with a '.' + // + *tc = '.'; + tc++; + pc++; + } + else + { + // + // If path begins with '.' or '.' is the first char after a '/' + // + if(*pc == '.' && (tc == targetbase || *(tc - 1) == separator)) + { + // + // '../', rewind to the previous separator + // + if(*(pc + 1) == '.' && *(pc + 2) == separator) + { + rewind_to_parent_path(targetbase, &tc, &pc, 3); + } + // + // '..', with no separator. + // This is valid if we are at the end of the string, and in that case we rewind. + // + else if(*(pc + 1) == '.' && *(pc + 2) == 0) + { + rewind_to_parent_path(targetbase, &tc, &pc, 2); + } + // + // './', just skip it + // + else if(*(pc + 1) == separator) + { + pc += 2; + } + // + // '.', with no separator. + // This is valid if we are at the end of the string, and in that case we rewind. + // + else if(*(pc + 1) == 0) + { + pc++; + } + // + // Otherwise, we leave the string intact. + // + else + { + *tc = *pc; + pc++; + tc++; + } + } + else if(*pc == separator) + { + // + // separator: + // * if the last char is already a separator, skip it + // * if we are back at targetbase but targetbase was not empty before, it means we + // fully rewinded back to targetbase and the string is now empty. Skip separator. + // Example: "/foo/../a" -> "/a" BUT "foo/../a" -> "a" + // -> Otherwise: "foo/../a" -> "/a" + // + if((tc > targetbase && *(tc - 1) == separator) || (tc == targetbase && !empty_base)) + { + pc++; + } + else + { + *tc = *pc; + tc++; + pc++; + } + } + else + { + // + // Normal char, copy it + // + *tc = *pc; + tc++; + pc++; + } + } + } +} + +/* + * Return false if path2 is an absolute path. + * path1 MUST be '/' terminated. + * path1 is not sanitized. + * If path2 is absolute, we only account for it. + */ +static inline bool concatenate_paths_(char* target, uint32_t targetlen, const char* path1, uint32_t len1, + const char* path2, uint32_t len2) +{ + if(targetlen < (len1 + len2 + 1)) { - result.pop_back(); + strlcpy(target, "/PATH_TOO_LONG", targetlen); + return false; } - if (result.length() > max_len) + if(len2 != 0 && path2[0] != '/') + { + memcpy(target, path1, len1); + copy_and_sanitize_path(target + len1, target, path2, '/'); + return true; + } + else { - return "/PATH_TOO_LONG"; + target[0] = 0; + copy_and_sanitize_path(target, target, path2, '/'); + return false; } +} - return result; +std::string sinsp_utils::concatenate_paths(std::string_view path1, std::string_view path2) +{ + char fullpath[SCAP_MAX_PATH_SIZE]; + concatenate_paths_(fullpath, SCAP_MAX_PATH_SIZE, path1.data(), (uint32_t)path1.length(), path2.data(), + path2.size()); + return std::string(fullpath); } diff --git a/userspace/libsinsp/utils.h b/userspace/libsinsp/utils.h index 85e4e5ff21..30d7347a61 100644 --- a/userspace/libsinsp/utils.h +++ b/userspace/libsinsp/utils.h @@ -95,10 +95,11 @@ class sinsp_utils // // Concatenate posix-style path1 and path2 up to max_len in size, normalizing the result. + // path1 MUST be '/' terminated and is not sanitized. // If path2 is absolute, the result will be equivalent to path2. // If the result would be too long, the output will contain the string "/PATH_TOO_LONG" instead. // - static std::string concatenate_paths(std::string_view path1, std::string_view path2, size_t max_len=SCAP_MAX_PATH_SIZE-1); + static std::string concatenate_paths(std::string_view path1, std::string_view path2); // // Determines if an IPv6 address is IPv4-mapped