Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Consolidate redundant split_string_to_array, split_string_on & split_path methods #465

Merged
3 changes: 2 additions & 1 deletion ci/iwyu/mappings.imp
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@
{ "symbol": ["nlohmann::json", "private", "<nlohmann/json_fwd.hpp>", "public"] },

# pybind11
{ "include": [ "<pybind11/detail/common.h>", private, "<pybind11/pybind11.h>", "public" ] },
{ "include": [ "@<pybind11/detail/.*.h>", private, "<pybind11/pybind11.h>", "public" ] },


{ "symbol": ["pybind11", "private", "<pybind11/cast.h>", "public"] },
{ "symbol": ["pybind11", "private", "<pybind11/embed.h>", "public"] },
Expand Down
6 changes: 2 additions & 4 deletions cpp/mrc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ add_library(libmrc
src/public/runnable/types.cpp
src/public/runtime/remote_descriptor.cpp
src/public/utils/bytes_to_string.cpp
src/public/utils/string_utils.cpp
src/public/utils/thread_utils.cpp
src/public/utils/type_utils.cpp
)
Expand Down Expand Up @@ -196,7 +197,7 @@ target_compile_definitions(libmrc
$<$<BOOL:${MRC_BUILD_BENCHMARKS}>:MRC_ENABLE_BENCHMARKING>
)

if (MRC_ENABLE_CODECOV)
if(MRC_ENABLE_CODECOV)
target_compile_definitions(libmrc INTERFACE "MRC_CODECOV_ENABLED")
endif()

Expand All @@ -206,7 +207,6 @@ set_target_properties(libmrc PROPERTIES OUTPUT_NAME ${PROJECT_NAME})

# ##################################################################################################
# - install targets --------------------------------------------------------------------------------

rapids_cmake_install_lib_dir(lib_dir)
include(CPack)
include(GNUInstallDirs)
Expand All @@ -226,7 +226,6 @@ install(

# ##################################################################################################
# - subdirectories ---------------------------------------------------------------------------------

if(MRC_BUILD_TESTS)
add_subdirectory(tests)

Expand All @@ -239,7 +238,6 @@ endif()

# ##################################################################################################
# - install export ---------------------------------------------------------------------------------

set(doc_string
[=[
Provide targets for mrc.
Expand Down
19 changes: 17 additions & 2 deletions cpp/mrc/include/mrc/utils/string_utils.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -17,8 +17,23 @@

#pragma once

#include <sstream>
// for ostringstream
#include <sstream> // IWYU pragma: keep
#include <string>
#include <vector>

// Concats multiple strings together using ostringstream. Use with MRC_CONCAT_STR("Start [" << my_int << "]")
#define MRC_CONCAT_STR(strs) ((std::ostringstream&)(std::ostringstream() << strs)).str()

namespace mrc {

/**
* @brief Splits a string into an vector of strings based on a delimiter.
*
* @param str The string to split.
* @param delimiter The delimiter to split the string on.
* @return std::vector<std::string> vector array of strings.
*/
std::vector<std::string> split_string_to_vector(const std::string& str, const std::string& delimiter);

} // namespace mrc
28 changes: 7 additions & 21 deletions cpp/mrc/src/internal/utils/parse_config.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -19,29 +19,15 @@

#include "./parse_ints.hpp"

#include "mrc/utils/string_utils.hpp" // for split_string_to_array

#include <glog/logging.h>

#include <cstdint> // for uint32_t
#include <cstdlib> // for atoi
#include <iostream>
#include <stdexcept>
#include <utility> // for move

namespace {

std::vector<std::string> split_string_on(std::string str, char delim)
{
std::vector<std::string> tokens;
std::istringstream f(str);
std::string s;
while (std::getline(f, s, delim))
{
tokens.push_back(s);
}
return tokens;
}
} // namespace

namespace mrc {

ConfigurationMap parse_config(std::string config_str)
Expand All @@ -50,9 +36,9 @@

bool left_wildcard = false;

for (const auto& entry : split_string_on(config_str, ';'))
for (const auto& entry : split_string_to_vector(config_str, ";"))

Check warning on line 39 in cpp/mrc/src/internal/utils/parse_config.cpp

View check run for this annotation

Codecov / codecov/patch

cpp/mrc/src/internal/utils/parse_config.cpp#L39

Added line #L39 was not covered by tests
{
auto tokens = split_string_on(entry, ':');
auto tokens = split_string_to_vector(entry, ":");

Check warning on line 41 in cpp/mrc/src/internal/utils/parse_config.cpp

View check run for this annotation

Codecov / codecov/patch

cpp/mrc/src/internal/utils/parse_config.cpp#L41

Added line #L41 was not covered by tests

int concurrency = 1;
std::vector<std::string> s;
Expand All @@ -76,7 +62,7 @@
concurrency = std::atoi(tokens[1].c_str());
case 1:
// parse segments
s = split_string_on(tokens[0], ',');
s = split_string_to_vector(tokens[0], ",");

Check warning on line 65 in cpp/mrc/src/internal/utils/parse_config.cpp

View check run for this annotation

Codecov / codecov/patch

cpp/mrc/src/internal/utils/parse_config.cpp#L65

Added line #L65 was not covered by tests
segments.insert(s.begin(), s.end());
break;

Expand All @@ -86,7 +72,7 @@
"<segment_set>:<concurrency=1>:<group_set:*>;[repeated]");
}

config.push_back(std::make_tuple(std::move(segments), concurrency, std::move(groups)));
config.emplace_back(std::move(segments), concurrency, std::move(groups));

Check warning on line 75 in cpp/mrc/src/internal/utils/parse_config.cpp

View check run for this annotation

Codecov / codecov/patch

cpp/mrc/src/internal/utils/parse_config.cpp#L75

Added line #L75 was not covered by tests
}

return config;
Expand Down
19 changes: 5 additions & 14 deletions cpp/mrc/src/internal/utils/parse_ints.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -17,6 +17,8 @@

#include "./parse_ints.hpp"

#include "mrc/utils/string_utils.hpp" // for split_string_to_array

#include <glog/logging.h>

#include <iostream>
Expand All @@ -31,28 +33,17 @@ int convert_string2_int(const std::string& str)
return x;
}

std::vector<std::string> split_string_to_array(const std::string& str, char splitter)
{
std::vector<std::string> tokens;
std::stringstream ss(str);
std::string temp;
while (getline(ss, temp, splitter)) // split into new "lines" based on character
{
tokens.push_back(temp);
}
return tokens;
}
} // namespace

namespace mrc {

std::vector<int> parse_ints(const std::string& data)
{
std::vector<int> result;
std::vector<std::string> tokens = split_string_to_array(data, ',');
std::vector<std::string> tokens = split_string_to_vector(data, ",");
for (auto& token : tokens)
{
std::vector<std::string> range = split_string_to_array(token, '-');
std::vector<std::string> range = split_string_to_vector(token, "-");
if (range.size() == 1)
{
result.push_back(convert_string2_int(range[0]));
Expand Down
36 changes: 36 additions & 0 deletions cpp/mrc/src/public/utils/string_utils.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "mrc/utils/string_utils.hpp"

#include <boost/algorithm/string.hpp> // for split
// We already have <boost/algorithm/string.hpp> included we don't need these others, it is also the only public header
// with a definition for boost::is_any_of, so even if we replaced string.hpp with these others we would still need to
// include string.hpp or a detail/ header
// IWYU pragma: no_include <boost/algorithm/string/classification.hpp>
// IWYU pragma: no_include <boost/algorithm/string/split.hpp>
// IWYU pragma: no_include <boost/iterator/iterator_facade.hpp>

namespace mrc {
std::vector<std::string> split_string_to_vector(const std::string& str, const std::string& delimiter)
{
std::vector<std::string> results;
boost::split(results, str, boost::is_any_of(delimiter));
return results;
}

} // namespace mrc
1 change: 1 addition & 0 deletions cpp/mrc/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ add_executable(test_mrc
test_node.cpp
test_pipeline.cpp
test_segment.cpp
test_string_utils.cpp
test_thread.cpp
test_type_utils.cpp
)
Expand Down
58 changes: 58 additions & 0 deletions cpp/mrc/tests/test_string_utils.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "./test_mrc.hpp" // IWYU pragma: associated

#include "mrc/utils/string_utils.hpp"

#include <gtest/gtest.h> // for EXPECT_EQ

#include <string>
#include <vector>

namespace mrc {

TEST_CLASS(StringUtils);

TEST_F(TestStringUtils, TestSplitStringToVector)
{
struct TestValues
{
std::string str;
std::string delimiter;
std::vector<std::string> expected_result;
};

std::vector<TestValues> values = {
{"Hello,World,!", ",", {"Hello", "World", "!"}},
{"a/b/c", "/", {"a", "b", "c"}},
{"/a/b/c", "/", {"", "a", "b", "c"}}, // leading delimeter
{"a/b/c/", "/", {"a", "b", "c", ""}}, // trailing delimeter
{"abcd", "/", {"abcd"}}, // no delimeter
{"", "/", {""}}, // empty string
{"/", "/", {"", ""}}, // single delimeter
{"//", "/", {"", "", ""}}, // duplicate delimeter
};

for (const auto& value : values)
{
auto result = mrc::split_string_to_vector(value.str, value.delimiter);
EXPECT_EQ(result, value.expected_result);
}
}

} // namespace mrc
8 changes: 5 additions & 3 deletions python/mrc/_pymrc/include/pymrc/utilities/json_values.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@
#include <pybind11/pybind11.h> // for PYBIND11_EXPORT, pybind11::object, type_caster

#include <cstddef> // for size_t
#include <string>
// IWYU wants us to use the pybind11.h for the PYBIND11_EXPORT macro, but we already have it in pytypes.h
// IWYU pragma: no_include <pybind11/pybind11.h>
#include <map> // for map
#include <string> // for string
#include <utility> // for move
// IWYU pragma: no_include <pybind11/cast.h>
// IWYU pragma: no_include <pybind11/pytypes.h>

namespace mrc::pymrc {

Expand Down
14 changes: 2 additions & 12 deletions python/mrc/_pymrc/src/utilities/json_values.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,8 @@
#include "pymrc/utilities/acquire_gil.hpp"
#include "pymrc/utils.hpp"

#include "mrc/utils/string_utils.hpp" // for MRC_CONCAT_STR
#include "mrc/utils/string_utils.hpp" // for MRC_CONCAT_STR, split_string_to_array

#include <boost/algorithm/string.hpp> // for split
#include <glog/logging.h>
#include <pybind11/cast.h>

Expand All @@ -34,23 +33,14 @@
#include <utility> // for move
#include <vector> // for vector

// We already have <boost/algorithm/string.hpp> included we don't need these others, it is also the only public header
// with a definition for boost::is_any_of, so even if we replaced string.hpp with these others we would still need to
// include string.hpp or a detail/ header
// IWYU pragma: no_include <boost/algorithm/string/classification.hpp>
// IWYU pragma: no_include <boost/algorithm/string/split.hpp>
// IWYU pragma: no_include <boost/iterator/iterator_facade.hpp>

namespace py = pybind11;
using namespace std::string_literals;

namespace {

std::vector<std::string> split_path(const std::string& path)
{
std::vector<std::string> path_parts;
boost::split(path_parts, path, boost::is_any_of("/"));
return path_parts;
return mrc::split_string_to_vector(path, "/"s);
}

struct PyFoundObject
Expand Down
Loading