Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replaced std::string with std::string_view and removed excessive copies in cudf::io #17734

Merged
merged 19 commits into from
Feb 4, 2025
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions cpp/benchmarks/json/json.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -190,10 +190,10 @@ static void bench_query(nvbench::state& state)
{
srand(5236);

auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const desired_bytes = static_cast<cudf::size_type>(state.get_int64("bytes"));
auto const query = state.get_int64("query");
auto const json_path = queries[query];
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const desired_bytes = static_cast<cudf::size_type>(state.get_int64("bytes"));
auto const query = state.get_int64("query");
std::string_view const json_path = queries[query];

auto const stream = cudf::get_default_stream();
auto input = build_json_string_column(desired_bytes, num_rows);
Expand Down
6 changes: 3 additions & 3 deletions cpp/benchmarks/string/join_strings.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -41,8 +41,8 @@ static void bench_join(nvbench::state& state)
state.add_global_memory_reads<nvbench::int8_t>(chars_size); // all bytes are read;
state.add_global_memory_writes<nvbench::int8_t>(chars_size); // all bytes are written

std::string separator(":");
std::string narep("null");
std::string_view separator(":");
std::string_view narep("null");
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto result = cudf::strings::join_strings(input, separator, narep);
});
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/string/like.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -34,7 +34,7 @@ static void bench_like(nvbench::state& state)
auto input = cudf::strings_column_view(col->view());

// This pattern forces reading the entire target string (when matched expected)
auto pattern = std::string("% 5W4_"); // regex equivalent: ".* 5W4.$"
auto pattern = std::string_view("% 5W4_"); // regex equivalent: ".* 5W4.$"

state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
// gather some throughput statistics as well
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/string/replace_re.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -49,7 +49,7 @@ static void bench_replace(nvbench::state& state)
cudf::strings::replace_with_backrefs(input, *program, replacement);
});
} else {
auto replacement = std::string("77");
auto replacement = std::string_view("77");
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
cudf::strings::replace_re(input, *program, replacement);
});
Expand Down
4 changes: 2 additions & 2 deletions cpp/examples/strings/libcudf_apis.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -53,7 +53,7 @@ std::unique_ptr<cudf::column> redact_strings(cudf::column_view const& names,

auto const last_initial_first = cudf::table_view({last_initial->view(), first});

auto result = cudf::strings::concatenate(last_initial_first, std::string(" "));
auto result = cudf::strings::concatenate(last_initial_first, std::string_view(" "));

cudaStreamSynchronize(0);

Expand Down
20 changes: 10 additions & 10 deletions cpp/include/cudf/io/csv.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -916,7 +916,7 @@ class csv_reader_options_builder {
*/
csv_reader_options_builder& prefix(std::string pfx)
{
options._prefix = pfx;
options._prefix = std::move(pfx);
return *this;
}

Expand Down Expand Up @@ -1450,7 +1450,7 @@ class csv_writer_options {
*
* @return string to used for null entries
*/
[[nodiscard]] std::string get_na_rep() const { return _na_rep; }
[[nodiscard]] std::string const& get_na_rep() const { return _na_rep; }

/**
* @brief Whether to write headers to csv.
Expand All @@ -1471,7 +1471,7 @@ class csv_writer_options {
*
* @return Character used for separating lines
*/
[[nodiscard]] std::string get_line_terminator() const { return _line_terminator; }
[[nodiscard]] std::string const& get_line_terminator() const { return _line_terminator; }

/**
* @brief Returns character used for separating column values.
Expand All @@ -1485,14 +1485,14 @@ class csv_writer_options {
*
* @return string used for values != 0 in INT8 types
*/
[[nodiscard]] std::string get_true_value() const { return _true_value; }
[[nodiscard]] std::string const& get_true_value() const { return _true_value; }

/**
* @brief Returns string used for values == 0 in INT8 types.
*
* @return string used for values == 0 in INT8 types
*/
[[nodiscard]] std::string get_false_value() const { return _false_value; }
[[nodiscard]] std::string const& get_false_value() const { return _false_value; }

/**
* @brief Returns the quote style for the writer.
Expand All @@ -1519,7 +1519,7 @@ class csv_writer_options {
*
* @param val String to represent null value
*/
void set_na_rep(std::string val) { _na_rep = val; }
void set_na_rep(std::string val) { _na_rep = std::move(val); }

/**
* @brief Enables/Disables headers being written to csv.
Expand All @@ -1540,7 +1540,7 @@ class csv_writer_options {
*
* @param term Character to represent line termination
*/
void set_line_terminator(std::string term) { _line_terminator = term; }
void set_line_terminator(std::string term) { _line_terminator = std::move(term); }

/**
* @brief Sets character used for separating column values.
Expand All @@ -1554,14 +1554,14 @@ class csv_writer_options {
*
* @param val String to represent values != 0 in INT8 types
*/
void set_true_value(std::string val) { _true_value = val; }
void set_true_value(std::string val) { _true_value = std::move(val); }

/**
* @brief Sets string used for values == 0 in INT8 types.
*
* @param val String to represent values == 0 in INT8 types
*/
void set_false_value(std::string val) { _false_value = val; }
void set_false_value(std::string val) { _false_value = std::move(val); }

/**
* @brief (Re)sets the table being written.
Expand Down
9 changes: 5 additions & 4 deletions cpp/include/cudf/io/text/detail/trie.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -27,6 +27,7 @@
#include <algorithm>
#include <queue>
#include <string>
#include <string_view>
#include <unordered_map>
#include <vector>

Expand Down Expand Up @@ -128,7 +129,7 @@ struct trie {
/**
* @brief Insert the string in to the trie tree, growing the trie as necessary
*/
void insert(std::string s) { insert(s.c_str(), s.size(), 0); }
void insert(std::string_view s) { insert(s.data(), s.size(), 0); }

private:
trie_builder_node& insert(char const* s, uint16_t size, uint8_t depth)
Expand Down Expand Up @@ -164,12 +165,12 @@ struct trie {
* @param mr Memory resource to use for the device memory allocation
* @return The trie.
*/
static trie create(std::string const& pattern,
static trie create(std::string pattern,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)

{
return create(std::vector<std::string>{pattern}, stream, mr);
return create(std::vector<std::string>{std::move(pattern)}, stream, mr);
}

/**
Expand Down
5 changes: 3 additions & 2 deletions cpp/include/cudf/io/text/multibyte_split.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -25,6 +25,7 @@

#include <memory>
#include <optional>
#include <string_view>

namespace CUDF_EXPORT cudf {
namespace io {
Expand Down Expand Up @@ -90,7 +91,7 @@ struct parse_options {
*/
std::unique_ptr<cudf::column> multibyte_split(
data_chunk_source const& source,
std::string const& delimiter,
std::string_view delimiter,
parse_options options = {},
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
Expand Down
28 changes: 15 additions & 13 deletions cpp/include/cudf/io/types.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -342,8 +342,8 @@ struct source_info {
*
* @param file_paths Input files paths
*/
explicit source_info(std::vector<std::string> const& file_paths)
: _type(io_type::FILEPATH), _filepaths(file_paths)
explicit source_info(std::vector<std::string> file_paths)
: _type(io_type::FILEPATH), _filepaths(std::move(file_paths))
lamarrr marked this conversation as resolved.
Show resolved Hide resolved
{
}

Expand All @@ -352,8 +352,8 @@ struct source_info {
*
* @param file_path Single input file
*/
explicit source_info(std::string const& file_path)
: _type(io_type::FILEPATH), _filepaths({file_path})
explicit source_info(std::string file_path)
: _type(io_type::FILEPATH), _filepaths({std::move(file_path)})
{
}

Expand Down Expand Up @@ -523,8 +523,8 @@ struct sink_info {
*
* @param file_paths Output files paths
*/
explicit sink_info(std::vector<std::string> const& file_paths)
: _type(io_type::FILEPATH), _num_sinks(file_paths.size()), _filepaths(file_paths)
explicit sink_info(std::vector<std::string> file_paths)
: _type(io_type::FILEPATH), _num_sinks(file_paths.size()), _filepaths(std::move(file_paths))
{
}

Expand All @@ -533,8 +533,8 @@ struct sink_info {
*
* @param file_path Single output file path
*/
explicit sink_info(std::string const& file_path)
: _type(io_type::FILEPATH), _filepaths({file_path})
explicit sink_info(std::string file_path)
: _type(io_type::FILEPATH), _filepaths({std::move(file_path)})
{
}

Expand All @@ -543,8 +543,8 @@ struct sink_info {
*
* @param buffers Output host buffers
*/
explicit sink_info(std::vector<std::vector<char>*> const& buffers)
: _type(io_type::HOST_BUFFER), _num_sinks(buffers.size()), _buffers(buffers)
explicit sink_info(std::vector<std::vector<char>*> buffers)
: _type(io_type::HOST_BUFFER), _num_sinks(buffers.size()), _buffers(std::move(buffers))
{
}
/**
Expand All @@ -560,7 +560,9 @@ struct sink_info {
* @param user_sinks Output user-implemented sinks
*/
explicit sink_info(std::vector<cudf::io::data_sink*> const& user_sinks)
: _type(io_type::USER_IMPLEMENTED), _num_sinks(user_sinks.size()), _user_sinks(user_sinks)
: _type(io_type::USER_IMPLEMENTED),
_num_sinks(user_sinks.size()),
_user_sinks(std::move(user_sinks))
{
}

Expand Down Expand Up @@ -810,7 +812,7 @@ class column_in_metadata {
*
* @return The name of this column
*/
[[nodiscard]] std::string get_name() const noexcept { return _name; }
[[nodiscard]] std::string const& get_name() const noexcept { return _name; }

/**
* @brief Get whether nullability has been explicitly set for this column.
Expand Down
6 changes: 4 additions & 2 deletions cpp/include/cudf/scalar/scalar.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -27,6 +27,8 @@
#include <rmm/device_buffer.hpp>
#include <rmm/device_scalar.hpp>

#include <string_view>

/**
* @file
* @brief Class definitions for cudf::scalar
Expand Down Expand Up @@ -464,7 +466,7 @@ class string_scalar : public scalar {
* @param stream CUDA stream used for device memory operations.
* @param mr Device memory resource to use for device memory allocation.
*/
string_scalar(std::string const& string,
string_scalar(std::string_view string,
bool is_valid = true,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
Expand Down
15 changes: 8 additions & 7 deletions cpp/src/io/avro/avro.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
* Copyright (c) 2019-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -297,7 +297,7 @@ enum attrtype_e {
*
* @returns true if successful, false if error
*/
bool schema_parser::parse(std::vector<schema_entry>& schema, std::string const& json_str)
bool schema_parser::parse(std::vector<schema_entry>& schema, std::string_view json_str)
{
// Empty schema
if (json_str == "[]") return true;
Expand All @@ -306,7 +306,7 @@ bool schema_parser::parse(std::vector<schema_entry>& schema, std::string const&
int depth = 0, parent_idx = -1, entry_idx = -1;
json_state_e state = state_attrname;
std::string str;
std::unordered_map<std::string, type_kind_e> const typenames = {
std::unordered_map<std::string_view, type_kind_e> const typenames = {
lamarrr marked this conversation as resolved.
Show resolved Hide resolved
{"null", type_null},
{"boolean", type_boolean},
{"int", type_int},
Expand All @@ -329,17 +329,17 @@ bool schema_parser::parse(std::vector<schema_entry>& schema, std::string const&
{"local-timestamp-millis", type_local_timestamp_millis},
{"local-timestamp-micros", type_local_timestamp_micros},
{"duration", type_duration}};
std::unordered_map<std::string, attrtype_e> const attrnames = {
std::unordered_map<std::string_view, attrtype_e> const attrnames = {
{"type", attrtype_type},
{"name", attrtype_name},
{"fields", attrtype_fields},
{"symbols", attrtype_symbols},
{"items", attrtype_items},
{"logicalType", attrtype_logicaltype}};
attrtype_e cur_attr = attrtype_none;
m_base = json_str.c_str();
m_base = json_str.begin();
m_cur = m_base;
m_end = m_base + json_str.length();
m_end = json_str.end();
while (more_data()) {
int const c = *m_cur++;
switch (c) {
Expand Down Expand Up @@ -487,7 +487,8 @@ std::string schema_parser::get_str()
;
auto len = static_cast<int32_t>(cur - start - 1);
m_cur = cur;
return s.assign(start, std::max(len, 0));
s.assign(start, std::max(len, 0));
return s;
}

} // namespace avro
Expand Down
Loading
Loading