Skip to content

Commit

Permalink
is_supported_remote_url
Browse files Browse the repository at this point in the history
  • Loading branch information
madsbk committed Oct 22, 2024
1 parent 36c551b commit 50f30c2
Showing 1 changed file with 14 additions and 6 deletions.
20 changes: 14 additions & 6 deletions cpp/src/io/utilities/datasource.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <sys/mman.h>
#include <unistd.h>

#include <regex>
#include <unordered_map>
#include <vector>

Expand Down Expand Up @@ -452,6 +453,16 @@ class remote_file_source : public datasource {
return datasource::buffer::create(std::move(h_data));
}

/**
* @brief Is `url` referring to a remote file using a protocol KvikIO support?
*/
static bool is_supported_remote_url(std::string const& url)
{
// Regular expression to match "<s3|http|https>://"
std::regex pattern{R"(^s3|http|https://)", std::regex_constants::icase};
return std::regex_search(url, pattern);
}

private:
kvikio::RemoteHandle _kvikio_file;
};
Expand All @@ -470,14 +481,11 @@ std::unique_ptr<datasource> datasource::create(std::string const& filepath,

CUDF_FAIL("Invalid LIBCUDF_MMAP_ENABLED value: " + policy);
}();

if (use_memory_mapping) {
if (remote_file_source::is_supported_remote_url(filepath)) {
return std::make_unique<remote_file_source>(filepath.c_str());
} else if (use_memory_mapping) {
return std::make_unique<memory_mapped_source>(filepath.c_str(), offset, max_size_estimate);
} else {
// If this is a S3 filepath (i.e. "s3://<bucket>/<object>"), we create a remote file source
if (filepath.size() > 5 && filepath.substr(0, 5) == "s3://") {
return std::make_unique<remote_file_source>(filepath.c_str());
}
// `file_source` reads the file directly, without memory mapping
return std::make_unique<file_source>(filepath.c_str());
}
Expand Down

0 comments on commit 50f30c2

Please sign in to comment.