Skip to content

Commit

Permalink
Support arbitrary strings in MAP_FLAT_COLS_STRUCT_KEYS (#2228)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #2228

Let's escape `,` and `;` characters in strings.

This is a follow up of D38183826 (417c00b).

Reviewed By: abhash09

Differential Revision: D38511317

fbshipit-source-id: 573d64fff127819cdf0828c2b6ca7a45d51857e6
  • Loading branch information
Daniel Munoz authored and facebook-github-bot committed Aug 10, 2022
1 parent f81308d commit f0cbb46
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 11 deletions.
31 changes: 20 additions & 11 deletions velox/dwio/dwrf/common/Config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "velox/dwio/dwrf/common/Config.h"

#include "folly/String.h"
#include "folly/dynamic.h"

namespace facebook::velox::dwrf {

Expand Down Expand Up @@ -146,27 +147,35 @@ Config::Entry<const std::vector<std::vector<std::string>>>
"orc.map.flat.cols.struct.keys",
{},
[](const std::vector<std::vector<std::string>>& val) {
std::vector<std::string> columns;
std::vector<folly::dynamic> columns;
columns.reserve(val.size());
std::transform(
val.cbegin(),
val.cend(),
std::back_inserter(columns),
[](const auto& v) { return folly::join(",", v); });
return folly::join(";", columns);
[](const auto& v) {
return folly::dynamic::array(v.cbegin(), v.cend());
});
return folly::toJson(
folly::dynamic::array(columns.cbegin(), columns.cend()));
},
[](const std::string& val) {
std::vector<std::string> partialResult;
folly::split(";", val, partialResult);
folly::dynamic columns = folly::parseJson(val);
std::vector<std::vector<std::string>> result;
result.reserve(columns.size());
std::transform(
partialResult.cbegin(),
partialResult.cend(),
columns.begin(),
columns.end(),
std::back_inserter(result),
[](const auto& str) {
std::vector<std::string> res;
folly::split(",", str, res);
return res;
[](const auto& keys) {
std::vector<std::string> intermediateResult;
intermediateResult.reserve(keys.size());
std::transform(
keys.begin(),
keys.end(),
std::back_inserter(intermediateResult),
[](const auto& str) { return str.asString(); });
return intermediateResult;
});
return result;
});
Expand Down
11 changes: 11 additions & 0 deletions velox/dwio/dwrf/test/ColumnWriterTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1283,6 +1283,17 @@ TEST(ColumnWriterTests, TestMapWriterBigBatch) {
/* useFlatMap */ true);
}

TEST(ColumnWriterTests, TestStructKeysConfigSerializationDeserialization) {
const std::vector<std::vector<std::string>> columns{
{"1.45", "hi, you;", "29102819", "1e-4"},
{"291", "world"},
{},
{"one", ", more", "\"two'three$"}};
const auto config = std::make_shared<Config>();
config->set<decltype(columns)>(Config::MAP_FLAT_COLS_STRUCT_KEYS, columns);
EXPECT_EQ(config->get(Config::MAP_FLAT_COLS_STRUCT_KEYS), columns);
}

std::unique_ptr<DwrfReader> getDwrfReader(
MemoryPool& pool,
const std::shared_ptr<const RowType> type,
Expand Down

0 comments on commit f0cbb46

Please sign in to comment.