Skip to content

Commit

Permalink
revert binary_like, review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
bkietz committed Oct 25, 2023
1 parent 6f2e851 commit 2798197
Show file tree
Hide file tree
Showing 9 changed files with 55 additions and 41 deletions.
3 changes: 1 addition & 2 deletions cpp/src/arrow/array/builder_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,7 @@ struct AppendScalarImpl {
Status Visit(const Decimal256Type& t) { return HandleFixedWidth(t); }

template <typename T>
enable_if_t<is_binary_like_type<T>::value || is_string_like_type<T>::value, Status>
Visit(const T&) {
enable_if_has_string_view<T, Status> Visit(const T&) {
int64_t data_size = 0;
for (auto it = scalars_begin_; it != scalars_end_; ++it) {
const auto& scalar = checked_cast<const typename TypeTraits<T>::ScalarType&>(*it);
Expand Down
8 changes: 8 additions & 0 deletions cpp/src/arrow/array/builder_binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -583,8 +583,16 @@ class ARROW_EXPORT BinaryViewBuilder : public ArrayBuilder {
data_builder_(pool, alignment),
data_heap_builder_(pool, alignment) {}

/// Set the size for future preallocated data buffers.
///
/// The default size is 32KB, so after each 32KB of string data appended to the builder
/// a new data buffer will be allocated. Adjust this to a larger value to decrease the
/// frequency of allocation, or to a smaller value to lower the overhead of each
/// allocation.
void SetBlockSize(int64_t blocksize) { data_heap_builder_.SetBlockSize(blocksize); }

/// The number of bytes which can be appended to this builder without allocating another
/// data buffer.
int64_t current_block_bytes_remaining() const {
return data_heap_builder_.current_remaining_bytes();
}
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/arrow/array/concatenate_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,8 @@ TEST_F(ConcatenateTest, StringType) {

TEST_F(ConcatenateTest, StringViewType) {
Check([this](int32_t size, double null_probability, std::shared_ptr<Array>* out) {
*out = rng_.StringView(size, /*min_length =*/0, /*max_length =*/40, null_probability);
*out = rng_.StringView(size, /*min_length =*/0, /*max_length =*/40, null_probability,
/*max_buffer_length=*/200);
ASSERT_OK((**out).ValidateFull());
});
}
Expand Down
31 changes: 16 additions & 15 deletions cpp/src/arrow/array/diff.cc
Original file line number Diff line number Diff line change
Expand Up @@ -476,30 +476,31 @@ class MakeFormatterImpl {
return Status::OK();
}

// format Binary, LargeBinary and FixedSizeBinary in hexadecimal
template <typename T>
enable_if_binary_like<T, Status> Visit(const T&) {
enable_if_has_string_view<T, Status> Visit(const T&) {
using ArrayType = typename TypeTraits<T>::ArrayType;
impl_ = [](const Array& array, int64_t index, std::ostream* os) {
*os << HexEncode(checked_cast<const ArrayType&>(array).GetView(index));
std::string_view view = checked_cast<const ArrayType&>(array).GetView(index);
if constexpr (T::is_utf8) {
// format String and StringView with \"\n\r\t\\ escaped
*os << '"' << Escape(view) << '"';
} else {
// format Binary, LargeBinary, BinaryView, and FixedSizeBinary in hexadecimal
*os << HexEncode(view);
}
};
return Status::OK();
}

// format Strings with \"\n\r\t\\ escaped
// format Decimals with Decimal___Array::FormatValue
template <typename T>
enable_if_string_like<T, Status> Visit(const T&) {
using ArrayType = typename TypeTraits<T>::ArrayType;
impl_ = [](const Array& array, int64_t index, std::ostream* os) {
*os << "\"" << Escape(checked_cast<const ArrayType&>(array).GetView(index)) << "\"";
};
return Status::OK();
}

// format Decimals with Decimal128Array::FormatValue
Status Visit(const Decimal128Type&) {
enable_if_decimal<T, Status> Visit(const T&) {
impl_ = [](const Array& array, int64_t index, std::ostream* os) {
*os << checked_cast<const Decimal128Array&>(array).FormatValue(index);
if constexpr (T::type_id == Type::DECIMAL128) {
*os << checked_cast<const Decimal128Array&>(array).FormatValue(index);
} else {
*os << checked_cast<const Decimal256Array&>(array).FormatValue(index);
}
};
return Status::OK();
}
Expand Down
13 changes: 13 additions & 0 deletions cpp/src/arrow/array/diff_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,19 @@ TEST_F(DiffTest, UnifiedDiffFormatter) {
@@ -5, +8 @@
+79
+11
)");
}

for (const auto& type : {
decimal128(10, 4),
decimal256(10, 4),
}) {
base_ = ArrayFromJSON(type, R"(["123.4567", "-78.9000"])");
target_ = ArrayFromJSON(type, R"(["123.4567", "-123.4567"])");
AssertDiffAndFormat(R"(
@@ -1, +1 @@
--78.9000
+-123.4567
)");
}
}
Expand Down
7 changes: 2 additions & 5 deletions cpp/src/arrow/integration/json_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ std::string GetTimeUnitName(TimeUnit::type unit) {

Result<std::string_view> GetStringView(const rj::Value& str) {
if (!str.IsString()) {
return Status::Invalid("field was not a string line ", __LINE__);
return Status::Invalid("field was not a string");
}
return std::string_view{str.GetString(), str.GetStringLength()};
}
Expand Down Expand Up @@ -1364,10 +1364,7 @@ class ArrayReader {
RETURN_NOT_OK(builder.AppendNull());
continue;
}

DCHECK(json_val.IsString());
std::string_view val{
json_val.GetString()}; // XXX can we use json_val.GetStringLength()?
ARROW_ASSIGN_OR_RAISE(auto val, GetStringView(json_val));

int64_t offset_start = ParseOffset(json_offsets[i]);
int64_t offset_end = ParseOffset(json_offsets[i + 1]);
Expand Down
17 changes: 6 additions & 11 deletions cpp/src/arrow/pretty_print.cc
Original file line number Diff line number Diff line change
Expand Up @@ -229,18 +229,13 @@ class ArrayPrinter : public PrettyPrinter {
}

template <typename ArrayType, typename T = typename ArrayType::TypeClass>
enable_if_string_like<T, Status> WriteDataValues(const ArrayType& array) {
enable_if_has_string_view<T, Status> WriteDataValues(const ArrayType& array) {
return WriteValues(array, [&](int64_t i) {
(*sink_) << "\"" << array.GetView(i) << "\"";
return Status::OK();
});
}

template <typename ArrayType, typename T = typename ArrayType::TypeClass>
enable_if_t<is_binary_like_type<T>::value && !is_decimal_type<T>::value, Status>
WriteDataValues(const ArrayType& array) {
return WriteValues(array, [&](int64_t i) {
(*sink_) << HexEncode(array.GetView(i));
if constexpr (T::is_utf8) {
(*sink_) << "\"" << array.GetView(i) << "\"";
} else {
(*sink_) << HexEncode(array.GetView(i));
}
return Status::OK();
});
}
Expand Down
10 changes: 5 additions & 5 deletions cpp/src/arrow/type_traits.h
Original file line number Diff line number Diff line change
Expand Up @@ -659,8 +659,7 @@ using enable_if_string_view = enable_if_t<is_string_view_type<T>::value, R>;

template <typename T>
using is_string_like_type =
std::integral_constant<bool, (is_base_binary_type<T>::value && T::is_utf8) ||
is_string_view_type<T>::value>;
std::integral_constant<bool, is_base_binary_type<T>::value && T::is_utf8>;

template <typename T, typename R = void>
using enable_if_string_like = enable_if_t<is_string_like_type<T>::value, R>;
Expand All @@ -683,9 +682,10 @@ template <typename T, typename R = void>
using enable_if_fixed_width_type = enable_if_t<is_fixed_width_type<T>::value, R>;

template <typename T>
using is_binary_like_type = std::integral_constant<
bool, (is_base_binary_type<T>::value && !is_string_like_type<T>::value) ||
is_binary_view_type<T>::value || is_fixed_size_binary_type<T>::value>;
using is_binary_like_type =
std::integral_constant<bool, (is_base_binary_type<T>::value &&
!is_string_like_type<T>::value) ||
is_fixed_size_binary_type<T>::value>;

template <typename T, typename R = void>
using enable_if_binary_like = enable_if_t<is_binary_like_type<T>::value, R>;
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/util/binary_view_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ bool EqualBinaryView(BinaryViewType::c_type l, BinaryViewType::c_type r,
if (l_size_and_prefix != r_size_and_prefix) return false;

if (l.is_inline()) {
// The inline part is zeroed at construction, so we can compare
// a word at a time if data extends past 'prefix_'.
// The columnar spec mandates that the inlined part be zero-padded, so we can compare
// a word at a time regardless of the exact size.
int64_t l_inlined, r_inlined;
memcpy(&l_inlined, l.inline_data() + BinaryViewType::kPrefixSize, sizeof(l_inlined));
memcpy(&r_inlined, r.inline_data() + BinaryViewType::kPrefixSize, sizeof(r_inlined));
Expand Down

0 comments on commit 2798197

Please sign in to comment.