From dadb24468c4da4fc70b1cef7256173e9f9a38b7b Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Fri, 20 May 2016 07:29:13 +0000 Subject: [PATCH 1/6] wip expose range equality to to allow for nested comparisons --- cpp/src/arrow/array-test.cc | 28 ++++++++++++++++++++++++++++ cpp/src/arrow/array.cc | 14 ++++++++++++++ cpp/src/arrow/array.h | 10 +++++++++- cpp/src/arrow/types/list.cc | 15 +++++++++++++++ cpp/src/arrow/types/list.h | 3 +++ cpp/src/arrow/types/primitive.cc | 17 ++++++++++++++++- cpp/src/arrow/types/primitive.h | 19 ++++++++++++++++++- 7 files changed, 103 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc index b4c727997ee7e..47c971beea3f5 100644 --- a/cpp/src/arrow/array-test.cc +++ b/cpp/src/arrow/array-test.cc @@ -56,6 +56,34 @@ TEST_F(TestArray, TestLength) { ASSERT_EQ(arr->length(), 100); } +ArrayPtr MakeArrayFromValidBytes(const std::vector& v, MemoryPool* pool) { + int32_t null_count = v.size() - std::accumulate(v.begin(), v.end(), 0); + std::shared_ptr null_buf = test::bytes_to_null_buffer(v); + + BufferBuilder value_builder(pool); + for (size_t i = 0; i < v.size(); ++i) { + value_builder.Append(0); + } + + ArrayPtr arr(new Int32Array(v.size(), value_builder.Finish(), null_count, null_buf)); + return arr; +} + +TEST_F(TestArray, TestEquality) { + auto array = MakeArrayFromValidBytes({1, 0, 1, 1, 0, 1, 0, 0}, pool_); + auto equal_array = MakeArrayFromValidBytes({1, 0, 1, 1, 0, 1, 0, 0}, pool_); + auto unequal_array = MakeArrayFromValidBytes({1, 1, 1, 1, 0, 1, 0, 0}, pool_); + + EXPECT_TRUE(array->Equals(array)); + EXPECT_TRUE(array->Equals(equal_array)); + EXPECT_TRUE(equal_array->Equals(array)); + EXPECT_FALSE(equal_array->Equals(unequal_array)); + EXPECT_TRUE(array->RangeEquals(4, 8, unequal_array)); + EXPECT_FALSE(array->RangeEquals(0, 4, unequal_array)); + EXPECT_FALSE(array->RangeEquals(0, 8, unequal_array)); + EXPECT_FALSE(array->RangeEquals(1, 2, unequal_array)); +} + TEST_F(TestArray, TestIsNull) { // clang-format off std::vector null_bitmap = {1, 0, 1, 1, 0, 1, 0, 0, diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc index c6b9b1599cdd2..ddc49128771c0 100644 --- a/cpp/src/arrow/array.cc +++ b/cpp/src/arrow/array.cc @@ -48,6 +48,14 @@ bool Array::EqualsExact(const Array& other) const { return true; } +bool Array::RangeEqualsExact(int32_t start_idx, int32_t end_idx, const Array& arr) const { + if (this == &arr) { return true; } + for (int i = start_idx; i < end_idx; ++i) { + if (IsNull(i) != arr.IsNull(i)) { return false; } + } + return true; +} + Status Array::Validate() const { return Status::OK(); } @@ -58,4 +66,10 @@ bool NullArray::Equals(const std::shared_ptr& arr) const { return arr->length() == length_; } +bool NullArray::RangeEquals( + int32_t start_idx, int32_t end_idx, const std::shared_ptr& arr) const { + if (Type::NA != arr->type_enum()) { return false; } + return true; +} + } // namespace arrow diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h index f98c4c28310f8..4b143be96e60d 100644 --- a/cpp/src/arrow/array.h +++ b/cpp/src/arrow/array.h @@ -59,6 +59,13 @@ class Array { bool EqualsExact(const Array& arr) const; virtual bool Equals(const std::shared_ptr& arr) const = 0; + + // Compare if the range of slots specified are equal for the given array and + // this array. end_idx exclusive. These methods do not bounds check. + bool RangeEqualsExact(int32_t start_idx, int32_t end_idx, const Array& arr) const; + virtual bool RangeEquals( + int32_t start_idx, int32_t end_idx, const std::shared_ptr& arr) const = 0; + // Determines if the array is internally consistent. Defaults to always // returning Status::OK. This can be an expensive check. virtual Status Validate() const; @@ -85,10 +92,11 @@ class NullArray : public Array { explicit NullArray(int32_t length) : NullArray(std::make_shared(), length) {} bool Equals(const std::shared_ptr& arr) const override; + bool RangeEquals(int32_t start_idx, int32_t end_idx, + const std::shared_ptr& arr) const override; }; typedef std::shared_ptr ArrayPtr; - } // namespace arrow #endif diff --git a/cpp/src/arrow/types/list.cc b/cpp/src/arrow/types/list.cc index 76e7fe5f4d429..483519c0bc5f0 100644 --- a/cpp/src/arrow/types/list.cc +++ b/cpp/src/arrow/types/list.cc @@ -44,6 +44,21 @@ bool ListArray::Equals(const std::shared_ptr& arr) const { return EqualsExact(*static_cast(arr.get())); } +bool ListArray::RangeEquals( + int32_t start_idx, int32_t end_idx, const std::shared_ptr& arr) const { + if (this == arr.get()) { return true; } + if (this->type_enum() != arr->type_enum()) { return false; } + auto other = static_cast(arr.get()); + for (int i = start_idx; i < end_idx; ++i) { + const bool is_null = IsNull(i); + if ((is_null != arr->IsNull(i)) || + (!is_null && !values_->RangeEquals(offset(i), offset(i + 1), other->values()))) { + return false; + } + } + return true; +} + Status ListArray::Validate() const { if (length_ < 0) { return Status::Invalid("Length was negative"); } if (!offset_buf_) { return Status::Invalid("offset_buf_ was null"); } diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h index a020b8ad22668..53480c36b1c42 100644 --- a/cpp/src/arrow/types/list.h +++ b/cpp/src/arrow/types/list.h @@ -72,6 +72,9 @@ class ListArray : public Array { bool EqualsExact(const ListArray& other) const; bool Equals(const std::shared_ptr& arr) const override; + bool RangeEquals( + int32_t start_idx, int32_t end_idx, const ArrayPtr& arr) const override; + protected: std::shared_ptr offset_buf_; const int32_t* offsets_; diff --git a/cpp/src/arrow/types/primitive.cc b/cpp/src/arrow/types/primitive.cc index 57a3f1e4e150b..1c217bd136f82 100644 --- a/cpp/src/arrow/types/primitive.cc +++ b/cpp/src/arrow/types/primitive.cc @@ -185,10 +185,25 @@ bool BooleanArray::EqualsExact(const BooleanArray& other) const { } } -bool BooleanArray::Equals(const std::shared_ptr& arr) const { +bool BooleanArray::Equals(const ArrayPtr& arr) const { if (this == arr.get()) return true; if (Type::BOOL != arr->type_enum()) { return false; } return EqualsExact(*static_cast(arr.get())); } +bool BooleanArray::RangeEquals( + int32_t start_idx, int32_t end_idx, const ArrayPtr& arr) const { + if (this == arr.get()) { return true; } + if (!arr) { return false; } + if (this->type_enum() != arr->type_enum()) { return false; } + auto other = static_cast(arr.get()); + for (int i = start_idx; i < end_idx; ++i) { + bool is_null = IsNull(i); + if (is_null != arr->IsNull(i) || (!is_null && Value(i) != other->Value(i))) { + return false; + } + } + return true; +} + } // namespace arrow diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h index fc45f6c5b0568..1e135096cd777 100644 --- a/cpp/src/arrow/types/primitive.h +++ b/cpp/src/arrow/types/primitive.h @@ -66,6 +66,21 @@ class PrimitiveArray : public Array { return PrimitiveArray::EqualsExact(*static_cast(&other)); \ } \ \ + bool RangeEquals( \ + int32_t start_idx, int32_t end_idx, const ArrayPtr& arr) const override { \ + if (this == arr.get()) { return true; } \ + if (!arr) { return false; } \ + if (this->type_enum() != arr->type_enum()) { return false; } \ + auto other = static_cast(arr.get()); \ + for (int i = start_idx; i < end_idx; ++i) { \ + bool is_null = IsNull(i); \ + if (is_null != arr->IsNull(i) || (!is_null && Value(i) != other->Value(i))) { \ + return false; \ + } \ + } \ + return true; \ + } \ + \ const T* raw_data() const { return reinterpret_cast(raw_data_); } \ \ T Value(int i) const { return raw_data()[i]; } \ @@ -248,7 +263,9 @@ class BooleanArray : public PrimitiveArray { int32_t null_count = 0, const std::shared_ptr& null_bitmap = nullptr); bool EqualsExact(const BooleanArray& other) const; - bool Equals(const std::shared_ptr& arr) const override; + bool Equals(const ArrayPtr& arr) const override; + bool RangeEquals( + int32_t start_idx, int32_t end_idx, const ArrayPtr& arr) const override; const uint8_t* raw_data() const { return reinterpret_cast(raw_data_); } From 318855d5af48f25533b5c31712d05bc3ca94e7d6 Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Mon, 23 May 2016 04:45:05 +0000 Subject: [PATCH 2/6] working primitive tests --- cpp/src/arrow/array-test.cc | 1 + cpp/src/arrow/types/primitive-test.cc | 56 ++++++++++++++++++++++++++- cpp/src/arrow/types/primitive.h | 16 +++++--- 3 files changed, 67 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc index 47c971beea3f5..e755cdee434aa 100644 --- a/cpp/src/arrow/array-test.cc +++ b/cpp/src/arrow/array-test.cc @@ -78,6 +78,7 @@ TEST_F(TestArray, TestEquality) { EXPECT_TRUE(array->Equals(equal_array)); EXPECT_TRUE(equal_array->Equals(array)); EXPECT_FALSE(equal_array->Equals(unequal_array)); + EXPECT_FALSE(unequal_array->Equals(equal_array)); EXPECT_TRUE(array->RangeEquals(4, 8, unequal_array)); EXPECT_FALSE(array->RangeEquals(0, 4, unequal_array)); EXPECT_FALSE(array->RangeEquals(0, 8, unequal_array)); diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc index 2b4c0879a28f4..a45121d72398e 100644 --- a/cpp/src/arrow/types/primitive-test.cc +++ b/cpp/src/arrow/types/primitive-test.cc @@ -50,7 +50,7 @@ class Array; \ KLASS tp_copy = tp; \ ASSERT_EQ(tp_copy.type, Type::ENUM); \ - } + } \ PRIMITIVE_TEST(Int8Type, INT8, "int8"); PRIMITIVE_TEST(Int16Type, INT16, "int16"); @@ -248,6 +248,7 @@ TYPED_TEST_CASE(TestPrimitiveBuilder, Primitives); #define DECL_ARRAYTYPE() typedef typename TestFixture::ArrayType ArrayType; + TYPED_TEST(TestPrimitiveBuilder, TestInit) { DECL_TYPE(); @@ -304,6 +305,59 @@ TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) { ASSERT_EQ(memory_before, this->pool_->bytes_allocated()); } +template +Status MakeArray(const vector& valid_bytes, + const vector& draws, + int size, + Builder* builder, + ArrayPtr* out) { + // Append the first 1000 + for (int i = 0; i < size; ++i) { + if (valid_bytes[i] > 0) { + RETURN_NOT_OK(builder->Append(draws[i])); + } else { + RETURN_NOT_OK(builder->AppendNull()); + } + } + *out = builder->Finish(); + return Status::OK(); +} + +TYPED_TEST(TestPrimitiveBuilder, Equality) { + DECL_T(); + + const int size = 1000; + this->RandomData(size); + vector& draws = this->draws_; + vector& valid_bytes = this->valid_bytes_; + ArrayPtr array, equal_array, unequal_array; + auto builder = this->builder_.get(); + ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &array)); + ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &equal_array)); + + // Make the not equal array by negating the first valid element with itself. + const auto first_valid = std::find_if(valid_bytes.begin(), valid_bytes.end(), + [](uint8_t valid){ return valid > 0;}); + const int first_valid_idx = std::distance(valid_bytes.begin(), first_valid); + // This should be true with a very high probability, but might introduce flakiness + ASSERT_LT(first_valid_idx, size-1); + draws[first_valid_idx] = ~*reinterpret_cast(&draws[first_valid_idx]); + ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &unequal_array)); + + // test normal equality + EXPECT_TRUE(array->Equals(array)); + EXPECT_TRUE(array->Equals(equal_array)); + EXPECT_TRUE(equal_array->Equals(array)); + EXPECT_FALSE(equal_array->Equals(unequal_array)); + EXPECT_FALSE(unequal_array->Equals(equal_array)); + + // Test range equality + EXPECT_FALSE(array->RangeEquals(0, first_valid_idx+1, unequal_array)); + EXPECT_FALSE(array->RangeEquals(first_valid_idx, size, unequal_array)); + EXPECT_TRUE(array->RangeEquals(0, first_valid_idx, unequal_array)); + EXPECT_TRUE(array->RangeEquals(first_valid_idx+1, size, unequal_array)); +} + TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) { DECL_T(); diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h index 1e135096cd777..a765536ec0278 100644 --- a/cpp/src/arrow/types/primitive.h +++ b/cpp/src/arrow/types/primitive.h @@ -110,8 +110,10 @@ class PrimitiveBuilder : public ArrayBuilder { using ArrayBuilder::Advance; // Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory - void AppendNulls(const uint8_t* valid_bytes, int32_t length) { + Status AppendNulls(const uint8_t* valid_bytes, int32_t length) { + RETURN_NOT_OK(Reserve(length)); UnsafeAppendToBitmap(valid_bytes, length); + return Status::OK(); } Status AppendNull() { @@ -154,9 +156,10 @@ class NumericBuilder : public PrimitiveBuilder { using PrimitiveBuilder::Reserve; // Scalar append. - void Append(value_type val) { - ArrayBuilder::Reserve(1); + Status Append(value_type val) { + RETURN_NOT_OK(ArrayBuilder::Reserve(1)); UnsafeAppend(val); + return Status::OK(); } // Does not capacity-check; make sure to call Reserve beforehand @@ -291,7 +294,9 @@ class BooleanBuilder : public PrimitiveBuilder { using PrimitiveBuilder::Append; // Scalar append - void Append(bool val) { + Status Append(bool val) { + // TODO(emkornfield) fix reserve/resize for BooleanBuilder + Reserve(1); util::set_bit(null_bitmap_data_, length_); if (val) { util::set_bit(raw_data_, length_); @@ -299,9 +304,10 @@ class BooleanBuilder : public PrimitiveBuilder { util::clear_bit(raw_data_, length_); } ++length_; + return Status::OK(); } - void Append(uint8_t val) { Append(static_cast(val)); } + Status Append(uint8_t val) { return Append(static_cast(val)); } }; } // namespace arrow From dcbaad4a5522080d71c562a187661ee0b2ba1953 Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Mon, 23 May 2016 06:10:16 +0000 Subject: [PATCH 3/6] unittests passing --- cpp/src/arrow/array-test.cc | 8 +++--- cpp/src/arrow/array.cc | 11 ++------ cpp/src/arrow/array.h | 8 +++--- cpp/src/arrow/types/list-test.cc | 36 +++++++++++++++++++++++++++ cpp/src/arrow/types/list.cc | 21 ++++++++++++---- cpp/src/arrow/types/list.h | 2 +- cpp/src/arrow/types/primitive-test.cc | 8 +++--- cpp/src/arrow/types/primitive.cc | 10 ++++---- cpp/src/arrow/types/primitive.h | 15 ++++++----- 9 files changed, 81 insertions(+), 38 deletions(-) diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc index e755cdee434aa..3b4736327b47c 100644 --- a/cpp/src/arrow/array-test.cc +++ b/cpp/src/arrow/array-test.cc @@ -79,10 +79,10 @@ TEST_F(TestArray, TestEquality) { EXPECT_TRUE(equal_array->Equals(array)); EXPECT_FALSE(equal_array->Equals(unequal_array)); EXPECT_FALSE(unequal_array->Equals(equal_array)); - EXPECT_TRUE(array->RangeEquals(4, 8, unequal_array)); - EXPECT_FALSE(array->RangeEquals(0, 4, unequal_array)); - EXPECT_FALSE(array->RangeEquals(0, 8, unequal_array)); - EXPECT_FALSE(array->RangeEquals(1, 2, unequal_array)); + EXPECT_TRUE(array->RangeEquals(4, 8, 4, unequal_array)); + EXPECT_FALSE(array->RangeEquals(0, 4, 0, unequal_array)); + EXPECT_FALSE(array->RangeEquals(0, 8, 0, unequal_array)); + EXPECT_FALSE(array->RangeEquals(1, 2, 1, unequal_array)); } TEST_F(TestArray, TestIsNull) { diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc index ddc49128771c0..ab169fc65fb47 100644 --- a/cpp/src/arrow/array.cc +++ b/cpp/src/arrow/array.cc @@ -48,14 +48,6 @@ bool Array::EqualsExact(const Array& other) const { return true; } -bool Array::RangeEqualsExact(int32_t start_idx, int32_t end_idx, const Array& arr) const { - if (this == &arr) { return true; } - for (int i = start_idx; i < end_idx; ++i) { - if (IsNull(i) != arr.IsNull(i)) { return false; } - } - return true; -} - Status Array::Validate() const { return Status::OK(); } @@ -67,7 +59,8 @@ bool NullArray::Equals(const std::shared_ptr& arr) const { } bool NullArray::RangeEquals( - int32_t start_idx, int32_t end_idx, const std::shared_ptr& arr) const { + int32_t start_idx, int32_t end_idx, int32_t other_start_index, + const std::shared_ptr& arr) const { if (Type::NA != arr->type_enum()) { return false; } return true; } diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h index 4b143be96e60d..c4c466920f0d7 100644 --- a/cpp/src/arrow/array.h +++ b/cpp/src/arrow/array.h @@ -61,10 +61,10 @@ class Array { virtual bool Equals(const std::shared_ptr& arr) const = 0; // Compare if the range of slots specified are equal for the given array and - // this array. end_idx exclusive. These methods do not bounds check. - bool RangeEqualsExact(int32_t start_idx, int32_t end_idx, const Array& arr) const; + // this array. end_idx exclusive. This methods does not bounds check. virtual bool RangeEquals( - int32_t start_idx, int32_t end_idx, const std::shared_ptr& arr) const = 0; + int32_t start_idx, int32_t end_idx, int32_t other_start_idx, + const std::shared_ptr& arr) const = 0; // Determines if the array is internally consistent. Defaults to always // returning Status::OK. This can be an expensive check. @@ -92,7 +92,7 @@ class NullArray : public Array { explicit NullArray(int32_t length) : NullArray(std::make_shared(), length) {} bool Equals(const std::shared_ptr& arr) const override; - bool RangeEquals(int32_t start_idx, int32_t end_idx, + bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_index, const std::shared_ptr& arr) const override; }; diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/types/list-test.cc index 6a8ad9aa59ead..c95e4aab2618b 100644 --- a/cpp/src/arrow/types/list-test.cc +++ b/cpp/src/arrow/types/list-test.cc @@ -86,6 +86,42 @@ class TestListBuilder : public TestBuilder { shared_ptr result_; }; +TEST_F(TestListBuilder, Equality) { + Int32Builder* vb = static_cast(builder_->value_builder().get()); + + ArrayPtr array, equal_array, unequal_array; + vector equal_offsets = {0, 1, 2, 5}; + vector equal_values = {1,2,3,4,5,2,2,2}; + vector unequal_offsets = {0, 1, 4}; + vector unequal_values = {1,2,2,2,3,4,5}; + + // setup two equal arrays + ASSERT_OK(builder_->Append(equal_offsets.data(), equal_offsets.size())); + ASSERT_OK(vb->Append(equal_values.data(), equal_values.size())); + array = builder_->Finish(); + ASSERT_OK(builder_->Append(equal_offsets.data(), equal_offsets.size())); + ASSERT_OK(vb->Append(equal_values.data(), equal_values.size())); + equal_array = builder_->Finish(); + // now an unequal one + ASSERT_OK(builder_->Append(unequal_offsets.data(), unequal_offsets.size())); + ASSERT_OK(vb->Append(unequal_values.data(), unequal_values.size())); + unequal_array = builder_->Finish(); + + // Test array equality + EXPECT_TRUE(array->Equals(array)); + EXPECT_TRUE(array->Equals(equal_array)); + EXPECT_TRUE(equal_array->Equals(array)); + EXPECT_FALSE(equal_array->Equals(unequal_array)); + EXPECT_FALSE(unequal_array->Equals(equal_array)); + + // Test range equality + EXPECT_TRUE(array->RangeEquals(0, 1, 0, unequal_array)); + EXPECT_FALSE(array->RangeEquals(0, 2, 0, unequal_array)); + EXPECT_FALSE(array->RangeEquals(1, 2, 1, unequal_array)); + EXPECT_TRUE(array->RangeEquals(2, 3, 2, unequal_array)); + EXPECT_TRUE(array->RangeEquals(3, 4, 1, unequal_array)); +} + TEST_F(TestListBuilder, TestResize) {} TEST_F(TestListBuilder, TestAppendNull) { diff --git a/cpp/src/arrow/types/list.cc b/cpp/src/arrow/types/list.cc index 483519c0bc5f0..0f08b2e28359e 100644 --- a/cpp/src/arrow/types/list.cc +++ b/cpp/src/arrow/types/list.cc @@ -45,14 +45,25 @@ bool ListArray::Equals(const std::shared_ptr& arr) const { } bool ListArray::RangeEquals( - int32_t start_idx, int32_t end_idx, const std::shared_ptr& arr) const { + int32_t start_idx, int32_t end_idx, + int32_t other_start_idx, + const std::shared_ptr& arr) const { if (this == arr.get()) { return true; } if (this->type_enum() != arr->type_enum()) { return false; } - auto other = static_cast(arr.get()); - for (int i = start_idx; i < end_idx; ++i) { + const auto other = static_cast(arr.get()); + for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) { const bool is_null = IsNull(i); - if ((is_null != arr->IsNull(i)) || - (!is_null && !values_->RangeEquals(offset(i), offset(i + 1), other->values()))) { + if (is_null != arr->IsNull(o_i)) { return false; } + if (is_null) continue; + const int32_t begin_offset = offset(i); + const int32_t end_offset = offset(i+1); + const int32_t other_begin_offset = other->offset(o_i); + const int32_t other_end_offset = other->offset(o_i+1); + // Underlying can't be equal if the size isn't equal + if (end_offset - begin_offset != other_end_offset - other_begin_offset) { + return false; + } + if (!values_->RangeEquals(begin_offset, end_offset, other_begin_offset, other->values())) { return false; } } diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h index 53480c36b1c42..894e1bba67fb2 100644 --- a/cpp/src/arrow/types/list.h +++ b/cpp/src/arrow/types/list.h @@ -73,7 +73,7 @@ class ListArray : public Array { bool Equals(const std::shared_ptr& arr) const override; bool RangeEquals( - int32_t start_idx, int32_t end_idx, const ArrayPtr& arr) const override; + int32_t start_idx, int32_t end_idx, int32_t other_start_idx, const ArrayPtr& arr) const override; protected: std::shared_ptr offset_buf_; diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc index a45121d72398e..a62cd39e6e1f9 100644 --- a/cpp/src/arrow/types/primitive-test.cc +++ b/cpp/src/arrow/types/primitive-test.cc @@ -352,10 +352,10 @@ TYPED_TEST(TestPrimitiveBuilder, Equality) { EXPECT_FALSE(unequal_array->Equals(equal_array)); // Test range equality - EXPECT_FALSE(array->RangeEquals(0, first_valid_idx+1, unequal_array)); - EXPECT_FALSE(array->RangeEquals(first_valid_idx, size, unequal_array)); - EXPECT_TRUE(array->RangeEquals(0, first_valid_idx, unequal_array)); - EXPECT_TRUE(array->RangeEquals(first_valid_idx+1, size, unequal_array)); + EXPECT_FALSE(array->RangeEquals(0, first_valid_idx+1, 0, unequal_array)); + EXPECT_FALSE(array->RangeEquals(first_valid_idx, size, first_valid_idx, unequal_array)); + EXPECT_TRUE(array->RangeEquals(0, first_valid_idx, 0, unequal_array)); + EXPECT_TRUE(array->RangeEquals(first_valid_idx+1, size, first_valid_idx+1, unequal_array)); } TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) { diff --git a/cpp/src/arrow/types/primitive.cc b/cpp/src/arrow/types/primitive.cc index 1c217bd136f82..9d0947f8195bc 100644 --- a/cpp/src/arrow/types/primitive.cc +++ b/cpp/src/arrow/types/primitive.cc @@ -192,14 +192,14 @@ bool BooleanArray::Equals(const ArrayPtr& arr) const { } bool BooleanArray::RangeEquals( - int32_t start_idx, int32_t end_idx, const ArrayPtr& arr) const { + int32_t start_idx, int32_t end_idx, int32_t other_start_idx, const ArrayPtr& arr) const { if (this == arr.get()) { return true; } if (!arr) { return false; } if (this->type_enum() != arr->type_enum()) { return false; } - auto other = static_cast(arr.get()); - for (int i = start_idx; i < end_idx; ++i) { - bool is_null = IsNull(i); - if (is_null != arr->IsNull(i) || (!is_null && Value(i) != other->Value(i))) { + const auto other = static_cast(arr.get()); + for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) { + const bool is_null = IsNull(i); + if (is_null != arr->IsNull(o_i) || (!is_null && Value(i) != other->Value(o_i))) { return false; } } diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h index a765536ec0278..f6785cd5e156a 100644 --- a/cpp/src/arrow/types/primitive.h +++ b/cpp/src/arrow/types/primitive.h @@ -67,14 +67,16 @@ class PrimitiveArray : public Array { } \ \ bool RangeEquals( \ - int32_t start_idx, int32_t end_idx, const ArrayPtr& arr) const override { \ + int32_t start_idx, int32_t end_idx, \ + int32_t other_start_idx, \ + const ArrayPtr& arr) const override { \ if (this == arr.get()) { return true; } \ if (!arr) { return false; } \ if (this->type_enum() != arr->type_enum()) { return false; } \ - auto other = static_cast(arr.get()); \ - for (int i = start_idx; i < end_idx; ++i) { \ - bool is_null = IsNull(i); \ - if (is_null != arr->IsNull(i) || (!is_null && Value(i) != other->Value(i))) { \ + const auto other = static_cast(arr.get()); \ + for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) { \ + const bool is_null = IsNull(i); \ + if (is_null != arr->IsNull(o_i) || (!is_null && Value(i) != other->Value(o_i))) { \ return false; \ } \ } \ @@ -268,7 +270,8 @@ class BooleanArray : public PrimitiveArray { bool EqualsExact(const BooleanArray& other) const; bool Equals(const ArrayPtr& arr) const override; bool RangeEquals( - int32_t start_idx, int32_t end_idx, const ArrayPtr& arr) const override; + int32_t start_idx, int32_t end_idx, int32_t other_start_idx, + const ArrayPtr& arr) const override; const uint8_t* raw_data() const { return reinterpret_cast(raw_data_); } From f5c6bd5c7a5b759c74ace7165102d0b13aa19fa7 Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Mon, 23 May 2016 06:10:37 +0000 Subject: [PATCH 4/6] make format/lint check --- cpp/src/arrow/array.cc | 3 +-- cpp/src/arrow/array.h | 3 +-- cpp/src/arrow/types/list-test.cc | 8 ++++---- cpp/src/arrow/types/list.cc | 13 ++++++------- cpp/src/arrow/types/list.h | 4 ++-- cpp/src/arrow/types/primitive-test.cc | 25 +++++++++++-------------- cpp/src/arrow/types/primitive.cc | 4 ++-- cpp/src/arrow/types/primitive.h | 10 ++++------ 8 files changed, 31 insertions(+), 39 deletions(-) diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc index ab169fc65fb47..3829a8c278c5a 100644 --- a/cpp/src/arrow/array.cc +++ b/cpp/src/arrow/array.cc @@ -58,8 +58,7 @@ bool NullArray::Equals(const std::shared_ptr& arr) const { return arr->length() == length_; } -bool NullArray::RangeEquals( - int32_t start_idx, int32_t end_idx, int32_t other_start_index, +bool NullArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_index, const std::shared_ptr& arr) const { if (Type::NA != arr->type_enum()) { return false; } return true; diff --git a/cpp/src/arrow/array.h b/cpp/src/arrow/array.h index c4c466920f0d7..76dc0f598141f 100644 --- a/cpp/src/arrow/array.h +++ b/cpp/src/arrow/array.h @@ -62,8 +62,7 @@ class Array { // Compare if the range of slots specified are equal for the given array and // this array. end_idx exclusive. This methods does not bounds check. - virtual bool RangeEquals( - int32_t start_idx, int32_t end_idx, int32_t other_start_idx, + virtual bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, const std::shared_ptr& arr) const = 0; // Determines if the array is internally consistent. Defaults to always diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/types/list-test.cc index c95e4aab2618b..2e41b4a61caf2 100644 --- a/cpp/src/arrow/types/list-test.cc +++ b/cpp/src/arrow/types/list-test.cc @@ -90,10 +90,10 @@ TEST_F(TestListBuilder, Equality) { Int32Builder* vb = static_cast(builder_->value_builder().get()); ArrayPtr array, equal_array, unequal_array; - vector equal_offsets = {0, 1, 2, 5}; - vector equal_values = {1,2,3,4,5,2,2,2}; + vector equal_offsets = {0, 1, 2, 5}; + vector equal_values = {1, 2, 3, 4, 5, 2, 2, 2}; vector unequal_offsets = {0, 1, 4}; - vector unequal_values = {1,2,2,2,3,4,5}; + vector unequal_values = {1, 2, 2, 2, 3, 4, 5}; // setup two equal arrays ASSERT_OK(builder_->Append(equal_offsets.data(), equal_offsets.size())); @@ -105,7 +105,7 @@ TEST_F(TestListBuilder, Equality) { // now an unequal one ASSERT_OK(builder_->Append(unequal_offsets.data(), unequal_offsets.size())); ASSERT_OK(vb->Append(unequal_values.data(), unequal_values.size())); - unequal_array = builder_->Finish(); + unequal_array = builder_->Finish(); // Test array equality EXPECT_TRUE(array->Equals(array)); diff --git a/cpp/src/arrow/types/list.cc b/cpp/src/arrow/types/list.cc index 0f08b2e28359e..f86055a83d1d9 100644 --- a/cpp/src/arrow/types/list.cc +++ b/cpp/src/arrow/types/list.cc @@ -44,9 +44,7 @@ bool ListArray::Equals(const std::shared_ptr& arr) const { return EqualsExact(*static_cast(arr.get())); } -bool ListArray::RangeEquals( - int32_t start_idx, int32_t end_idx, - int32_t other_start_idx, +bool ListArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, const std::shared_ptr& arr) const { if (this == arr.get()) { return true; } if (this->type_enum() != arr->type_enum()) { return false; } @@ -55,15 +53,16 @@ bool ListArray::RangeEquals( const bool is_null = IsNull(i); if (is_null != arr->IsNull(o_i)) { return false; } if (is_null) continue; - const int32_t begin_offset = offset(i); - const int32_t end_offset = offset(i+1); + const int32_t begin_offset = offset(i); + const int32_t end_offset = offset(i + 1); const int32_t other_begin_offset = other->offset(o_i); - const int32_t other_end_offset = other->offset(o_i+1); + const int32_t other_end_offset = other->offset(o_i + 1); // Underlying can't be equal if the size isn't equal if (end_offset - begin_offset != other_end_offset - other_begin_offset) { return false; } - if (!values_->RangeEquals(begin_offset, end_offset, other_begin_offset, other->values())) { + if (!values_->RangeEquals( + begin_offset, end_offset, other_begin_offset, other->values())) { return false; } } diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h index 894e1bba67fb2..0a3941633eb83 100644 --- a/cpp/src/arrow/types/list.h +++ b/cpp/src/arrow/types/list.h @@ -72,8 +72,8 @@ class ListArray : public Array { bool EqualsExact(const ListArray& other) const; bool Equals(const std::shared_ptr& arr) const override; - bool RangeEquals( - int32_t start_idx, int32_t end_idx, int32_t other_start_idx, const ArrayPtr& arr) const override; + bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, + const ArrayPtr& arr) const override; protected: std::shared_ptr offset_buf_; diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc index a62cd39e6e1f9..87eb0fe3a8bf7 100644 --- a/cpp/src/arrow/types/primitive-test.cc +++ b/cpp/src/arrow/types/primitive-test.cc @@ -50,7 +50,7 @@ class Array; \ KLASS tp_copy = tp; \ ASSERT_EQ(tp_copy.type, Type::ENUM); \ - } \ + } PRIMITIVE_TEST(Int8Type, INT8, "int8"); PRIMITIVE_TEST(Int16Type, INT16, "int16"); @@ -248,7 +248,6 @@ TYPED_TEST_CASE(TestPrimitiveBuilder, Primitives); #define DECL_ARRAYTYPE() typedef typename TestFixture::ArrayType ArrayType; - TYPED_TEST(TestPrimitiveBuilder, TestInit) { DECL_TYPE(); @@ -305,12 +304,9 @@ TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) { ASSERT_EQ(memory_before, this->pool_->bytes_allocated()); } -template -Status MakeArray(const vector& valid_bytes, - const vector& draws, - int size, - Builder* builder, - ArrayPtr* out) { +template +Status MakeArray(const vector& valid_bytes, const vector& draws, int size, + Builder* builder, ArrayPtr* out) { // Append the first 1000 for (int i = 0; i < size; ++i) { if (valid_bytes[i] > 0) { @@ -331,16 +327,16 @@ TYPED_TEST(TestPrimitiveBuilder, Equality) { vector& draws = this->draws_; vector& valid_bytes = this->valid_bytes_; ArrayPtr array, equal_array, unequal_array; - auto builder = this->builder_.get(); + auto builder = this->builder_.get(); ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &array)); ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &equal_array)); // Make the not equal array by negating the first valid element with itself. - const auto first_valid = std::find_if(valid_bytes.begin(), valid_bytes.end(), - [](uint8_t valid){ return valid > 0;}); + const auto first_valid = std::find_if( + valid_bytes.begin(), valid_bytes.end(), [](uint8_t valid) { return valid > 0; }); const int first_valid_idx = std::distance(valid_bytes.begin(), first_valid); // This should be true with a very high probability, but might introduce flakiness - ASSERT_LT(first_valid_idx, size-1); + ASSERT_LT(first_valid_idx, size - 1); draws[first_valid_idx] = ~*reinterpret_cast(&draws[first_valid_idx]); ASSERT_OK(MakeArray(valid_bytes, draws, size, builder, &unequal_array)); @@ -352,10 +348,11 @@ TYPED_TEST(TestPrimitiveBuilder, Equality) { EXPECT_FALSE(unequal_array->Equals(equal_array)); // Test range equality - EXPECT_FALSE(array->RangeEquals(0, first_valid_idx+1, 0, unequal_array)); + EXPECT_FALSE(array->RangeEquals(0, first_valid_idx + 1, 0, unequal_array)); EXPECT_FALSE(array->RangeEquals(first_valid_idx, size, first_valid_idx, unequal_array)); EXPECT_TRUE(array->RangeEquals(0, first_valid_idx, 0, unequal_array)); - EXPECT_TRUE(array->RangeEquals(first_valid_idx+1, size, first_valid_idx+1, unequal_array)); + EXPECT_TRUE( + array->RangeEquals(first_valid_idx + 1, size, first_valid_idx + 1, unequal_array)); } TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) { diff --git a/cpp/src/arrow/types/primitive.cc b/cpp/src/arrow/types/primitive.cc index 9d0947f8195bc..8e6c0f809ca44 100644 --- a/cpp/src/arrow/types/primitive.cc +++ b/cpp/src/arrow/types/primitive.cc @@ -191,8 +191,8 @@ bool BooleanArray::Equals(const ArrayPtr& arr) const { return EqualsExact(*static_cast(arr.get())); } -bool BooleanArray::RangeEquals( - int32_t start_idx, int32_t end_idx, int32_t other_start_idx, const ArrayPtr& arr) const { +bool BooleanArray::RangeEquals(int32_t start_idx, int32_t end_idx, + int32_t other_start_idx, const ArrayPtr& arr) const { if (this == arr.get()) { return true; } if (!arr) { return false; } if (this->type_enum() != arr->type_enum()) { return false; } diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h index f6785cd5e156a..c41c6a9a285b4 100644 --- a/cpp/src/arrow/types/primitive.h +++ b/cpp/src/arrow/types/primitive.h @@ -66,9 +66,7 @@ class PrimitiveArray : public Array { return PrimitiveArray::EqualsExact(*static_cast(&other)); \ } \ \ - bool RangeEquals( \ - int32_t start_idx, int32_t end_idx, \ - int32_t other_start_idx, \ + bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, \ const ArrayPtr& arr) const override { \ if (this == arr.get()) { return true; } \ if (!arr) { return false; } \ @@ -76,7 +74,8 @@ class PrimitiveArray : public Array { const auto other = static_cast(arr.get()); \ for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) { \ const bool is_null = IsNull(i); \ - if (is_null != arr->IsNull(o_i) || (!is_null && Value(i) != other->Value(o_i))) { \ + if (is_null != arr->IsNull(o_i) || \ + (!is_null && Value(i) != other->Value(o_i))) { \ return false; \ } \ } \ @@ -269,8 +268,7 @@ class BooleanArray : public PrimitiveArray { bool EqualsExact(const BooleanArray& other) const; bool Equals(const ArrayPtr& arr) const override; - bool RangeEquals( - int32_t start_idx, int32_t end_idx, int32_t other_start_idx, + bool RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, const ArrayPtr& arr) const override; const uint8_t* raw_data() const { return reinterpret_cast(raw_data_); } From f96363961ed23ffe04aaa2d3b4d65fb3b4dbd85d Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Mon, 23 May 2016 06:15:12 +0000 Subject: [PATCH 5/6] add in check for null arrays --- cpp/src/arrow/array.cc | 1 + cpp/src/arrow/types/list.cc | 1 + 2 files changed, 2 insertions(+) diff --git a/cpp/src/arrow/array.cc b/cpp/src/arrow/array.cc index 3829a8c278c5a..d6b081f315532 100644 --- a/cpp/src/arrow/array.cc +++ b/cpp/src/arrow/array.cc @@ -60,6 +60,7 @@ bool NullArray::Equals(const std::shared_ptr& arr) const { bool NullArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_index, const std::shared_ptr& arr) const { + if (!arr) { return false; } if (Type::NA != arr->type_enum()) { return false; } return true; } diff --git a/cpp/src/arrow/types/list.cc b/cpp/src/arrow/types/list.cc index f86055a83d1d9..6334054caf84a 100644 --- a/cpp/src/arrow/types/list.cc +++ b/cpp/src/arrow/types/list.cc @@ -47,6 +47,7 @@ bool ListArray::Equals(const std::shared_ptr& arr) const { bool ListArray::RangeEquals(int32_t start_idx, int32_t end_idx, int32_t other_start_idx, const std::shared_ptr& arr) const { if (this == arr.get()) { return true; } + if (!arr) { return false; } if (this->type_enum() != arr->type_enum()) { return false; } const auto other = static_cast(arr.get()); for (int32_t i = start_idx, o_i = other_start_idx; i < end_idx; ++i, ++o_i) { From d5ae777474e05fe8c3bc02ee871ae40d5d76bbf1 Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Mon, 23 May 2016 06:22:49 +0000 Subject: [PATCH 6/6] remove todo, its handled by type_traits --- cpp/src/arrow/types/primitive.h | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h index c41c6a9a285b4..9597fc8363138 100644 --- a/cpp/src/arrow/types/primitive.h +++ b/cpp/src/arrow/types/primitive.h @@ -296,7 +296,6 @@ class BooleanBuilder : public PrimitiveBuilder { // Scalar append Status Append(bool val) { - // TODO(emkornfield) fix reserve/resize for BooleanBuilder Reserve(1); util::set_bit(null_bitmap_data_, length_); if (val) {