From c98c74989c083576d746a646492d3e4dde15bd5c Mon Sep 17 00:00:00 2001 From: zhli1142015 Date: Fri, 13 Sep 2024 21:20:53 +0800 Subject: [PATCH 1/7] Make sliceBuffer a public api --- velox/vector/BaseVector.h | 24 ++++++++++++------------ velox/vector/tests/VectorTest.cpp | 16 ++++++++++++++++ 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/velox/vector/BaseVector.h b/velox/vector/BaseVector.h index 74478e2f7030..bcb53a4ad793 100644 --- a/velox/vector/BaseVector.h +++ b/velox/vector/BaseVector.h @@ -863,6 +863,18 @@ class BaseVector { storageByteCount_ = std::nullopt; } + /// Slice a buffer with specific type. + /// For boolean type and if the 'offset' is not multiple of 8, return a + /// shifted copy, new buffer is allocated from 'pool'. + /// Otherwise return a BufferView into the original buffer (with shared + /// ownership of original buffer). + static BufferPtr sliceBuffer( + const Type& type, + const BufferPtr& buf, + vector_size_t offset, + vector_size_t length, + memory::MemoryPool* pool); + protected: // Returns a brief summary of the vector. The default implementation includes // encoding, type, number of rows and number of nulls. @@ -882,18 +894,6 @@ class BaseVector { ensureNullsCapacity(length_, true); } - // Slice a buffer with specific type. - // - // For boolean type and if the offset is not multiple of 8, return a shifted - // copy; otherwise return a BufferView into the original buffer (with shared - // ownership of original buffer). - static BufferPtr sliceBuffer( - const Type&, - const BufferPtr&, - vector_size_t offset, - vector_size_t length, - memory::MemoryPool*); - BufferPtr sliceNulls(vector_size_t offset, vector_size_t length) const { return sliceBuffer(*BOOLEAN(), nulls_, offset, length, pool_); } diff --git a/velox/vector/tests/VectorTest.cpp b/velox/vector/tests/VectorTest.cpp index 3a5525412b02..cdc9b1727ec6 100644 --- a/velox/vector/tests/VectorTest.cpp +++ b/velox/vector/tests/VectorTest.cpp @@ -3933,5 +3933,21 @@ TEST_F(VectorTest, hasOverlappingRanges) { test(2, {false, false}, {1, 0}, {1, 3}, true); } +TEST_F(VectorTest, testSliceBuffer) { + auto bufferPtr = AlignedBuffer::allocate(10, pool()); + auto sliceBufferPtr = + BaseVector::sliceBuffer(*BIGINT(), bufferPtr, 1, 5, pool()); + ASSERT_TRUE(sliceBufferPtr->isView()); + ASSERT_EQ(sliceBufferPtr->as(), bufferPtr->as() + 1); + + bufferPtr = AlignedBuffer::allocate(16, pool()); + sliceBufferPtr = BaseVector::sliceBuffer(*BOOLEAN(), bufferPtr, 8, 8, pool()); + ASSERT_TRUE(sliceBufferPtr->isView()); + ASSERT_EQ(sliceBufferPtr->as(), bufferPtr->as() + 1); + + sliceBufferPtr = BaseVector::sliceBuffer(*BOOLEAN(), bufferPtr, 5, 5, pool()); + ASSERT_FALSE(sliceBufferPtr->isView()); +} + } // namespace } // namespace facebook::velox From f8e09aab3f9b60b298f4f4dab0298ca3741e5696 Mon Sep 17 00:00:00 2001 From: zhli1142015 Date: Sat, 14 Sep 2024 10:48:00 +0800 Subject: [PATCH 2/7] address comments --- velox/vector/BaseVector.cpp | 12 ++++++++++++ velox/vector/BaseVector.h | 1 + velox/vector/tests/VectorTest.cpp | 31 ++++++++++++++++++++++++++++--- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/velox/vector/BaseVector.cpp b/velox/vector/BaseVector.cpp index 2ae0d69212ff..75ae31dd9c64 100644 --- a/velox/vector/BaseVector.cpp +++ b/velox/vector/BaseVector.cpp @@ -947,6 +947,16 @@ BufferPtr sliceBufferZeroCopy( const BufferPtr& buf, vector_size_t offset, vector_size_t length) { + VELOX_USER_CHECK_LE( + offset * typeSize, + buf->size(), + "Offset must be less than or equal to {}.", + buf->size() / typeSize); + VELOX_USER_CHECK_LE( + (offset + length) * typeSize, + buf->size(), + "Length must be less than or equal to {}.", + buf->size() / typeSize - offset); // Cannot use `Buffer::as()` here because Buffer::podType_ is false // when type is OPAQUE. auto data = @@ -967,6 +977,8 @@ BufferPtr BaseVector::sliceBuffer( if (!buf) { return nullptr; } + VELOX_USER_CHECK_GE(offset, 0, "Offset must be non-negative."); + VELOX_USER_CHECK_GE(length, 0, "Length must be non-negative."); if (type.kind() != TypeKind::BOOLEAN) { return sliceBufferZeroCopy( typeSize(type), type.isPrimitiveType(), buf, offset, length); diff --git a/velox/vector/BaseVector.h b/velox/vector/BaseVector.h index bcb53a4ad793..f32a2aeb1809 100644 --- a/velox/vector/BaseVector.h +++ b/velox/vector/BaseVector.h @@ -868,6 +868,7 @@ class BaseVector { /// shifted copy, new buffer is allocated from 'pool'. /// Otherwise return a BufferView into the original buffer (with shared /// ownership of original buffer). + /// @param type Must be primative type. static BufferPtr sliceBuffer( const Type& type, const BufferPtr& buf, diff --git a/velox/vector/tests/VectorTest.cpp b/velox/vector/tests/VectorTest.cpp index cdc9b1727ec6..d8e8b6fbb69b 100644 --- a/velox/vector/tests/VectorTest.cpp +++ b/velox/vector/tests/VectorTest.cpp @@ -3933,20 +3933,45 @@ TEST_F(VectorTest, hasOverlappingRanges) { test(2, {false, false}, {1, 0}, {1, 3}, true); } -TEST_F(VectorTest, testSliceBuffer) { +TEST_F(VectorTest, sliceBigintBuffer) { auto bufferPtr = AlignedBuffer::allocate(10, pool()); auto sliceBufferPtr = BaseVector::sliceBuffer(*BIGINT(), bufferPtr, 1, 5, pool()); ASSERT_TRUE(sliceBufferPtr->isView()); + ASSERT_EQ(sliceBufferPtr->size(), 40); // 5 * type size of int64_t. ASSERT_EQ(sliceBufferPtr->as(), bufferPtr->as() + 1); - bufferPtr = AlignedBuffer::allocate(16, pool()); - sliceBufferPtr = BaseVector::sliceBuffer(*BOOLEAN(), bufferPtr, 8, 8, pool()); + VELOX_ASSERT_USER_THROW( + BaseVector::sliceBuffer(*BIGINT(), bufferPtr, -1, 5, pool()), + "Offset must be non-negative."); + VELOX_ASSERT_USER_THROW( + BaseVector::sliceBuffer(*BIGINT(), bufferPtr, 0, -1, pool()), + "Length must be non-negative."); + VELOX_ASSERT_USER_THROW( + BaseVector::sliceBuffer(*BIGINT(), bufferPtr, 11, 1, pool()), + "Offset must be less than or equal to 10."); + VELOX_ASSERT_USER_THROW( + BaseVector::sliceBuffer(*BIGINT(), bufferPtr, 5, 6, pool()), + "Length must be less than or equal to 5."); +} + +TEST_F(VectorTest, sliceBooleanBuffer) { + auto bufferPtr = AlignedBuffer::allocate(16, pool()); + auto data = bufferPtr->asMutableRange(); + for (int i = 0; i < 16; ++i) { + data[i] = (i % 2 != 0); + } + auto sliceBufferPtr = + BaseVector::sliceBuffer(*BOOLEAN(), bufferPtr, 8, 8, pool()); ASSERT_TRUE(sliceBufferPtr->isView()); ASSERT_EQ(sliceBufferPtr->as(), bufferPtr->as() + 1); sliceBufferPtr = BaseVector::sliceBuffer(*BOOLEAN(), bufferPtr, 5, 5, pool()); ASSERT_FALSE(sliceBufferPtr->isView()); + auto sliceData = sliceBufferPtr->asRange(); + for (int i = 0; i < 5; ++i) { + ASSERT_EQ(sliceData[i], i % 2 == 0); + } } } // namespace From 77df06d971cff4aab76719940d67926a3a1ac50b Mon Sep 17 00:00:00 2001 From: zhli1142015 Date: Sat, 14 Sep 2024 11:40:40 +0800 Subject: [PATCH 3/7] minor change fix ut t --- velox/vector/BaseVector.cpp | 6 +++--- velox/vector/BaseVector.h | 2 +- velox/vector/tests/VectorTest.cpp | 3 +++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/velox/vector/BaseVector.cpp b/velox/vector/BaseVector.cpp index 75ae31dd9c64..67d43f42d9c2 100644 --- a/velox/vector/BaseVector.cpp +++ b/velox/vector/BaseVector.cpp @@ -974,11 +974,11 @@ BufferPtr BaseVector::sliceBuffer( vector_size_t offset, vector_size_t length, memory::MemoryPool* pool) { + VELOX_USER_CHECK_GE(offset, 0, "Offset must be non-negative."); + VELOX_USER_CHECK_GE(length, 0, "Length must be non-negative."); if (!buf) { return nullptr; } - VELOX_USER_CHECK_GE(offset, 0, "Offset must be non-negative."); - VELOX_USER_CHECK_GE(length, 0, "Length must be non-negative."); if (type.kind() != TypeKind::BOOLEAN) { return sliceBufferZeroCopy( typeSize(type), type.isPrimitiveType(), buf, offset, length); @@ -986,7 +986,7 @@ BufferPtr BaseVector::sliceBuffer( if (offset % 8 == 0) { return sliceBufferZeroCopy(1, true, buf, offset / 8, (length + 7) / 8); } - VELOX_DCHECK_NOT_NULL(pool); + VELOX_CHECK_NOT_NULL(pool, "Pool must not be null."); auto ans = AlignedBuffer::allocate(length, pool); bits::copyBits( buf->as(), offset, ans->asMutable(), 0, length); diff --git a/velox/vector/BaseVector.h b/velox/vector/BaseVector.h index f32a2aeb1809..c74300760ba1 100644 --- a/velox/vector/BaseVector.h +++ b/velox/vector/BaseVector.h @@ -868,7 +868,7 @@ class BaseVector { /// shifted copy, new buffer is allocated from 'pool'. /// Otherwise return a BufferView into the original buffer (with shared /// ownership of original buffer). - /// @param type Must be primative type. + /// @param type Must be primitive type or OPAQUE type. static BufferPtr sliceBuffer( const Type& type, const BufferPtr& buf, diff --git a/velox/vector/tests/VectorTest.cpp b/velox/vector/tests/VectorTest.cpp index d8e8b6fbb69b..33321d2a7d7d 100644 --- a/velox/vector/tests/VectorTest.cpp +++ b/velox/vector/tests/VectorTest.cpp @@ -3972,6 +3972,9 @@ TEST_F(VectorTest, sliceBooleanBuffer) { for (int i = 0; i < 5; ++i) { ASSERT_EQ(sliceData[i], i % 2 == 0); } + VELOX_ASSERT_THROW( + BaseVector::sliceBuffer(*BOOLEAN(), bufferPtr, 5, 6, nullptr), + "Pool must not be null."); } } // namespace From 4a477b220177204e9131fb756c7d187dd9a307d7 Mon Sep 17 00:00:00 2001 From: zhli1142015 Date: Fri, 20 Sep 2024 12:41:28 +0800 Subject: [PATCH 4/7] address comments minor change --- velox/vector/BaseVector.cpp | 8 ++++---- velox/vector/BaseVector.h | 12 +++++++++++- velox/vector/tests/VectorTest.cpp | 18 ++++++++++++++---- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/velox/vector/BaseVector.cpp b/velox/vector/BaseVector.cpp index 67d43f42d9c2..f2deefce4345 100644 --- a/velox/vector/BaseVector.cpp +++ b/velox/vector/BaseVector.cpp @@ -947,12 +947,12 @@ BufferPtr sliceBufferZeroCopy( const BufferPtr& buf, vector_size_t offset, vector_size_t length) { - VELOX_USER_CHECK_LE( + VELOX_CHECK_LE( offset * typeSize, buf->size(), "Offset must be less than or equal to {}.", buf->size() / typeSize); - VELOX_USER_CHECK_LE( + VELOX_CHECK_LE( (offset + length) * typeSize, buf->size(), "Length must be less than or equal to {}.", @@ -974,8 +974,8 @@ BufferPtr BaseVector::sliceBuffer( vector_size_t offset, vector_size_t length, memory::MemoryPool* pool) { - VELOX_USER_CHECK_GE(offset, 0, "Offset must be non-negative."); - VELOX_USER_CHECK_GE(length, 0, "Length must be non-negative."); + VELOX_CHECK_GE(offset, 0, "Offset must be non-negative."); + VELOX_CHECK_GE(length, 0, "Length must be non-negative."); if (!buf) { return nullptr; } diff --git a/velox/vector/BaseVector.h b/velox/vector/BaseVector.h index c74300760ba1..3d50569c2882 100644 --- a/velox/vector/BaseVector.h +++ b/velox/vector/BaseVector.h @@ -868,7 +868,17 @@ class BaseVector { /// shifted copy, new buffer is allocated from 'pool'. /// Otherwise return a BufferView into the original buffer (with shared /// ownership of original buffer). - /// @param type Must be primitive type or OPAQUE type. + /// + /// @param type The data type of the elements in new buffer. Must be primitive + /// type or OPAQUE type. + /// @param buf A pointer to the buffer to be sliced. If this is null, the + /// function returns a null pointer. + /// @param offset The starting point in the buffer from which the slice + /// begins. Must be non-negative. + /// @param length The number of elements to include in the slice. Must be + /// non-negative. + /// @param pool A pointer to a memory pool for allocating new buffers, + /// required if a new buffer needs to be created. static BufferPtr sliceBuffer( const Type& type, const BufferPtr& buf, diff --git a/velox/vector/tests/VectorTest.cpp b/velox/vector/tests/VectorTest.cpp index 33321d2a7d7d..3f9c962f66bd 100644 --- a/velox/vector/tests/VectorTest.cpp +++ b/velox/vector/tests/VectorTest.cpp @@ -3941,16 +3941,16 @@ TEST_F(VectorTest, sliceBigintBuffer) { ASSERT_EQ(sliceBufferPtr->size(), 40); // 5 * type size of int64_t. ASSERT_EQ(sliceBufferPtr->as(), bufferPtr->as() + 1); - VELOX_ASSERT_USER_THROW( + VELOX_ASSERT_THROW( BaseVector::sliceBuffer(*BIGINT(), bufferPtr, -1, 5, pool()), "Offset must be non-negative."); - VELOX_ASSERT_USER_THROW( + VELOX_ASSERT_THROW( BaseVector::sliceBuffer(*BIGINT(), bufferPtr, 0, -1, pool()), "Length must be non-negative."); - VELOX_ASSERT_USER_THROW( + VELOX_ASSERT_THROW( BaseVector::sliceBuffer(*BIGINT(), bufferPtr, 11, 1, pool()), "Offset must be less than or equal to 10."); - VELOX_ASSERT_USER_THROW( + VELOX_ASSERT_THROW( BaseVector::sliceBuffer(*BIGINT(), bufferPtr, 5, 6, pool()), "Length must be less than or equal to 5."); } @@ -3977,5 +3977,15 @@ TEST_F(VectorTest, sliceBooleanBuffer) { "Pool must not be null."); } +TEST_F(VectorTest, sliceArrayBuffer) { + auto bufferPtr = AlignedBuffer::allocate(10, pool()); + try { + BaseVector::sliceBuffer(*ARRAY(BIGINT()), bufferPtr, 1, 1, pool()); + FAIL() << "Expected an exception"; + } catch (const std::invalid_argument& e) { + ASSERT_TRUE(strstr(e.what(), "Not a fixed width type: ARRAY") != nullptr); + } +} + } // namespace } // namespace facebook::velox From 3a4b8906c8eb2f23d72b0d0bf2cffbda6c91de31 Mon Sep 17 00:00:00 2001 From: zhli1142015 Date: Fri, 20 Sep 2024 21:04:01 +0800 Subject: [PATCH 5/7] address comments --- velox/buffer/Buffer.cpp | 77 +++++++++++++++++++++++++++++ velox/buffer/Buffer.h | 45 ++++++++++++++++- velox/buffer/CMakeLists.txt | 2 +- velox/buffer/tests/BufferTest.cpp | 35 +++++++++++++ velox/vector/BaseVector.cpp | 76 ---------------------------- velox/vector/BaseVector.h | 25 +--------- velox/vector/ComplexVector.cpp | 8 +-- velox/vector/DictionaryVector-inl.h | 3 +- velox/vector/FlatVector-inl.h | 3 +- velox/vector/tests/VectorTest.cpp | 54 -------------------- 10 files changed, 163 insertions(+), 165 deletions(-) create mode 100644 velox/buffer/Buffer.cpp diff --git a/velox/buffer/Buffer.cpp b/velox/buffer/Buffer.cpp new file mode 100644 index 000000000000..f606383b40cc --- /dev/null +++ b/velox/buffer/Buffer.cpp @@ -0,0 +1,77 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/buffer/Buffer.h" + +namespace facebook::velox { + +namespace { +struct BufferReleaser { + explicit BufferReleaser(const BufferPtr& parent) : parent_(parent) {} + void addRef() const {} + void release() const {} + + private: + BufferPtr parent_; +}; +} // namespace + +BufferPtr Buffer::sliceBufferZeroCopy( + size_t typeSize, + bool podType, + const BufferPtr& buf, + size_t offset, + size_t length) { + auto bytesOffset = checkedMultiply(offset, typeSize); + auto bytesLength = checkedMultiply(length, typeSize); + VELOX_CHECK_LE( + bytesOffset, + buf->size(), + "Offset must be less than or equal to {}.", + buf->size() / typeSize); + VELOX_CHECK_LE( + bytesLength, + buf->size() - bytesOffset, + "Length must be less than or equal to {}.", + buf->size() / typeSize - offset); + // Cannot use `Buffer::as()` here because Buffer::podType_ is false + // when type is OPAQUE. + auto data = reinterpret_cast(buf->as()) + bytesOffset; + return BufferView::create( + data, bytesLength, BufferReleaser(buf), podType); +} + +template <> +BufferPtr Buffer::slice( + const BufferPtr& buf, + size_t offset, + size_t length, + memory::MemoryPool* pool) { + if (!buf) { + return nullptr; + } + + if (offset % 8 == 0) { + return sliceBufferZeroCopy( + 1, true, buf, bits::nbytes(offset), bits::nbytes(length)); + } + VELOX_CHECK_NOT_NULL(pool, "Pool must not be null."); + auto ans = AlignedBuffer::allocate(length, pool); + bits::copyBits( + buf->as(), offset, ans->asMutable(), 0, length); + return ans; +} +} // namespace facebook::velox diff --git a/velox/buffer/Buffer.h b/velox/buffer/Buffer.h index b7aad5d07530..6a2569d0f262 100644 --- a/velox/buffer/Buffer.h +++ b/velox/buffer/Buffer.h @@ -29,8 +29,11 @@ namespace facebook { namespace velox { +class Buffer; class AlignedBuffer; +using BufferPtr = boost::intrusive_ptr; + // Represents vector payloads, like arrays of numbers or strings or // associated null flags. Buffers are reference counted and must be // held by BufferPtr. Buffers can either own their memory or can be @@ -172,6 +175,32 @@ class Buffer { return os; } + /// Slice a buffer with specific type T. + /// For boolean type and if the 'offset' is not multiple of 8, return a + /// shifted copy, new buffer is allocated from 'pool'. + /// Otherwise return a BufferView into the original buffer (with shared + /// ownership of original buffer). + /// + /// @param buf A pointer to the buffer to be sliced. If this is null, the + /// function returns a null pointer. + /// @param offset The starting point in the buffer from which the slice + /// begins. + /// @param length The number of elements to include in the slice. + /// @param pool A pointer to a memory pool for allocating new buffers, + /// required if a new buffer needs to be created. + template + static BufferPtr slice( + const BufferPtr& buf, + size_t offset, + size_t length, + memory::MemoryPool* pool) { + if (!buf) { + return nullptr; + } + return sliceBufferZeroCopy( + sizeof(T), is_pod_like_v, buf, offset, length); + } + protected: // Writes a magic word at 'capacity_'. No-op for a BufferView. The actual // logic is inside a separate virtual function, allowing override by derived @@ -246,6 +275,14 @@ class Buffer { uint64_t padding_[2] = {static_cast(-1), static_cast(-1)}; // Needs to use setCapacity() from static method reallocate(). friend class AlignedBuffer; + + private: + static BufferPtr sliceBufferZeroCopy( + size_t typeSize, + bool podType, + const BufferPtr& buf, + size_t offset, + size_t length); }; static_assert( @@ -262,7 +299,12 @@ inline MutableRange Buffer::asMutableRange() { return MutableRange(asMutable(), 0, size() * 8); } -using BufferPtr = boost::intrusive_ptr; +template <> +BufferPtr Buffer::slice( + const BufferPtr& buf, + size_t offset, + size_t length, + memory::MemoryPool* pool); static inline void intrusive_ptr_add_ref(Buffer* buffer) { buffer->addRef(); @@ -655,6 +697,5 @@ class BufferView : public Buffer { Releaser const releaser_; }; - } // namespace velox } // namespace facebook diff --git a/velox/buffer/CMakeLists.txt b/velox/buffer/CMakeLists.txt index 2b1e8ea65a78..ec15197d4f5a 100644 --- a/velox/buffer/CMakeLists.txt +++ b/velox/buffer/CMakeLists.txt @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -velox_add_library(velox_buffer StringViewBufferHolder.cpp) +velox_add_library(velox_buffer Buffer.cpp StringViewBufferHolder.cpp) velox_link_libraries(velox_buffer velox_memory velox_common_base Folly::folly) diff --git a/velox/buffer/tests/BufferTest.cpp b/velox/buffer/tests/BufferTest.cpp index 2f6107f551ed..39c84533d1d1 100644 --- a/velox/buffer/tests/BufferTest.cpp +++ b/velox/buffer/tests/BufferTest.cpp @@ -453,5 +453,40 @@ TEST_F(BufferTest, testAllocateSizeOverflow) { AlignedBuffer::reallocate(&buf, 1ull << 62), VeloxException); } +TEST_F(BufferTest, sliceBigintBuffer) { + auto bufferPtr = AlignedBuffer::allocate(10, pool_.get()); + auto sliceBufferPtr = Buffer::slice(bufferPtr, 1, 5, pool_.get()); + ASSERT_TRUE(sliceBufferPtr->isView()); + ASSERT_EQ(sliceBufferPtr->size(), 40); // 5 * type size of int64_t. + ASSERT_EQ(sliceBufferPtr->as(), bufferPtr->as() + 1); + + VELOX_ASSERT_THROW( + Buffer::slice(bufferPtr, 11, 1, pool_.get()), + "Offset must be less than or equal to 10."); + VELOX_ASSERT_THROW( + Buffer::slice(bufferPtr, 5, 6, pool_.get()), + "Length must be less than or equal to 5."); +} + +TEST_F(BufferTest, sliceBooleanBuffer) { + auto bufferPtr = AlignedBuffer::allocate(16, pool_.get()); + auto data = bufferPtr->asMutableRange(); + for (int i = 0; i < 16; ++i) { + data[i] = (i % 2 != 0); + } + auto sliceBufferPtr = Buffer::slice(bufferPtr, 8, 8, pool_.get()); + ASSERT_TRUE(sliceBufferPtr->isView()); + ASSERT_EQ(sliceBufferPtr->as(), bufferPtr->as() + 1); + + sliceBufferPtr = Buffer::slice(bufferPtr, 5, 5, pool_.get()); + ASSERT_FALSE(sliceBufferPtr->isView()); + auto sliceData = sliceBufferPtr->asRange(); + for (int i = 0; i < 5; ++i) { + ASSERT_EQ(sliceData[i], i % 2 == 0); + } + VELOX_ASSERT_THROW( + Buffer::slice(bufferPtr, 5, 6, nullptr), "Pool must not be null."); +} + } // namespace velox } // namespace facebook diff --git a/velox/vector/BaseVector.cpp b/velox/vector/BaseVector.cpp index f2deefce4345..c07d541463b7 100644 --- a/velox/vector/BaseVector.cpp +++ b/velox/vector/BaseVector.cpp @@ -917,82 +917,6 @@ void BaseVector::validate(const VectorValidateOptions& options) const { } } -namespace { - -size_t typeSize(const Type& type) { - switch (type.kind()) { - case TypeKind::VARCHAR: - case TypeKind::VARBINARY: - return sizeof(StringView); - case TypeKind::OPAQUE: - return sizeof(std::shared_ptr); - default: - VELOX_DCHECK(type.isPrimitiveType(), type.toString()); - return type.cppSizeInBytes(); - } -} - -struct BufferReleaser { - explicit BufferReleaser(const BufferPtr& parent) : parent_(parent) {} - void addRef() const {} - void release() const {} - - private: - BufferPtr parent_; -}; - -BufferPtr sliceBufferZeroCopy( - size_t typeSize, - bool podType, - const BufferPtr& buf, - vector_size_t offset, - vector_size_t length) { - VELOX_CHECK_LE( - offset * typeSize, - buf->size(), - "Offset must be less than or equal to {}.", - buf->size() / typeSize); - VELOX_CHECK_LE( - (offset + length) * typeSize, - buf->size(), - "Length must be less than or equal to {}.", - buf->size() / typeSize - offset); - // Cannot use `Buffer::as()` here because Buffer::podType_ is false - // when type is OPAQUE. - auto data = - reinterpret_cast(buf->as()) + offset * typeSize; - return BufferView::create( - data, length * typeSize, BufferReleaser(buf), podType); -} - -} // namespace - -// static -BufferPtr BaseVector::sliceBuffer( - const Type& type, - const BufferPtr& buf, - vector_size_t offset, - vector_size_t length, - memory::MemoryPool* pool) { - VELOX_CHECK_GE(offset, 0, "Offset must be non-negative."); - VELOX_CHECK_GE(length, 0, "Length must be non-negative."); - if (!buf) { - return nullptr; - } - if (type.kind() != TypeKind::BOOLEAN) { - return sliceBufferZeroCopy( - typeSize(type), type.isPrimitiveType(), buf, offset, length); - } - if (offset % 8 == 0) { - return sliceBufferZeroCopy(1, true, buf, offset / 8, (length + 7) / 8); - } - VELOX_CHECK_NOT_NULL(pool, "Pool must not be null."); - auto ans = AlignedBuffer::allocate(length, pool); - bits::copyBits( - buf->as(), offset, ans->asMutable(), 0, length); - return ans; -} - std::optional BaseVector::findDuplicateValue( vector_size_t start, vector_size_t size, diff --git a/velox/vector/BaseVector.h b/velox/vector/BaseVector.h index 3d50569c2882..8420a1e3ba79 100644 --- a/velox/vector/BaseVector.h +++ b/velox/vector/BaseVector.h @@ -863,29 +863,6 @@ class BaseVector { storageByteCount_ = std::nullopt; } - /// Slice a buffer with specific type. - /// For boolean type and if the 'offset' is not multiple of 8, return a - /// shifted copy, new buffer is allocated from 'pool'. - /// Otherwise return a BufferView into the original buffer (with shared - /// ownership of original buffer). - /// - /// @param type The data type of the elements in new buffer. Must be primitive - /// type or OPAQUE type. - /// @param buf A pointer to the buffer to be sliced. If this is null, the - /// function returns a null pointer. - /// @param offset The starting point in the buffer from which the slice - /// begins. Must be non-negative. - /// @param length The number of elements to include in the slice. Must be - /// non-negative. - /// @param pool A pointer to a memory pool for allocating new buffers, - /// required if a new buffer needs to be created. - static BufferPtr sliceBuffer( - const Type& type, - const BufferPtr& buf, - vector_size_t offset, - vector_size_t length, - memory::MemoryPool* pool); - protected: // Returns a brief summary of the vector. The default implementation includes // encoding, type, number of rows and number of nulls. @@ -906,7 +883,7 @@ class BaseVector { } BufferPtr sliceNulls(vector_size_t offset, vector_size_t length) const { - return sliceBuffer(*BOOLEAN(), nulls_, offset, length, pool_); + return Buffer::slice(nulls_, offset, length, pool_); } TypePtr type_; diff --git a/velox/vector/ComplexVector.cpp b/velox/vector/ComplexVector.cpp index 041d67cb1458..65b750fade2e 100644 --- a/velox/vector/ComplexVector.cpp +++ b/velox/vector/ComplexVector.cpp @@ -1186,8 +1186,8 @@ VectorPtr ArrayVector::slice(vector_size_t offset, vector_size_t length) const { type_, sliceNulls(offset, length), length, - sliceBuffer(*INTEGER(), offsets_, offset, length, pool_), - sliceBuffer(*INTEGER(), sizes_, offset, length, pool_), + Buffer::slice(offsets_, offset, length, pool_), + Buffer::slice(sizes_, offset, length, pool_), elements_); } @@ -1485,8 +1485,8 @@ VectorPtr MapVector::slice(vector_size_t offset, vector_size_t length) const { type_, sliceNulls(offset, length), length, - sliceBuffer(*INTEGER(), offsets_, offset, length, pool_), - sliceBuffer(*INTEGER(), sizes_, offset, length, pool_), + Buffer::slice(offsets_, offset, length, pool_), + Buffer::slice(sizes_, offset, length, pool_), keys_, values_); } diff --git a/velox/vector/DictionaryVector-inl.h b/velox/vector/DictionaryVector-inl.h index add6dcf5a551..e5a006895191 100644 --- a/velox/vector/DictionaryVector-inl.h +++ b/velox/vector/DictionaryVector-inl.h @@ -185,8 +185,7 @@ VectorPtr DictionaryVector::slice(vector_size_t offset, vector_size_t length) this->sliceNulls(offset, length), length, valueVector(), - BaseVector::sliceBuffer( - *INTEGER(), indices_, offset, length, this->pool_)); + Buffer::slice(indices_, offset, length, this->pool_)); } template diff --git a/velox/vector/FlatVector-inl.h b/velox/vector/FlatVector-inl.h index d56037114e7d..3ab6f1ced798 100644 --- a/velox/vector/FlatVector-inl.h +++ b/velox/vector/FlatVector-inl.h @@ -423,8 +423,7 @@ VectorPtr FlatVector::slice(vector_size_t offset, vector_size_t length) this->type_, this->sliceNulls(offset, length), length, - BaseVector::sliceBuffer( - *this->type_, values_, offset, length, this->pool_), + Buffer::slice(values_, offset, length, this->pool_), std::vector(stringBuffers_)); } diff --git a/velox/vector/tests/VectorTest.cpp b/velox/vector/tests/VectorTest.cpp index 3f9c962f66bd..3a5525412b02 100644 --- a/velox/vector/tests/VectorTest.cpp +++ b/velox/vector/tests/VectorTest.cpp @@ -3933,59 +3933,5 @@ TEST_F(VectorTest, hasOverlappingRanges) { test(2, {false, false}, {1, 0}, {1, 3}, true); } -TEST_F(VectorTest, sliceBigintBuffer) { - auto bufferPtr = AlignedBuffer::allocate(10, pool()); - auto sliceBufferPtr = - BaseVector::sliceBuffer(*BIGINT(), bufferPtr, 1, 5, pool()); - ASSERT_TRUE(sliceBufferPtr->isView()); - ASSERT_EQ(sliceBufferPtr->size(), 40); // 5 * type size of int64_t. - ASSERT_EQ(sliceBufferPtr->as(), bufferPtr->as() + 1); - - VELOX_ASSERT_THROW( - BaseVector::sliceBuffer(*BIGINT(), bufferPtr, -1, 5, pool()), - "Offset must be non-negative."); - VELOX_ASSERT_THROW( - BaseVector::sliceBuffer(*BIGINT(), bufferPtr, 0, -1, pool()), - "Length must be non-negative."); - VELOX_ASSERT_THROW( - BaseVector::sliceBuffer(*BIGINT(), bufferPtr, 11, 1, pool()), - "Offset must be less than or equal to 10."); - VELOX_ASSERT_THROW( - BaseVector::sliceBuffer(*BIGINT(), bufferPtr, 5, 6, pool()), - "Length must be less than or equal to 5."); -} - -TEST_F(VectorTest, sliceBooleanBuffer) { - auto bufferPtr = AlignedBuffer::allocate(16, pool()); - auto data = bufferPtr->asMutableRange(); - for (int i = 0; i < 16; ++i) { - data[i] = (i % 2 != 0); - } - auto sliceBufferPtr = - BaseVector::sliceBuffer(*BOOLEAN(), bufferPtr, 8, 8, pool()); - ASSERT_TRUE(sliceBufferPtr->isView()); - ASSERT_EQ(sliceBufferPtr->as(), bufferPtr->as() + 1); - - sliceBufferPtr = BaseVector::sliceBuffer(*BOOLEAN(), bufferPtr, 5, 5, pool()); - ASSERT_FALSE(sliceBufferPtr->isView()); - auto sliceData = sliceBufferPtr->asRange(); - for (int i = 0; i < 5; ++i) { - ASSERT_EQ(sliceData[i], i % 2 == 0); - } - VELOX_ASSERT_THROW( - BaseVector::sliceBuffer(*BOOLEAN(), bufferPtr, 5, 6, nullptr), - "Pool must not be null."); -} - -TEST_F(VectorTest, sliceArrayBuffer) { - auto bufferPtr = AlignedBuffer::allocate(10, pool()); - try { - BaseVector::sliceBuffer(*ARRAY(BIGINT()), bufferPtr, 1, 1, pool()); - FAIL() << "Expected an exception"; - } catch (const std::invalid_argument& e) { - ASSERT_TRUE(strstr(e.what(), "Not a fixed width type: ARRAY") != nullptr); - } -} - } // namespace } // namespace facebook::velox From 12442119e3b6572263384fb13418d843b14f3786 Mon Sep 17 00:00:00 2001 From: zhli1142015 Date: Sat, 21 Sep 2024 08:36:03 +0800 Subject: [PATCH 6/7] address comments --- velox/buffer/Buffer.cpp | 23 ++++++++++++----------- velox/buffer/Buffer.h | 19 ++++++++++--------- velox/vector/ComplexVector.cpp | 8 ++++---- velox/vector/DictionaryVector-inl.h | 2 +- 4 files changed, 27 insertions(+), 25 deletions(-) diff --git a/velox/buffer/Buffer.cpp b/velox/buffer/Buffer.cpp index f606383b40cc..2152d3f80d9f 100644 --- a/velox/buffer/Buffer.cpp +++ b/velox/buffer/Buffer.cpp @@ -32,46 +32,47 @@ struct BufferReleaser { BufferPtr Buffer::sliceBufferZeroCopy( size_t typeSize, bool podType, - const BufferPtr& buf, + const BufferPtr& buffer, size_t offset, size_t length) { auto bytesOffset = checkedMultiply(offset, typeSize); auto bytesLength = checkedMultiply(length, typeSize); VELOX_CHECK_LE( bytesOffset, - buf->size(), + buffer->size(), "Offset must be less than or equal to {}.", - buf->size() / typeSize); + buffer->size() / typeSize); VELOX_CHECK_LE( bytesLength, - buf->size() - bytesOffset, + buffer->size() - bytesOffset, "Length must be less than or equal to {}.", - buf->size() / typeSize - offset); + buffer->size() / typeSize - offset); // Cannot use `Buffer::as()` here because Buffer::podType_ is false // when type is OPAQUE. - auto data = reinterpret_cast(buf->as()) + bytesOffset; + auto data = + reinterpret_cast(buffer->as()) + bytesOffset; return BufferView::create( - data, bytesLength, BufferReleaser(buf), podType); + data, bytesLength, BufferReleaser(buffer), podType); } template <> BufferPtr Buffer::slice( - const BufferPtr& buf, + const BufferPtr& buffer, size_t offset, size_t length, memory::MemoryPool* pool) { - if (!buf) { + if (!buffer) { return nullptr; } if (offset % 8 == 0) { return sliceBufferZeroCopy( - 1, true, buf, bits::nbytes(offset), bits::nbytes(length)); + 1, true, buffer, bits::nbytes(offset), bits::nbytes(length)); } VELOX_CHECK_NOT_NULL(pool, "Pool must not be null."); auto ans = AlignedBuffer::allocate(length, pool); bits::copyBits( - buf->as(), offset, ans->asMutable(), 0, length); + buffer->as(), offset, ans->asMutable(), 0, length); return ans; } } // namespace facebook::velox diff --git a/velox/buffer/Buffer.h b/velox/buffer/Buffer.h index 6a2569d0f262..e880b9c4b5c4 100644 --- a/velox/buffer/Buffer.h +++ b/velox/buffer/Buffer.h @@ -181,24 +181,25 @@ class Buffer { /// Otherwise return a BufferView into the original buffer (with shared /// ownership of original buffer). /// - /// @param buf A pointer to the buffer to be sliced. If this is null, the + /// @param buffer A pointer to the buffer to be sliced. If this is null, the /// function returns a null pointer. - /// @param offset The starting point in the buffer from which the slice - /// begins. - /// @param length The number of elements to include in the slice. + /// @param offset The element position in the buffer where the slice begins. + /// Must be less or equal than the buffer size. + /// @param length The number of elements to include in the slice. Must be + /// less or equal than the buffer size - 'offset'. /// @param pool A pointer to a memory pool for allocating new buffers, /// required if a new buffer needs to be created. template static BufferPtr slice( - const BufferPtr& buf, + const BufferPtr& buffer, size_t offset, size_t length, memory::MemoryPool* pool) { - if (!buf) { + if (!buffer) { return nullptr; } return sliceBufferZeroCopy( - sizeof(T), is_pod_like_v, buf, offset, length); + sizeof(T), is_pod_like_v, buffer, offset, length); } protected: @@ -280,7 +281,7 @@ class Buffer { static BufferPtr sliceBufferZeroCopy( size_t typeSize, bool podType, - const BufferPtr& buf, + const BufferPtr& buffer, size_t offset, size_t length); }; @@ -301,7 +302,7 @@ inline MutableRange Buffer::asMutableRange() { template <> BufferPtr Buffer::slice( - const BufferPtr& buf, + const BufferPtr& buffer, size_t offset, size_t length, memory::MemoryPool* pool); diff --git a/velox/vector/ComplexVector.cpp b/velox/vector/ComplexVector.cpp index 65b750fade2e..3d0ef74c721d 100644 --- a/velox/vector/ComplexVector.cpp +++ b/velox/vector/ComplexVector.cpp @@ -1186,8 +1186,8 @@ VectorPtr ArrayVector::slice(vector_size_t offset, vector_size_t length) const { type_, sliceNulls(offset, length), length, - Buffer::slice(offsets_, offset, length, pool_), - Buffer::slice(sizes_, offset, length, pool_), + Buffer::slice(offsets_, offset, length, pool_), + Buffer::slice(sizes_, offset, length, pool_), elements_); } @@ -1485,8 +1485,8 @@ VectorPtr MapVector::slice(vector_size_t offset, vector_size_t length) const { type_, sliceNulls(offset, length), length, - Buffer::slice(offsets_, offset, length, pool_), - Buffer::slice(sizes_, offset, length, pool_), + Buffer::slice(offsets_, offset, length, pool_), + Buffer::slice(sizes_, offset, length, pool_), keys_, values_); } diff --git a/velox/vector/DictionaryVector-inl.h b/velox/vector/DictionaryVector-inl.h index e5a006895191..562ee06a6a4f 100644 --- a/velox/vector/DictionaryVector-inl.h +++ b/velox/vector/DictionaryVector-inl.h @@ -185,7 +185,7 @@ VectorPtr DictionaryVector::slice(vector_size_t offset, vector_size_t length) this->sliceNulls(offset, length), length, valueVector(), - Buffer::slice(indices_, offset, length, this->pool_)); + Buffer::slice(indices_, offset, length, this->pool_)); } template From 9999345fbe98978d1a2e7c2fa6b710f079aa1c4e Mon Sep 17 00:00:00 2001 From: zhli1142015 Date: Sun, 22 Sep 2024 17:00:13 +0800 Subject: [PATCH 7/7] address comments --- velox/buffer/Buffer.cpp | 4 +--- velox/buffer/Buffer.h | 8 +++----- velox/buffer/tests/BufferTest.cpp | 3 +++ velox/vector/BaseVector.h | 2 +- velox/vector/ComplexVector.cpp | 12 ++++++++---- velox/vector/DictionaryVector-inl.h | 4 +++- velox/vector/FlatVector-inl.h | 3 ++- 7 files changed, 21 insertions(+), 15 deletions(-) diff --git a/velox/buffer/Buffer.cpp b/velox/buffer/Buffer.cpp index 2152d3f80d9f..806959295401 100644 --- a/velox/buffer/Buffer.cpp +++ b/velox/buffer/Buffer.cpp @@ -61,9 +61,7 @@ BufferPtr Buffer::slice( size_t offset, size_t length, memory::MemoryPool* pool) { - if (!buffer) { - return nullptr; - } + VELOX_CHECK_NOT_NULL(buffer, "Buffer must not be null."); if (offset % 8 == 0) { return sliceBufferZeroCopy( diff --git a/velox/buffer/Buffer.h b/velox/buffer/Buffer.h index e880b9c4b5c4..09778f4c0d14 100644 --- a/velox/buffer/Buffer.h +++ b/velox/buffer/Buffer.h @@ -181,8 +181,7 @@ class Buffer { /// Otherwise return a BufferView into the original buffer (with shared /// ownership of original buffer). /// - /// @param buffer A pointer to the buffer to be sliced. If this is null, the - /// function returns a null pointer. + /// @param buffer A pointer to the buffer to be sliced. Must not be null. /// @param offset The element position in the buffer where the slice begins. /// Must be less or equal than the buffer size. /// @param length The number of elements to include in the slice. Must be @@ -195,9 +194,7 @@ class Buffer { size_t offset, size_t length, memory::MemoryPool* pool) { - if (!buffer) { - return nullptr; - } + VELOX_CHECK_NOT_NULL(buffer, "Buffer must not be null."); return sliceBufferZeroCopy( sizeof(T), is_pod_like_v, buffer, offset, length); } @@ -698,5 +695,6 @@ class BufferView : public Buffer { Releaser const releaser_; }; + } // namespace velox } // namespace facebook diff --git a/velox/buffer/tests/BufferTest.cpp b/velox/buffer/tests/BufferTest.cpp index 39c84533d1d1..902d578c1ceb 100644 --- a/velox/buffer/tests/BufferTest.cpp +++ b/velox/buffer/tests/BufferTest.cpp @@ -466,6 +466,9 @@ TEST_F(BufferTest, sliceBigintBuffer) { VELOX_ASSERT_THROW( Buffer::slice(bufferPtr, 5, 6, pool_.get()), "Length must be less than or equal to 5."); + VELOX_ASSERT_THROW( + Buffer::slice(nullptr, 5, 6, pool_.get()), + "Buffer must not be null."); } TEST_F(BufferTest, sliceBooleanBuffer) { diff --git a/velox/vector/BaseVector.h b/velox/vector/BaseVector.h index 8420a1e3ba79..705c11dc9cb4 100644 --- a/velox/vector/BaseVector.h +++ b/velox/vector/BaseVector.h @@ -883,7 +883,7 @@ class BaseVector { } BufferPtr sliceNulls(vector_size_t offset, vector_size_t length) const { - return Buffer::slice(nulls_, offset, length, pool_); + return nulls_ ? Buffer::slice(nulls_, offset, length, pool_) : nulls_; } TypePtr type_; diff --git a/velox/vector/ComplexVector.cpp b/velox/vector/ComplexVector.cpp index 3d0ef74c721d..1ef5f5d29c0f 100644 --- a/velox/vector/ComplexVector.cpp +++ b/velox/vector/ComplexVector.cpp @@ -1186,8 +1186,10 @@ VectorPtr ArrayVector::slice(vector_size_t offset, vector_size_t length) const { type_, sliceNulls(offset, length), length, - Buffer::slice(offsets_, offset, length, pool_), - Buffer::slice(sizes_, offset, length, pool_), + offsets_ ? Buffer::slice(offsets_, offset, length, pool_) + : offsets_, + sizes_ ? Buffer::slice(sizes_, offset, length, pool_) + : sizes_, elements_); } @@ -1485,8 +1487,10 @@ VectorPtr MapVector::slice(vector_size_t offset, vector_size_t length) const { type_, sliceNulls(offset, length), length, - Buffer::slice(offsets_, offset, length, pool_), - Buffer::slice(sizes_, offset, length, pool_), + offsets_ ? Buffer::slice(offsets_, offset, length, pool_) + : offsets_, + sizes_ ? Buffer::slice(sizes_, offset, length, pool_) + : sizes_, keys_, values_); } diff --git a/velox/vector/DictionaryVector-inl.h b/velox/vector/DictionaryVector-inl.h index 562ee06a6a4f..fe1a6fed756e 100644 --- a/velox/vector/DictionaryVector-inl.h +++ b/velox/vector/DictionaryVector-inl.h @@ -185,7 +185,9 @@ VectorPtr DictionaryVector::slice(vector_size_t offset, vector_size_t length) this->sliceNulls(offset, length), length, valueVector(), - Buffer::slice(indices_, offset, length, this->pool_)); + indices_ + ? Buffer::slice(indices_, offset, length, this->pool_) + : indices_); } template diff --git a/velox/vector/FlatVector-inl.h b/velox/vector/FlatVector-inl.h index 3ab6f1ced798..8551d8d8839b 100644 --- a/velox/vector/FlatVector-inl.h +++ b/velox/vector/FlatVector-inl.h @@ -423,7 +423,8 @@ VectorPtr FlatVector::slice(vector_size_t offset, vector_size_t length) this->type_, this->sliceNulls(offset, length), length, - Buffer::slice(values_, offset, length, this->pool_), + values_ ? Buffer::slice(values_, offset, length, this->pool_) + : values_, std::vector(stringBuffers_)); }