diff --git a/velox/buffer/Buffer.cpp b/velox/buffer/Buffer.cpp new file mode 100644 index 000000000000..806959295401 --- /dev/null +++ b/velox/buffer/Buffer.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "velox/buffer/Buffer.h" + +namespace facebook::velox { + +namespace { +struct BufferReleaser { + explicit BufferReleaser(const BufferPtr& parent) : parent_(parent) {} + void addRef() const {} + void release() const {} + + private: + BufferPtr parent_; +}; +} // namespace + +BufferPtr Buffer::sliceBufferZeroCopy( + size_t typeSize, + bool podType, + const BufferPtr& buffer, + size_t offset, + size_t length) { + auto bytesOffset = checkedMultiply(offset, typeSize); + auto bytesLength = checkedMultiply(length, typeSize); + VELOX_CHECK_LE( + bytesOffset, + buffer->size(), + "Offset must be less than or equal to {}.", + buffer->size() / typeSize); + VELOX_CHECK_LE( + bytesLength, + buffer->size() - bytesOffset, + "Length must be less than or equal to {}.", + buffer->size() / typeSize - offset); + // Cannot use `Buffer::as()` here because Buffer::podType_ is false + // when type is OPAQUE. + auto data = + reinterpret_cast(buffer->as()) + bytesOffset; + return BufferView::create( + data, bytesLength, BufferReleaser(buffer), podType); +} + +template <> +BufferPtr Buffer::slice( + const BufferPtr& buffer, + size_t offset, + size_t length, + memory::MemoryPool* pool) { + VELOX_CHECK_NOT_NULL(buffer, "Buffer must not be null."); + + if (offset % 8 == 0) { + return sliceBufferZeroCopy( + 1, true, buffer, bits::nbytes(offset), bits::nbytes(length)); + } + VELOX_CHECK_NOT_NULL(pool, "Pool must not be null."); + auto ans = AlignedBuffer::allocate(length, pool); + bits::copyBits( + buffer->as(), offset, ans->asMutable(), 0, length); + return ans; +} +} // namespace facebook::velox diff --git a/velox/buffer/Buffer.h b/velox/buffer/Buffer.h index b7aad5d07530..09778f4c0d14 100644 --- a/velox/buffer/Buffer.h +++ b/velox/buffer/Buffer.h @@ -29,8 +29,11 @@ namespace facebook { namespace velox { +class Buffer; class AlignedBuffer; +using BufferPtr = boost::intrusive_ptr; + // Represents vector payloads, like arrays of numbers or strings or // associated null flags. Buffers are reference counted and must be // held by BufferPtr. Buffers can either own their memory or can be @@ -172,6 +175,30 @@ class Buffer { return os; } + /// Slice a buffer with specific type T. + /// For boolean type and if the 'offset' is not multiple of 8, return a + /// shifted copy, new buffer is allocated from 'pool'. + /// Otherwise return a BufferView into the original buffer (with shared + /// ownership of original buffer). + /// + /// @param buffer A pointer to the buffer to be sliced. Must not be null. + /// @param offset The element position in the buffer where the slice begins. + /// Must be less or equal than the buffer size. + /// @param length The number of elements to include in the slice. Must be + /// less or equal than the buffer size - 'offset'. + /// @param pool A pointer to a memory pool for allocating new buffers, + /// required if a new buffer needs to be created. + template + static BufferPtr slice( + const BufferPtr& buffer, + size_t offset, + size_t length, + memory::MemoryPool* pool) { + VELOX_CHECK_NOT_NULL(buffer, "Buffer must not be null."); + return sliceBufferZeroCopy( + sizeof(T), is_pod_like_v, buffer, offset, length); + } + protected: // Writes a magic word at 'capacity_'. No-op for a BufferView. The actual // logic is inside a separate virtual function, allowing override by derived @@ -246,6 +273,14 @@ class Buffer { uint64_t padding_[2] = {static_cast(-1), static_cast(-1)}; // Needs to use setCapacity() from static method reallocate(). friend class AlignedBuffer; + + private: + static BufferPtr sliceBufferZeroCopy( + size_t typeSize, + bool podType, + const BufferPtr& buffer, + size_t offset, + size_t length); }; static_assert( @@ -262,7 +297,12 @@ inline MutableRange Buffer::asMutableRange() { return MutableRange(asMutable(), 0, size() * 8); } -using BufferPtr = boost::intrusive_ptr; +template <> +BufferPtr Buffer::slice( + const BufferPtr& buffer, + size_t offset, + size_t length, + memory::MemoryPool* pool); static inline void intrusive_ptr_add_ref(Buffer* buffer) { buffer->addRef(); diff --git a/velox/buffer/CMakeLists.txt b/velox/buffer/CMakeLists.txt index 2b1e8ea65a78..ec15197d4f5a 100644 --- a/velox/buffer/CMakeLists.txt +++ b/velox/buffer/CMakeLists.txt @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -velox_add_library(velox_buffer StringViewBufferHolder.cpp) +velox_add_library(velox_buffer Buffer.cpp StringViewBufferHolder.cpp) velox_link_libraries(velox_buffer velox_memory velox_common_base Folly::folly) diff --git a/velox/buffer/tests/BufferTest.cpp b/velox/buffer/tests/BufferTest.cpp index 2f6107f551ed..902d578c1ceb 100644 --- a/velox/buffer/tests/BufferTest.cpp +++ b/velox/buffer/tests/BufferTest.cpp @@ -453,5 +453,43 @@ TEST_F(BufferTest, testAllocateSizeOverflow) { AlignedBuffer::reallocate(&buf, 1ull << 62), VeloxException); } +TEST_F(BufferTest, sliceBigintBuffer) { + auto bufferPtr = AlignedBuffer::allocate(10, pool_.get()); + auto sliceBufferPtr = Buffer::slice(bufferPtr, 1, 5, pool_.get()); + ASSERT_TRUE(sliceBufferPtr->isView()); + ASSERT_EQ(sliceBufferPtr->size(), 40); // 5 * type size of int64_t. + ASSERT_EQ(sliceBufferPtr->as(), bufferPtr->as() + 1); + + VELOX_ASSERT_THROW( + Buffer::slice(bufferPtr, 11, 1, pool_.get()), + "Offset must be less than or equal to 10."); + VELOX_ASSERT_THROW( + Buffer::slice(bufferPtr, 5, 6, pool_.get()), + "Length must be less than or equal to 5."); + VELOX_ASSERT_THROW( + Buffer::slice(nullptr, 5, 6, pool_.get()), + "Buffer must not be null."); +} + +TEST_F(BufferTest, sliceBooleanBuffer) { + auto bufferPtr = AlignedBuffer::allocate(16, pool_.get()); + auto data = bufferPtr->asMutableRange(); + for (int i = 0; i < 16; ++i) { + data[i] = (i % 2 != 0); + } + auto sliceBufferPtr = Buffer::slice(bufferPtr, 8, 8, pool_.get()); + ASSERT_TRUE(sliceBufferPtr->isView()); + ASSERT_EQ(sliceBufferPtr->as(), bufferPtr->as() + 1); + + sliceBufferPtr = Buffer::slice(bufferPtr, 5, 5, pool_.get()); + ASSERT_FALSE(sliceBufferPtr->isView()); + auto sliceData = sliceBufferPtr->asRange(); + for (int i = 0; i < 5; ++i) { + ASSERT_EQ(sliceData[i], i % 2 == 0); + } + VELOX_ASSERT_THROW( + Buffer::slice(bufferPtr, 5, 6, nullptr), "Pool must not be null."); +} + } // namespace velox } // namespace facebook diff --git a/velox/vector/BaseVector.cpp b/velox/vector/BaseVector.cpp index 2ae0d69212ff..c07d541463b7 100644 --- a/velox/vector/BaseVector.cpp +++ b/velox/vector/BaseVector.cpp @@ -917,70 +917,6 @@ void BaseVector::validate(const VectorValidateOptions& options) const { } } -namespace { - -size_t typeSize(const Type& type) { - switch (type.kind()) { - case TypeKind::VARCHAR: - case TypeKind::VARBINARY: - return sizeof(StringView); - case TypeKind::OPAQUE: - return sizeof(std::shared_ptr); - default: - VELOX_DCHECK(type.isPrimitiveType(), type.toString()); - return type.cppSizeInBytes(); - } -} - -struct BufferReleaser { - explicit BufferReleaser(const BufferPtr& parent) : parent_(parent) {} - void addRef() const {} - void release() const {} - - private: - BufferPtr parent_; -}; - -BufferPtr sliceBufferZeroCopy( - size_t typeSize, - bool podType, - const BufferPtr& buf, - vector_size_t offset, - vector_size_t length) { - // Cannot use `Buffer::as()` here because Buffer::podType_ is false - // when type is OPAQUE. - auto data = - reinterpret_cast(buf->as()) + offset * typeSize; - return BufferView::create( - data, length * typeSize, BufferReleaser(buf), podType); -} - -} // namespace - -// static -BufferPtr BaseVector::sliceBuffer( - const Type& type, - const BufferPtr& buf, - vector_size_t offset, - vector_size_t length, - memory::MemoryPool* pool) { - if (!buf) { - return nullptr; - } - if (type.kind() != TypeKind::BOOLEAN) { - return sliceBufferZeroCopy( - typeSize(type), type.isPrimitiveType(), buf, offset, length); - } - if (offset % 8 == 0) { - return sliceBufferZeroCopy(1, true, buf, offset / 8, (length + 7) / 8); - } - VELOX_DCHECK_NOT_NULL(pool); - auto ans = AlignedBuffer::allocate(length, pool); - bits::copyBits( - buf->as(), offset, ans->asMutable(), 0, length); - return ans; -} - std::optional BaseVector::findDuplicateValue( vector_size_t start, vector_size_t size, diff --git a/velox/vector/BaseVector.h b/velox/vector/BaseVector.h index 74478e2f7030..705c11dc9cb4 100644 --- a/velox/vector/BaseVector.h +++ b/velox/vector/BaseVector.h @@ -882,20 +882,8 @@ class BaseVector { ensureNullsCapacity(length_, true); } - // Slice a buffer with specific type. - // - // For boolean type and if the offset is not multiple of 8, return a shifted - // copy; otherwise return a BufferView into the original buffer (with shared - // ownership of original buffer). - static BufferPtr sliceBuffer( - const Type&, - const BufferPtr&, - vector_size_t offset, - vector_size_t length, - memory::MemoryPool*); - BufferPtr sliceNulls(vector_size_t offset, vector_size_t length) const { - return sliceBuffer(*BOOLEAN(), nulls_, offset, length, pool_); + return nulls_ ? Buffer::slice(nulls_, offset, length, pool_) : nulls_; } TypePtr type_; diff --git a/velox/vector/ComplexVector.cpp b/velox/vector/ComplexVector.cpp index 041d67cb1458..1ef5f5d29c0f 100644 --- a/velox/vector/ComplexVector.cpp +++ b/velox/vector/ComplexVector.cpp @@ -1186,8 +1186,10 @@ VectorPtr ArrayVector::slice(vector_size_t offset, vector_size_t length) const { type_, sliceNulls(offset, length), length, - sliceBuffer(*INTEGER(), offsets_, offset, length, pool_), - sliceBuffer(*INTEGER(), sizes_, offset, length, pool_), + offsets_ ? Buffer::slice(offsets_, offset, length, pool_) + : offsets_, + sizes_ ? Buffer::slice(sizes_, offset, length, pool_) + : sizes_, elements_); } @@ -1485,8 +1487,10 @@ VectorPtr MapVector::slice(vector_size_t offset, vector_size_t length) const { type_, sliceNulls(offset, length), length, - sliceBuffer(*INTEGER(), offsets_, offset, length, pool_), - sliceBuffer(*INTEGER(), sizes_, offset, length, pool_), + offsets_ ? Buffer::slice(offsets_, offset, length, pool_) + : offsets_, + sizes_ ? Buffer::slice(sizes_, offset, length, pool_) + : sizes_, keys_, values_); } diff --git a/velox/vector/DictionaryVector-inl.h b/velox/vector/DictionaryVector-inl.h index add6dcf5a551..fe1a6fed756e 100644 --- a/velox/vector/DictionaryVector-inl.h +++ b/velox/vector/DictionaryVector-inl.h @@ -185,8 +185,9 @@ VectorPtr DictionaryVector::slice(vector_size_t offset, vector_size_t length) this->sliceNulls(offset, length), length, valueVector(), - BaseVector::sliceBuffer( - *INTEGER(), indices_, offset, length, this->pool_)); + indices_ + ? Buffer::slice(indices_, offset, length, this->pool_) + : indices_); } template diff --git a/velox/vector/FlatVector-inl.h b/velox/vector/FlatVector-inl.h index d56037114e7d..8551d8d8839b 100644 --- a/velox/vector/FlatVector-inl.h +++ b/velox/vector/FlatVector-inl.h @@ -423,8 +423,8 @@ VectorPtr FlatVector::slice(vector_size_t offset, vector_size_t length) this->type_, this->sliceNulls(offset, length), length, - BaseVector::sliceBuffer( - *this->type_, values_, offset, length, this->pool_), + values_ ? Buffer::slice(values_, offset, length, this->pool_) + : values_, std::vector(stringBuffers_)); }