Skip to content

Commit

Permalink
[Opt](limit) Avoid copy in set_num_rows implementation (#41062)
Browse files Browse the repository at this point in the history
## Proposed changes

Issue Number: close #xxx

if the column has no extra reference, we could assume_mutable and just
resize it. no need to copy.

perf:
```
Run on (96 X 3054.64 MHz CPU s)
CPU Caches:
  L1 Data 32 KiB (x48)
  L1 Instruction 32 KiB (x48)
  L2 Unified 1024 KiB (x48)
  L3 Unified 36608 KiB (x2)
Load Average: 14.09, 16.44, 35.13
-----------------------------------------------------
Benchmark           Time             CPU   Iterations
-----------------------------------------------------
SHRINK     7.3495e+13 ns     44527988 ns            1
CUT        7.3496e+13 ns   1638929204 ns            1
```
  • Loading branch information
zclllyybb authored Sep 24, 2024
1 parent 47ce9f9 commit 990f18f
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 2 deletions.
11 changes: 10 additions & 1 deletion be/src/vec/columns/column.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,12 +185,21 @@ class IColumn : public COW<IColumn> {

/// Removes all elements outside of specified range.
/// Is used in LIMIT operation, for example.
virtual Ptr cut(size_t start, size_t length) const {
virtual Ptr cut(size_t start, size_t length) const final {
MutablePtr res = clone_empty();
res->insert_range_from(*this, start, length);
return res;
}

/// cut or expand inplace. `this` would be moved, only the return value is avaliable.
virtual Ptr shrink(size_t length) const final {
// NOLINTBEGIN(performance-move-const-arg)
MutablePtr res = std::move(*this).mutate();
res->resize(length);
// NOLINTEND(performance-move-const-arg)
return res->get_ptr();
}

/// Appends new value at the end of column (column's size is increased by 1).
/// Is used to transform raw strings to Blocks (for example, inside input format parsers)
virtual void insert(const Field& x) = 0;
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/core/block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ void Block::set_num_rows(size_t length) {
if (rows() > length) {
for (auto& elem : data) {
if (elem.column) {
elem.column = elem.column->cut(0, length);
elem.column = elem.column->shrink(length);
}
}
if (length < row_same_bit.size()) {
Expand Down
121 changes: 121 additions & 0 deletions be/test/vec/core/column_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include <gtest/gtest.h>

#include "vec/columns/column_string.h"
#include "vec/columns/columns_number.h"
#include "vec/common/string_ref.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type_string.h"

namespace doris::vectorized {

class ColumnTest : public ::testing::Test {
protected:
void SetUp() override {
col_str = ColumnString::create();
col_str->insert_data("aaa", 3);
col_str->insert_data("bb", 2);
col_str->insert_data("cccc", 4);

col_int = ColumnInt64::create();
col_int->insert_value(1);
col_int->insert_value(2);
col_int->insert_value(3);

col_dcm = ColumnDecimal64::create(0, 3);
col_dcm->insert_value(1.23);
col_dcm->insert_value(4.56);
col_dcm->insert_value(7.89);
}

ColumnString::MutablePtr col_str;
ColumnInt64::MutablePtr col_int;
ColumnDecimal64::MutablePtr col_dcm;
};

TEST_F(ColumnTest, CutColumnString) {
auto cut_col = col_str->cut(0, 2);
EXPECT_EQ(cut_col->size(), 2);
EXPECT_EQ(cut_col->get_data_at(0), StringRef("aaa"));
EXPECT_EQ(cut_col->get_data_at(1), StringRef("bb"));
EXPECT_THROW({ col_str->cut(0, 10); }, doris::Exception);
}

TEST_F(ColumnTest, CutColumnInt64) {
auto cut_col = col_int->cut(0, 2);
EXPECT_EQ(cut_col->size(), 2);
EXPECT_EQ(static_cast<const ColumnInt64*>(cut_col.get())->get_element(0), 1);
EXPECT_EQ(static_cast<const ColumnInt64*>(cut_col.get())->get_element(1), 2);
EXPECT_THROW({ col_int->cut(0, 10); }, doris::Exception);
}

TEST_F(ColumnTest, CutColumnDecimal64) {
auto cut_col = col_dcm->cut(0, 2);
EXPECT_EQ(cut_col->size(), 2);
EXPECT_EQ(static_cast<const ColumnDecimal64*>(cut_col.get())->get_element(0), Decimal64(1.23));
EXPECT_EQ(static_cast<const ColumnDecimal64*>(cut_col.get())->get_element(1), Decimal64(4.56));
EXPECT_THROW({ col_dcm->cut(0, 10); }, doris::Exception);
}

TEST_F(ColumnTest, ShrinkColumnString) {
auto shrunk_col = col_str->shrink(2);
EXPECT_EQ(shrunk_col->size(), 2);
EXPECT_EQ(shrunk_col->get_data_at(0), StringRef("aaa"));
EXPECT_EQ(shrunk_col->get_data_at(1), StringRef("bb"));

shrunk_col = shrunk_col->shrink(10);
EXPECT_EQ(shrunk_col->size(), 10);
EXPECT_EQ(shrunk_col->use_count(), 1);
EXPECT_EQ(shrunk_col->get_data_at(0), StringRef("aaa"));
EXPECT_EQ(shrunk_col->get_data_at(1), StringRef("bb"));
// column string scale in will not clear
}

TEST_F(ColumnTest, ShrinkColumnInt64) {
auto shrunk_col = col_int->shrink(2);
EXPECT_EQ(shrunk_col->size(), 2);
EXPECT_EQ(static_cast<const ColumnInt64*>(shrunk_col.get())->get_element(0), 1);
EXPECT_EQ(static_cast<const ColumnInt64*>(shrunk_col.get())->get_element(1), 2);

shrunk_col = col_int->shrink(10);
EXPECT_EQ(shrunk_col->size(), 10);
EXPECT_EQ(shrunk_col->use_count(), 1);
EXPECT_EQ(static_cast<const ColumnInt64*>(shrunk_col.get())->get_element(0), 1);
EXPECT_EQ(static_cast<const ColumnInt64*>(shrunk_col.get())->get_element(1), 2);
// column vector scale out will not empty init
}

TEST_F(ColumnTest, ShrinkColumnDecimal64) {
auto shrunk_col = col_dcm->shrink(2);
EXPECT_EQ(shrunk_col->size(), 2);
EXPECT_EQ(static_cast<const ColumnDecimal64*>(shrunk_col.get())->get_element(0),
Decimal64(1.23));
EXPECT_EQ(static_cast<const ColumnDecimal64*>(shrunk_col.get())->get_element(1),
Decimal64(4.56));

shrunk_col = col_dcm->shrink(10);
EXPECT_EQ(shrunk_col->size(), 10);
EXPECT_EQ(shrunk_col->use_count(), 1);
EXPECT_EQ(static_cast<const ColumnDecimal64*>(shrunk_col.get())->get_element(0),
Decimal64(1.23));
EXPECT_EQ(static_cast<const ColumnDecimal64*>(shrunk_col.get())->get_element(1),
Decimal64(4.56));
// column decimal scale out will not empty init
}
} // namespace doris::vectorized

0 comments on commit 990f18f

Please sign in to comment.