Skip to content

Commit

Permalink
[Bug](join) avoid overflow on bucket_size+1 (#37493)
Browse files Browse the repository at this point in the history
## Proposed changes
avoid overflow on bucket_size+1

```cpp
*** Query id: 7371b4516f5b475f-8c060d33a27ffde0 ***
*** is nereids: 1 ***
*** tablet id: 0 ***
*** Aborted at 1720441228 (unix time) try "date -d @1720441228" if you are using GNU date ***
*** Current BE git commitID: 2c9d3af ***
*** SIGSEGV address not mapped to object (@0x7fec499d5be8) received by PID 466196 (TID 470466 OR 0x7fe5411fc700) from PID 1235049448; stack trace: ***
 0# 0x0000561B81AFD533 in /mnt/disk1/xiaolei/incubator-doris/output/be/lib/doris_be
 1# 0x00007FEC26D7FB50 in /lib64/libc.so.6
 2# doris::Status doris::pipeline::ProcessHashTableBuild<doris::vectorized::MethodOneNumber<unsigned char, doris::JoinHashTable<unsigned char, HashCRC32<unsigned char> > > >::run<0, false, false, false>(doris::vectorized::MethodOneNumber<unsigned char, doris::JoinHashTable<unsigned char, HashCRC32<unsigned char> > >&, doris::vectorized::PODArray<unsigned char, 4096ul, Allocator<false, false, false>, 16ul, 15ul> const*, bool*) in /mnt/disk1/xiaolei/incubator-doris/output/be/lib/doris_be
 3# 0x0000561B8AAE8B90 in /mnt/disk1/xiaolei/incubator-doris/output/be/lib/doris_be
 4# doris::pipeline::HashJoinBuildSinkLocalState::process_build_block(doris::RuntimeState*, doris::vectorized::Block&) in /mnt/disk1/xiaolei/incubator-doris/output/be/lib/doris_be
 5# doris::pipeline::HashJoinBuildSinkOperatorX::sink(doris::RuntimeState*, doris::vectorized::Block*, bool) in /mnt/disk1/xiaolei/incubator-doris/output/be/lib/doris_be
 6# doris::pipeline::PipelineTask::execute(bool*) in /mnt/disk1/xiaolei/incubator-doris/output/be/lib/doris_be
 7# doris::pipeline::TaskScheduler::_do_work(unsigned long) in /mnt/disk1/xiaolei/incubator-doris/output/be/lib/doris_be
 8# doris::ThreadPool::dispatch_thread() in /mnt/disk1/xiaolei/incubator-doris/output/be/lib/doris_be
 9# doris::Thread::supervise_thread(void*) in /mnt/disk1/xiaolei/incubator-doris/output/be/lib/doris_be
10# start_thread in /lib64/libpthread.so.0
11# __clone in /lib64/libc.so.6

```
  • Loading branch information
BiteTheDDDDt committed Jul 15, 2024
1 parent 8360e3f commit 2848067
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 1 deletion.
2 changes: 1 addition & 1 deletion be/src/vec/common/hash_table/join_hash_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class JoinHashTable {
static uint32_t calc_bucket_size(size_t num_elem) {
size_t expect_bucket_size = num_elem + (num_elem - 1) / 7;
return std::min(phmap::priv::NormalizeCapacity(expect_bucket_size) + 1,
static_cast<size_t>(std::numeric_limits<uint32_t>::max()));
static_cast<size_t>(std::numeric_limits<int32_t>::max()) + 1);
}

size_t get_byte_size() const {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
97656250

Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("big_join_build") {

sql """ DROP TABLE IF EXISTS b_table; """
sql """ DROP TABLE IF EXISTS p_table; """

sql """
create table b_table (
k1 tinyint not null,
)
duplicate key (k1)
distributed BY hash(k1) buckets 64
properties("replication_num" = "1");
"""
sql """
create table p_table (
k1 tinyint not null,
)
duplicate key (k1)
distributed BY hash(k1) buckets 64
properties("replication_num" = "1");
"""
sql """
insert into p_table select * from numbers("number" = "5");
"""
sql """
insert into b_table select * from numbers("number" = "1000000000");
"""
sql """
insert into b_table select * from numbers("number" = "1000000000");
"""
sql """
insert into b_table select * from numbers("number" = "1000000000");
"""
sql """
insert into b_table select * from numbers("number" = "1000000000");
"""
sql """
insert into b_table select * from numbers("number" = "1000000000");
"""

qt_sql"""select /*+ leading(p_table b_table) */ count(*) from p_table,b_table where p_table.k1=b_table.k1 and b_table.k1<91;"""
}

0 comments on commit 2848067

Please sign in to comment.