Skip to content

Commit

Permalink
[Fix](multi-catalog) Fix column mutate() crash replace it by assume_m…
Browse files Browse the repository at this point in the history
…utable(). (apache#46151)

### What problem does this PR solve?

Problem Summary:

```
warning: Unable to find libthread_db matching inferior's thread library, thread debugging will not be available.
Core was generated by `/mnt/doris/be/lib/doris_be'.
Program terminated with signal SIGSEGV, Segmentation fault.
#0  0x0000000000000000 in ?? ()
[Current thread is 1 (LWP 3923404)]
(gdb) bt
#0  0x0000000000000000 in ?? ()
#1  0x000055f44f97dda7 in COW<doris::vectorized::IColumn>::release_ref (this=0x7f6bf7d07cc8) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/common/cow.h:99
#2  COW<doris::vectorized::IColumn>::intrusive_ptr<doris::vectorized::IColumn>::~intrusive_ptr (this=0x7f6b792f9670) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/common/cow.h:133
#3  doris::vectorized::ScalarColumnReader::_read_nested_column (this=this@entry=0x7f6be31f8900, doris_column=..., type=
    std::shared_ptr<const doris::vectorized::IDataType> (use count 1, weak count 0) = {...}, select_vector=..., batch_size=<optimized out>, batch_size@entry=4064, read_rows=0x7f6b792f9ad8, 
    eof=0x7f6b792f9af0, is_dict_filter=<optimized out>, align_rows=<optimized out>) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:447
#4  0x000055f44f97e1fc in doris::vectorized::ScalarColumnReader::read_column_data (this=0x7f6be31f8900, doris_column=..., 
    type=std::shared_ptr<const doris::vectorized::IDataType> (use count 1, weak count 0) = {...}, select_vector=..., batch_size=4064, read_rows=0x7f6b792f9ad8, eof=0x7f6b792f9af0, 
    is_dict_filter=<optimized out>) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:504
#5  0x000055f44f97ecbe in doris::vectorized::ArrayColumnReader::read_column_data (this=0x7f69a922ce00, doris_column=..., type=..., select_vector=..., batch_size=4064, read_rows=0x7f6b792f9ad8, 
    eof=0x7f6b792f9af0, is_dict_filter=<optimized out>) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp:616
#6  0x000055f44f975460 in doris::vectorized::RowGroupReader::_read_column_data (this=this@entry=0x7f6cf83dd180, block=block@entry=0x7f6bbcc66938, columns=..., batch_size=4064, 
    read_rows=read_rows@entry=0x7f6b792f9ee0, batch_eof=0x7f6cf83d71f0, select_vector=...) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp:426
#7  0x000055f44f972682 in doris::vectorized::RowGroupReader::next_batch (this=0x7f6cf83dd180, block=0x7f6bbcc66938, batch_size=140099571418880, read_rows=0x7f6b792f9ee0, batch_eof=0x7f6f06724610)
    at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp:321
#8  0x000055f44f9141f8 in doris::vectorized::ParquetReader::get_next_block (this=0x7f6cf83d7000, block=0x7f6bbcc66938, read_rows=0x7f6b792f9ee0, eof=0x7f6bbcc66f88)
    at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/format/parquet/vparquet_reader.cpp:576
#9  0x000055f450aba6f4 in doris::vectorized::IcebergTableReader::get_next_block (this=0x7f6bb5611180, block=0x7f6bbcc66938, read_rows=0x7f6b792f9ee0, eof=0x7f6bbcc66f88)
    at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/format/table/iceberg_reader.cpp:138
#10 0x000055f450aaa351 in doris::vectorized::VFileScanner::_get_block_wrapped (this=0x7f6bbcc66800, state=<optimized out>, block=0x7f6bbcc66938, eof=0x7f6b792fa2f7)
    at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/scan/vfile_scanner.cpp:359
apache#11 0x000055f450aa9ecc in doris::vectorized::VFileScanner::_get_block_impl (this=0x0, state=0xffffffffffffa770, block=0x8c3de0, eof=0x7f6b79300700)
    at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/scan/vfile_scanner.cpp:301
apache#12 0x000055f450b41e9c in doris::vectorized::VScanner::get_block (this=this@entry=0x7f6bbcc66800, state=state@entry=0x7f6f06724000, block=block@entry=0x7f6bbcc66938, eof=eof@entry=0x7f6b792fa2f7)
    at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/scan/vscanner.cpp:133
apache#13 0x000055f450b41977 in doris::vectorized::VScanner::get_block_after_projects (this=0x7f6bbcc66800, state=0x7f6f06724000, block=0x7f6cf8394b80, eos=0x7f6b792fa2f7)
    at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/scan/vscanner.cpp:96
apache#14 0x000055f450a941ff in doris::vectorized::ScannerScheduler::_scanner_scan (ctx=std::shared_ptr<doris::vectorized::ScannerContext> (use count 10, weak count 1) = {...}, 
    scan_task=std::shared_ptr<doris::vectorized::ScanTask> (use count 2, weak count 0) = {...}) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:289
apache#15 0x000055f450a94b73 in doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>, std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()() const::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const (this=<optimized out>) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:180
apache#16 doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>, std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()() const::{lambda()#1}::operator()() const (this=0x7f70ccc56ee0) at /home/zcp/repo_center/doris_enterprise/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:179
apache#17 std::__invoke_impl<void, doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>, std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()() const::{lambda()#1}&>(std::__invoke_other, doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>, std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()() const::{lambda()#1}&) (__f=...) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:61
apache#18 std::__invoke_r<void, doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>, std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()() const::{lambda()#1}&>(doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>, std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()() const::{lambda()#1}&) (__fn=...)
    at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:111
apache#19 std::_Function_handler<void (), doris::vectorized::ScannerScheduler::submit(std::shared_ptr<doris::vectorized::ScannerContext>, std::shared_ptr<doris::vectorized::ScanTask>)::$_1::operator()() const::{lambda()#1}>::_M_invoke(std::_Any_data const&) (__functor=...) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:291
apache#20 0x000055f44ca858c8 in doris::ThreadPool::dispatch_thread (this=0x7f70ba259200) at /home/zcp/repo_center/doris_enterprise/doris/be/src/util/threadpool.cpp:543
apache#21 0x000055f44ca7ad91 in std::function<void ()>::operator()() const (this=0x7f6bf7d07cc0)
    at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:560
apache#22 doris::Thread::supervise_thread (arg=0x7f6fa185a020) at /home/zcp/repo_center/doris_enterprise/doris/be/src/util/thread.cpp:498
apache#23 0x00007f70ebf78e25 in ?? ()
apache#24 0x0000000000000000 in ?? ()
```
  • Loading branch information
kaka11chen authored Dec 31, 2024
1 parent 8670378 commit ed61d66
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 21 deletions.
7 changes: 4 additions & 3 deletions be/src/vec/exec/format/orc/vorc_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1211,8 +1211,8 @@ Status OrcReader::_fill_missing_columns(
for (auto& kv : missing_columns) {
if (kv.second == nullptr) {
// no default column, fill with null
auto nullable_column = reinterpret_cast<vectorized::ColumnNullable*>(
(*std::move(block->get_by_name(kv.first).column)).mutate().get());
auto mutable_column = block->get_by_name(kv.first).column->assume_mutable();
auto* nullable_column = static_cast<vectorized::ColumnNullable*>(mutable_column.get());
nullable_column->insert_many_defaults(rows);
} else {
// fill with default value
Expand All @@ -1226,8 +1226,9 @@ Status OrcReader::_fill_missing_columns(
// call resize because the first column of _src_block_ptr may not be filled by reader,
// so _src_block_ptr->rows() may return wrong result, cause the column created by `ctx->execute()`
// has only one row.
std::move(*block->get_by_position(result_column_id).column).mutate()->resize(rows);
auto result_column_ptr = block->get_by_position(result_column_id).column;
auto mutable_column = result_column_ptr->assume_mutable();
mutable_column->resize(rows);
// result_column_ptr maybe a ColumnConst, convert it to a normal column
result_column_ptr = result_column_ptr->convert_to_full_column_if_const();
auto origin_column_type = block->get_by_name(kv.first).type;
Expand Down
16 changes: 8 additions & 8 deletions be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -679,8 +679,8 @@ Status ArrayColumnReader::read_column_data(ColumnPtr& doris_column, DataTypePtr&
MutableColumnPtr data_column;
NullMap* null_map_ptr = nullptr;
if (doris_column->is_nullable()) {
auto* nullable_column = reinterpret_cast<vectorized::ColumnNullable*>(
(*std::move(doris_column)).mutate().get());
auto mutable_column = doris_column->assume_mutable();
auto* nullable_column = static_cast<vectorized::ColumnNullable*>(mutable_column.get());
null_map_ptr = &nullable_column->get_null_map_data();
data_column = nullable_column->get_nested_column_ptr();
} else {
Expand Down Expand Up @@ -730,8 +730,8 @@ Status MapColumnReader::read_column_data(ColumnPtr& doris_column, DataTypePtr& t
MutableColumnPtr data_column;
NullMap* null_map_ptr = nullptr;
if (doris_column->is_nullable()) {
auto* nullable_column = reinterpret_cast<vectorized::ColumnNullable*>(
(*std::move(doris_column)).mutate().get());
auto mutable_column = doris_column->assume_mutable();
auto* nullable_column = static_cast<vectorized::ColumnNullable*>(mutable_column.get());
null_map_ptr = &nullable_column->get_null_map_data();
data_column = nullable_column->get_nested_column_ptr();
} else {
Expand Down Expand Up @@ -799,8 +799,8 @@ Status StructColumnReader::read_column_data(ColumnPtr& doris_column, DataTypePtr
MutableColumnPtr data_column;
NullMap* null_map_ptr = nullptr;
if (doris_column->is_nullable()) {
auto* nullable_column = reinterpret_cast<vectorized::ColumnNullable*>(
(*std::move(doris_column)).mutate().get());
auto mutable_column = doris_column->assume_mutable();
auto* nullable_column = static_cast<vectorized::ColumnNullable*>(mutable_column.get());
null_map_ptr = &nullable_column->get_null_map_data();
data_column = nullable_column->get_nested_column_ptr();
} else {
Expand Down Expand Up @@ -880,8 +880,8 @@ Status StructColumnReader::read_column_data(ColumnPtr& doris_column, DataTypePtr
auto& doris_field = doris_struct.get_column_ptr(idx);
auto& doris_type = const_cast<DataTypePtr&>(doris_struct_type->get_element(idx));
DCHECK(doris_type->is_nullable());
auto* nullable_column = reinterpret_cast<vectorized::ColumnNullable*>(
(*std::move(doris_field)).mutate().get());
auto mutable_column = doris_field->assume_mutable();
auto* nullable_column = static_cast<vectorized::ColumnNullable*>(mutable_column.get());
nullable_column->insert_null_elements(missing_column_sz);
}

Expand Down
7 changes: 4 additions & 3 deletions be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -684,8 +684,8 @@ Status RowGroupReader::_fill_missing_columns(
for (auto& kv : missing_columns) {
if (kv.second == nullptr) {
// no default column, fill with null
auto nullable_column = reinterpret_cast<vectorized::ColumnNullable*>(
(*std::move(block->get_by_name(kv.first).column)).mutate().get());
auto mutable_column = block->get_by_name(kv.first).column->assume_mutable();
auto* nullable_column = static_cast<vectorized::ColumnNullable*>(mutable_column.get());
nullable_column->insert_many_defaults(rows);
} else {
// fill with default value
Expand All @@ -699,8 +699,9 @@ Status RowGroupReader::_fill_missing_columns(
// call resize because the first column of _src_block_ptr may not be filled by reader,
// so _src_block_ptr->rows() may return wrong result, cause the column created by `ctx->execute()`
// has only one row.
std::move(*block->get_by_position(result_column_id).column).mutate()->resize(rows);
auto result_column_ptr = block->get_by_position(result_column_id).column;
auto mutable_column = result_column_ptr->assume_mutable();
mutable_column->resize(rows);
// result_column_ptr maybe a ColumnConst, convert it to a normal column
result_column_ptr = result_column_ptr->convert_to_full_column_if_const();
auto origin_column_type = block->get_by_name(kv.first).type;
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/exec/scan/new_es_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ Status NewEsScanner::_get_block_impl(RuntimeState* state, Block* block, bool* eo
columns.resize(column_size);
for (auto i = 0; i < column_size; i++) {
if (mem_reuse) {
columns[i] = std::move(*block->get_by_position(i).column).mutate();
columns[i] = block->get_by_position(i).column->assume_mutable();
} else {
columns[i] = _tuple_desc->slots()[i]->get_empty_mutable_column();
}
Expand Down
9 changes: 4 additions & 5 deletions be/src/vec/exec/scan/vfile_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -492,8 +492,8 @@ Status VFileScanner::_fill_missing_columns(size_t rows) {
for (auto& kv : _missing_col_descs) {
if (kv.second == nullptr) {
// no default column, fill with null
auto nullable_column = reinterpret_cast<vectorized::ColumnNullable*>(
(*std::move(_src_block_ptr->get_by_name(kv.first).column)).mutate().get());
auto mutable_column = _src_block_ptr->get_by_name(kv.first).column->assume_mutable();
auto* nullable_column = static_cast<vectorized::ColumnNullable*>(mutable_column.get());
nullable_column->insert_many_defaults(rows);
} else {
// fill with default value
Expand All @@ -507,10 +507,9 @@ Status VFileScanner::_fill_missing_columns(size_t rows) {
// call resize because the first column of _src_block_ptr may not be filled by reader,
// so _src_block_ptr->rows() may return wrong result, cause the column created by `ctx->execute()`
// has only one row.
std::move(*_src_block_ptr->get_by_position(result_column_id).column)
.mutate()
->resize(rows);
auto result_column_ptr = _src_block_ptr->get_by_position(result_column_id).column;
auto mutable_column = result_column_ptr->assume_mutable();
mutable_column->resize(rows);
// result_column_ptr maybe a ColumnConst, convert it to a normal column
result_column_ptr = result_column_ptr->convert_to_full_column_if_const();
auto origin_column_type = _src_block_ptr->get_by_name(kv.first).type;
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/exec/scan/vmeta_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ Status VMetaScanner::_get_block_impl(RuntimeState* state, Block* block, bool* eo
columns.resize(column_size);
for (auto i = 0; i < column_size; i++) {
if (mem_reuse) {
columns[i] = std::move(*block->get_by_position(i).column).mutate();
columns[i] = block->get_by_position(i).column->assume_mutable();
} else {
columns[i] = _tuple_desc->slots()[i]->get_empty_mutable_column();
}
Expand Down

0 comments on commit ed61d66

Please sign in to comment.