Skip to content

Commit

Permalink
[pick](array-range)improve array_range func for large param (#38707)
Browse files Browse the repository at this point in the history
## Proposed changes
backport: #38284
Issue Number: close #xxx

<!--Describe your changes.-->
  • Loading branch information
amorynan authored Aug 2, 2024
1 parent b7e1588 commit f5bc659
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 8 deletions.
31 changes: 23 additions & 8 deletions be/src/vec/functions/array/function_array_range.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,21 +168,22 @@ struct RangeImplUtil {
dest_nested_column->reserve(input_rows_count);
dest_nested_null_map.reserve(input_rows_count);

vector(start_column->get_data(), end_column->get_data(), step_column->get_data(),
args_null_map->get_data(), nested_column->get_data(), dest_nested_null_map,
dest_offsets);
RETURN_IF_ERROR(vector(start_column->get_data(), end_column->get_data(),
step_column->get_data(), args_null_map->get_data(),
nested_column->get_data(), dest_nested_null_map, dest_offsets));

block.get_by_position(result).column =
ColumnNullable::create(std::move(dest_array_column_ptr), std::move(args_null_map));
return Status::OK();
}

private:
static void vector(const PaddedPODArray<SourceDataType>& start,
const PaddedPODArray<SourceDataType>& end, const PaddedPODArray<Int32>& step,
NullMap& args_null_map, PaddedPODArray<SourceDataType>& nested_column,
PaddedPODArray<UInt8>& dest_nested_null_map,
ColumnArray::Offsets64& dest_offsets) {
static Status vector(const PaddedPODArray<SourceDataType>& start,
const PaddedPODArray<SourceDataType>& end,
const PaddedPODArray<Int32>& step, NullMap& args_null_map,
PaddedPODArray<SourceDataType>& nested_column,
PaddedPODArray<UInt8>& dest_nested_null_map,
ColumnArray::Offsets64& dest_offsets) {
int rows = start.size();
for (auto row = 0; row < rows; ++row) {
auto idx = start[row];
Expand All @@ -195,6 +196,13 @@ struct RangeImplUtil {
dest_offsets.push_back(dest_offsets.back());
continue;
} else {
if (idx < end_row && step_row > 0 &&
((static_cast<__int128_t>(end_row) - static_cast<__int128_t>(step_row) -
1) / static_cast<__int128_t>(step_row) +
1) > max_array_size_as_field) {
return Status::InvalidArgument("Array size exceeds the limit {}",
max_array_size_as_field);
}
int offset = dest_offsets.back();
while (idx < end[row]) {
nested_column.push_back(idx);
Expand All @@ -219,11 +227,17 @@ struct RangeImplUtil {
using UNIT = std::conditional_t<std::is_same_v<TimeUnitOrVoid, void>,
std::integral_constant<TimeUnit, TimeUnit::DAY>,
TimeUnitOrVoid>;
int move = 0;
while (doris::datetime_diff<UNIT::value, DateTimeV2ValueType,
DateTimeV2ValueType>(idx, end_row) > 0) {
if (move > max_array_size_as_field) {
return Status::InvalidArgument("Array size exceeds the limit {}",
max_array_size_as_field);
}
nested_column.push_back(idx);
dest_nested_null_map.push_back(0);
offset++;
move++;
idx = doris::vectorized::date_time_add<
UNIT::value, DateV2Value<DateTimeV2ValueType>,
DateV2Value<DateTimeV2ValueType>, DateTimeV2>(idx, step_row,
Expand All @@ -233,6 +247,7 @@ struct RangeImplUtil {
}
}
}
return Status::OK();
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,11 @@ suite("nereids_scalar_fn_Array") {
order_qt_sql_array_range_two_param_notnull "select array_range(kint, 1000) from fn_test_not_nullable order by id"
order_qt_sql_array_range_three_param "select array_range(kint, 10000, ktint) from fn_test order by id"
order_qt_sql_array_range_three_param_notnull "select array_range(kint, 10000, ktint) from fn_test_not_nullable order by id"
// make a large size of array element, expect to throw error
test {
sql "select array_range(kint, 1000000000) from fn_test"
exception ('Array size exceeds the limit 1000000')
}

// array_remove
order_qt_sql_array_remove_Double "select array_remove(kadbl, kdbl) from fn_test"
Expand Down Expand Up @@ -1276,4 +1281,29 @@ suite("nereids_scalar_fn_Array") {
qt_sequence_datetime_hour """select sequence(kdtmv2s1, date_add(kdtmv2s1, interval kint-3 hour), interval kint hour) from fn_test order by kdtmv2s1;"""
qt_sequence_datetime_minute """select sequence(kdtmv2s1, date_add(kdtmv2s1, interval kint+1 minute), interval kint minute) from fn_test order by kdtmv2s1;"""
qt_sequence_datetime_second """select sequence(kdtmv2s1, date_add(kdtmv2s1, interval kint second), interval kint-1 second) from fn_test order by kdtmv2s1;"""
// make large error size
test {
sql "select array_size(sequence(kdtmv2s1, date_add(kdtmv2s1, interval kint+1000 year), interval kint hour)) from fn_test order by kdtmv2s1;"
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}

test {
sql "select array_size(sequence(kdtmv2s1, date_add(kdtmv2s1, interval kint+10000 month), interval kint hour)) from fn_test order by kdtmv2s1;"
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}

test {
sql "select array_size(sequence(kdtmv2s1, date_add(kdtmv2s1, interval kint+1000001 day), interval kint day)) from fn_test order by kdtmv2s1;"
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -422,4 +422,26 @@ suite("test_array_functions") {
qt_const_select "select sequence(cast('2022-35-38 12:00:10' as datetimev2(0)), cast('2022-05-18 22:00:30' as datetimev2(0))); "
qt_const_select "select sequence(1, 10, 0); "
qt_const_select "select sequence(cast('2022-05-15 12:00:00' as datetimev2(0)), cast('2022-05-17 12:00:00' as datetimev2(0)), interval 0 day); "
// test large size of array
test {
sql """ select sequence(cast('2022-05-01 12:00:00' as datetimev2(0)), cast('2022-05-17 12:00:00' as datetimev2(0)), interval 10000000000 week); """
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
test {
sql """ select sequence(1, 10000000000); """
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
test {
sql """ select sequence(1, 10000000000, 2); """
check{result, exception, startTime, endTime ->
assertTrue(exception != null)
logger.info(exception.message)
}
}
}

0 comments on commit f5bc659

Please sign in to comment.