From 76bf780396e2fa7759beb157df2a0a0d4d4010f8 Mon Sep 17 00:00:00 2001 From: Oleksiy Kononenko Date: Wed, 18 Jan 2023 20:57:58 -0800 Subject: [PATCH] Fix median for void columns/gby --- docs/api/dt/median.rst | 5 +++-- docs/releases/v1.1.0.rst | 3 +++ src/core/expr/head_reduce_unary.cc | 2 +- src/core/sort.cc | 1 + tests/test-reduce.py | 12 +++++++++--- 5 files changed, 17 insertions(+), 6 deletions(-) diff --git a/docs/api/dt/median.rst b/docs/api/dt/median.rst index 61eac12c42..b322082c85 100644 --- a/docs/api/dt/median.rst +++ b/docs/api/dt/median.rst @@ -14,8 +14,9 @@ Input columns. return: Expr - f-expression having one row, and the same names, stypes and - number of columns as in `cols`. + f-expression having one row, and the same names and number of columns + as in `cols`. The column stypes are `float32` for + `float32` columns, and `float64` for all the other numeric types. except: TypeError The exception is raised when one of the columns from `cols` diff --git a/docs/releases/v1.1.0.rst b/docs/releases/v1.1.0.rst index 7346afa9d2..78e8189125 100644 --- a/docs/releases/v1.1.0.rst +++ b/docs/releases/v1.1.0.rst @@ -139,6 +139,9 @@ -[fix] Reducers and row-wise functions now support :attr:`void ` columns. [#3284] + -[fix] Fixed :func:`dt.median()` when used in a groupby context with + :attr:`void ` columns. [#3411] + fread ----- diff --git a/src/core/expr/head_reduce_unary.cc b/src/core/expr/head_reduce_unary.cc index 8267be937b..9fedaa5de7 100644 --- a/src/core/expr/head_reduce_unary.cc +++ b/src/core/expr/head_reduce_unary.cc @@ -830,7 +830,7 @@ static Column compute_median(Column&& arg, const Groupby& gby) { return Column::new_na_column(1, arg.stype()); } switch (arg.stype()) { - case SType::VOID: return Column(new ConstNa_ColumnImpl(1)); + case SType::VOID: return Column(new ConstNa_ColumnImpl(gby.size())); case SType::BOOL: case SType::INT8: return _median (std::move(arg), gby); case SType::INT16: return _median(std::move(arg), gby); diff --git a/src/core/sort.cc b/src/core/sort.cc index 8c763dd9b3..ef8ff90926 100644 --- a/src/core/sort.cc +++ b/src/core/sort.cc @@ -1497,6 +1497,7 @@ RiGb group(const std::vector& columns, void dt::ColumnImpl::sort_grouped(const Groupby& grps, Column& out) { + out.materialize(); // `SortContext::continue_sort()` requires material column (void) out.stats(); SortContext sc(nrows(), RowIndex(), grps, /* make_groups = */ false); sc.continue_sort(out, /* desc = */ false, /* make_groups = */ false); diff --git a/tests/test-reduce.py b/tests/test-reduce.py index 9d9cc5da64..221d990bae 100644 --- a/tests/test-reduce.py +++ b/tests/test-reduce.py @@ -458,19 +458,25 @@ def test_mean_empty_frame(): def test_median_void(): DT = dt.Frame([None] * 10) - DT_median = DT[:, mean(f.C0)] + DT_median = DT[:, median(f.C0)] assert_equals(DT_median, dt.Frame([None])) def test_median_void_per_group(): DT = dt.Frame([[None, None, None, None, None], [1, 2, 1, 2, 2]]) - DT_median = DT[:, mean(f.C0), by(f.C1)] + DT_median = DT[:, median(f.C0), by(f.C1)] assert_equals(DT_median, dt.Frame(C1=[1, 2]/dt.int32, C0=[None, None])) +def test_median_nonvoid_per_void_group(): + DT = dt.Frame([[None, None, None, None, None], [1, 2, 1, 2, 2]]) + DT_median = DT[:, median(f.C1), by(f.C0)] + assert_equals(DT_median, dt.Frame([[None], [2]/dt.float64])) + + def test_median_void_grouped(): DT = dt.Frame([[None, None, None, None, None], [1, 2, 1, 2, 2]]) - DT_median = DT[:, mean(f.C0), by(f.C0)] + DT_median = DT[:, median(f.C0), by(f.C0)] assert_equals(DT_median, dt.Frame([[None], [None]/dt.float64]))