From 1b9c71393cdd64d199ad8112cb306f2890f88cd4 Mon Sep 17 00:00:00 2001 From: Oleksiy Kononenko Date: Mon, 17 Aug 2020 18:02:18 -0700 Subject: [PATCH 1/7] Adjust qcut docs --- src/core/column/qcut.h | 7 ++++--- src/core/expr/fexpr_qcut.cc | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/core/column/qcut.h b/src/core/column/qcut.h index 7558ae22cd..7c9c5dfafe 100644 --- a/src/core/column/qcut.h +++ b/src/core/column/qcut.h @@ -32,9 +32,10 @@ namespace dt { /** - * Virtual column to bin input data into equal-population - * discrete intervals, i.e. quantiles. In reality, for some data - * these quantiles won't have exactly the same population. + * Virtual column to bin input data into intervals with approximately + * equal populations. If there are duplicate values in the + * data, they will all be placed into the same bin. In extreme cases + * this may cause the bins to be highly unbalanced. * * Quantiles are generated based on the element/group information * obtained from the groupby operation, i.e. rowindex and offsets. diff --git a/src/core/expr/fexpr_qcut.cc b/src/core/expr/fexpr_qcut.cc index 0f5894a1c2..e54501025d 100644 --- a/src/core/expr/fexpr_qcut.cc +++ b/src/core/expr/fexpr_qcut.cc @@ -145,7 +145,7 @@ equal populations. Thus, the intervals are chosen according to the sample quantiles of the data. If there are duplicate values in the data, they will all be placed -into the same been. In extreme cases this may cause the bins to be +into the same bin. In extreme cases this may cause the bins to be highly unbalanced. Parameters From 51f0efcaf152f711aabcd1ab51332baca59bf971 Mon Sep 17 00:00:00 2001 From: Oleksiy Kononenko Date: Tue, 18 Aug 2020 12:04:06 -0700 Subject: [PATCH 2/7] Convert cut to FExpr --- src/core/expr/head_func.cc | 1 - src/core/expr/head_func.h | 15 --- src/core/expr/head_func_cut.cc | 219 --------------------------------- src/core/expr/op.h | 1 - src/datatable/expr/expr.py | 1 - tests/expr/test-cut.py | 60 +++++---- 6 files changed, 35 insertions(+), 262 deletions(-) delete mode 100644 src/core/expr/head_func_cut.cc diff --git a/src/core/expr/head_func.cc b/src/core/expr/head_func.cc index 5944efd5f1..91ffe7a457 100644 --- a/src/core/expr/head_func.cc +++ b/src/core/expr/head_func.cc @@ -169,7 +169,6 @@ void Head_Func::init() { factory[static_cast(Op::SETPLUS)] = make_colsetop; factory[static_cast(Op::SETMINUS)] = make_colsetop; factory[static_cast(Op::SHIFTFN)] = &Head_Func_Shift::make; - factory[static_cast(Op::CUT)] = &Head_Func_Cut::make; factory[static_cast(Op::COUNT0)] = make_reduce0; factory[static_cast(Op::COV)] = make_reduce2; factory[static_cast(Op::CORR)] = make_reduce2; diff --git a/src/core/expr/head_func.h b/src/core/expr/head_func.h index ed7f4542b3..22d2e25560 100644 --- a/src/core/expr/head_func.h +++ b/src/core/expr/head_func.h @@ -142,21 +142,6 @@ class Head_Func_IsClose : public Head_Func { -class Head_Func_Cut : public Head_Func { - private: - py::oobj py_nbins_; - bool right_closed_; - size_t: 56; - - public: - Head_Func_Cut(py::oobj py_nbins, py::oobj right_closed); - static ptrHead make(Op, const py::otuple& params); - - Workframe evaluate_n(const vecExpr&, EvalContext&) const override; -}; - - - }} // namespace dt::expr #endif diff --git a/src/core/expr/head_func_cut.cc b/src/core/expr/head_func_cut.cc deleted file mode 100644 index 8c0df433ba..0000000000 --- a/src/core/expr/head_func_cut.cc +++ /dev/null @@ -1,219 +0,0 @@ -//------------------------------------------------------------------------------ -// Copyright 2020 H2O.ai -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. -//------------------------------------------------------------------------------ -#include "_dt.h" -#include "column/cut.h" -#include "datatablemodule.h" -#include "expr/eval_context.h" -#include "expr/fexpr_column.h" -#include "expr/head_func.h" -#include "frame/py_frame.h" -#include "parallel/api.h" -namespace dt { -namespace expr { - - - -//------------------------------------------------------------------------------ -// Head_Func_Cut -//------------------------------------------------------------------------------ - -Head_Func_Cut::Head_Func_Cut(py::oobj py_nbins, py::oobj right_closed) - : py_nbins_(py_nbins), right_closed_(right_closed.to_bool()) {} - - -ptrHead Head_Func_Cut::make(Op, const py::otuple& params) { - xassert(params.size() == 2); - return ptrHead(new Head_Func_Cut(params[0], params[1])); -} - - - -Workframe Head_Func_Cut::evaluate_n( - const vecExpr& args, EvalContext& ctx) const -{ - - if (ctx.has_groupby()) { - throw NotImplError() << "cut() cannot be used in a groupby context"; - } - - int32_t nbins_default = 10; - Workframe wf = args[0]->evaluate_n(ctx); - const size_t ncols = wf.ncols(); - - int32vec nbins(ncols); - bool defined_nbins = !py_nbins_.is_none(); - bool nbins_list_or_tuple = py_nbins_.is_list_or_tuple(); - - if (nbins_list_or_tuple) { - py::oiter py_nbins = py_nbins_.to_oiter(); - if (py_nbins.size() != ncols) { - throw ValueError() << "When `nbins` is a list or a tuple, its length must be " - << "the same as the number of columns in the frame/expression, i.e. `" - << ncols << "`, instead got: `" << py_nbins.size() << "`"; - - } - - size_t i = 0; - for (auto py_nbin : py_nbins) { - int32_t nbin = py_nbin.to_int32_strict(); - if (nbin <= 0) { - throw ValueError() << "All elements in `nbins` must be positive, " - "got `nbins[" << i << "`]: `" << nbin << "`"; - } - - nbins[i++] = nbin; - } - xassert(i == ncols); - - } else { - if (defined_nbins) { - nbins_default = py_nbins_.to_int32_strict(); - if (nbins_default <= 0) { - throw ValueError() << "Number of bins must be positive, " - "instead got: `" << nbins_default << "`"; - } - } - - for (size_t i = 0; i < ncols; ++i) { - nbins[i] = nbins_default; - } - } - - // Cut workframe in-place - for (size_t i = 0; i < ncols; ++i) { - Column coli = wf.retrieve_column(i); - coli = Column(Cut_ColumnImpl::make( - std::move(coli), i, nbins[i], right_closed_ - )); - wf.replace_column(i, std::move(coli)); - } - - return wf; -} - -}} // dt::expr - - -namespace py { - -static oobj make_pyexpr(dt::expr::Op opcode, otuple targs, otuple tparams) { - size_t op = static_cast(opcode); - return robj(Expr_Type).call({ oint(op), targs, tparams }); -} - - -static oobj cut_frame(oobj arg0, oobj arg1, oobj arg2) { - using namespace dt::expr; - auto slice_all = oslice(oslice::NA, oslice::NA, oslice::NA); - auto f_all = PyFExpr::make(new FExpr_ColumnAsArg(0, slice_all)); - auto cutexpr = make_pyexpr(Op::CUT, - otuple{ f_all }, - otuple{ arg1, arg2 }); - auto frame = static_cast(arg0.to_borrowed_ref()); - return frame->m__getitem__(otuple{ slice_all, cutexpr }); -} - - -static const char* doc_cut = -R"(cut(cols, nbins=10, right_closed=True) --- - -Cut all the columns in a Frame/f-expression by binning -their values into equal-width discrete intervals. - -Parameters ----------- -cols: Frame | f-expression - Frame or f-expression consisting of numeric columns. -nbins: int | list of ints | tuple of ints - When a single number is specified, this number of bins - will be used to bin each column of `cols`. - When a list or a tuple is provided, each column will be binned - by using its own number of bins. In the latter case, - the list/tuple length must be equal to the number of columns - in `cols`. -right_closed: bool - Each binning interval is `half-open`_. This flag indicates which - side of the interval is closed. - -return: Frame | Expr - The return type matches the type of the `cols` argument. - If the function is applied to a frame, then the result is a frame where - each column from the original frame has been cut into the specified bins. - If the `cols` argument is an f-expression, then the result is a new - f-expression that transforms every column into its cut version. - -See also --------- -:func:`qcut()` -- function for quantile binning. - -.. _`half-open`: https://en.wikipedia.org/wiki/Interval_(mathematics)#Terminology - -)"; - - -static PKArgs args_cut( - 1, 0, 2, false, false, - { - "cols", "nbins", "right_closed" - }, - "cut", doc_cut -); - - -/** - * Python-facing function that can take as an argument either a Frame or - * an f-expression. - */ -static oobj pyfn_cut(const PKArgs& args) -{ - if (args[0].is_none_or_undefined()) { - throw TypeError() << "Function `cut()` requires one positional argument, " - "but none were given"; - } - oobj arg0 = args[0].to_oobj(); - oobj arg1 = args[1].is_none_or_undefined()? py::None() : args[1].to_oobj(); - oobj arg2 = args[2].is_none_or_undefined()? py::True() : args[2].to_oobj(); - - if (arg0.is_frame()) { - return cut_frame(arg0, arg1, arg2); - } - if (arg0.is_dtexpr() || arg0.is_fexpr()) { - return make_pyexpr( - dt::expr::Op::CUT, - otuple{ arg0 }, - otuple{ arg1, arg2 } - ); - } - throw TypeError() << "The first argument to `cut()` must be a column " - "expression or a Frame, instead got " << arg0.typeobj(); -} - - - -void DatatableModule::init_methods_cut() { - ADD_FN(&py::pyfn_cut, py::args_cut); -} - -} // namespace py - - diff --git a/src/core/expr/op.h b/src/core/expr/op.h index a2039b83f3..2db729f9e9 100644 --- a/src/core/expr/op.h +++ b/src/core/expr/op.h @@ -52,7 +52,6 @@ enum class Op : size_t { SETPLUS = 3, SETMINUS = 4, SHIFTFN = 5, // head_func_shift.cc - CUT = 7, // Unary UPLUS = UNOP_FIRST, // funary/basic.cc diff --git a/src/datatable/expr/expr.py b/src/datatable/expr/expr.py index 7937f0a75d..512deaedd1 100644 --- a/src/datatable/expr/expr.py +++ b/src/datatable/expr/expr.py @@ -38,7 +38,6 @@ class OpCodes(enum.Enum): SETPLUS = 3 SETMINUS = 4 SHIFTFN = 5 - CUT = 7 # Unary UPLUS = 101 diff --git a/tests/expr/test-cut.py b/tests/expr/test-cut.py index b0f2d257e0..ab272f9336 100644 --- a/tests/expr/test-cut.py +++ b/tests/expr/test-cut.py @@ -21,15 +21,15 @@ # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS # IN THE SOFTWARE. #------------------------------------------------------------------------------- -from datatable import dt, stype, f, cut -from tests import assert_equals +import math import pytest import random -import math +from datatable import dt, stype, f, cut, FExpr +from tests import assert_equals #------------------------------------------------------------------------------- -# cut() +# Errors #------------------------------------------------------------------------------- def test_cut_error_noargs(): @@ -43,7 +43,7 @@ def test_cut_error_wrong_column_type(): msg = r"cut\(\) can only be applied to numeric columns, instead column 1 " \ "has an stype: str32" with pytest.raises(TypeError, match=msg): - cut(DT) + DT[:, cut(DT)] def test_cut_error_wrong_column_type_zero_rows(): @@ -51,35 +51,35 @@ def test_cut_error_wrong_column_type_zero_rows(): msg = r"cut\(\) can only be applied to numeric columns, instead column 0 " \ "has an stype: str32" with pytest.raises(TypeError, match=msg): - cut(DT) + DT[:, cut(DT)] def test_cut_error_float_nbins(): msg = "Expected an integer, instead got " DT = dt.Frame(range(10)) with pytest.raises(TypeError, match=msg): - cut(DT, nbins = 1.5) + DT[:, cut(DT, nbins = 1.5)] def test_cut_error_zero_nbins(): msg = "Number of bins must be positive, instead got: 0" DT = dt.Frame(range(10)) with pytest.raises(ValueError, match=msg): - cut(DT, nbins = 0) + DT[:, cut(DT, nbins = 0)] def test_cut_error_negative_nbins(): msg = "Number of bins must be positive, instead got: -10" DT = dt.Frame(range(10)) with pytest.raises(ValueError, match=msg): - cut(DT, nbins = -10) + DT[:, cut(DT, nbins = -10)] def test_cut_error_negative_nbins_list(): msg = r"All elements in nbins must be positive, got nbins\[0\]: 0" DT = dt.Frame([[3, 1, 4], [1, 5, 9]]) with pytest.raises(ValueError, match=msg): - cut(DT, nbins = [0, -1]) + DT[:, cut(DT, nbins = [0, -1])] def test_cut_error_inconsistent_nbins(): @@ -87,14 +87,15 @@ def test_cut_error_inconsistent_nbins(): "the number of columns in the frame/expression, i.e. 2, instead got: 1") DT = dt.Frame([[3, 1, 4], [1, 5, 9]]) with pytest.raises(ValueError, match=msg): - cut(DT, nbins = [10]) + DT[:, cut(DT, nbins = [10])] def test_cut_error_wrong_right(): - msg = "Expected a boolean, instead got " + msg = r"Argument right_closed in cut\(\) should be a boolean, instead got " \ + "" DT = dt.Frame(range(10)) with pytest.raises(TypeError, match=msg): - cut(DT, right_closed = 1492) + DT[:, cut(DT, right_closed = 1492)] def test_cut_error_groupby(): @@ -104,14 +105,23 @@ def test_cut_error_groupby(): DT[:, cut(f[0]), f[0]] + +#------------------------------------------------------------------------------- +# Normal +#------------------------------------------------------------------------------- + def test_cut_empty_frame(): - DT_cut = cut(dt.Frame()) - assert_equals(DT_cut, dt.Frame()) + DT = dt.Frame() + expr_cut = cut(DT) + assert isinstance(expr_cut, FExpr) + assert_equals(DT[:, f[:]], DT) def test_cut_trivial(): DT = dt.Frame({"trivial": range(10)}) - DT_cut = cut(DT) + DT_cut = DT[:, cut(f[:])] + expr_cut = cut(DT) + assert isinstance(expr_cut, FExpr) assert_equals(DT, DT_cut) @@ -124,8 +134,8 @@ def test_cut_expr(): def test_cut_one_row(): nbins = [1, 2, 3, 4] DT = dt.Frame([[True], [404], [3.1415926], [None]]) - DT_cut_right = cut(DT, nbins = nbins) - DT_cut_left = cut(DT, nbins = nbins, right_closed = False) + DT_cut_right = DT[:, cut(DT, nbins = nbins)] + DT_cut_left = DT[:, cut(DT, nbins = nbins, right_closed = False)] assert DT_cut_right.to_list() == [[0], [0], [1], [None]] assert DT_cut_left.to_list() == [[0], [1], [1], [None]] @@ -172,9 +182,9 @@ def test_cut_small(): stypes = [stype.int32] * DT.ncols ) - DT_cut_list = cut(DT, nbins = nbins) - DT_cut_tuple = cut(DT, nbins = tuple(nbins)) - DT_cut_list_left = cut(DT, nbins = nbins, right_closed = False) + DT_cut_list = DT[:, cut(DT, nbins = nbins)] + DT_cut_tuple = DT[:, cut(DT, nbins = tuple(nbins))] + DT_cut_list_left = DT[:, cut(DT, nbins = nbins, right_closed = False)] assert_equals(DT_ref_right, DT_cut_list) assert_equals(DT_ref_right, DT_cut_tuple) assert_equals(DT_ref_left, DT_cut_list_left) @@ -192,7 +202,7 @@ def test_cut_vs_pandas_random(pandas, seed): n = random.randint(1, max_size) nbins = [random.randint(1, max_size) for _ in range(3)] - right_closed = random.randint(0, 1) + right_closed = bool(random.getrandbits(1)) data = [[] for _ in range(3)] for _ in range(n): @@ -201,7 +211,7 @@ def test_cut_vs_pandas_random(pandas, seed): data[2].append(random.random() * 2 * max_value - max_value) DT = dt.Frame(data, stypes = [stype.bool8, stype.int32, stype.float64]) - DT_cut = cut(DT, nbins = nbins, right_closed = right_closed) + DT_cut = DT[:, cut(DT, nbins = nbins, right_closed = right_closed)] PD_cut = [pandas.cut(data[i], nbins[i], labels=False, right=right_closed) for i in range(3)] @@ -222,8 +232,8 @@ def test_cut_pandas_issue_35126(pandas): nbins = 42 data = [-97, 0, 97] DT = dt.Frame(data) - DT_cut_right = cut(DT, nbins = nbins) - DT_cut_left = cut(DT, nbins = nbins, right_closed = False) + DT_cut_right = DT[:, cut(DT, nbins = nbins)] + DT_cut_left = DT[:, cut(DT, nbins = nbins, right_closed = False)] assert DT_cut_right.to_list() == [[0, 20, 41]] assert DT_cut_left.to_list() == [[0, 21, 41]] From 521193ba60d0ee9d5e342b06d513f5cfd2805e2d Mon Sep 17 00:00:00 2001 From: Oleksiy Kononenko Date: Tue, 18 Aug 2020 12:06:06 -0700 Subject: [PATCH 3/7] Add cut FExpr file --- src/core/expr/fexpr_cut.cc | 198 +++++++++++++++++++++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100644 src/core/expr/fexpr_cut.cc diff --git a/src/core/expr/fexpr_cut.cc b/src/core/expr/fexpr_cut.cc new file mode 100644 index 0000000000..5b30a87ad1 --- /dev/null +++ b/src/core/expr/fexpr_cut.cc @@ -0,0 +1,198 @@ +//------------------------------------------------------------------------------ +// Copyright 2020 H2O.ai +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. +//------------------------------------------------------------------------------ +#include "_dt.h" +#include "column/latent.h" +#include "column/cut.h" +#include "datatablemodule.h" +#include "expr/eval_context.h" +#include "expr/fexpr_func.h" +#include "frame/py_frame.h" +namespace dt { +namespace expr { + + + +//------------------------------------------------------------------------------ +// FExpr_Cut +//------------------------------------------------------------------------------ + +class FExpr_Cut : public FExpr_Func { + private: + ptrExpr arg_; + py::oobj py_nbins_; + bool right_closed_; + size_t: 56; + + public: + FExpr_Cut(py::oobj arg, py::robj py_nbins, bool right_closed) + : arg_(as_fexpr(arg)), + py_nbins_(py_nbins), + right_closed_(right_closed) + {} + + std::string repr() const override { + std::string out = "cut("; + out += arg_->repr(); + if (!py_nbins_.is_none()) { + out += ", nbins="; + out += py_nbins_.repr().to_string(); + out += ", right_closed="; + out += right_closed_? "true" : "false"; + } + out += ")"; + return out; + } + + + Workframe evaluate_n(EvalContext& ctx) const override { + if (ctx.has_groupby()) { + throw NotImplError() << "cut() cannot be used in a groupby context"; + } + + int32_t nbins_default = 10; + Workframe wf = arg_->evaluate_n(ctx); + const size_t ncols = wf.ncols(); + + int32vec nbins(ncols); + bool defined_nbins = !py_nbins_.is_none(); + bool nbins_list_or_tuple = py_nbins_.is_list_or_tuple(); + + if (nbins_list_or_tuple) { + py::oiter py_nbins = py_nbins_.to_oiter(); + if (py_nbins.size() != ncols) { + throw ValueError() << "When `nbins` is a list or a tuple, its length must be " + << "the same as the number of columns in the frame/expression, i.e. `" + << ncols << "`, instead got: `" << py_nbins.size() << "`"; + + } + + size_t i = 0; + for (auto py_nbin : py_nbins) { + int32_t nbin = py_nbin.to_int32_strict(); + if (nbin <= 0) { + throw ValueError() << "All elements in `nbins` must be positive, " + "got `nbins[" << i << "`]: `" << nbin << "`"; + } + + nbins[i++] = nbin; + } + xassert(i == ncols); + + } else { + if (defined_nbins) { + nbins_default = py_nbins_.to_int32_strict(); + if (nbins_default <= 0) { + throw ValueError() << "Number of bins must be positive, " + "instead got: `" << nbins_default << "`"; + } + } + + for (size_t i = 0; i < ncols; ++i) { + nbins[i] = nbins_default; + } + } + + // Cut workframe in-place + for (size_t i = 0; i < ncols; ++i) { + Column coli = wf.retrieve_column(i); + coli = Column(Cut_ColumnImpl::make( + std::move(coli), i, nbins[i], right_closed_ + )); + wf.replace_column(i, std::move(coli)); + } + + return wf; + } +}; + + + + +//------------------------------------------------------------------------------ +// Python-facing `cut()` function +//------------------------------------------------------------------------------ + +static const char* doc_cut = +R"(cut(cols, nbins=10, right_closed=True) +-- + +Cut all the columns in a Frame/f-expression by binning +their values into equal-width discrete intervals. + +Parameters +---------- +cols: FExpr + Frame or f-expression consisting of numeric columns. +nbins: int | List[int] + When a single number is specified, this number of bins + will be used to bin each column of `cols`. + When a list or a tuple is provided, each column will be binned + by using its own number of bins. In the latter case, + the list/tuple length must be equal to the number of columns + in `cols`. +right_closed: bool + Each binning interval is `half-open`_. This flag indicates which + side of the interval is closed. + +return: FExpr + The return type matches the type of the `cols` argument. + If the function is applied to a frame, then the result is a frame where + each column from the original frame has been cut into the specified bins. + If the `cols` argument is an f-expression, then the result is a new + f-expression that transforms every column into its cut version. + +See also +-------- +:func:`qcut()` -- function for quantile binning. + +.. _`half-open`: https://en.wikipedia.org/wiki/Interval_(mathematics)#Terminology + +)"; + +static py::PKArgs args_cut( + 1, 0, 2, false, false, + { + "cols", "nbins", "right_closed" + }, + "cut", doc_cut +); + +static py::oobj pyfn_cut(const py::PKArgs& args) { + if (args[0].is_none_or_undefined()) { + throw TypeError() << "Function `cut()` requires one positional argument, " + "but none were given"; + } + py::oobj arg0 = args[0].to_oobj(); + py::oobj arg1 = args[1].is_none_or_undefined()? py::None() : args[1].to_oobj(); + bool arg2 = args[2].is_none_or_undefined()? true : args[2].to_bool_strict(); + + return PyFExpr::make(new FExpr_Cut(arg0, arg1, arg2)); +} + + + +}} // dt::expr + + +void py::DatatableModule::init_methods_cut() { + ADD_FN(&dt::expr::pyfn_cut, dt::expr::args_cut); +} From b32ca0d4a20583dfac211e138e1be2bc49e70ab3 Mon Sep 17 00:00:00 2001 From: Oleksiy Kononenko Date: Tue, 18 Aug 2020 12:07:32 -0700 Subject: [PATCH 4/7] Remove obsolete include --- src/core/expr/fexpr_cut.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/core/expr/fexpr_cut.cc b/src/core/expr/fexpr_cut.cc index 5b30a87ad1..a206989fcf 100644 --- a/src/core/expr/fexpr_cut.cc +++ b/src/core/expr/fexpr_cut.cc @@ -20,9 +20,8 @@ // IN THE SOFTWARE. //------------------------------------------------------------------------------ #include "_dt.h" -#include "column/latent.h" -#include "column/cut.h" #include "datatablemodule.h" +#include "column/cut.h" #include "expr/eval_context.h" #include "expr/fexpr_func.h" #include "frame/py_frame.h" From bb40573a650460db74001d80f16e5f1e5c73ebcf Mon Sep 17 00:00:00 2001 From: Oleksiy Kononenko Date: Tue, 18 Aug 2020 12:12:06 -0700 Subject: [PATCH 5/7] true -> True, false -> False in repr --- src/core/expr/fexpr_cut.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/expr/fexpr_cut.cc b/src/core/expr/fexpr_cut.cc index a206989fcf..8198937810 100644 --- a/src/core/expr/fexpr_cut.cc +++ b/src/core/expr/fexpr_cut.cc @@ -55,7 +55,7 @@ class FExpr_Cut : public FExpr_Func { out += ", nbins="; out += py_nbins_.repr().to_string(); out += ", right_closed="; - out += right_closed_? "true" : "false"; + out += right_closed_? "True" : "False"; } out += ")"; return out; From 3bed523b8eaaf5c6831248bbd4324ae1e4ee41c3 Mon Sep 17 00:00:00 2001 From: Oleksiy Kononenko Date: Tue, 18 Aug 2020 12:15:39 -0700 Subject: [PATCH 6/7] Improve docs --- src/core/expr/fexpr_cut.cc | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/core/expr/fexpr_cut.cc b/src/core/expr/fexpr_cut.cc index 8198937810..36172be4cb 100644 --- a/src/core/expr/fexpr_cut.cc +++ b/src/core/expr/fexpr_cut.cc @@ -134,13 +134,13 @@ static const char* doc_cut = R"(cut(cols, nbins=10, right_closed=True) -- -Cut all the columns in a Frame/f-expression by binning -their values into equal-width discrete intervals. +Cut all the columns from `cols` by binning their values into +equal-width discrete intervals. Parameters ---------- cols: FExpr - Frame or f-expression consisting of numeric columns. + Input data for equal-width interval binning. nbins: int | List[int] When a single number is specified, this number of bins will be used to bin each column of `cols`. @@ -153,11 +153,8 @@ right_closed: bool side of the interval is closed. return: FExpr - The return type matches the type of the `cols` argument. - If the function is applied to a frame, then the result is a frame where - each column from the original frame has been cut into the specified bins. - If the `cols` argument is an f-expression, then the result is a new - f-expression that transforms every column into its cut version. + f-expression that converts input columns into the columns filled + with the respective bin ids. See also -------- From 23ce4be4d3fb92d027bc39068184520e310ce603 Mon Sep 17 00:00:00 2001 From: Oleksiy Kononenko Date: Tue, 18 Aug 2020 12:33:01 -0700 Subject: [PATCH 7/7] Fix docs --- docs/api/dt/cut.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/api/dt/cut.rst b/docs/api/dt/cut.rst index 5f7686e759..8c46425789 100644 --- a/docs/api/dt/cut.rst +++ b/docs/api/dt/cut.rst @@ -1,6 +1,6 @@ .. py:currentmodule:: datatable .. xfunction:: datatable.cut - :src: src/core/expr/head_func_cut.cc pyfn_cut - :doc: src/core/expr/head_func_cut.cc doc_cut + :src: src/core/expr/fexpr_cut.cc pyfn_cut + :doc: src/core/expr/fexpr_cut.cc doc_cut :tests: tests/expr/test-cut.py