From 2bd171fbfbbabe74b02c7ad705baa9386e935892 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Wed, 25 Dec 2019 12:23:18 +0800 Subject: [PATCH 01/11] TST: added test for groupby.quantile with multiple qs for int columns (GH30289) --- pandas/tests/groupby/test_function.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 4ca23c61ba920..a2423374a4e8f 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1398,6 +1398,22 @@ def test_quantile_array_multiple_levels(): tm.assert_frame_equal(result, expected) +def test_groupby_quantile_with_arraylike_q_and_int_columns(): + # GH30289 + df = pd.DataFrame(np.array([2 * [_ % 4] for _ in range(10)]), columns=[0, 1]) + + quantiles = [0.5, 0.6] + expected_index = pd.MultiIndex.from_product( + [[0, 1, 2, 3], [0.5, 0.6]], names=[0, None] + ) + + expected_values = [float(x) for x in [0, 0, 1, 1, 2, 2, 3, 3]] + expected = pd.DataFrame(expected_values, index=expected_index, columns=[1]) + result = df.groupby(0).quantile(quantiles) + + tm.assert_frame_equal(result, expected) + + def test_quantile_raises(): df = pd.DataFrame( [["foo", "a"], ["foo", "b"], ["foo", "c"]], columns=["key", "val"] From 9381150d8b8b89ed54f501702335a526aded6217 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Wed, 25 Dec 2019 12:27:16 +0800 Subject: [PATCH 02/11] CLN: refactored MultiIndex._get_level_number (GH30289) --- pandas/core/indexes/multi.py | 49 +++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 05a4da28eb0a1..fcdcbce1279e3 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1282,6 +1282,34 @@ def _constructor(self): def inferred_type(self) -> str: return "mixed" + def _get_level_number_by_label(self, label) -> int: + try: + level = self.names.index(label) + except ValueError: + raise KeyError(f"Level {label} not found") + return level + + def _get_level_number_by_position(self, pos) -> int: + """Returns level number at given position + The pos should be given in python list index style which may be negative. + Raises IndexError if pos is out of range + """ + if pos < 0: + pos += self.nlevels + if pos < 0: + orig_pos = pos - self.nlevels + raise IndexError( + f"Too many levels: Index has only {self.nlevels} levels," + f" {orig_pos} is not a valid level number" + ) + # Note: levels are zero-based + elif pos >= self.nlevels: + raise IndexError( + f"Too many levels: Index has only {self.nlevels} levels, " + f"not {pos + 1}" + ) + return pos + def _get_level_number(self, level) -> int: count = self.names.count(level) if (count > 1) and not is_integer(level): @@ -1289,24 +1317,11 @@ def _get_level_number(self, level) -> int: f"The name {level} occurs multiple times, use a level number" ) try: - level = self.names.index(level) - except ValueError: + level = self._get_level_number_by_label(level) + except KeyError: if not is_integer(level): - raise KeyError(f"Level {level} not found") - elif level < 0: - level += self.nlevels - if level < 0: - orig_level = level - self.nlevels - raise IndexError( - f"Too many levels: Index has only {self.nlevels} levels," - f" {orig_level} is not a valid level number" - ) - # Note: levels are zero-based - elif level >= self.nlevels: - raise IndexError( - f"Too many levels: Index has only {self.nlevels} levels, " - f"not {level + 1}" - ) + raise + level = self._get_level_number_by_position(level) return level _tuples = None From 6d1f801a8b9c86995f0befe506f0707bcef873f6 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Wed, 25 Dec 2019 12:30:45 +0800 Subject: [PATCH 03/11] ENH: added "positional" parameter in MultiIndex.reorder_levels (GH30289) --- pandas/core/indexes/multi.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index fcdcbce1279e3..4b7de6e0fa52c 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2186,18 +2186,34 @@ def swaplevel(self, i=-2, j=-1): levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False ) - def reorder_levels(self, order): + def reorder_levels(self, order, positional=None): """ Rearrange levels using input order. May not drop or duplicate levels. Parameters ---------- + order: list + the order of index levels after reorder, could be level labels or positions + positional : bool, optional + How to interpret integer values in `order`. + + * None (default): prefer treating the values as labels, + but fall back to positional if no label with that + value is value. + * True : only treat integer values as positions. + * False : only treat integer values as labels. Returns ------- MultiIndex """ - order = [self._get_level_number(i) for i in order] + if positional is None: + order = [self._get_level_number(i) for i in order] + elif positional: + order = [self._get_level_number_by_position(i) for i in order] + else: + order = [self._get_level_number_by_label(i) for i in order] + if len(order) != self.nlevels: raise AssertionError( f"Length of order must be same as number of levels ({self.nlevels})," From 3386f58ad140dac8edeb58f8c63c6973fb6abe46 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Wed, 25 Dec 2019 12:32:47 +0800 Subject: [PATCH 04/11] BUG: fixed groupby.quantile with multiple qs for int columns issue (GH30289) --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index dfda1470413b7..94433b1ef4e63 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5063,9 +5063,9 @@ def reorder_levels(self, order, axis=0): result = self.copy() if axis == 0: - result.index = result.index.reorder_levels(order) + result.index = result.index.reorder_levels(order, positional=True) else: - result.columns = result.columns.reorder_levels(order) + result.columns = result.columns.reorder_levels(order, positional=True) return result # ---------------------------------------------------------------------- From 42561919e3d05e6fd8b38c093cad56faa5f7e4ec Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Wed, 25 Dec 2019 13:32:29 +0800 Subject: [PATCH 05/11] DOC: added bug fix and enhancements in whatsnew doc(GH30289) --- doc/source/whatsnew/v1.0.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index faca744a8f92c..92b8dec2cdab2 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -207,6 +207,7 @@ Other enhancements - The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`) - :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue: `30270`) - DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`) +- Added `positional` optional parameter in :meth:`MultiIndex.reorder_levels` to specify the type of levels to use, labels or positions or both (:issue:`30289`) Build Changes @@ -845,6 +846,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupBy.agg` with timezone-aware datetime64 column incorrectly casting results to the original dtype (:issue:`29641`) - Bug in :meth:`DataFrame.groupby` when using axis=1 and having a single level columns index (:issue:`30208`) - Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`) +- Bug in :meth:`GroupBy.quantile` with multiple q values when columns are integers (:issue:`30289`) Reshaping ^^^^^^^^^ From 54991b2d380ae5a68858dc37b25071b41ed6a89b Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Wed, 25 Dec 2019 14:15:54 +0800 Subject: [PATCH 06/11] DOC: reformatted docstring (GH30289) --- pandas/core/indexes/multi.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 4b7de6e0fa52c..87f41afff15d8 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1290,10 +1290,17 @@ def _get_level_number_by_label(self, label) -> int: return level def _get_level_number_by_position(self, pos) -> int: - """Returns level number at given position - The pos should be given in python list index style which may be negative. - Raises IndexError if pos is out of range - """ + """ + Returns level number at given position + + Parameters + ---------- + pos : level position given in python list index style which may be negative + + Raises + ------ + IndexError if pos is out of range + """ if pos < 0: pos += self.nlevels if pos < 0: @@ -2192,7 +2199,7 @@ def reorder_levels(self, order, positional=None): Parameters ---------- - order: list + order : list the order of index levels after reorder, could be level labels or positions positional : bool, optional How to interpret integer values in `order`. @@ -2207,6 +2214,7 @@ def reorder_levels(self, order, positional=None): ------- MultiIndex """ + # GH30289 if positional is None: order = [self._get_level_number(i) for i in order] elif positional: From 665032bda712ac87279d4ae93f51bb94374aef09 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Wed, 25 Dec 2019 14:47:43 +0800 Subject: [PATCH 07/11] ENH: added parameter "positional" in DataFrame.reorder_levels (GH30289) --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/frame.py | 14 +++++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 92b8dec2cdab2..723482abb9c6b 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -207,6 +207,7 @@ Other enhancements - The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`) - :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue: `30270`) - DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`) +- Added `positional` optional parameter in :meth:`DataFrame.reorder_levels` to specify the type of levels to use, labels or positions or both (:issue:`30289`) - Added `positional` optional parameter in :meth:`MultiIndex.reorder_levels` to specify the type of levels to use, labels or positions or both (:issue:`30289`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 94433b1ef4e63..0d9ec5162ebee 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5040,7 +5040,7 @@ def swaplevel(self, i=-2, j=-1, axis=0): result.columns = result.columns.swaplevel(i, j) return result - def reorder_levels(self, order, axis=0): + def reorder_levels(self, order, axis=0, positional=None): """ Rearrange index levels using input order. May not drop or duplicate levels. @@ -5051,6 +5051,14 @@ def reorder_levels(self, order, axis=0): (position) or by key (label). axis : int Where to reorder levels. + positional : bool, optional + How to interpret integer values in `order`. + + * None (default): prefer treating the values as labels, + but fall back to positional if no label with that + value is value. + * True : only treat integer values as positions. + * False : only treat integer values as labels. Returns ------- @@ -5063,9 +5071,9 @@ def reorder_levels(self, order, axis=0): result = self.copy() if axis == 0: - result.index = result.index.reorder_levels(order, positional=True) + result.index = result.index.reorder_levels(order, positional=positional) else: - result.columns = result.columns.reorder_levels(order, positional=True) + result.columns = result.columns.reorder_levels(order, positional=positional) return result # ---------------------------------------------------------------------- From 64e9176850561ac98d4f04caf8b5e6d6f7964786 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Wed, 25 Dec 2019 15:00:37 +0800 Subject: [PATCH 08/11] BUG: GroupBy.quantile error with integer columns and arraylike q (GH30289) --- pandas/core/groupby/groupby.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index b5325d8305249..ff96b7555773d 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1938,7 +1938,9 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray: # but this hits https://github.com/pandas-dev/pandas/issues/10710 # which doesn't reorder the list-like `q` on the inner level. order = np.roll(list(range(result.index.nlevels)), -1) - result = result.reorder_levels(order) + result = result.reorder_levels( + order, positional=True + ) # GH30289: reorder based on position, not labels result = result.reindex(q, level=-1) # fix order. From d207a3e69558792adbc9aa03ec0f7b64b127ced4 Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Thu, 26 Dec 2019 19:58:58 +0800 Subject: [PATCH 09/11] reverted most of changes as they are not the right solution to the issue --- doc/source/whatsnew/v1.0.0.rst | 5 -- pandas/core/frame.py | 14 +---- pandas/core/groupby/groupby.py | 4 +- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/multi.py | 77 ++++++------------------- pandas/tests/indexes/multi/test_drop.py | 16 ----- 6 files changed, 24 insertions(+), 94 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index c8edf88a38462..f52ce897c2e12 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -207,8 +207,6 @@ Other enhancements - The ``partition_cols`` argument in :meth:`DataFrame.to_parquet` now accepts a string (:issue:`27117`) - :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue: `30270`) - DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`) -- Added `positional` optional parameter in :meth:`DataFrame.reorder_levels` to specify the type of levels to use, labels or positions or both (:issue:`30289`) -- Added `positional` optional parameter in :meth:`MultiIndex.reorder_levels` to specify the type of levels to use, labels or positions or both (:issue:`30289`) Build Changes @@ -717,10 +715,8 @@ Datetimelike - Bug in :func:`pandas.to_datetime` failing for `deques` when using ``cache=True`` (the default) (:issue:`29403`) - Bug in :meth:`Series.item` with ``datetime64`` or ``timedelta64`` dtype, :meth:`DatetimeIndex.item`, and :meth:`TimedeltaIndex.item` returning an integer instead of a :class:`Timestamp` or :class:`Timedelta` (:issue:`30175`) - Bug in :class:`DatetimeIndex` addition when adding a non-optimized :class:`DateOffset` incorrectly dropping timezone information (:issue:`30336`) -- Bug in :meth:`DataFrame.drop` where attempting to drop non-existent values from a DatetimeIndex would yield a confusing error message (:issue:`30399`) - Bug in :meth:`DataFrame.append` would remove the timezone-awareness of new data (:issue:`30238`) - Timedelta ^^^^^^^^^ - Bug in subtracting a :class:`TimedeltaIndex` or :class:`TimedeltaArray` from a ``np.datetime64`` object (:issue:`29558`) @@ -850,7 +846,6 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupBy.agg` with timezone-aware datetime64 column incorrectly casting results to the original dtype (:issue:`29641`) - Bug in :meth:`DataFrame.groupby` when using axis=1 and having a single level columns index (:issue:`30208`) - Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`) -- Bug in :meth:`GroupBy.quantile` with multiple q values when columns are integers (:issue:`30289`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0d9ec5162ebee..dfda1470413b7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5040,7 +5040,7 @@ def swaplevel(self, i=-2, j=-1, axis=0): result.columns = result.columns.swaplevel(i, j) return result - def reorder_levels(self, order, axis=0, positional=None): + def reorder_levels(self, order, axis=0): """ Rearrange index levels using input order. May not drop or duplicate levels. @@ -5051,14 +5051,6 @@ def reorder_levels(self, order, axis=0, positional=None): (position) or by key (label). axis : int Where to reorder levels. - positional : bool, optional - How to interpret integer values in `order`. - - * None (default): prefer treating the values as labels, - but fall back to positional if no label with that - value is value. - * True : only treat integer values as positions. - * False : only treat integer values as labels. Returns ------- @@ -5071,9 +5063,9 @@ def reorder_levels(self, order, axis=0, positional=None): result = self.copy() if axis == 0: - result.index = result.index.reorder_levels(order, positional=positional) + result.index = result.index.reorder_levels(order) else: - result.columns = result.columns.reorder_levels(order, positional=positional) + result.columns = result.columns.reorder_levels(order) return result # ---------------------------------------------------------------------- diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index a95a7bc397909..529d123d256e8 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1938,9 +1938,7 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray: # but this hits https://github.com/pandas-dev/pandas/issues/10710 # which doesn't reorder the list-like `q` on the inner level. order = np.roll(list(range(result.index.nlevels)), -1) - result = result.reorder_levels( - order, positional=True - ) # GH30289: reorder based on position, not labels + result = result.reorder_levels(order) result = result.reindex(q, level=-1) # fix order. diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 272e97481a723..ce7a238daeca9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4551,7 +4551,7 @@ def get_indexer_non_unique(self, target): if is_categorical(target): tgt_values = np.asarray(target) - elif self.is_all_dates and target.is_all_dates: # GH 30399 + elif self.is_all_dates: tgt_values = target.asi8 else: tgt_values = target._ndarray_values diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 87f41afff15d8..05a4da28eb0a1 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1282,41 +1282,6 @@ def _constructor(self): def inferred_type(self) -> str: return "mixed" - def _get_level_number_by_label(self, label) -> int: - try: - level = self.names.index(label) - except ValueError: - raise KeyError(f"Level {label} not found") - return level - - def _get_level_number_by_position(self, pos) -> int: - """ - Returns level number at given position - - Parameters - ---------- - pos : level position given in python list index style which may be negative - - Raises - ------ - IndexError if pos is out of range - """ - if pos < 0: - pos += self.nlevels - if pos < 0: - orig_pos = pos - self.nlevels - raise IndexError( - f"Too many levels: Index has only {self.nlevels} levels," - f" {orig_pos} is not a valid level number" - ) - # Note: levels are zero-based - elif pos >= self.nlevels: - raise IndexError( - f"Too many levels: Index has only {self.nlevels} levels, " - f"not {pos + 1}" - ) - return pos - def _get_level_number(self, level) -> int: count = self.names.count(level) if (count > 1) and not is_integer(level): @@ -1324,11 +1289,24 @@ def _get_level_number(self, level) -> int: f"The name {level} occurs multiple times, use a level number" ) try: - level = self._get_level_number_by_label(level) - except KeyError: + level = self.names.index(level) + except ValueError: if not is_integer(level): - raise - level = self._get_level_number_by_position(level) + raise KeyError(f"Level {level} not found") + elif level < 0: + level += self.nlevels + if level < 0: + orig_level = level - self.nlevels + raise IndexError( + f"Too many levels: Index has only {self.nlevels} levels," + f" {orig_level} is not a valid level number" + ) + # Note: levels are zero-based + elif level >= self.nlevels: + raise IndexError( + f"Too many levels: Index has only {self.nlevels} levels, " + f"not {level + 1}" + ) return level _tuples = None @@ -2193,35 +2171,18 @@ def swaplevel(self, i=-2, j=-1): levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False ) - def reorder_levels(self, order, positional=None): + def reorder_levels(self, order): """ Rearrange levels using input order. May not drop or duplicate levels. Parameters ---------- - order : list - the order of index levels after reorder, could be level labels or positions - positional : bool, optional - How to interpret integer values in `order`. - - * None (default): prefer treating the values as labels, - but fall back to positional if no label with that - value is value. - * True : only treat integer values as positions. - * False : only treat integer values as labels. Returns ------- MultiIndex """ - # GH30289 - if positional is None: - order = [self._get_level_number(i) for i in order] - elif positional: - order = [self._get_level_number_by_position(i) for i in order] - else: - order = [self._get_level_number_by_label(i) for i in order] - + order = [self._get_level_number(i) for i in order] if len(order) != self.nlevels: raise AssertionError( f"Length of order must be same as number of levels ({self.nlevels})," diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py index ee60f4537ade3..2c24c5bd57085 100644 --- a/pandas/tests/indexes/multi/test_drop.py +++ b/pandas/tests/indexes/multi/test_drop.py @@ -139,19 +139,3 @@ def test_drop_not_lexsorted(): tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi) with tm.assert_produces_warning(PerformanceWarning): tm.assert_index_equal(lexsorted_mi.drop("a"), not_lexsorted_mi.drop("a")) - - -def test_drop_with_non_unique_datetime_index_and_invalid_keys(): - # GH 30399 - - # define dataframe with unique datetime index - df = pd.DataFrame( - np.random.randn(5, 3), - columns=["a", "b", "c"], - index=pd.date_range("2012", freq="H", periods=5), - ) - # create dataframe with non-unique datetime index - df = df.iloc[[0, 2, 2, 3]].copy() - - with pytest.raises(KeyError, match="not found in axis"): - df.drop(["a", "b"]) # Dropping with labels not exist in the index From 5fea18d80a0ff6f8f6e1b73a11bcc6ba8ab6c1bb Mon Sep 17 00:00:00 2001 From: fujiaxiang Date: Thu, 26 Dec 2019 20:25:59 +0800 Subject: [PATCH 10/11] BUG: GroupBy.quantile error with integer columns and arraylike q (GH30289) --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/groupby/groupby.py | 23 ++++++++++------------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index f52ce897c2e12..bfa03d33d4e7e 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -846,6 +846,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupBy.agg` with timezone-aware datetime64 column incorrectly casting results to the original dtype (:issue:`29641`) - Bug in :meth:`DataFrame.groupby` when using axis=1 and having a single level columns index (:issue:`30208`) - Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`) +- Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 529d123d256e8..d8b5a9ed67959 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1937,21 +1937,18 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray: # >>> result.stack(0).loc[pd.IndexSlice[:, ..., q], :] # but this hits https://github.com/pandas-dev/pandas/issues/10710 # which doesn't reorder the list-like `q` on the inner level. - order = np.roll(list(range(result.index.nlevels)), -1) + order = np.append(np.arange(1, result.index.nlevels), 0) + # temporarily saves the index names + index_names = np.array(result.index.names) + # set index names to positions to avoid confusion + result.index.names = np.arange(len(index_names)) + # place quantiles on the inside result = result.reorder_levels(order) - result = result.reindex(q, level=-1) + # restore the index names in order + result.index.names = index_names[order] - # fix order. - hi = len(q) * self.ngroups - arr = np.arange(0, hi, self.ngroups) - arrays = [] - - for i in range(self.ngroups): - arr2 = arr + i - arrays.append(arr2) - - indices = np.concatenate(arrays) - assert len(indices) == len(result) + # reorder rows to keep things sorted + indices = np.arange(len(result)).reshape([len(q), self.ngroups]).T.flatten() return result.take(indices) @Substitution(name="groupby") From 3a5d5e1ba20be1443a20ec33069f76d168086b0a Mon Sep 17 00:00:00 2001 From: Jiaxiang Date: Thu, 26 Dec 2019 08:32:29 +0800 Subject: [PATCH 11/11] BUG: Fix wrong error in df drop with non unique datetime index and invalid keys (#30446) --- doc/source/whatsnew/v1.0.0.rst | 2 ++ pandas/core/indexes/base.py | 2 +- pandas/tests/indexes/multi/test_drop.py | 16 ++++++++++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index bfa03d33d4e7e..6ef43142fa386 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -715,8 +715,10 @@ Datetimelike - Bug in :func:`pandas.to_datetime` failing for `deques` when using ``cache=True`` (the default) (:issue:`29403`) - Bug in :meth:`Series.item` with ``datetime64`` or ``timedelta64`` dtype, :meth:`DatetimeIndex.item`, and :meth:`TimedeltaIndex.item` returning an integer instead of a :class:`Timestamp` or :class:`Timedelta` (:issue:`30175`) - Bug in :class:`DatetimeIndex` addition when adding a non-optimized :class:`DateOffset` incorrectly dropping timezone information (:issue:`30336`) +- Bug in :meth:`DataFrame.drop` where attempting to drop non-existent values from a DatetimeIndex would yield a confusing error message (:issue:`30399`) - Bug in :meth:`DataFrame.append` would remove the timezone-awareness of new data (:issue:`30238`) + Timedelta ^^^^^^^^^ - Bug in subtracting a :class:`TimedeltaIndex` or :class:`TimedeltaArray` from a ``np.datetime64`` object (:issue:`29558`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ce7a238daeca9..272e97481a723 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4551,7 +4551,7 @@ def get_indexer_non_unique(self, target): if is_categorical(target): tgt_values = np.asarray(target) - elif self.is_all_dates: + elif self.is_all_dates and target.is_all_dates: # GH 30399 tgt_values = target.asi8 else: tgt_values = target._ndarray_values diff --git a/pandas/tests/indexes/multi/test_drop.py b/pandas/tests/indexes/multi/test_drop.py index 2c24c5bd57085..ee60f4537ade3 100644 --- a/pandas/tests/indexes/multi/test_drop.py +++ b/pandas/tests/indexes/multi/test_drop.py @@ -139,3 +139,19 @@ def test_drop_not_lexsorted(): tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi) with tm.assert_produces_warning(PerformanceWarning): tm.assert_index_equal(lexsorted_mi.drop("a"), not_lexsorted_mi.drop("a")) + + +def test_drop_with_non_unique_datetime_index_and_invalid_keys(): + # GH 30399 + + # define dataframe with unique datetime index + df = pd.DataFrame( + np.random.randn(5, 3), + columns=["a", "b", "c"], + index=pd.date_range("2012", freq="H", periods=5), + ) + # create dataframe with non-unique datetime index + df = df.iloc[[0, 2, 2, 3]].copy() + + with pytest.raises(KeyError, match="not found in axis"): + df.drop(["a", "b"]) # Dropping with labels not exist in the index