From 65d05f750edbafbaec5273a7128d4e4bb6aa3d6e Mon Sep 17 00:00:00 2001
From: cgangwar11 <chandan.gangwar0411@gmail.com>
Date: Mon, 21 Jan 2019 02:24:31 +0530
Subject: [PATCH 1/7] BUG Fixes GH24740

---
 doc/source/whatsnew/v0.24.0.rst          |  1 +
 pandas/core/groupby/grouper.py           |  1 +
 pandas/tests/groupby/test_categorical.py | 12 ++++++++++++
 3 files changed, 14 insertions(+)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 3268575c7064d..52c4a3e1b229e 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1782,6 +1782,7 @@ Groupby/Resample/Rolling
 - Bug in :meth:`DataFrame.groupby` did not respect the ``observed`` argument when selecting a column and instead always used ``observed=False`` (:issue:`23970`)
 - Bug in :func:`pandas.core.groupby.SeriesGroupBy.pct_change` or :func:`pandas.core.groupby.DataFrameGroupBy.pct_change` would previously work across groups when calculating the percent change, where it now correctly works per group (:issue:`21200`, :issue:`21235`).
 - Bug preventing hash table creation with very large number (2^32) of rows (:issue:`22805`)
+- Bug in :meth: `pandas.core.groupby.groups` which casues ``ValueError`` if ``observed=True`` and ``nan`` is present in group column (:issue:`22805`) 
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index d8df227d4911a..16c7ea687237c 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -299,6 +299,7 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
                 self._labels = self.grouper.codes
                 if observed:
                     codes = algorithms.unique1d(self.grouper.codes)
+                    codes = codes[codes != -1]
                 else:
                     codes = np.arange(len(categories))
 
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 144b64025e1c0..199002cfc294c 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -420,6 +420,18 @@ def test_observed_groups(observed):
     tm.assert_dict_equal(result, expected)
 
 
+def test_observed_groups_with_nan(observed=True):
+    # GH 24740
+    df = pd.DataFrame({'cat': pd.Categorical(['a', 'c', 'a'],
+                       categories=['a', 'b', 'd', 'e', 'f']),
+                       'vals': [1, 2, 3]})
+
+    g = df.groupby('cat', observed=observed)
+    result = g.groups
+    expected = {'a': Index([0, 2], dtype='int64')}
+    tm.assert_dict_equal(result, expected)
+
+
 def test_datetime():
     # GH9049: ensure backward compatibility
     levels = pd.date_range('2014-01-01', periods=4)

From ee1c9af72d2490c5ee342fd96a9b0937b015f1a0 Mon Sep 17 00:00:00 2001
From: cgangwar11 <chandan.gangwar0411@gmail.com>
Date: Mon, 21 Jan 2019 08:05:09 +0530
Subject: [PATCH 2/7] minor_documentation_fix

---
 doc/source/whatsnew/v0.24.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 52c4a3e1b229e..785e7a4a0053f 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1782,7 +1782,7 @@ Groupby/Resample/Rolling
 - Bug in :meth:`DataFrame.groupby` did not respect the ``observed`` argument when selecting a column and instead always used ``observed=False`` (:issue:`23970`)
 - Bug in :func:`pandas.core.groupby.SeriesGroupBy.pct_change` or :func:`pandas.core.groupby.DataFrameGroupBy.pct_change` would previously work across groups when calculating the percent change, where it now correctly works per group (:issue:`21200`, :issue:`21235`).
 - Bug preventing hash table creation with very large number (2^32) of rows (:issue:`22805`)
-- Bug in :meth: `pandas.core.groupby.groups` which casues ``ValueError`` if ``observed=True`` and ``nan`` is present in group column (:issue:`22805`) 
+- Bug in :meth: `pandas.core.groupby.groups` which casues ``ValueError`` if ``observed=True`` and ``nan`` is present in group column (:issue:`22805`).
 
 Reshaping
 ^^^^^^^^^

From 16531ec0d5ce28101223ba2113987e7e4cd6d780 Mon Sep 17 00:00:00 2001
From: cgangwar11 <chandan.gangwar0411@gmail.com>
Date: Mon, 21 Jan 2019 22:36:45 +0530
Subject: [PATCH 3/7] Using literal observed value

---
 pandas/tests/groupby/test_categorical.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 199002cfc294c..0283e077d231a 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -420,13 +420,13 @@ def test_observed_groups(observed):
     tm.assert_dict_equal(result, expected)
 
 
-def test_observed_groups_with_nan(observed=True):
+def test_observed_groups_with_nan():
     # GH 24740
     df = pd.DataFrame({'cat': pd.Categorical(['a', 'c', 'a'],
                        categories=['a', 'b', 'd', 'e', 'f']),
                        'vals': [1, 2, 3]})
 
-    g = df.groupby('cat', observed=observed)
+    g = df.groupby('cat', observed=True)
     result = g.groups
     expected = {'a': Index([0, 2], dtype='int64')}
     tm.assert_dict_equal(result, expected)

From 0fe8678cd6bee28f06285e0ccc3f1244fb052b23 Mon Sep 17 00:00:00 2001
From: cgangwar11 <chandan.gangwar0411@gmail.com>
Date: Tue, 22 Jan 2019 20:08:42 +0530
Subject: [PATCH 4/7] Modified previous test case and added new one for 21151

---
 pandas/tests/groupby/test_categorical.py | 33 +++++++++++++++++++-----
 1 file changed, 27 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 0283e077d231a..e118135ccc75d 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -420,18 +420,39 @@ def test_observed_groups(observed):
     tm.assert_dict_equal(result, expected)
 
 
-def test_observed_groups_with_nan():
+def test_observed_groups_with_nan(observed):
     # GH 24740
-    df = pd.DataFrame({'cat': pd.Categorical(['a', 'c', 'a'],
-                       categories=['a', 'b', 'd', 'e', 'f']),
+    df = pd.DataFrame({'cat': pd.Categorical(['a', np.nan, 'a'],
+                       categories=['a', 'b', 'd']),
                        'vals': [1, 2, 3]})
-
-    g = df.groupby('cat', observed=True)
+    g = df.groupby('cat', observed=observed)
     result = g.groups
-    expected = {'a': Index([0, 2], dtype='int64')}
+    if observed:
+        expected = {'a': Index([0, 2], dtype='int64')}
+    else:
+        expected = {'a': Index([0, 2], dtype='int64'),
+                    'b': Index([], dtype='int64'),
+                    'd': Index([], dtype='int64')}
     tm.assert_dict_equal(result, expected)
 
 
+def test_dataframe_categorical_with_nan(observed):
+    # GH 21151
+    s1 = pd.Categorical([np.nan, 'a', np.nan, 'a'],
+                        categories=['a', 'b', 'c'])
+    s2 = pd.Series([1, 2, 3, 4])
+    df = pd.DataFrame({'s1': s1, 's2': s2})
+    result = df.groupby('s1', observed=observed).first().reset_index()
+    if observed:
+        expected = DataFrame({'s1': pd.Categorical(['a'],
+                              categories=['a', 'b', 'c']), 's2': [2]})
+    else:
+        expected = DataFrame({'s1': pd.Categorical(['a', 'b', 'c'],
+                              categories=['a', 'b', 'c']),
+                              's2': [2, np.nan, np.nan]})
+    tm.assert_frame_equal(result, expected)
+
+
 def test_datetime():
     # GH9049: ensure backward compatibility
     levels = pd.date_range('2014-01-01', periods=4)

From e397a5ab6abe8e10f8149d57e7862587a63578b1 Mon Sep 17 00:00:00 2001
From: cgangwar11 <chandan.gangwar0411@gmail.com>
Date: Tue, 22 Jan 2019 20:13:20 +0530
Subject: [PATCH 5/7] whatsnew changes

---
 doc/source/whatsnew/v0.24.0.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 785e7a4a0053f..f890046ef4fc8 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1782,7 +1782,8 @@ Groupby/Resample/Rolling
 - Bug in :meth:`DataFrame.groupby` did not respect the ``observed`` argument when selecting a column and instead always used ``observed=False`` (:issue:`23970`)
 - Bug in :func:`pandas.core.groupby.SeriesGroupBy.pct_change` or :func:`pandas.core.groupby.DataFrameGroupBy.pct_change` would previously work across groups when calculating the percent change, where it now correctly works per group (:issue:`21200`, :issue:`21235`).
 - Bug preventing hash table creation with very large number (2^32) of rows (:issue:`22805`)
-- Bug in :meth: `pandas.core.groupby.groups` which casues ``ValueError`` if ``observed=True`` and ``nan`` is present in group column (:issue:`22805`).
+- Bug in groupby which casues ``ValueError`` if ``observed=True`` and ``nan`` is present in group column (:issue:`22805`).
+- Bug in groupby which casues wrong grouping of categorical if ``observed=True`` and ``nan`` is present in group column (:issue:`21151`).
 
 Reshaping
 ^^^^^^^^^

From e2baddaf4b4f6c9019a8f284126875a5067a7049 Mon Sep 17 00:00:00 2001
From: cgangwar11 <chandan.gangwar0411@gmail.com>
Date: Tue, 22 Jan 2019 20:26:53 +0530
Subject: [PATCH 6/7] minor changes

---
 doc/source/whatsnew/v0.24.0.rst | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index f890046ef4fc8..a1d8593a19d0f 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1782,8 +1782,7 @@ Groupby/Resample/Rolling
 - Bug in :meth:`DataFrame.groupby` did not respect the ``observed`` argument when selecting a column and instead always used ``observed=False`` (:issue:`23970`)
 - Bug in :func:`pandas.core.groupby.SeriesGroupBy.pct_change` or :func:`pandas.core.groupby.DataFrameGroupBy.pct_change` would previously work across groups when calculating the percent change, where it now correctly works per group (:issue:`21200`, :issue:`21235`).
 - Bug preventing hash table creation with very large number (2^32) of rows (:issue:`22805`)
-- Bug in groupby which casues ``ValueError`` if ``observed=True`` and ``nan`` is present in group column (:issue:`22805`).
-- Bug in groupby which casues wrong grouping of categorical if ``observed=True`` and ``nan`` is present in group column (:issue:`21151`).
+- Bug in groupby when grouping on categorical causes ``ValueError`` and incorrect grouping if ``observed=True`` and ``nan`` is present in categorical column (:issue:`24850`, :issue:`21151`).
 
 Reshaping
 ^^^^^^^^^

From 8d5b8968acaac684f61e3acbf4df47f2f905696c Mon Sep 17 00:00:00 2001
From: cgangwar11 <chandan.gangwar0411@gmail.com>
Date: Tue, 22 Jan 2019 20:27:49 +0530
Subject: [PATCH 7/7] minor changes

---
 doc/source/whatsnew/v0.24.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index a1d8593a19d0f..c2e5f3b3c9aa2 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1782,7 +1782,7 @@ Groupby/Resample/Rolling
 - Bug in :meth:`DataFrame.groupby` did not respect the ``observed`` argument when selecting a column and instead always used ``observed=False`` (:issue:`23970`)
 - Bug in :func:`pandas.core.groupby.SeriesGroupBy.pct_change` or :func:`pandas.core.groupby.DataFrameGroupBy.pct_change` would previously work across groups when calculating the percent change, where it now correctly works per group (:issue:`21200`, :issue:`21235`).
 - Bug preventing hash table creation with very large number (2^32) of rows (:issue:`22805`)
-- Bug in groupby when grouping on categorical causes ``ValueError`` and incorrect grouping if ``observed=True`` and ``nan`` is present in categorical column (:issue:`24850`, :issue:`21151`).
+- Bug in groupby when grouping on categorical causes ``ValueError`` and incorrect grouping if ``observed=True`` and ``nan`` is present in categorical column (:issue:`24740`, :issue:`21151`).
 
 Reshaping
 ^^^^^^^^^