diff --git a/doc/source/getting_started/comparison/comparison_with_sas.rst b/doc/source/getting_started/comparison/comparison_with_sas.rst
index 69bb700c97b15..4e284fe7b5968 100644
--- a/doc/source/getting_started/comparison/comparison_with_sas.rst
+++ b/doc/source/getting_started/comparison/comparison_with_sas.rst
@@ -629,7 +629,7 @@ for more details and examples.
 
 .. ipython:: python
 
-   tips_summed = tips.groupby(['sex', 'smoker'])['total_bill', 'tip'].sum()
+   tips_summed = tips.groupby(['sex', 'smoker'])[['total_bill', 'tip']].sum()
    tips_summed.head()
 
 
diff --git a/doc/source/getting_started/comparison/comparison_with_stata.rst b/doc/source/getting_started/comparison/comparison_with_stata.rst
index db687386329bb..fec6bae1e0330 100644
--- a/doc/source/getting_started/comparison/comparison_with_stata.rst
+++ b/doc/source/getting_started/comparison/comparison_with_stata.rst
@@ -617,7 +617,7 @@ for more details and examples.
 
 .. ipython:: python
 
-   tips_summed = tips.groupby(['sex', 'smoker'])['total_bill', 'tip'].sum()
+   tips_summed = tips.groupby(['sex', 'smoker'])[['total_bill', 'tip']].sum()
    tips_summed.head()
 
 
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index e524b8d2fbf8c..2404e60323294 100755
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -584,6 +584,37 @@ Deprecations
 - :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`)
 - The deprecated internal attributes ``_start``, ``_stop`` and ``_step`` of :class:`RangeIndex` now raise a ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`26581`)
 
+**Selecting Columns from a Grouped DataFrame**
+
+When selecting columns from a :class:`DataFrameGroupBy` object, passing individual keys (or a tuple of keys) inside single brackets is deprecated,
+a list of items should be used instead. (:issue:`23566`) For example:
+
+.. code-block:: ipython
+
+    df = pd.DataFrame({
+        "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
+        "B": np.random.randn(8),
+        "C": np.random.randn(8),
+    })
+    g = df.groupby('A')
+
+    # single key, returns SeriesGroupBy
+    g['B']
+
+    # tuple of single key, returns SeriesGroupBy
+    g[('B',)]
+
+    # tuple of multiple keys, returns DataFrameGroupBy, raises FutureWarning
+    g[('B', 'C')]
+
+    # multiple keys passed directly, returns DataFrameGroupBy, raises FutureWarning
+    # (implicitly converts the passed strings into a single tuple)
+    g['B', 'C']
+
+    # proper way, returns DataFrameGroupBy
+    g[['B', 'C']]
+
+
 .. _whatsnew_1000.prior_deprecations:
 
 
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 27afd8ca018ac..c49677fa27a31 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -25,6 +25,7 @@
     Union,
     cast,
 )
+import warnings
 
 import numpy as np
 
@@ -326,7 +327,7 @@ def _aggregate_multiple_funcs(self, arg):
         return DataFrame(results, columns=columns)
 
     def _wrap_series_output(
-        self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]], index: Index,
+        self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]], index: Index
     ) -> Union[Series, DataFrame]:
         """
         Wraps the output of a SeriesGroupBy operation into the expected result.
@@ -1578,6 +1579,19 @@ def filter(self, func, dropna=True, *args, **kwargs):
 
         return self._apply_filter(indices, dropna)
 
+    def __getitem__(self, key):
+        # per GH 23566
+        if isinstance(key, tuple) and len(key) > 1:
+            # if len == 1, then it becomes a SeriesGroupBy and this is actually
+            # valid syntax, so don't raise warning
+            warnings.warn(
+                "Indexing with multiple keys (implicitly converted to a tuple "
+                "of keys) will be deprecated, use a list instead.",
+                FutureWarning,
+                stacklevel=2,
+            )
+        return super().__getitem__(key)
+
     def _gotitem(self, key, ndim: int, subset=None):
         """
         sub-classes to define
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index f2af397357e4f..04c707acafab2 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -71,14 +71,12 @@ def test_getitem_list_of_columns(self):
         )
 
         result = df.groupby("A")[["C", "D"]].mean()
-        result2 = df.groupby("A")["C", "D"].mean()
-        result3 = df.groupby("A")[df.columns[2:4]].mean()
+        result2 = df.groupby("A")[df.columns[2:4]].mean()
 
         expected = df.loc[:, ["A", "C", "D"]].groupby("A").mean()
 
         tm.assert_frame_equal(result, expected)
         tm.assert_frame_equal(result2, expected)
-        tm.assert_frame_equal(result3, expected)
 
     def test_getitem_numeric_column_names(self):
         # GH #13731
@@ -91,14 +89,40 @@ def test_getitem_numeric_column_names(self):
             }
         )
         result = df.groupby(0)[df.columns[1:3]].mean()
-        result2 = df.groupby(0)[2, 4].mean()
-        result3 = df.groupby(0)[[2, 4]].mean()
+        result2 = df.groupby(0)[[2, 4]].mean()
 
         expected = df.loc[:, [0, 2, 4]].groupby(0).mean()
 
         tm.assert_frame_equal(result, expected)
         tm.assert_frame_equal(result2, expected)
-        tm.assert_frame_equal(result3, expected)
+
+        # per GH 23566 this should raise a FutureWarning
+        with tm.assert_produces_warning(FutureWarning):
+            df.groupby(0)[2, 4].mean()
+
+    def test_getitem_single_list_of_columns(self, df):
+        # per GH 23566 this should raise a FutureWarning
+        with tm.assert_produces_warning(FutureWarning):
+            df.groupby("A")["C", "D"].mean()
+
+    def test_getitem_single_column(self):
+        df = DataFrame(
+            {
+                "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
+                "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
+                "C": np.random.randn(8),
+                "D": np.random.randn(8),
+                "E": np.random.randn(8),
+            }
+        )
+
+        result = df.groupby("A")["C"].mean()
+
+        as_frame = df.loc[:, ["A", "C"]].groupby("A").mean()
+        as_series = as_frame.iloc[:, 0]
+        expected = as_series
+
+        tm.assert_series_equal(result, expected)
 
 
 # grouping
diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py
index 2a82b39b646c0..27dd314f0df8e 100644
--- a/pandas/tests/groupby/test_transform.py
+++ b/pandas/tests/groupby/test_transform.py
@@ -319,7 +319,7 @@ def test_dispatch_transform(tsframe):
 
 def test_transform_select_columns(df):
     f = lambda x: x.mean()
-    result = df.groupby("A")["C", "D"].transform(f)
+    result = df.groupby("A")[["C", "D"]].transform(f)
 
     selection = df[["C", "D"]]
     expected = selection.groupby(df["A"]).transform(f)