pandas-dev · TomAugspurger · Feb 3, 2019 · Jan 26, 2019 · Jan 26, 2019 · Jan 28, 2019
diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template
@@ -39,7 +39,7 @@ See the :ref:`overview` for more detail about what's in the library.
 {% endif %}
 
     {% if not single_doc -%}
-    What's New in 0.24.0 <whatsnew/v0.24.0>
+    What's New in 0.24.1 <whatsnew/v0.24.1>
     install
     getting_started/index
     user_guide/index

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -989,6 +989,36 @@ a single date rather than the entire array.
 
    os.remove('tmp.csv')
 
+
+.. _io.csv.mixed_timezones:
+
+Parsing a CSV with mixed Timezones
+++++++++++++++++++++++++++++++++++
+
+Pandas cannot natively represent a column or index with mixed timezones. If your CSV
+file contains columns with a mixture of timezones, the default result will be
+an object-dtype column with strings, even with ``parse_dates``.
+
+
+.. ipython:: python
+
+   content = """\
+   a
+   2000-01-01T00:00:00+05:00
+   2000-01-01T00:00:00+06:00"""
+   df = pd.read_csv(StringIO(content), parse_dates=['a'])
+   df['a']
+
+To parse the mixed-timezone values as a datetime column, pass a partially-applied
+:func:`to_datetime` with ``utc=True`` as the ``date_parser``.
+
+.. ipython:: python
+
+   df = pd.read_csv(StringIO(content), parse_dates=['a'],
+                    date_parser=lambda col: pd.to_datetime(col, utc=True))
+   df['a']
+
+
 .. _io.dayfirst:
 
 

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -6,7 +6,8 @@ What's New in 0.24.0 (January 25, 2019)
 .. warning::
 
    The 0.24.x series of releases will be the last to support Python 2. Future feature
-   releases will support Python 3 only. See :ref:`install.dropping-27` for more.
+   releases will support Python 3 only. See :ref:`install.dropping-27` for more
+   details.
 
 {{ header }}
 
@@ -244,7 +245,7 @@ the new extension arrays that back interval and period data.
 Joining with two multi-indexes
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-:func:`DataFrame.merge` and :func:`DataFrame.join` can now be used to join multi-indexed ``Dataframe`` instances on the overlaping index levels (:issue:`6360`)
+:func:`DataFrame.merge` and :func:`DataFrame.join` can now be used to join multi-indexed ``Dataframe`` instances on the overlapping index levels (:issue:`6360`)
 
 See the :ref:`Merge, join, and concatenate
 <merging.Join_with_two_multi_indexes>` documentation section.
@@ -647,6 +648,52 @@ that the dates have been converted to UTC
     pd.to_datetime(["2015-11-18 15:30:00+05:30",
                     "2015-11-18 16:30:00+06:30"], utc=True)
 
+
+.. _whatsnew_0240.api_breaking.read_csv_mixed_tz:
+
+Parsing mixed-timezones with :func:`read_csv`
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+:func:`read_csv` no longer silently converts mixed-timezone columns to UTC (:issue:`24987`).
+
+*Previous Behavior*
+
+.. code-block:: python
+
+   >>> import io
+   >>> content = """\
+   ... a
+   ... 2000-01-01T00:00:00+05:00
+   ... 2000-01-01T00:00:00+06:00"""
+   >>> df = pd.read_csv(io.StringIO(content), parse_dates=['a'])
+   >>> df.a
+   0   1999-12-31 19:00:00
+   1   1999-12-31 18:00:00
+   Name: a, dtype: datetime64[ns]
+
+*New Behavior*
+
+.. ipython:: python
+
+   import io
+   content = """\
+   a
+   2000-01-01T00:00:00+05:00
+   2000-01-01T00:00:00+06:00"""
+   df = pd.read_csv(io.StringIO(content), parse_dates=['a'])
+   df.a
+
+As can be seen, the ``dtype`` is object; each value in the column is a string.
+To convert the strings to an array of datetimes, the ``date_parser`` argument
+
+.. ipython:: python
+
+   df = pd.read_csv(io.StringIO(content), parse_dates=['a'],
+                    date_parser=lambda col: pd.to_datetime(col, utc=True))
+   df.a
+
+See :ref:`whatsnew_0240.api.timezone_offset_parsing` for more.
+
 .. _whatsnew_0240.api_breaking.period_end_time:
 
 Time values in ``dt.end_time`` and ``to_timestamp(how='end')``
@@ -1148,8 +1195,6 @@ Other API Changes
 - :class:`pandas.io.formats.style.Styler` supports a ``number-format`` property when using :meth:`~pandas.io.formats.style.Styler.to_excel` (:issue:`22015`)
 - :meth:`DataFrame.corr` and :meth:`Series.corr` now raise a ``ValueError`` along with a helpful error message instead of a ``KeyError`` when supplied with an invalid method (:issue:`22298`)
 - :meth:`shift` will now always return a copy, instead of the previous behaviour of returning self when shifting by 0 (:issue:`22397`)
-- :meth:`DataFrame.set_index` now gives a better (and less frequent) KeyError, raises a ``ValueError`` for incorrect types,
-  and will not fail on duplicate column names with ``drop=True``. (:issue:`22484`)
 - Slicing a single row of a DataFrame with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`)
 - :class:`DateOffset` attribute `_cacheable` and method `_should_cache` have been removed (:issue:`23118`)
 - :meth:`Series.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23801`).

diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst
@@ -15,6 +15,17 @@ Whats New in 0.24.1 (February XX, 2019)
 These are the changes in pandas 0.24.1. See :ref:`release` for a full changelog
 including other versions of pandas.
 
+.. _whatsnew_0241.regressions:
+
+Fixed Regressions
+^^^^^^^^^^^^^^^^^
+
+- Bug in :meth:`DataFrame.itertuples` with ``records`` orient raising an ``AttributeError`` when the ``DataFrame`` contained more than 255 columns (:issue:`24939`)
+- Bug in :meth:`DataFrame.itertuples` orient converting integer column names to strings prepended with an underscore (:issue:`24940`)
+- Fixed regression in :func:`read_sql` when passing certain queries with MySQL/pymysql (:issue:`24988`).
+- Fixed regression in :class:`Index.intersection` incorrectly sorting the values by default (:issue:`24959`).
+- Fixed regression in :func:`merge` when merging an empty ``DataFrame`` with multiple timezone-aware columns on one of the timezone-aware columns (:issue:`25014`).
+- Fixed regression in :meth:`Series.rename_axis` and :meth:`DataFrame.rename_axis` where passing ``None`` failed to remove the axis name (:issue:`25034`)
 
 .. _whatsnew_0241.enhancements:
 
@@ -58,15 +69,23 @@ Bug Fixes
 -
 
 **Timedelta**
-
+- Bug in :func:`to_timedelta` with `box=False` incorrectly returning a ``datetime64`` object instead of a ``timedelta64`` object (:issue:`24961`)
 -
 -
 -
 
+**Reshaping**
+
+- Bug in :meth:`DataFrame.groupby` with :class:`Grouper` when there is a time change (DST) and grouping frequency is ``'1d'`` (:issue:`24972`)
+
+**Visualization**
+
+- Fixed the warning for implicitly registered matplotlib converters not showing. See :ref:`whatsnew_0211.converters` for more (:issue:`24963`).
+
 
 **Other**
 
--
+- Fixed AttributeError when printing a DataFrame's HTML repr after accessing the IPython config object (:issue:`25036`)
 -
 
 .. _whatsnew_0.241.contributors:

diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
@@ -222,7 +222,7 @@ def __getitem__(self, item):
             item = item._ndarray
 
         result = self._ndarray[item]
-        if not lib.is_scalar(result):
+        if not lib.is_scalar(item):
             result = type(self)(result)
         return result
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -17,6 +17,7 @@
 import itertools
 import sys
 import warnings
+from distutils.version import LooseVersion
 from textwrap import dedent
 
 import numpy as np
@@ -70,7 +71,7 @@
     is_iterator,
     is_sequence,
     is_named_tuple)
-from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass, ABCMultiIndex
+from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass
 from pandas.core.dtypes.missing import isna, notna
 
 from pandas.core import algorithms
@@ -646,9 +647,15 @@ def _repr_html_(self):
         # XXX: In IPython 3.x and above, the Qt console will not attempt to
         # display HTML, so this check can be removed when support for
         # IPython 2.x is no longer needed.
-        if console.in_qtconsole():
-            # 'HTML output is disabled in QtConsole'
-            return None
+        try:
+            import IPython
+        except ImportError:
+            pass
+        else:
+            if LooseVersion(IPython.__version__) < LooseVersion('3.0'):
+                if console.in_qtconsole():
+                    # 'HTML output is disabled in QtConsole'
+                    return None
 
         if self._info_repr():
             buf = StringIO(u(""))
@@ -847,7 +854,7 @@ def itertuples(self, index=True, name="Pandas"):
         ----------
         index : bool, default True
             If True, return the index as the first element of the tuple.
-        name : str, default "Pandas"
+        name : str or None, default "Pandas"
             The name of the returned namedtuples or None to return regular
             tuples.
 
@@ -1290,23 +1297,26 @@ def to_dict(self, orient='dict', into=dict):
                            ('columns', self.columns.tolist()),
                            ('data', [
                                list(map(com.maybe_box_datetimelike, t))
-                               for t in self.itertuples(index=False)]
-                            )))
+                               for t in self.itertuples(index=False, name=None)
+                           ])))
         elif orient.lower().startswith('s'):
             return into_c((k, com.maybe_box_datetimelike(v))
                           for k, v in compat.iteritems(self))
         elif orient.lower().startswith('r'):
+            columns = self.columns.tolist()
+            rows = (dict(zip(columns, row))
+                    for row in self.itertuples(index=False, name=None))
             return [
                 into_c((k, com.maybe_box_datetimelike(v))
-                       for k, v in compat.iteritems(row._asdict()))
-                for row in self.itertuples(index=False)]
+                       for k, v in compat.iteritems(row))
+                for row in rows]
         elif orient.lower().startswith('i'):
             if not self.index.is_unique:
                 raise ValueError(
                     "DataFrame index must be unique for orient='index'."
                 )
             return into_c((t[0], dict(zip(self.columns, t[1:])))
-                          for t in self.itertuples())
+                          for t in self.itertuples(name=None))
         else:
             raise ValueError("orient '{o}' not understood".format(o=orient))
 
@@ -4127,33 +4137,8 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
         4 16     10  2014    31
         """
         inplace = validate_bool_kwarg(inplace, 'inplace')
-
-        err_msg = ('The parameter "keys" may be a column key, one-dimensional '
-                   'array, or a list containing only valid column keys and '
-                   'one-dimensional arrays.')
-
-        if (is_scalar(keys) or isinstance(keys, tuple)
-                or isinstance(keys, (ABCIndexClass, ABCSeries, np.ndarray))):
-            # make sure we have a container of keys/arrays we can iterate over
-            # tuples can appear as valid column keys!
+        if not isinstance(keys, list):
             keys = [keys]
-        elif not isinstance(keys, list):
-            raise ValueError(err_msg)
-
-        missing = []
-        for col in keys:
-            if (is_scalar(col) or isinstance(col, tuple)):
-                # if col is a valid column key, everything is fine
-                # tuples are always considered keys, never as list-likes
-                if col not in self:
-                    missing.append(col)
-            elif (not isinstance(col, (ABCIndexClass, ABCSeries,
-                                       np.ndarray, list))
-                  or getattr(col, 'ndim', 1) > 1):
-                raise ValueError(err_msg)
-
-        if missing:
-            raise KeyError('{}'.format(missing))
 
         if inplace:
             frame = self
@@ -4164,31 +4149,37 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
         names = []
         if append:
             names = [x for x in self.index.names]
-            if isinstance(self.index, ABCMultiIndex):
+            if isinstance(self.index, MultiIndex):
                 for i in range(self.index.nlevels):
                     arrays.append(self.index._get_level_values(i))
             else:
                 arrays.append(self.index)
 
         to_remove = []
         for col in keys:
-            if isinstance(col, ABCMultiIndex):
-                for n in range(col.nlevels):
+            if isinstance(col, MultiIndex):
+                # append all but the last column so we don't have to modify
+                # the end of this loop
+                for n in range(col.nlevels - 1):
                     arrays.append(col._get_level_values(n))
+
+                level = col._get_level_values(col.nlevels - 1)
                 names.extend(col.names)
-            elif isinstance(col, (ABCIndexClass, ABCSeries)):
-                # if Index then not MultiIndex (treated above)
-                arrays.append(col)
+            elif isinstance(col, Series):
+                level = col._values
+                names.append(col.name)
+            elif isinstance(col, Index):
+                level = col
                 names.append(col.name)
-            elif isinstance(col, (list, np.ndarray)):
-                arrays.append(col)
+            elif isinstance(col, (list, np.ndarray, Index)):
+                level = col
                 names.append(None)
-            # from here, col can only be a column label
             else:
-                arrays.append(frame[col]._values)
+                level = frame[col]._values
                 names.append(col)
                 if drop:
                     to_remove.append(col)
+            arrays.append(level)
 
         index = ensure_index_from_sequences(arrays, names)
 
@@ -4197,8 +4188,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
             raise ValueError('Index has duplicate keys: {dup}'.format(
                 dup=duplicates))
 
-        # use set to handle duplicate column names gracefully in case of drop
-        for c in set(to_remove):
+        for c in to_remove:
             del frame[c]
 
         # clear up memory usage