pandas-dev · jreback · Jun 10, 2019 · Jan 13, 2019 · Jan 14, 2019 · Jan 14, 2019
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -25,6 +25,7 @@ Other Enhancements
 - ``Series.str`` has gained :meth:`Series.str.casefold` method to removes all case distinctions present in a string (:issue:`25405`)
 - :meth:`DataFrame.set_index` now works for instances of ``abc.Iterator``, provided their output is of the same length as the calling frame (:issue:`22484`, :issue:`24984`)
 - :meth:`DatetimeIndex.union` now supports the ``sort`` argument. The behaviour of the sort parameter matches that of :meth:`Index.union` (:issue:`24994`)
+- :class:`DatetimeIndex` and :class:`TimedeltaIndex` now have a `mean` method (:issue:`24757`)
 -
 
 .. _whatsnew_0250.api_breaking:

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -1435,6 +1435,50 @@ def max(self, axis=None, skipna=True, *args, **kwargs):
         # Don't have to worry about NA `result`, since no NA went in.
         return self._box_func(result)
 
+    def mean(self, axis=None, skipna=True):
+        """
+        Return the mean value of the Array or mean along an axis.
+
+        .. versionadded:: 0.25.0
+
+        Parameters
+        ----------
+        axis : None
+            Dummy parameter to match NumPy signature
+        skipna : bool, default True
+            Whether to ignore any NaT elements
+
+        See Also
+        --------
+        numpy.ndarray.mean
+        Series.mean : Return the mean value in a Series.
+        """
+        if is_period_dtype(self):
+            # See discussion in GH#24757
+            raise NotImplementedError(
+                "mean is not implemented for {cls} since the meaning may be "
-                "mean is not implemented for {cls} since the meaning may be "
+                "mean is not implemented for {cls} since the meaning is "
-                "mean is not implemented for {cls} since the meaning may be "
+                "mean is not implemented for {cls} since the meaning is "
+                "ambiguous.  An alternative is "
+                "obj.to_timestamp(how='start').mean()"
+                .format(cls=type(self).__name__))
+
+        nv.validate_minmax_axis(axis)
+
+        mask = self.isna()
+        if skipna:
+            values = self[~mask]
+        elif mask.any():
+            return NaT
+        else:
+            values = self
+
+        if not len(values):
+            # short-circut for empty max / min
+            return NaT
+
+        result = nanops.nanmean(values.view('i8'), skipna=skipna)
+        # Don't have to worry about NA `result`, since no NA went in.
+        return self._box_func(result)
+
 
 # -------------------------------------------------------------------
 # Shared Constructor Helpers

diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -72,6 +72,7 @@ class DatetimeIndexOpsMixin(ExtensionOpsMixin):
     _maybe_mask_results = ea_passthrough(
         DatetimeLikeArrayMixin._maybe_mask_results)
     __iter__ = ea_passthrough(DatetimeLikeArrayMixin.__iter__)
+    mean = ea_passthrough(DatetimeLikeArrayMixin.mean)
 
     @property
     def freq(self):

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -3686,6 +3686,10 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
         elif is_datetime64_dtype(delegate):
             # use DatetimeIndex implementation to handle skipna correctly
             delegate = DatetimeIndex(delegate)
+        elif is_timedelta64_dtype(delegate) and hasattr(TimedeltaIndex, name):
+            # use TimedeltaIndex to handle skipna correctly
+            # TODO: remove hasattr check after TimedeltaIndex has `std` method
+            delegate = TimedeltaIndex(delegate)
 
         # dispatch to numpy arrays
         elif isinstance(delegate, np.ndarray):

diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
@@ -1194,6 +1194,47 @@ def test_mean_corner(self, float_frame, float_string_frame):
         means = float_frame.mean(0)
         assert means['bool'] == float_frame['bool'].values.mean()
 
+    def test_mean_datetimelike(self):
+        # GH#24757 check that datetimelike are excluded by default, handled
+        #  correctly with numeric_only=True
+
+        df = pd.DataFrame({
+            'A': np.arange(3),
+            'B': pd.date_range('2016-01-01', periods=3),
+            'C': pd.timedelta_range('1D', periods=3),
+            'D': pd.period_range('2016', periods=3, freq='A')
+        })
+        result = df.mean(numeric_only=True)
+        expected = pd.Series({'A': 1.})
+        tm.assert_series_equal(result, expected)
+
+        result = df.mean()
+        expected = pd.Series({
+            'A': 1.,
+            'C': df.loc[1, 'C']
+        })
+        tm.assert_series_equal(result, expected)
+
+    @pytest.mark.xfail(reason="casts to object-dtype and then tries to "
+                              "add timestamps",
+                       raises=TypeError, strict=True)
+    def test_mean_datetimelike_numeric_only_false(self):
+        df = pd.DataFrame({
+            'A': np.arange(3),
+            'B': pd.date_range('2016-01-01', periods=3),
+            'C': pd.timedelta_range('1D', periods=3),
+            'D': pd.period_range('2016', periods=3, freq='A')
+        })
+
+        result = df.mean(numeric_only=False)
+        expected = pd.Series({
+            'A': 1,
+            'B': df.loc[1, 'B'],
+            'C': df.loc[1, 'C'],
+            'D': df.loc[1, 'D']
+        })
+        tm.assert_series_equal(result, expected)
+
     def test_stats_mixed_type(self, float_string_frame):
         # don't blow up
         float_string_frame.std(1)

diff --git a/pandas/tests/reductions/test_stat_reductions.py b/pandas/tests/reductions/test_stat_reductions.py
@@ -11,9 +11,78 @@
 
 import pandas as pd
 from pandas import DataFrame, Series, compat
+from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray
 import pandas.util.testing as tm
 
 
+class TestDatetimeLikeStatReductions(object):
+
+    @pytest.mark.parametrize('box', [Series, pd.Index, DatetimeArray])
+    def test_dt64_mean(self, tz_naive_fixture, box):
+        tz = tz_naive_fixture
+
+        dti = pd.date_range('2001-01-01', periods=11, tz=tz)
+        # shuffle so that we are not just working with monotone-increasing
+        dti = dti.take([4, 1, 3, 10, 9, 7, 8, 5, 0, 2, 6])
+        dtarr = dti._data
+
+        obj = box(dtarr)
+        assert obj.mean() == pd.Timestamp('2001-01-06', tz=tz)
+        assert obj.mean(skipna=False) == pd.Timestamp('2001-01-06', tz=tz)
+
+        # dtarr[-2] will be the first date 2001-01-1
+        dtarr[-2] = pd.NaT
+
+        obj = box(dtarr)
+        assert obj.mean() == pd.Timestamp('2001-01-06 07:12:00', tz=tz)
+        assert obj.mean(skipna=False) is pd.NaT
+
+    @pytest.mark.parametrize('box', [Series, pd.Index, PeriodArray])
+    def test_period_mean(self, box):
+        # GH#24757
+        dti = pd.date_range('2001-01-01', periods=11)
+        # shuffle so that we are not just working with monotone-increasing
+        dti = dti.take([4, 1, 3, 10, 9, 7, 8, 5, 0, 2, 6])
+
+        # use hourly frequency to avoid rounding errors in expected results
+        #  TODO: flesh this out with different frequencies
+        parr = dti._data.to_period('H')
+        obj = box(parr)
+        with pytest.raises(NotImplementedError, match="ambiguous"):
+            obj.mean()
+        with pytest.raises(NotImplementedError, match="ambiguous"):
+            obj.mean(skipna=True)
+
+        # parr[-2] will be the first date 2001-01-1
+        parr[-2] = pd.NaT
+
+        with pytest.raises(NotImplementedError, match="ambiguous"):
+            obj.mean()
+        with pytest.raises(NotImplementedError, match="ambiguous"):
+            obj.mean(skipna=True)
+
+    @pytest.mark.parametrize('box', [Series, pd.Index, TimedeltaArray])
+    def test_td64_mean(self, box):
+        tdi = pd.TimedeltaIndex([0, 3, -2, -7, 1, 2, -1, 3, 5, -2, 4],
+                                unit='D')
+
+        tdarr = tdi._data
+        obj = box(tdarr)
+
+        result = obj.mean()
+        expected = np.array(tdarr).mean()
+        assert result == expected
+
+        tdarr[0] = pd.NaT
+        assert obj.mean(skipna=False) is pd.NaT
+
+        result2 = obj.mean(skipna=True)
+        assert result2 == tdi[1:].mean()
+
+        # exact equality fails by 1 nanosecond
+        assert result2.round('us') == (result * 11. / 10).round('us')
+
+
 class TestSeriesStatReductions(object):
     # Note: the name TestSeriesStatReductions indicates these tests
     #  were moved from a series-specific test file, _not_ that these tests are