Skip to content

Commit

Permalink
ENH: select_dtypes now allows 'datetimetz' for generically selecting …
Browse files Browse the repository at this point in the history
…datetimes with timezones (pandas-dev#14910)
  • Loading branch information
jreback authored and ShaharBental committed Dec 26, 2016
1 parent bd04002 commit 808f386
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 3 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ Other enhancements
- ``pd.Series.interpolate`` now supports timedelta as an index type with ``method='time'`` (:issue:`6424`)
- ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`)

- ``.select_dtypes()`` now allows `datetimetz` to generically select datetimes with tz (:issue:`14910`)

.. _whatsnew_0200.api_breaking:

Expand Down Expand Up @@ -249,5 +250,4 @@ Bug Fixes




- Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`)
5 changes: 5 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2257,7 +2257,12 @@ def select_dtypes(self, include=None, exclude=None):
this will return *all* object dtype columns
* See the `numpy dtype hierarchy
<http://docs.scipy.org/doc/numpy/reference/arrays.scalars.html>`__
* To select datetimes, use np.datetime64, 'datetime' or 'datetime64'
* To select timedeltas, use np.timedelta64, 'timedelta' or
'timedelta64'
* To select Pandas categorical dtypes, use 'category'
* To select Pandas datetimetz dtypes, use 'datetimetz' (new in 0.20.0),
or a 'datetime64[ns, tz]' string
Examples
--------
Expand Down
37 changes: 35 additions & 2 deletions pandas/tests/frame/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,15 +109,48 @@ def test_select_dtypes_include(self):
'c': np.arange(3, 6).astype('u1'),
'd': np.arange(4.0, 7.0, dtype='float64'),
'e': [True, False, True],
'f': pd.Categorical(list('abc'))})
'f': pd.Categorical(list('abc')),
'g': pd.date_range('20130101', periods=3),
'h': pd.date_range('20130101', periods=3,
tz='US/Eastern'),
'i': pd.date_range('20130101', periods=3,
tz='CET'),
'j': pd.period_range('2013-01', periods=3,
freq='M'),
'k': pd.timedelta_range('1 day', periods=3)})

ri = df.select_dtypes(include=[np.number])
ei = df[['b', 'c', 'd', 'k']]
assert_frame_equal(ri, ei)

ri = df.select_dtypes(include=[np.number], exclude=['timedelta'])
ei = df[['b', 'c', 'd']]
assert_frame_equal(ri, ei)

ri = df.select_dtypes(include=[np.number, 'category'])
ri = df.select_dtypes(include=[np.number, 'category'],
exclude=['timedelta'])
ei = df[['b', 'c', 'd', 'f']]
assert_frame_equal(ri, ei)

ri = df.select_dtypes(include=['datetime'])
ei = df[['g']]
assert_frame_equal(ri, ei)

ri = df.select_dtypes(include=['datetime64'])
ei = df[['g']]
assert_frame_equal(ri, ei)

ri = df.select_dtypes(include=['datetimetz'])
ei = df[['h', 'i']]
assert_frame_equal(ri, ei)

ri = df.select_dtypes(include=['timedelta'])
ei = df[['k']]
assert_frame_equal(ri, ei)

self.assertRaises(NotImplementedError,
lambda: df.select_dtypes(include=['period']))

def test_select_dtypes_exclude(self):
df = DataFrame({'a': list('abc'),
'b': list(range(1, 4)),
Expand Down
5 changes: 5 additions & 0 deletions pandas/types/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,11 @@ def _get_dtype_from_object(dtype):
pass
return dtype.type
elif isinstance(dtype, string_types):
if dtype in ['datetimetz', 'datetime64tz']:
return DatetimeTZDtype.type
elif dtype in ['period']:
raise NotImplementedError

if dtype == 'datetime' or dtype == 'timedelta':
dtype += '64'

Expand Down

0 comments on commit 808f386

Please sign in to comment.