Skip to content

Commit

Permalink
Merge pull request #3516 from jreback/GH3416
Browse files Browse the repository at this point in the history
BUG/CLN: datetime64/timedelta64
  • Loading branch information
jreback committed May 8, 2013
2 parents dc84742 + d54c6a7 commit ba19ff9
Show file tree
Hide file tree
Showing 8 changed files with 73 additions and 25 deletions.
8 changes: 8 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ pandas 0.11.1
Note: The default value will change in 0.12 to the "no mangle" behaviour,
If your code relies on this behaviour, explicitly specify mangle_dupe_cols=True
in your calls.
- Do not allow astypes on ``datetime64[ns]`` except to ``object``, and
``timedelta64[ns]`` to ``object/int`` (GH3425_)
- Do not allow datetimelike/timedeltalike creation except with valid types
(e.g. cannot pass ``datetime64[ms]``) (GH3423_)

**Bug Fixes**

Expand Down Expand Up @@ -88,11 +92,15 @@ pandas 0.11.1
- Fixed bug in mixed-frame assignment with aligned series (GH3492_)
- Fixed bug in selecting month/quarter/year from a series would not select the time element
on the last day (GH3546_)
- Properly convert np.datetime64 objects in a Series (GH3416_)

.. _GH3164: https://github.com/pydata/pandas/issues/3164
.. _GH2786: https://github.com/pydata/pandas/issues/2786
.. _GH2194: https://github.com/pydata/pandas/issues/2194
.. _GH3230: https://github.com/pydata/pandas/issues/3230
.. _GH3425: https://github.com/pydata/pandas/issues/3425
.. _GH3416: https://github.com/pydata/pandas/issues/3416
.. _GH3423: https://github.com/pydata/pandas/issues/3423
.. _GH3251: https://github.com/pydata/pandas/issues/3251
.. _GH3379: https://github.com/pydata/pandas/issues/3379
.. _GH3480: https://github.com/pydata/pandas/issues/3480
Expand Down
37 changes: 29 additions & 8 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ class AmbiguousIndexError(PandasError, KeyError):


_POSSIBLY_CAST_DTYPES = set([ np.dtype(t) for t in ['M8[ns]','m8[ns]','O','int8','uint8','int16','uint16','int32','uint32','int64','uint64'] ])
_NS_DTYPE = np.dtype('M8[ns]')
_TD_DTYPE = np.dtype('m8[ns]')
_INT64_DTYPE = np.dtype(np.int64)

def isnull(obj):
'''
Expand Down Expand Up @@ -1084,6 +1087,12 @@ def _possibly_cast_to_datetime(value, dtype, coerce = False):

if is_datetime64 or is_timedelta64:

# force the dtype if needed
if is_datetime64 and dtype != _NS_DTYPE:
raise TypeError("cannot convert datetimelike to dtype [%s]" % dtype)
elif is_timedelta64 and dtype != _TD_DTYPE:
raise TypeError("cannot convert timedeltalike to dtype [%s]" % dtype)

if np.isscalar(value):
if value == tslib.iNaT or isnull(value):
value = tslib.iNaT
Expand All @@ -1098,7 +1107,8 @@ def _possibly_cast_to_datetime(value, dtype, coerce = False):
elif np.prod(value.shape) and value.dtype != dtype:
try:
if is_datetime64:
value = tslib.array_to_datetime(value, coerce = coerce)
from pandas.tseries.tools import to_datetime
value = to_datetime(value, coerce=coerce).values
elif is_timedelta64:
value = _possibly_cast_to_timedelta(value)
except:
Expand All @@ -1119,12 +1129,12 @@ def _possibly_cast_to_datetime(value, dtype, coerce = False):
v = [ v ]
if len(v):
inferred_type = lib.infer_dtype(v)
if inferred_type == 'datetime':
if inferred_type in ['datetime','datetime64']:
try:
value = tslib.array_to_datetime(np.array(v))
except:
pass
elif inferred_type == 'timedelta':
elif inferred_type in ['timedelta','timedelta64']:
value = _possibly_cast_to_timedelta(value)

return value
Expand Down Expand Up @@ -1515,9 +1525,24 @@ def _astype_nansafe(arr, dtype, copy = True):
if not isinstance(dtype, np.dtype):
dtype = np.dtype(dtype)

if issubclass(arr.dtype.type, np.datetime64):
if is_datetime64_dtype(arr):
if dtype == object:
return tslib.ints_to_pydatetime(arr.view(np.int64))
elif issubclass(dtype.type, np.int):
return arr.view(dtype)
elif dtype != _NS_DTYPE:
raise TypeError("cannot astype a datetimelike from [%s] to [%s]" % (arr.dtype,dtype))
return arr.astype(_NS_DTYPE)
elif is_timedelta64_dtype(arr):
if issubclass(dtype.type, np.int):
return arr.view(dtype)
elif dtype == object:
return arr.astype(object)

# in py3, timedelta64[ns] are int64
elif (py3compat.PY3 and dtype not in [_INT64_DTYPE,_TD_DTYPE]) or (not py3compat.PY3 and dtype != _TD_DTYPE):
raise TypeError("cannot astype a timedelta from [%s] to [%s]" % (arr.dtype,dtype))
return arr.astype(_TD_DTYPE)
elif (np.issubdtype(arr.dtype, np.floating) and
np.issubdtype(dtype, np.integer)):

Expand Down Expand Up @@ -1721,9 +1746,6 @@ def _check_as_is(x):
self.queue.truncate(0)


_NS_DTYPE = np.dtype('M8[ns]')


def _concat_compat(to_concat, axis=0):
# filter empty arrays
to_concat = [x for x in to_concat if x.shape[axis] > 0]
Expand Down Expand Up @@ -1751,7 +1773,6 @@ def _to_pydatetime(x):

return x


def _where_compat(mask, arr1, arr2):
if arr1.dtype == _NS_DTYPE and arr2.dtype == _NS_DTYPE:
new_vals = np.where(mask, arr1.view(np.int64), arr2.view(np.int64))
Expand Down
6 changes: 1 addition & 5 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from numpy import nan
import numpy as np

from pandas.core.common import _possibly_downcast_to_dtype, isnull
from pandas.core.common import _possibly_downcast_to_dtype, isnull, _NS_DTYPE, _TD_DTYPE
from pandas.core.index import Index, MultiIndex, _ensure_index, _handle_legacy_indexes
from pandas.core.indexing import _check_slice_bounds, _maybe_convert_indices
import pandas.core.common as com
Expand Down Expand Up @@ -740,10 +740,6 @@ def should_store(self, value):
(np.integer, np.floating, np.complexfloating,
np.datetime64, np.bool_))

_NS_DTYPE = np.dtype('M8[ns]')
_TD_DTYPE = np.dtype('m8[ns]')


class DatetimeBlock(Block):
_can_hold_na = True

Expand Down
28 changes: 28 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,27 @@ def test_constructor_dtype_datetime64(self):
self.assert_(isnull(s[1]) == True)
self.assert_(s.dtype == 'M8[ns]')

# GH3416
dates = [
np.datetime64(datetime(2013, 1, 1)),
np.datetime64(datetime(2013, 1, 2)),
np.datetime64(datetime(2013, 1, 3)),
]

s = Series(dates)
self.assert_(s.dtype == 'M8[ns]')

s.ix[0] = np.nan
self.assert_(s.dtype == 'M8[ns]')

# invalid astypes
for t in ['s','D','us','ms']:
self.assertRaises(TypeError, s.astype, 'M8[%s]' % t)

# GH3414 related
self.assertRaises(TypeError, lambda x: Series(Series(dates).astype('int')/1000000,dtype='M8[ms]'))
self.assertRaises(TypeError, lambda x: Series(dates, dtype='datetime64'))

def test_constructor_dict(self):
d = {'a': 0., 'b': 1., 'c': 2.}
result = Series(d, index=['b', 'c', 'd', 'a'])
Expand Down Expand Up @@ -1809,6 +1830,13 @@ def test_constructor_dtype_timedelta64(self):
td = Series([ timedelta(days=i) for i in range(3) ] + [ np.nan ], dtype='m8[ns]' )
self.assert_(td.dtype=='timedelta64[ns]')

# invalid astypes
for t in ['s','D','us','ms']:
self.assertRaises(TypeError, td.astype, 'm8[%s]' % t)

# valid astype
td.astype('int')

# this is an invalid casting
self.assertRaises(Exception, Series, [ timedelta(days=i) for i in range(3) ] + [ 'foo' ], dtype='m8[ns]' )

Expand Down
5 changes: 1 addition & 4 deletions pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import numpy as np

from pandas.core.common import isnull
from pandas.core.common import isnull, _NS_DTYPE, _INT64_DTYPE
from pandas.core.index import Index, Int64Index
from pandas.tseries.frequencies import (
infer_freq, to_offset, get_period_alias,
Expand Down Expand Up @@ -92,9 +92,6 @@ class TimeSeriesError(Exception):


_midnight = time(0, 0)
_NS_DTYPE = np.dtype('M8[ns]')
_INT64_DTYPE = np.dtype(np.int64)


class DatetimeIndex(Int64Index):
"""
Expand Down
6 changes: 1 addition & 5 deletions pandas/tseries/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import pandas.tseries.frequencies as _freq_mod

import pandas.core.common as com
from pandas.core.common import isnull
from pandas.core.common import isnull, _NS_DTYPE, _INT64_DTYPE
from pandas.util import py3compat

from pandas.lib import Timestamp
Expand Down Expand Up @@ -516,10 +516,6 @@ def wrapper(self, other):
return result
return wrapper

_INT64_DTYPE = np.dtype(np.int64)
_NS_DTYPE = np.dtype('M8[ns]')


class PeriodIndex(Int64Index):
"""
Immutable ndarray holding ordinal values indicating regular periods in
Expand Down
2 changes: 1 addition & 1 deletion pandas/tseries/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -1470,7 +1470,7 @@ def test_frame_datetime64_handling_groupby(self):
(3, np.datetime64('2012-07-04'))],
columns=['a', 'date'])
result = df.groupby('a').first()
self.assertEqual(result['date'][3], np.datetime64('2012-07-03'))
self.assertEqual(result['date'][3], datetime(2012,7,3))

def test_series_interpolate_intraday(self):
# #1698
Expand Down
6 changes: 4 additions & 2 deletions pandas/tseries/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def _maybe_get_tz(tz):


def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True,
format=None):
format=None, coerce=False):
"""
Convert argument to datetime
Expand All @@ -68,6 +68,7 @@ def to_datetime(arg, errors='ignore', dayfirst=False, utc=None, box=True,
If True returns a DatetimeIndex, if False returns ndarray of values
format : string, default None
strftime to parse time, eg "%d/%m/%Y"
coerce : force errors to NaT (False by default)
Returns
-------
Expand All @@ -84,7 +85,8 @@ def _convert_f(arg):
result = tslib.array_strptime(arg, format)
else:
result = tslib.array_to_datetime(arg, raise_=errors == 'raise',
utc=utc, dayfirst=dayfirst)
utc=utc, dayfirst=dayfirst,
coerce=coerce)
if com.is_datetime64_dtype(result) and box:
result = DatetimeIndex(result, tz='utc' if utc else None)
return result
Expand Down

0 comments on commit ba19ff9

Please sign in to comment.