Skip to content

Commit

Permalink
BUG: provide for automatic conversion of object -> datetime64[ns] typ…
Browse files Browse the repository at this point in the history
…es upon creation (in make_block)

     this obviates the need to convert_objects (mostly)
     in addition, enabled setting of NaT in datetime64[ns] columns via np.nan (on-the-fly-conversion)
  • Loading branch information
jreback authored and wesm committed Dec 28, 2012
1 parent d5b7907 commit e06334a
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 9 deletions.
24 changes: 19 additions & 5 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,25 @@ def _setitem_with_indexer(self, indexer, value):
if np.prod(values.shape):
values[plane_indexer] = value
except ValueError:
for item, v in zip(item_labels[het_idx], value):
data = self.obj[item]
values = data.values
if np.prod(values.shape):
values[plane_indexer] = v

# convert nan to iNaT if possible
if data.dtype == 'M8[ns]':
mask = com._isnull(value)
if np.isscalar(value) and mask:
from pandas import tslib
value = tslib.iNaT
values[plane_indexer] = value
elif isinstance(value, np.array) and mask.any():
from pandas import tslib
value = value.copy()
value.putmask(iNat,mask)
values[plane_indexer] = value
else:
for item, v in zip(item_labels[het_idx], value):
data = self.obj[item]
values = data.values
if np.prod(values.shape):
values[plane_indexer] = v
else:
if isinstance(indexer, tuple):
indexer = _maybe_convert_ix(*indexer)
Expand Down
16 changes: 15 additions & 1 deletion pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,7 @@ def get_values(self, dtype):
def make_block(values, items, ref_items):
dtype = values.dtype
vtype = dtype.type
klass = None

if issubclass(vtype, np.floating):
klass = FloatBlock
Expand All @@ -459,7 +460,20 @@ def make_block(values, items, ref_items):
klass = IntBlock
elif dtype == np.bool_:
klass = BoolBlock
else:

# try to infer a datetimeblock
if klass is None and np.prod(values.shape):
inferred_type = lib.infer_dtype(values.flatten())
if inferred_type == 'datetime':

# we have an object array that has been inferred as datetime, so convert it
try:
values = tslib.array_to_datetime(values.flatten()).reshape(values.shape)
klass = DatetimeBlock
except: # it already object, so leave it
pass

if klass is None:
klass = ObjectBlock

return klass(values, items, ref_items, ndim=values.ndim)
Expand Down
23 changes: 22 additions & 1 deletion pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import pandas.core.format as fmt
import pandas.core.datetools as datetools
from pandas.core.api import (DataFrame, Index, Series, notnull, isnull,
MultiIndex, DatetimeIndex)
MultiIndex, DatetimeIndex, Timestamp)
from pandas.io.parsers import read_csv

from pandas.util.testing import (assert_almost_equal,
Expand Down Expand Up @@ -1073,6 +1073,27 @@ def test_setitem_single_column_mixed(self):
expected = [nan, 'qux', nan, 'qux', nan]
assert_almost_equal(df['str'].values, expected)

def test_setitem_single_column_mixed_datetime(self):
df = DataFrame(randn(5, 3), index=['a', 'b', 'c', 'd', 'e'],
columns=['foo', 'bar', 'baz'])

df['timestamp'] = Timestamp('20010102')

# check our dtypes
result = df.get_dtype_counts()
expected = Series({ 'float64' : 3, 'datetime64[ns]' : 1})
assert_series_equal(result, expected)

# set an allowable datetime64 type
from pandas import tslib
df.ix['b','timestamp'] = tslib.iNaT

# this fails because nan is a float type
df.ix['b','timestamp'] = nan

# prior to 0.10.1 this failed
#self.assertRaises(TypeError, df.ix.__setitem__, ('c','timestamp'), nan)

def test_setitem_frame(self):
piece = self.frame.ix[:2, ['A', 'B']]
self.frame.ix[-2:, ['A', 'B']] = piece.values
Expand Down
5 changes: 3 additions & 2 deletions pandas/tseries/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -1244,9 +1244,9 @@ def test_append_concat(self):

def test_set_dataframe_column_ns_dtype(self):
x = DataFrame([datetime.now(), datetime.now()])
self.assert_(x[0].dtype == object)
#self.assert_(x[0].dtype == object)

x[0] = to_datetime(x[0])
#x[0] = to_datetime(x[0])
self.assert_(x[0].dtype == np.dtype('M8[ns]'))

def test_groupby_count_dateparseerror(self):
Expand Down Expand Up @@ -2075,6 +2075,7 @@ def test_get_level_values_box(self):
def test_frame_apply_dont_convert_datetime64(self):
from pandas.tseries.offsets import BDay
df = DataFrame({'x1': [datetime(1996,1,1)]})

df = df.applymap(lambda x: x+BDay())
df = df.applymap(lambda x: x+BDay())

Expand Down

0 comments on commit e06334a

Please sign in to comment.