Skip to content

Commit

Permalink
Merge pull request #2846 from stephenwlin/fix-maybe-convert-objects
Browse files Browse the repository at this point in the history
BUG: Various issues with maybe_convert_objects (GH #2845)
thanks!
  • Loading branch information
jreback committed Feb 14, 2013
2 parents fc8de6d + a3bfa7f commit d44e9c7
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 53 deletions.
106 changes: 53 additions & 53 deletions pandas/src/inference.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
bint seen_bool = 0
bint seen_object = 0
bint seen_null = 0
bint seen_numeric = 0
object val, onan
float64_t fval, fnan

Expand Down Expand Up @@ -437,12 +438,17 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
else:
seen_object = 1
# objects[i] = val.astype('O')
break
elif util.is_integer_object(val):
seen_int = 1
floats[i] = <float64_t> val
complexes[i] = <double complex> val
if not seen_null:
ints[i] = val
try:
ints[i] = val
except OverflowError:
seen_object = 1
break
elif util.is_complex_object(val):
complexes[i] = val
seen_complex = 1
Expand All @@ -452,6 +458,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
idatetimes[i] = convert_to_tsobject(val, None).value
else:
seen_object = 1
break
elif try_float and not util.is_string_object(val):
# this will convert Decimal objects
try:
Expand All @@ -460,72 +467,65 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
seen_float = 1
except Exception:
seen_object = 1
break
else:
seen_object = 1
break

if not safe:
if seen_null:
if (seen_float or seen_int) and not seen_object:
if seen_complex:
return complexes
else:
return floats
else:
return objects
else:
if seen_object:
return objects
elif not seen_bool:
if seen_datetime:
if seen_complex or seen_float or seen_int:
return objects
else:
return datetimes
else:
seen_numeric = seen_complex or seen_float or seen_int

if not seen_object:

if not safe:
if seen_null:
if not seen_bool and not seen_datetime:
if seen_complex:
return complexes
elif seen_float:
elif seen_float or seen_int:
return floats
elif seen_int:
return ints
else:
if not seen_float and not seen_int:
if not seen_bool:
if seen_datetime:
if not seen_numeric:
return datetimes
else:
if seen_complex:
return complexes
elif seen_float:
return floats
elif seen_int:
return ints
elif not seen_datetime and not seen_numeric:
return bools.view(np.bool_)

return objects
else:
# don't cast int to float, etc.
if seen_null:
if (seen_float or seen_int) and not seen_object:
if seen_complex:
return complexes
else:
return floats
else:
return objects
else:
if seen_object:
return objects
elif not seen_bool:
if seen_datetime:
if seen_complex or seen_float or seen_int:
return objects
else:
return datetimes
else:
if seen_int and seen_float:
return objects
elif seen_complex:
return complexes
# don't cast int to float, etc.
if seen_null:
if not seen_bool and not seen_datetime:
if seen_complex:
if not seen_int:
return complexes
elif seen_float:
return floats
elif seen_int:
return ints
if not seen_int:
return floats
else:
if not seen_float and not seen_int:
if not seen_bool:
if seen_datetime:
if not seen_numeric:
return datetimes
else:
if seen_complex:
if not seen_int:
return complexes
elif seen_float:
if not seen_int:
return floats
elif seen_int:
return ints
elif not seen_datetime and not seen_numeric:
return bools.view(np.bool_)

return objects
return objects


def convert_sql_column(x):
Expand Down
63 changes: 63 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8099,6 +8099,69 @@ def test_as_matrix_lcd(self):
values = self.mixed_int.as_matrix(['C'])
self.assert_(values.dtype == np.uint8)

def test_constructor_with_convert(self):
# this is actually mostly a test of lib.maybe_convert_objects
# #2845
df = DataFrame({'A' : [2**63-1] })
result = df['A']
expected = Series(np.asarray([2**63-1], np.int64))
assert_series_equal(result, expected)

df = DataFrame({'A' : [2**63] })
result = df['A']
expected = Series(np.asarray([2**63], np.object_))
assert_series_equal(result, expected)

df = DataFrame({'A' : [datetime(2005, 1, 1), True] })
result = df['A']
expected = Series(np.asarray([datetime(2005, 1, 1), True], np.object_))
assert_series_equal(result, expected)

df = DataFrame({'A' : [None, 1] })
result = df['A']
expected = Series(np.asarray([np.nan, 1], np.float_))
assert_series_equal(result, expected)

df = DataFrame({'A' : [1.0, 2] })
result = df['A']
expected = Series(np.asarray([1.0, 2], np.float_))
assert_series_equal(result, expected)

df = DataFrame({'A' : [1.0+2.0j, 3] })
result = df['A']
expected = Series(np.asarray([1.0+2.0j, 3], np.complex_))
assert_series_equal(result, expected)

df = DataFrame({'A' : [1.0+2.0j, 3.0] })
result = df['A']
expected = Series(np.asarray([1.0+2.0j, 3.0], np.complex_))
assert_series_equal(result, expected)

df = DataFrame({'A' : [1.0+2.0j, True] })
result = df['A']
expected = Series(np.asarray([1.0+2.0j, True], np.object_))
assert_series_equal(result, expected)

df = DataFrame({'A' : [1.0, None] })
result = df['A']
expected = Series(np.asarray([1.0, np.nan], np.float_))
assert_series_equal(result, expected)

df = DataFrame({'A' : [1.0+2.0j, None] })
result = df['A']
expected = Series(np.asarray([1.0+2.0j, np.nan], np.complex_))
assert_series_equal(result, expected)

df = DataFrame({'A' : [2.0, 1, True, None] })
result = df['A']
expected = Series(np.asarray([2.0, 1, True, None], np.object_))
assert_series_equal(result, expected)

df = DataFrame({'A' : [2.0, 1, datetime(2006, 1, 1), None] })
result = df['A']
expected = Series(np.asarray([2.0, 1, datetime(2006, 1, 1),
None], np.object_))
assert_series_equal(result, expected)

def test_constructor_with_datetimes(self):
intname = np.dtype(np.int_).name
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,17 @@ def test_format(self):
expected = [str(index[0])]
self.assertEquals(formatted, expected)

# 2845
index = Index([1, 2.0+3.0j, np.nan])
formatted = index.format()
expected = [str(index[0]), str(index[1]), str(index[2])]
self.assertEquals(formatted, expected)

index = Index([1, 2.0+3.0j, None])
formatted = index.format()
expected = [str(index[0]), str(index[1]), '']
self.assertEquals(formatted, expected)

self.strIndex[:0].format()

def test_format_with_name_time_info(self):
Expand Down

0 comments on commit d44e9c7

Please sign in to comment.