diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx index 6044955d8188f..082ebf4f5b3c2 100644 --- a/pandas/src/inference.pyx +++ b/pandas/src/inference.pyx @@ -403,6 +403,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, bint seen_bool = 0 bint seen_object = 0 bint seen_null = 0 + bint seen_numeric = 0 object val, onan float64_t fval, fnan @@ -437,12 +438,17 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, else: seen_object = 1 # objects[i] = val.astype('O') + break elif util.is_integer_object(val): seen_int = 1 floats[i] = val complexes[i] = val if not seen_null: - ints[i] = val + try: + ints[i] = val + except OverflowError: + seen_object = 1 + break elif util.is_complex_object(val): complexes[i] = val seen_complex = 1 @@ -452,6 +458,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, idatetimes[i] = convert_to_tsobject(val, None).value else: seen_object = 1 + break elif try_float and not util.is_string_object(val): # this will convert Decimal objects try: @@ -460,72 +467,65 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, seen_float = 1 except Exception: seen_object = 1 + break else: seen_object = 1 + break - if not safe: - if seen_null: - if (seen_float or seen_int) and not seen_object: - if seen_complex: - return complexes - else: - return floats - else: - return objects - else: - if seen_object: - return objects - elif not seen_bool: - if seen_datetime: - if seen_complex or seen_float or seen_int: - return objects - else: - return datetimes - else: + seen_numeric = seen_complex or seen_float or seen_int + + if not seen_object: + + if not safe: + if seen_null: + if not seen_bool and not seen_datetime: if seen_complex: return complexes - elif seen_float: + elif seen_float or seen_int: return floats - elif seen_int: - return ints else: - if not seen_float and not seen_int: + if not seen_bool: + if seen_datetime: + if not seen_numeric: + return datetimes + else: + if seen_complex: + return complexes + elif seen_float: + return floats + elif seen_int: + return ints + elif not seen_datetime and not seen_numeric: return bools.view(np.bool_) - return objects - else: - # don't cast int to float, etc. - if seen_null: - if (seen_float or seen_int) and not seen_object: - if seen_complex: - return complexes - else: - return floats - else: - return objects else: - if seen_object: - return objects - elif not seen_bool: - if seen_datetime: - if seen_complex or seen_float or seen_int: - return objects - else: - return datetimes - else: - if seen_int and seen_float: - return objects - elif seen_complex: - return complexes + # don't cast int to float, etc. + if seen_null: + if not seen_bool and not seen_datetime: + if seen_complex: + if not seen_int: + return complexes elif seen_float: - return floats - elif seen_int: - return ints + if not seen_int: + return floats else: - if not seen_float and not seen_int: + if not seen_bool: + if seen_datetime: + if not seen_numeric: + return datetimes + else: + if seen_complex: + if not seen_int: + return complexes + elif seen_float: + if not seen_int: + return floats + elif seen_int: + return ints + elif not seen_datetime and not seen_numeric: return bools.view(np.bool_) - return objects + return objects def convert_sql_column(x): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index c628bf3f0df97..77bf23ea946e1 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -8099,6 +8099,69 @@ def test_as_matrix_lcd(self): values = self.mixed_int.as_matrix(['C']) self.assert_(values.dtype == np.uint8) + def test_constructor_with_convert(self): + # this is actually mostly a test of lib.maybe_convert_objects + # #2845 + df = DataFrame({'A' : [2**63-1] }) + result = df['A'] + expected = Series(np.asarray([2**63-1], np.int64)) + assert_series_equal(result, expected) + + df = DataFrame({'A' : [2**63] }) + result = df['A'] + expected = Series(np.asarray([2**63], np.object_)) + assert_series_equal(result, expected) + + df = DataFrame({'A' : [datetime(2005, 1, 1), True] }) + result = df['A'] + expected = Series(np.asarray([datetime(2005, 1, 1), True], np.object_)) + assert_series_equal(result, expected) + + df = DataFrame({'A' : [None, 1] }) + result = df['A'] + expected = Series(np.asarray([np.nan, 1], np.float_)) + assert_series_equal(result, expected) + + df = DataFrame({'A' : [1.0, 2] }) + result = df['A'] + expected = Series(np.asarray([1.0, 2], np.float_)) + assert_series_equal(result, expected) + + df = DataFrame({'A' : [1.0+2.0j, 3] }) + result = df['A'] + expected = Series(np.asarray([1.0+2.0j, 3], np.complex_)) + assert_series_equal(result, expected) + + df = DataFrame({'A' : [1.0+2.0j, 3.0] }) + result = df['A'] + expected = Series(np.asarray([1.0+2.0j, 3.0], np.complex_)) + assert_series_equal(result, expected) + + df = DataFrame({'A' : [1.0+2.0j, True] }) + result = df['A'] + expected = Series(np.asarray([1.0+2.0j, True], np.object_)) + assert_series_equal(result, expected) + + df = DataFrame({'A' : [1.0, None] }) + result = df['A'] + expected = Series(np.asarray([1.0, np.nan], np.float_)) + assert_series_equal(result, expected) + + df = DataFrame({'A' : [1.0+2.0j, None] }) + result = df['A'] + expected = Series(np.asarray([1.0+2.0j, np.nan], np.complex_)) + assert_series_equal(result, expected) + + df = DataFrame({'A' : [2.0, 1, True, None] }) + result = df['A'] + expected = Series(np.asarray([2.0, 1, True, None], np.object_)) + assert_series_equal(result, expected) + + df = DataFrame({'A' : [2.0, 1, datetime(2006, 1, 1), None] }) + result = df['A'] + expected = Series(np.asarray([2.0, 1, datetime(2006, 1, 1), + None], np.object_)) + assert_series_equal(result, expected) def test_constructor_with_datetimes(self): intname = np.dtype(np.int_).name diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 95d85d44f2ceb..a5732f252d617 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -348,6 +348,17 @@ def test_format(self): expected = [str(index[0])] self.assertEquals(formatted, expected) + # 2845 + index = Index([1, 2.0+3.0j, np.nan]) + formatted = index.format() + expected = [str(index[0]), str(index[1]), str(index[2])] + self.assertEquals(formatted, expected) + + index = Index([1, 2.0+3.0j, None]) + formatted = index.format() + expected = [str(index[0]), str(index[1]), ''] + self.assertEquals(formatted, expected) + self.strIndex[:0].format() def test_format_with_name_time_info(self):