diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index e06085c4c5c26..b8c28bb8daadd 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -233,7 +233,7 @@ Other enhancements - Add keyword ``sort`` to :func:`pivot_table` to allow non-sorting of the result (:issue:`39143`) - Add keyword ``dropna`` to :meth:`DataFrame.value_counts` to allow counting rows that include ``NA`` values (:issue:`41325`) - :meth:`Series.replace` will now cast results to ``PeriodDtype`` where possible instead of ``object`` dtype (:issue:`41526`) -- Improved error message in ``corr` and ``cov`` methods on :class:`.Rolling`, :class:`.Expanding`, and :class:`.ExponentialMovingWindow` when ``other`` is not a :class:`DataFrame` or :class:`Series` (:issue:`41741`) +- Improved error message in ``corr`` and ``cov`` methods on :class:`.Rolling`, :class:`.Expanding`, and :class:`.ExponentialMovingWindow` when ``other`` is not a :class:`DataFrame` or :class:`Series` (:issue:`41741`) .. --------------------------------------------------------------------------- @@ -686,6 +686,7 @@ Deprecations - Deprecated passing arguments (apart from ``cond`` and ``other``) as positional in :meth:`DataFrame.mask` and :meth:`Series.mask` (:issue:`41485`) - Deprecated passing arguments as positional in :meth:`DataFrame.clip` and :meth:`Series.clip` (other than ``"upper"`` and ``"lower"``) (:issue:`41485`) - Deprecated special treatment of lists with first element a Categorical in the :class:`DataFrame` constructor; pass as ``pd.DataFrame({col: categorical, ...})`` instead (:issue:`38845`) +- Deprecated behavior of :class:`DataFrame` constructor when a ``dtype`` is passed and the data cannot be cast to that dtype. In a future version, this will raise instead of being silently ignored (:issue:`24435`) - Deprecated passing arguments as positional (except for ``"method"``) in :meth:`DataFrame.interpolate` and :meth:`Series.interpolate` (:issue:`41485`) - Deprecated passing arguments as positional in :meth:`DataFrame.ffill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill`, and :meth:`Series.bfill` (:issue:`41485`) - Deprecated passing arguments as positional in :meth:`DataFrame.sort_values` (other than ``"by"``) and :meth:`Series.sort_values` (:issue:`41485`) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 0c299056075c1..ff73bc227fdb2 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -12,6 +12,7 @@ Sequence, cast, ) +import warnings import numpy as np import numpy.ma as ma @@ -745,6 +746,17 @@ def _try_cast( if raise_cast_failure: raise else: + # we only get here with raise_cast_failure False, which means + # called via the DataFrame constructor + # GH#24435 + warnings.warn( + f"Could not cast to {dtype}, falling back to object. This " + "behavior is deprecated. In a future version, when a dtype is " + "passed to 'DataFrame', either all columns will be cast to that " + "dtype, or a TypeError will be raised", + FutureWarning, + stacklevel=7, + ) subarr = np.array(arr, dtype=object, copy=copy) return subarr diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index ba0acdc4f947b..34854be29ad1f 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -258,8 +258,11 @@ def f(dtype): f([("A", "datetime64[h]"), ("B", "str"), ("C", "int32")]) # these work (though results may be unexpected) - f("int64") - f("float64") + depr_msg = "either all columns will be cast to that dtype, or a TypeError will" + with tm.assert_produces_warning(FutureWarning, match=depr_msg): + f("int64") + with tm.assert_produces_warning(FutureWarning, match=depr_msg): + f("float64") # 10822 # invalid error message on dt inference diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index d118a376b56ec..784969c199c9f 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -207,7 +207,9 @@ def test_constructor_mixed(self, float_string_frame): assert float_string_frame["foo"].dtype == np.object_ def test_constructor_cast_failure(self): - foo = DataFrame({"a": ["a", "b", "c"]}, dtype=np.float64) + msg = "either all columns will be cast to that dtype, or a TypeError will" + with tm.assert_produces_warning(FutureWarning, match=msg): + foo = DataFrame({"a": ["a", "b", "c"]}, dtype=np.float64) assert foo["a"].dtype == object # GH 3010, constructing with odd arrays @@ -683,7 +685,10 @@ def test_constructor_dict_cast2(self): "A": dict(zip(range(20), tm.makeStringIndex(20))), "B": dict(zip(range(15), np.random.randn(15))), } - frame = DataFrame(test_data, dtype=float) + msg = "either all columns will be cast to that dtype, or a TypeError will" + with tm.assert_produces_warning(FutureWarning, match=msg): + frame = DataFrame(test_data, dtype=float) + assert len(frame) == 20 assert frame["A"].dtype == np.object_ assert frame["B"].dtype == np.float64 diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index f07bf3464b74c..f1fbe0c5a6b9c 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -206,27 +206,26 @@ def test_frame_getitem_nan_multiindex(nulls_fixture): df = DataFrame( [[11, n, 13], [21, n, 23], [31, n, 33], [41, n, 43]], columns=cols, - dtype="int64", ).set_index(["a", "b"]) + df["c"] = df["c"].astype("int64") idx = (21, n) result = df.loc[:idx] - expected = DataFrame( - [[11, n, 13], [21, n, 23]], columns=cols, dtype="int64" - ).set_index(["a", "b"]) + expected = DataFrame([[11, n, 13], [21, n, 23]], columns=cols).set_index(["a", "b"]) + expected["c"] = expected["c"].astype("int64") tm.assert_frame_equal(result, expected) result = df.loc[idx:] expected = DataFrame( - [[21, n, 23], [31, n, 33], [41, n, 43]], columns=cols, dtype="int64" + [[21, n, 23], [31, n, 33], [41, n, 43]], columns=cols ).set_index(["a", "b"]) + expected["c"] = expected["c"].astype("int64") tm.assert_frame_equal(result, expected) idx1, idx2 = (21, n), (31, n) result = df.loc[idx1:idx2] - expected = DataFrame( - [[21, n, 23], [31, n, 33]], columns=cols, dtype="int64" - ).set_index(["a", "b"]) + expected = DataFrame([[21, n, 23], [31, n, 33]], columns=cols).set_index(["a", "b"]) + expected["c"] = expected["c"].astype("int64") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index c1a096ed06efc..ab868a3d3713d 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -284,7 +284,12 @@ def test_loc_setitem_dtype(self): df.loc[:, cols] = df.loc[:, cols].astype("float32") expected = DataFrame( - {"id": ["A"], "a": [1.2], "b": [0.0], "c": [-2.5]}, dtype="float32" + { + "id": ["A"], + "a": np.array([1.2], dtype="float32"), + "b": np.array([0.0], dtype="float32"), + "c": np.array([-2.5], dtype="float32"), + } ) # id is inferred as object tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/reshape/test_get_dummies.py b/pandas/tests/reshape/test_get_dummies.py index 8af49ac20987a..653ea88ed62ac 100644 --- a/pandas/tests/reshape/test_get_dummies.py +++ b/pandas/tests/reshape/test_get_dummies.py @@ -272,8 +272,9 @@ def test_dataframe_dummies_subset(self, df, sparse): "from_A_a": [1, 0, 1], "from_A_b": [0, 1, 0], }, - dtype=np.uint8, ) + cols = expected.columns + expected[cols[1:]] = expected[cols[1:]].astype(np.uint8) expected[["C"]] = df[["C"]] if sparse: cols = ["from_A_a", "from_A_b"]