From aefdf5fb7196d63d6945f68b62200f8b26337273 Mon Sep 17 00:00:00 2001 From: Matthew Owen Date: Wed, 17 Jul 2024 13:41:26 -0700 Subject: [PATCH] [py312] Fix `test_tensor.py` (#46669) ## Why are these changes needed? To upgrade to py312 we need to upgrade to `pandas>=2.0.0`. This upgrade introduced a breaking change in the syntax of some of our code / tests: > Changed behavior in setting values with df.loc[:, foo] = bar or df.iloc[:, foo] = bar, these now always attempt to set values inplace before falling back to casting ([GH 45333](https://github.com/pandas-dev/pandas/issues/45333)) As a result we needed to update these uses of loc to do direct assignment. ## Related issue number ## Checks - [ ] I've signed off every commit(by using the -s flag, i.e., `git commit -s`) in this PR. - [ ] I've run `scripts/format.sh` to lint the changes in this PR. - [ ] I've included any doc changes needed for https://docs.ray.io/en/master/. - [ ] I've added any new APIs to the API Reference. For example, if I added a method in Tune, I've added it in `doc/source/tune/api/` under the corresponding `.rst` file. - [ ] I've made sure the tests are passing. Note that there might be a few flaky tests, see the recent failures at https://flakey-tests.ray.io/ - Testing Strategy - [ ] Unit tests - [ ] Release tests - [ ] This PR is not tested :( Signed-off-by: Matthew Owen --- python/ray/air/util/data_batch_conversion.py | 4 ++-- python/ray/data/tests/test_tensor.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/ray/air/util/data_batch_conversion.py b/python/ray/air/util/data_batch_conversion.py index 426814d72f1d..e134b5b1d31f 100644 --- a/python/ray/air/util/data_batch_conversion.py +++ b/python/ray/air/util/data_batch_conversion.py @@ -319,7 +319,7 @@ def _cast_ndarray_columns_to_tensor_extension(df: "pd.DataFrame") -> "pd.DataFra with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) warnings.simplefilter("ignore", category=SettingWithCopyWarning) - df.loc[:, col_name] = TensorArray(col) + df[col_name] = TensorArray(col) except Exception as e: raise ValueError( f"Tried to cast column {col_name} to the TensorArray tensor " @@ -354,5 +354,5 @@ def _cast_tensor_columns_to_ndarrays(df: "pd.DataFrame") -> "pd.DataFrame": with warnings.catch_warnings(): warnings.simplefilter("ignore", category=FutureWarning) warnings.simplefilter("ignore", category=SettingWithCopyWarning) - df.loc[:, col_name] = pd.Series(list(col.to_numpy())) + df[col_name] = list(col.to_numpy()) return df diff --git a/python/ray/data/tests/test_tensor.py b/python/ray/data/tests/test_tensor.py index 852142bf4150..3932d5b76ac0 100644 --- a/python/ray/data/tests/test_tensor.py +++ b/python/ray/data/tests/test_tensor.py @@ -564,7 +564,7 @@ def test_tensors_in_tables_pandas_roundtrip( ds_df = ds.to_pandas() expected_df = df + 1 if enable_automatic_tensor_extension_cast: - expected_df.loc[:, "two"] = list(expected_df["two"].to_numpy()) + expected_df["two"] = list(expected_df["two"].to_numpy()) pd.testing.assert_frame_equal(ds_df, expected_df) @@ -585,7 +585,7 @@ def test_tensors_in_tables_pandas_roundtrip_variable_shaped( ds_df = ds.to_pandas() expected_df = df + 1 if enable_automatic_tensor_extension_cast: - expected_df.loc[:, "two"] = _create_possibly_ragged_ndarray( + expected_df["two"] = _create_possibly_ragged_ndarray( expected_df["two"].to_numpy() ) pd.testing.assert_frame_equal(ds_df, expected_df) @@ -873,8 +873,8 @@ def test_tensors_in_tables_iter_batches( ) df = pd.concat([df1, df2], ignore_index=True) if enable_automatic_tensor_extension_cast: - df.loc[:, "one"] = list(df["one"].to_numpy()) - df.loc[:, "two"] = list(df["two"].to_numpy()) + df["one"] = list(df["one"].to_numpy()) + df["two"] = list(df["two"].to_numpy()) ds = ray.data.from_pandas([df1, df2]) batches = list(ds.iter_batches(batch_size=2, batch_format="pandas")) assert len(batches) == 3