Skip to content

Commit

Permalink
feat(python): Respect index order in DataFrame.to_numpy also for no…
Browse files Browse the repository at this point in the history
…n-numeric frames (#16390)
  • Loading branch information
stinodego authored May 22, 2024
1 parent c75dba9 commit 0a46613
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 13 deletions.
4 changes: 1 addition & 3 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1518,9 +1518,7 @@ def to_numpy(
Fortran-like. In general, using the Fortran-like index order is faster.
However, the C-like order might be more appropriate to use for downstream
applications to prevent cloning data, e.g. when reshaping into a
one-dimensional array. Note that this option only takes effect if
`structured` is set to `False` and the DataFrame dtypes allow a
global dtype for all columns.
one-dimensional array.
allow_copy
Allow memory to be copied to perform the conversion. If set to `False`,
causes conversions that are not zero-copy to fail.
Expand Down
28 changes: 20 additions & 8 deletions py-polars/src/interop/numpy/to_numpy_df.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ fn df_to_numpy_with_copy(
if let Some(arr) = try_df_to_numpy_numeric_supertype(py, df, order) {
Ok(arr)
} else {
df_columns_to_numpy(py, df, writable)
df_columns_to_numpy(py, df, order, writable)
}
}
fn try_df_to_numpy_numeric_supertype(
Expand All @@ -163,7 +163,12 @@ fn try_df_to_numpy_numeric_supertype(
};
Some(np_array)
}
fn df_columns_to_numpy(py: Python, df: &DataFrame, writable: bool) -> PyResult<PyObject> {
fn df_columns_to_numpy(
py: Python,
df: &DataFrame,
order: IndexOrder,
writable: bool,
) -> PyResult<PyObject> {
let np_arrays = df.iter().map(|s| {
let mut arr = series_to_numpy(py, s, writable, true).unwrap();

Expand All @@ -184,10 +189,17 @@ fn df_columns_to_numpy(py: Python, df: &DataFrame, writable: bool) -> PyResult<P
});

let numpy = PyModule::import_bound(py, "numpy")?;
let arr = numpy
.getattr(intern!(py, "vstack"))
.unwrap()
.call1((PyList::new_bound(py, np_arrays),))?
.getattr(intern!(py, "T"))?;
Ok(arr.into())
let np_array = match order {
IndexOrder::C => numpy
.getattr(intern!(py, "column_stack"))
.unwrap()
.call1((PyList::new_bound(py, np_arrays),))?,
IndexOrder::Fortran => numpy
.getattr(intern!(py, "vstack"))
.unwrap()
.call1((PyList::new_bound(py, np_arrays),))?
.getattr(intern!(py, "T"))?,
};

Ok(np_array.into())
}
9 changes: 7 additions & 2 deletions py-polars/tests/unit/interop/numpy/test_to_numpy_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,13 +220,18 @@ def test_df_to_numpy_stacking_array() -> None:
assert_array_equal(result[0][0], expected[0][0])


def test_df_to_numpy_stacking_string() -> None:
@pytest.mark.parametrize("order", ["c", "fortran"])
def test_df_to_numpy_stacking_string(order: IndexOrder) -> None:
df = pl.DataFrame({"a": [1, 2, 3], "b": ["x", "y", "z"]})
result = df.to_numpy()
result = df.to_numpy(order=order)

expected = np.array([[1, "x"], [2, "y"], [3, "z"]], dtype=np.object_)

assert_array_equal(result, expected)
if order == "c":
assert result.flags.c_contiguous is True
else:
assert result.flags.f_contiguous is True


def test_to_numpy_chunked_16375() -> None:
Expand Down

0 comments on commit 0a46613

Please sign in to comment.