Skip to content

Commit

Permalink
test and better error messages
Browse files Browse the repository at this point in the history
  • Loading branch information
coastalwhite committed Sep 18, 2024
1 parent 6208079 commit d3cb162
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 2 deletions.
5 changes: 3 additions & 2 deletions crates/polars-core/src/datatypes/dtype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -793,10 +793,11 @@ pub fn merge_dtypes(left: &DataType, right: &DataType) -> PolarsResult<DataType>
let merged = merge_dtypes(inner_l, inner_r)?;
List(Box::new(merged))
},
#[cfg(feature = "dtype-struct")]
(Struct(inner_l), Struct(inner_r)) => {
polars_ensure!(inner_l.len() == inner_r.len(), ComputeError: "cannot combine different structs");
polars_ensure!(inner_l.len() == inner_r.len(), ComputeError: "cannot combine structs with differing amounts of fields ({} != {})", inner_l.len(), inner_r.len());
let fields = inner_l.iter().zip(inner_r.iter()).map(|(l, r)| {
polars_ensure!(l.name() == r.name(), ComputeError: "cannot combine different structs");
polars_ensure!(l.name() == r.name(), ComputeError: "cannot combine structs with different fields ({} != {})", l.name(), r.name());
let merged = merge_dtypes(l.dtype(), r.dtype())?;
Ok(Field::new(l.name().clone(), merged))
}).collect::<PolarsResult<Vec<_>>>()?;
Expand Down
18 changes: 18 additions & 0 deletions py-polars/tests/unit/datatypes/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

import pytest

from hypothesis import given

import polars as pl
from polars import StringCache
from polars.exceptions import (
Expand Down Expand Up @@ -845,3 +847,19 @@ def test_get_cat_categories_multiple_chunks() -> None:
)
df_cat = df.lazy().select(pl.col("e").cat.get_categories()).collect()
assert len(df_cat) == 2


@pytest.mark.parametrize(
"f", [
lambda x: (pl.List(pl.Categorical), [x]),
lambda x: (pl.Struct({ 'a': pl.Categorical }), { 'a': x }),
]
)
def test_nested_categorical_concat(f: Callable[[str], tuple[pl.DataType, list[str] | dict[str, str]]]) -> None:
dt, va = f("a")
_, vb = f("b")
a = pl.DataFrame({ 'x': [va] }, schema={ 'x': dt })
b = pl.DataFrame({ 'x': [vb] }, schema={ 'x': dt })

with pytest.raises(pl.exceptions.StringCacheMismatchError):
stack = pl.concat([a, b])

0 comments on commit d3cb162

Please sign in to comment.