diff --git a/crates/polars-row/src/decode.rs b/crates/polars-row/src/decode.rs index cbb9de7bfcc4..5d9e514b9bbb 100644 --- a/crates/polars-row/src/decode.rs +++ b/crates/polars-row/src/decode.rs @@ -44,7 +44,9 @@ unsafe fn decode(rows: &mut [&[u8]], field: &SortField, data_type: &ArrowDataTyp match data_type { ArrowDataType::Null => NullArray::new(ArrowDataType::Null, rows.len()).to_boxed(), ArrowDataType::Boolean => decode_bool(rows, field).to_boxed(), - ArrowDataType::LargeBinary => decode_binview(rows, field).to_boxed(), + ArrowDataType::BinaryView | ArrowDataType::LargeBinary => { + decode_binview(rows, field).to_boxed() + }, ArrowDataType::Utf8View => { let arr = decode_binview(rows, field); arr.to_utf8view_unchecked().boxed() diff --git a/py-polars/tests/unit/streaming/test_streaming_group_by.py b/py-polars/tests/unit/streaming/test_streaming_group_by.py index 6fa9f079f8d3..506422dc38dd 100644 --- a/py-polars/tests/unit/streaming/test_streaming_group_by.py +++ b/py-polars/tests/unit/streaming/test_streaming_group_by.py @@ -453,3 +453,30 @@ def test_streaming_group_null_count() -> None: assert df.group_by("g").agg(pl.col("a").count()).collect(streaming=True).to_dict( as_series=False ) == {"g": [1], "a": [3]} + + +def test_streaming_groupby_binary_15116() -> None: + assert ( + pl.LazyFrame( + { + "str": [ + "A", + "A", + "BB", + "BB", + "CCCC", + "CCCC", + "DDDDDDDD", + "DDDDDDDD", + "EEEEEEEEEEEEEEEE", + "A", + ] + } + ) + .select([pl.col("str").cast(pl.Binary)]) + .group_by(["str"]) + .agg([pl.len().alias("count")]) + ).sort("str").collect(streaming=True).to_dict(as_series=False) == { + "str": [b"A", b"BB", b"CCCC", b"DDDDDDDD", b"EEEEEEEEEEEEEEEE"], + "count": [3, 2, 2, 2, 1], + }