From cb260f6b3364cfc7fe987405f880db6e9f296a5b Mon Sep 17 00:00:00 2001 From: ritchie Date: Tue, 16 Jul 2024 09:36:47 +0200 Subject: [PATCH 1/2] fix: Fix explode invalid check --- crates/polars-core/src/frame/explode.rs | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/crates/polars-core/src/frame/explode.rs b/crates/polars-core/src/frame/explode.rs index 77edc1c94f89..9bad32981f1f 100644 --- a/crates/polars-core/src/frame/explode.rs +++ b/crates/polars-core/src/frame/explode.rs @@ -80,15 +80,6 @@ impl DataFrame { Ok(()) } - let check_offsets = || { - let first_offsets = exploded_columns[0].1.as_slice(); - for (_, offsets) in &exploded_columns[1..] { - polars_ensure!(first_offsets == offsets.as_slice(), - ShapeMismatch: "exploded columns must have matching element counts" - ) - } - Ok(()) - }; let process_first = || { let (exploded, offsets) = &exploded_columns[0]; @@ -102,9 +93,8 @@ impl DataFrame { process_column(self, &mut df, exploded.clone())?; PolarsResult::Ok(df) }; - let (df, result) = POOL.join(process_first, check_offsets); - let mut df = df?; - result?; + + let mut df = process_first()?; for (exploded, _) in exploded_columns.into_iter().skip(1) { process_column(self, &mut df, exploded)? From 84e5e1a113620280efe75097c3d66b666c0a4209 Mon Sep 17 00:00:00 2001 From: ritchie Date: Tue, 16 Jul 2024 10:26:41 +0200 Subject: [PATCH 2/2] better --- crates/polars-core/src/frame/explode.rs | 25 +++++++++++++++++-- .../tests/unit/operations/test_explode.py | 9 +++++++ 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/crates/polars-core/src/frame/explode.rs b/crates/polars-core/src/frame/explode.rs index 9bad32981f1f..774b30f17390 100644 --- a/crates/polars-core/src/frame/explode.rs +++ b/crates/polars-core/src/frame/explode.rs @@ -80,6 +80,26 @@ impl DataFrame { Ok(()) } + let check_offsets = || { + let first_offsets = exploded_columns[0].1.as_slice(); + for (_, offsets) in &exploded_columns[1..] { + let offsets = offsets.as_slice(); + + let offset_l = first_offsets[0]; + let offset_r = offsets[0]; + let all_equal_len = first_offsets.len() != offsets.len() || { + first_offsets + .iter() + .zip(offsets.iter()) + .all(|(l, r)| (*l - offset_l) == (*r - offset_r)) + }; + + polars_ensure!(all_equal_len, + ShapeMismatch: "exploded columns must have matching element counts" + ) + } + Ok(()) + }; let process_first = || { let (exploded, offsets) = &exploded_columns[0]; @@ -93,8 +113,9 @@ impl DataFrame { process_column(self, &mut df, exploded.clone())?; PolarsResult::Ok(df) }; - - let mut df = process_first()?; + let (df, result) = POOL.join(process_first, check_offsets); + let mut df = df?; + result?; for (exploded, _) in exploded_columns.into_iter().skip(1) { process_column(self, &mut df, exploded)? diff --git a/py-polars/tests/unit/operations/test_explode.py b/py-polars/tests/unit/operations/test_explode.py index fa3e51efabce..c6a5049319a3 100644 --- a/py-polars/tests/unit/operations/test_explode.py +++ b/py-polars/tests/unit/operations/test_explode.py @@ -438,3 +438,12 @@ def test_undefined_col_15852() -> None: with pytest.raises(pl.exceptions.ColumnNotFoundError): lf.explode("bar").join(lf, on="foo").collect() + + +def test_explode_17648() -> None: + df = pl.DataFrame({"a": [[1, 3], [2, 6, 7], [3, 9, 2], [4], [5, 1, 2, 3, 4]]}) + assert ( + df.slice(1, 2) + .with_columns(pl.int_ranges(pl.col("a").list.len()).alias("count")) + .explode("a", "count") + ).to_dict(as_series=False) == {"a": [2, 6, 7, 3, 9, 2], "count": [0, 1, 2, 0, 1, 2]}