Skip to content

Commit

Permalink
fix: Don't panic on hashing nested list types (#16555)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored May 28, 2024
1 parent 52c919f commit cb40bbd
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 6 deletions.
11 changes: 8 additions & 3 deletions crates/polars-core/src/series/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,14 @@ impl Hash for Wrap<Series> {
fn hash<H: Hasher>(&self, state: &mut H) {
let rs = RandomState::with_seeds(0, 0, 0, 0);
let mut h = vec![];
self.0.vec_hash(rs, &mut h).unwrap();
let h = h.into_iter().fold(0, |a: u64, b| a.wrapping_add(b));
h.hash(state)
if self.0.vec_hash(rs, &mut h).is_ok() {
let h = h.into_iter().fold(0, |a: u64, b| a.wrapping_add(b));
h.hash(state)
} else {
self.len().hash(state);
self.null_count().hash(state);
self.dtype().hash(state);
}
}
}

Expand Down
10 changes: 7 additions & 3 deletions crates/polars-plan/src/logical_plan/lit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use std::hash::{Hash, Hasher};
use polars_core::export::chrono::{Duration as ChronoDuration, NaiveDate, NaiveDateTime};
use polars_core::prelude::*;
use polars_core::utils::materialize_dyn_int;
use polars_utils::hashing::hash_to_partition;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

Expand Down Expand Up @@ -453,9 +454,12 @@ impl Hash for LiteralValue {
let len = s.len();
len.hash(state);
s.null_count().hash(state);
// Hash 5 first values. Still a poor hash, but it removes the pathological clashes.
for i in 0..std::cmp::min(5, len) {
s.get(i).unwrap().hash(state);
const RANDOM: u64 = 0x2c194fa5df32a367;
let mut rng = (len as u64) ^ RANDOM;
for _ in 0..5 {
let idx = hash_to_partition(rng, len);
s.get(idx).unwrap().hash(state);
rng = rng.rotate_right(17).wrapping_add(RANDOM);
}
},
LiteralValue::Range {
Expand Down
6 changes: 6 additions & 0 deletions py-polars/tests/unit/test_cse.py
Original file line number Diff line number Diff line change
Expand Up @@ -768,3 +768,9 @@ def test_cse_series_collision_16138(capfd: Any, monkeypatch: Any) -> None:
}
captured = capfd.readouterr().err
assert "3 CSE" in captured


def test_nested_cache_no_panic_16553() -> None:
assert pl.LazyFrame().select(a=[[[1]]]).collect(comm_subexpr_elim=True).to_dict(
as_series=False
) == {"a": [[[[1]]]]}

0 comments on commit cb40bbd

Please sign in to comment.