Skip to content

Commit

Permalink
perf: improve group_tuples of high cardinality data ~10% (#7938)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Apr 2, 2023
1 parent a7d3895 commit ae8698b
Showing 1 changed file with 18 additions and 1 deletion.
19 changes: 18 additions & 1 deletion polars/polars-core/src/frame/groupby/proxy.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::mem::ManuallyDrop;
use std::ops::Deref;

use polars_arrow::trusted_len::PushUnchecked;
use polars_arrow::utils::CustomIterTools;
use rayon::iter::plumbing::UnindexedConsumer;
use rayon::prelude::*;
Expand Down Expand Up @@ -46,7 +47,23 @@ impl From<Vec<IdxItem>> for GroupsIdx {

impl From<Vec<Vec<IdxItem>>> for GroupsIdx {
fn from(v: Vec<Vec<IdxItem>>) -> Self {
v.into_iter().flatten().collect()
// 10% faster than `iter().flatten().collect()
let cap = v.iter().map(|v| v.len()).sum::<usize>();
let mut first = Vec::with_capacity(cap);
let mut all = Vec::with_capacity(cap);
for inner in v {
for (first_val, vals) in inner {
unsafe {
first.push_unchecked(first_val);
all.push_unchecked(vals)
}
}
}
GroupsIdx {
sorted: false,
first,
all,
}
}
}

Expand Down

0 comments on commit ae8698b

Please sign in to comment.