diff --git a/polars/polars-core/src/frame/mod.rs b/polars/polars-core/src/frame/mod.rs index 44fbb6ab6342..34ceedae4d6b 100644 --- a/polars/polars-core/src/frame/mod.rs +++ b/polars/polars-core/src/frame/mod.rs @@ -2378,6 +2378,21 @@ impl DataFrame { } } + /// Iterator over the rows in this `DataFrame` as Arrow RecordBatches as physical values. + /// + /// # Panics + /// + /// Panics if the `DataFrame` that is passed is not rechunked. + /// + /// This responsibility is left to the caller as we don't want to take mutable references here, + /// but we also don't want to rechunk here, as this operation is costly and would benefit the caller + /// as well. + pub fn iter_chunks_physical(&self) -> PhysRecordBatchIter<'_> { + PhysRecordBatchIter { + iters: self.columns.iter().map(|s| s.chunks().iter()).collect(), + } + } + /// Get a `DataFrame` with all the columns in reversed order. #[must_use] pub fn reverse(&self) -> Self { @@ -3346,6 +3361,22 @@ impl<'a> Iterator for RecordBatchIter<'a> { } } +pub struct PhysRecordBatchIter<'a> { + iters: Vec>, +} + +impl Iterator for PhysRecordBatchIter<'_> { + type Item = ArrowChunk; + + fn next(&mut self) -> Option { + self.iters + .iter_mut() + .map(|phys_iter| phys_iter.next().cloned()) + .collect::>>() + .map(ArrowChunk::new) + } +} + impl Default for DataFrame { fn default() -> Self { DataFrame::new_no_checks(vec![]) diff --git a/polars/polars-core/src/series/iterator.rs b/polars/polars-core/src/series/iterator.rs index 5dce58b56d0c..b8609758f733 100644 --- a/polars/polars-core/src/series/iterator.rs +++ b/polars/polars-core/src/series/iterator.rs @@ -57,6 +57,8 @@ impl FromIterator for Series { } } +pub type SeriesPhysIter<'a> = Box> + 'a>; + #[cfg(any(feature = "rows", feature = "dtype-struct"))] impl Series { /// iterate over [`Series`] as [`AnyValue`]. @@ -76,7 +78,7 @@ impl Series { } } - pub fn phys_iter(&self) -> Box> + '_> { + pub fn phys_iter(&self) -> SeriesPhysIter<'_> { let dtype = self.dtype(); let phys_dtype = dtype.to_physical(); diff --git a/polars/polars-core/src/utils/mod.rs b/polars/polars-core/src/utils/mod.rs index c39d55f2f665..1d49f489e4f2 100644 --- a/polars/polars-core/src/utils/mod.rs +++ b/polars/polars-core/src/utils/mod.rs @@ -141,7 +141,7 @@ pub fn split_series(s: &Series, n: usize) -> PolarsResult> { } fn flatten_df(df: &DataFrame) -> impl Iterator + '_ { - df.iter_chunks().flat_map(|chunk| { + df.iter_chunks_physical().flat_map(|chunk| { let df = DataFrame::new_no_checks( df.iter() .zip(chunk.into_arrays())