Skip to content

Commit

Permalink
perf: Reduce copy in MemSlice (#17983)
Browse files Browse the repository at this point in the history
  • Loading branch information
nameexhaustion committed Aug 1, 2024
1 parent bb762f3 commit b5afc8f
Show file tree
Hide file tree
Showing 8 changed files with 214 additions and 185 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ avro-schema = { version = "0.3" }
base64 = "0.22.0"
bitflags = "2"
bytemuck = { version = "1.11", features = ["derive", "extern_crate_alloc"] }
bytes = { version = "1.3" }
chrono = { version = "0.4.31", default-features = false, features = ["std"] }
chrono-tz = "0.8.1"
ciborium = "0.2"
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-io/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ arrow = { workspace = true }
async-trait = { version = "0.1.59", optional = true }
atoi_simd = { workspace = true, optional = true }
blake3 = { version = "1.5.1", optional = true }
bytes = { version = "1.3" }
bytes = { workspace = true }
chrono = { workspace = true, optional = true }
chrono-tz = { workspace = true, optional = true }
fast-float = { workspace = true, optional = true }
Expand Down
10 changes: 6 additions & 4 deletions crates/polars-io/src/mmap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ use std::collections::btree_map::Entry;
use std::collections::BTreeMap;
use std::fs::File;
use std::io::{BufReader, Cursor, Read, Seek};
use std::sync::Mutex;
use std::sync::{Arc, Mutex};

use memmap::Mmap;
use once_cell::sync::Lazy;
use polars_core::config::verbose;
use polars_error::{polars_bail, PolarsResult};
use polars_utils::mmap::{MemSlice, MmapSlice};
use polars_utils::mmap::MemSlice;

// Keep track of memory mapped files so we don't write to them while reading
// Use a btree as it uses less memory than a hashmap and this thing never shrinks.
Expand Down Expand Up @@ -144,12 +144,14 @@ impl std::ops::Deref for ReaderBytes<'_> {
}
}

impl<'a> ReaderBytes<'a> {
/// Require 'static to force the caller to do any transmute as it's usually much
/// clearer to see there whether it's sound.
impl ReaderBytes<'static> {
pub fn into_mem_slice(self) -> MemSlice {
match self {
ReaderBytes::Borrowed(v) => MemSlice::from_slice(v),
ReaderBytes::Owned(v) => MemSlice::from_vec(v),
ReaderBytes::Mapped(v, _) => MemSlice::from_mmap(MmapSlice::new(v)),
ReaderBytes::Mapped(v, _) => MemSlice::from_mmap(Arc::new(v)),
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-io/src/parquet/read/mmap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,15 @@ fn _mmap_single_column<'a>(
) -> (&'a ColumnChunkMetaData, MemSlice) {
let (start, len) = meta.byte_range();
let chunk = match store {
ColumnStore::Local(mem_slice) => mem_slice.slice(start as usize, (start + len) as usize),
ColumnStore::Local(mem_slice) => mem_slice.slice((start as usize)..(start + len) as usize),
#[cfg(all(feature = "async", feature = "parquet"))]
ColumnStore::Fetched(fetched) => {
let entry = fetched.get(&start).unwrap_or_else(|| {
panic!(
"mmap_columns: column with start {start} must be prefetched in ColumnStore.\n"
)
});
MemSlice::from_slice(entry.as_ref())
MemSlice::from_bytes(entry.clone())
},
};
(meta, chunk)
Expand Down
9 changes: 6 additions & 3 deletions crates/polars-io/src/parquet/read/read_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,10 @@ pub fn read_parquet<R: MmapBytesReader>(
}

let reader = ReaderBytes::from(&mut reader);
let store = mmap::ColumnStore::Local(reader.into_mem_slice());
let store = mmap::ColumnStore::Local(
unsafe { std::mem::transmute::<ReaderBytes<'_>, ReaderBytes<'static>>(reader) }
.into_mem_slice(),
);

let dfs = rg_to_dfs(
&store,
Expand Down Expand Up @@ -471,8 +474,8 @@ impl FetchRowGroupsFromMmapReader {

fn fetch_row_groups(&mut self, _row_groups: Range<usize>) -> PolarsResult<ColumnStore> {
// @TODO: we can something smarter here with mmap
Ok(mmap::ColumnStore::Local(MemSlice::from_slice(
self.0.deref(),
Ok(mmap::ColumnStore::Local(MemSlice::from_vec(
self.0.deref().to_vec(),
)))
}
}
Expand Down
1 change: 1 addition & 0 deletions crates/polars-utils/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ polars-error = { workspace = true }

ahash = { workspace = true }
bytemuck = { workspace = true }
bytes = { workspace = true }
hashbrown = { workspace = true }
indexmap = { workspace = true }
memmap = { workspace = true, optional = true }
Expand Down
Loading

0 comments on commit b5afc8f

Please sign in to comment.