Skip to content

Commit

Permalink
do not fetch if path_index==0
Browse files Browse the repository at this point in the history
  • Loading branch information
nameexhaustion committed Sep 10, 2024
1 parent e00e56d commit ccabe98
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ pub use crate::parquet::thrift_format::KeyValue;
/// Metadata for a Parquet file.
// This is almost equal to [`parquet_format_safe::FileMetaData`] but contains the descriptors,
// which are crucial to deserialize pages.
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct FileMetadata {
/// version of this file.
pub version: i32,
Expand Down
20 changes: 16 additions & 4 deletions crates/polars-stream/src/nodes/parquet_source/metadata_fetch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ impl ParquetSourceNode {
)
.await?,
);

if path_idx == 0 {
return Ok((0, byte_source, MemSlice::from_slice(&[])));
}

let (metadata_bytes, maybe_full_bytes) =
read_parquet_metadata_bytes(byte_source.as_ref(), verbose).await?;

Expand All @@ -109,20 +114,27 @@ impl ParquetSourceNode {
}
};

let first_metadata = self.first_metadata.clone();

let process_metadata_bytes = {
move |handle: task_handles_ext::AbortOnDropHandle<
PolarsResult<(usize, Arc<DynByteSource>, MemSlice)>,
>| {
let projected_arrow_fields = projected_arrow_fields.clone();
let first_metadata = first_metadata.clone();
// Run on CPU runtime - metadata deserialization is expensive, especially
// for very wide tables.
let handle = async_executor::spawn(TaskPriority::Low, async move {
let (path_index, byte_source, metadata_bytes) = handle.await.unwrap()?;

let metadata = polars_parquet::parquet::read::deserialize_metadata(
metadata_bytes.as_ref(),
metadata_bytes.len() * 2 + 1024,
)?;
let metadata = if path_index == 0 {
Arc::unwrap_or_clone(first_metadata)
} else {
polars_parquet::parquet::read::deserialize_metadata(
metadata_bytes.as_ref(),
metadata_bytes.len() * 2 + 1024,
)?
};

ensure_metadata_has_projected_fields(
projected_arrow_fields.as_ref(),
Expand Down

0 comments on commit ccabe98

Please sign in to comment.