From 01a1f132355efb74ce53c24d811e7954b460ad34 Mon Sep 17 00:00:00 2001 From: nameexhaustion Date: Thu, 22 Feb 2024 14:53:55 +1100 Subject: [PATCH] refactor(internal): simplify compressed_chunk_size calculation and leave comments to explain --- .../src/parquet/encoding/hybrid_rle/encoder.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs b/crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs index e2e381bc4b90..1c4dd67ccec7 100644 --- a/crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs +++ b/crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs @@ -40,7 +40,8 @@ fn bitpacked_encode_u32>( let remainder = length - chunks * U32_BLOCK_LEN; let mut buffer = [0u32; U32_BLOCK_LEN]; - let compressed_chunk_size = ceil8(U32_BLOCK_LEN * num_bits); + // simplified from ceil8(U32_BLOCK_LEN * num_bits) since U32_BLOCK_LEN = 32 + let compressed_chunk_size = 4 * num_bits; for _ in 0..chunks { iterator @@ -58,6 +59,9 @@ fn bitpacked_encode_u32>( // Must be careful here to ensure we write a multiple of `num_bits` // (the bit width) to align with the spec. Some readers also rely on // this - see https://github.com/pola-rs/polars/pull/13883. + + // this is ceil8(remainder * num_bits), but we ensure the output is a + // multiple of num_bits by rewriting it as ceil8(remainder) * num_bits let compressed_remainder_size = ceil8(remainder) * num_bits; iterator .by_ref()