From 01a1f132355efb74ce53c24d811e7954b460ad34 Mon Sep 17 00:00:00 2001
From: nameexhaustion <simonlin.rqmmw@slmail.me>
Date: Thu, 22 Feb 2024 14:53:55 +1100
Subject: [PATCH] refactor(internal): simplify compressed_chunk_size
 calculation and leave comments to explain

---
 .../src/parquet/encoding/hybrid_rle/encoder.rs              | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs b/crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs
index e2e381bc4b90..1c4dd67ccec7 100644
--- a/crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs
+++ b/crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs
@@ -40,7 +40,8 @@ fn bitpacked_encode_u32<W: Write, I: Iterator<Item = u32>>(
     let remainder = length - chunks * U32_BLOCK_LEN;
     let mut buffer = [0u32; U32_BLOCK_LEN];
 
-    let compressed_chunk_size = ceil8(U32_BLOCK_LEN * num_bits);
+    // simplified from ceil8(U32_BLOCK_LEN * num_bits) since U32_BLOCK_LEN = 32
+    let compressed_chunk_size = 4 * num_bits;
 
     for _ in 0..chunks {
         iterator
@@ -58,6 +59,9 @@ fn bitpacked_encode_u32<W: Write, I: Iterator<Item = u32>>(
         // Must be careful here to ensure we write a multiple of `num_bits`
         // (the bit width) to align with the spec. Some readers also rely on
         // this - see https://github.com/pola-rs/polars/pull/13883.
+
+        // this is ceil8(remainder * num_bits), but we ensure the output is a
+        // multiple of num_bits by rewriting it as ceil8(remainder) * num_bits
         let compressed_remainder_size = ceil8(remainder) * num_bits;
         iterator
             .by_ref()