From 818472c5c6fc2e0cb3a15b5b1244dab114451957 Mon Sep 17 00:00:00 2001 From: nameexhaustion Date: Sun, 21 Jan 2024 20:44:47 +1100 Subject: [PATCH 1/3] c --- .../polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs b/crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs index c4523a7da53b..4db31ee91817 100644 --- a/crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs +++ b/crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs @@ -55,7 +55,7 @@ fn bitpacked_encode_u32>( } if remainder != 0 { - let compressed_remainder_size = ceil8(remainder * num_bits); + let compressed_remainder_size = ceil8(remainder) * num_bits; iterator .by_ref() .take(remainder) From dfa1767a853e358d6a30a5bf6469f5073b096f0d Mon Sep 17 00:00:00 2001 From: nameexhaustion Date: Sun, 21 Jan 2024 22:13:56 +1100 Subject: [PATCH 2/3] update test case --- .../src/parquet/encoding/hybrid_rle/encoder.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs b/crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs index 4db31ee91817..5b86df893a94 100644 --- a/crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs +++ b/crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs @@ -129,7 +129,13 @@ mod tests { assert_eq!( vec, - vec![(2 << 1 | 1), 0b01_10_01_00, 0b00_01_01_10, 0b_00_00_00_11] + vec![ + (2 << 1 | 1), + 0b01_10_01_00, + 0b00_01_01_10, + 0b_00_00_00_11, + 0b0 + ] ); Ok(()) } From 7db9eeed19ab9b5cc53e82f16304f1de2b02c26e Mon Sep 17 00:00:00 2001 From: nameexhaustion Date: Sun, 21 Jan 2024 23:33:09 +1100 Subject: [PATCH 3/3] add comment --- .../polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs b/crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs index 5b86df893a94..e2e381bc4b90 100644 --- a/crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs +++ b/crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs @@ -55,6 +55,9 @@ fn bitpacked_encode_u32>( } if remainder != 0 { + // Must be careful here to ensure we write a multiple of `num_bits` + // (the bit width) to align with the spec. Some readers also rely on + // this - see https://github.com/pola-rs/polars/pull/13883. let compressed_remainder_size = ceil8(remainder) * num_bits; iterator .by_ref()