Skip to content

Commit

Permalink
fix: parquet hybrid RLE encoding did not always align to bit width (p…
Browse files Browse the repository at this point in the history
  • Loading branch information
nameexhaustion authored and r-brink committed Jan 24, 2024
1 parent a383b22 commit 4b10b43
Showing 1 changed file with 11 additions and 2 deletions.
13 changes: 11 additions & 2 deletions crates/polars-parquet/src/parquet/encoding/hybrid_rle/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,10 @@ fn bitpacked_encode_u32<W: Write, I: Iterator<Item = u32>>(
}

if remainder != 0 {
let compressed_remainder_size = ceil8(remainder * num_bits);
// Must be careful here to ensure we write a multiple of `num_bits`
// (the bit width) to align with the spec. Some readers also rely on
// this - see https://github.com/pola-rs/polars/pull/13883.
let compressed_remainder_size = ceil8(remainder) * num_bits;
iterator
.by_ref()
.take(remainder)
Expand Down Expand Up @@ -129,7 +132,13 @@ mod tests {

assert_eq!(
vec,
vec![(2 << 1 | 1), 0b01_10_01_00, 0b00_01_01_10, 0b_00_00_00_11]
vec![
(2 << 1 | 1),
0b01_10_01_00,
0b00_01_01_10,
0b_00_00_00_11,
0b0
]
);
Ok(())
}
Expand Down

0 comments on commit 4b10b43

Please sign in to comment.