Skip to content

Commit

Permalink
fix: add snappy compression on checkpoint files (#2365)
Browse files Browse the repository at this point in the history
# Description
Noticed that we didn't set any compression, this should bring down the
file size a bit for users that have large checkpoints
  • Loading branch information
ion-elgreco authored Apr 1, 2024
1 parent 7568b57 commit e34767d
Showing 1 changed file with 11 additions and 1 deletion.
12 changes: 11 additions & 1 deletion crates/core/src/protocol/checkpoints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ use futures::{StreamExt, TryStreamExt};
use lazy_static::lazy_static;
use object_store::{Error, ObjectStore};
use parquet::arrow::ArrowWriter;
use parquet::basic::Compression;
use parquet::errors::ParquetError;
use parquet::file::properties::WriterProperties;
use regex::Regex;
use serde_json::Value;
use tracing::{debug, error};
Expand Down Expand Up @@ -333,7 +335,15 @@ fn parquet_bytes_from_state(
debug!("Writing to checkpoint parquet buffer...");
// Write the Checkpoint parquet file.
let mut bytes = vec![];
let mut writer = ArrowWriter::try_new(&mut bytes, arrow_schema.clone(), None)?;
let mut writer = ArrowWriter::try_new(
&mut bytes,
arrow_schema.clone(),
Some(
WriterProperties::builder()
.set_compression(Compression::SNAPPY)
.build(),
),
)?;
let mut decoder = ReaderBuilder::new(arrow_schema)
.with_batch_size(CHECKPOINT_RECORD_BATCH_SIZE)
.build_decoder()?;
Expand Down

0 comments on commit e34767d

Please sign in to comment.