From 23c08d26a43e8ad7a0729ff430f51e4857d1327d Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Mon, 3 May 2021 19:16:52 -0700 Subject: [PATCH 1/2] Support zstd compression Add initial support for compressing tarballs with zstd compression. This provides comparable or in some cases better compression, while substantially improving decompression performance. This doesn't change any of the defaults (which still use gz and xz), just introduces initial off-by-default support for zst to allow us to experiment. --- Cargo.toml | 1 + src/compression.rs | 28 ++++++++++++++++++++++++++++ src/tarballer.rs | 4 ++-- 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 7979528..40e125d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ walkdir = "2" xz2 = "0.1.4" num_cpus = "1" remove_dir_all = "0.5" +zstd = { version = "0.8.0", features = ["zstdmt"] } [dependencies.clap] features = ["yaml"] diff --git a/src/compression.rs b/src/compression.rs index b3010cb..49eb413 100644 --- a/src/compression.rs +++ b/src/compression.rs @@ -8,6 +8,7 @@ use xz2::{read::XzDecoder, write::XzEncoder}; pub enum CompressionFormat { Gz, Xz, + Zstd, } impl CompressionFormat { @@ -15,6 +16,7 @@ impl CompressionFormat { match path.as_ref().extension().and_then(|e| e.to_str()) { Some("gz") => Some(CompressionFormat::Gz), Some("xz") => Some(CompressionFormat::Xz), + Some("zst") => Some(CompressionFormat::Zstd), _ => None, } } @@ -23,6 +25,7 @@ impl CompressionFormat { match self { CompressionFormat::Gz => "gz", CompressionFormat::Xz => "xz", + CompressionFormat::Zstd => "zst", } } @@ -48,6 +51,18 @@ impl CompressionFormat { .encoder()?; Box::new(XzEncoder::new_stream(file, stream)) } + CompressionFormat::Zstd => { + // Level 19 provides a good balance between compression time and file size. + let mut encoder = zstd::Encoder::new(file, 19)?; + // Long-distance matching provides a substantial benefit for our tarballs, and + // actually makes compression *faster*. + encoder.long_distance_matching(true).context("zst long_distance_matching")?; + // Long-distance matching uses a 128MB window, and currently needs about that much + // memory per thread, so limit the number of threads to be friendlier to 32-bit + // systems. + encoder.multithread(Ord::min(num_cpus::get(), 12) as u32).context("zst multithread")?; + Box::new(encoder) + } }) } @@ -56,6 +71,7 @@ impl CompressionFormat { Ok(match self { CompressionFormat::Gz => Box::new(GzDecoder::new(file)), CompressionFormat::Xz => Box::new(XzDecoder::new(file)), + CompressionFormat::Zstd => Box::new(zstd::Decoder::new(file)?), }) } } @@ -73,6 +89,7 @@ impl TryFrom<&'_ str> for CompressionFormats { match format.trim() { "gz" => parsed.push(CompressionFormat::Gz), "xz" => parsed.push(CompressionFormat::Xz), + "zst" => parsed.push(CompressionFormat::Zstd), other => anyhow::bail!("unknown compression format: {}", other), } } @@ -90,6 +107,10 @@ impl CompressionFormats { pub(crate) fn iter(&self) -> impl Iterator + '_ { self.0.iter().map(|i| *i) } + + pub(crate) fn len(&self) -> usize { + self.0.len() + } } pub(crate) trait Encoder: Send + Write { @@ -110,6 +131,13 @@ impl Encoder for XzEncoder { } } +impl Encoder for zstd::Encoder<'_, W> { + fn finish(mut self: Box) -> Result<(), Error> { + zstd::Encoder::do_finish(self.as_mut()).context("failed to finish .zst file")?; + Ok(()) + } +} + pub(crate) struct CombinedEncoder { encoders: Vec>, } diff --git a/src/tarballer.rs b/src/tarballer.rs index 4ac8cf7..faf7ac9 100644 --- a/src/tarballer.rs +++ b/src/tarballer.rs @@ -45,13 +45,13 @@ impl Tarballer { .context("failed to collect file paths")?; files.sort_by(|a, b| a.bytes().rev().cmp(b.bytes().rev())); - // Write the tar into both encoded files. We write all directories + // Write the tar into the encoded files. We write all directories // first, so files may be directly created. (See rust-lang/rustup.rs#1092.) let buf = BufWriter::with_capacity(1024 * 1024, encoder); let mut builder = Builder::new(buf); let pool = rayon::ThreadPoolBuilder::new() - .num_threads(2) + .num_threads(self.compression_formats.len()) .build() .unwrap(); pool.install(move || { From 9a829d9700e2312110a1847249c2b84add8772af Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Mon, 3 May 2021 19:29:30 -0700 Subject: [PATCH 2/2] Add a test for zst --- test.sh | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test.sh b/test.sh index bf6de4c..949d296 100755 --- a/test.sh +++ b/test.sh @@ -1218,6 +1218,21 @@ generate_compression_formats_multiple() { } runtest generate_compression_formats_multiple +generate_compression_formats_multiple_zst() { + try sh "$S/gen-installer.sh" \ + --image-dir="$TEST_DIR/image1" \ + --work-dir="$WORK_DIR" \ + --output-dir="$OUT_DIR" \ + --package-name="rustc" \ + --component-name="rustc" \ + --compression-formats="gz,zst" + + try test -e "${OUT_DIR}/rustc.tar.gz" + try test ! -e "${OUT_DIR}/rustc.tar.xz" + try test -e "${OUT_DIR}/rustc.tar.zst" +} +runtest generate_compression_formats_multiple_zst + generate_compression_formats_error() { expect_fail sh "$S/gen-installer.sh" \ --image-dir="$TEST_DIR/image1" \