Skip to content

Commit

Permalink
zstd support
Browse files Browse the repository at this point in the history
Based on [1], but using compression level 3 for speed rather than 19 for
competitiveness with xz.

[1] rust-lang#109
  • Loading branch information
aswild committed Jul 17, 2022
1 parent 300b5ec commit 29fc71d
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 2 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ walkdir = "2"
xz2 = "0.1.4"
num_cpus = "1"
remove_dir_all = "0.5"
zstd = { version = "0.10.0", features = ["zstdmt"] }

[dependencies.clap]
features = ["derive"]
Expand Down
29 changes: 29 additions & 0 deletions src/compression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,21 @@ use flate2::{read::GzDecoder, write::GzEncoder};
use rayon::prelude::*;
use std::{convert::TryFrom, fmt, io::Read, io::Write, path::Path, str::FromStr};
use xz2::{read::XzDecoder, write::XzEncoder};
use zstd::stream::{read::Decoder as ZstdDecoder, write::Encoder as ZstdEncoder};

#[derive(Debug, Copy, Clone)]
pub enum CompressionFormat {
Gz,
Xz,
Zstd,
}

impl CompressionFormat {
pub(crate) fn detect_from_path(path: impl AsRef<Path>) -> Option<Self> {
match path.as_ref().extension().and_then(|e| e.to_str()) {
Some("gz") => Some(CompressionFormat::Gz),
Some("xz") => Some(CompressionFormat::Xz),
Some("zst") => Some(CompressionFormat::Zstd),
_ => None,
}
}
Expand All @@ -23,6 +26,7 @@ impl CompressionFormat {
match self {
CompressionFormat::Gz => "gz",
CompressionFormat::Xz => "xz",
CompressionFormat::Zstd => "zst",
}
}

Expand All @@ -48,6 +52,17 @@ impl CompressionFormat {
.encoder()?;
Box::new(XzEncoder::new_stream(file, stream))
}
CompressionFormat::Zstd => {
// zstd's default compression level is 3, which is on par with gzip but much faster
let mut enc = ZstdEncoder::new(file, 3).context("failed to initialize zstd encoder")?;
// Long-distance matching provides a substantial benefit for our tarballs, and
// actually makes compressiong *faster*.
enc.long_distance_matching(true).context("zst long_distance_matching")?;
// Enable multithreaded mode. zstd seems to be faster when using the number of
// physical CPU cores rather than logical/SMT threads.
enc.multithread(num_cpus::get_physical() as u32).context("zst multithreaded")?;
Box::new(enc)
}
})
}

Expand All @@ -56,6 +71,7 @@ impl CompressionFormat {
Ok(match self {
CompressionFormat::Gz => Box::new(GzDecoder::new(file)),
CompressionFormat::Xz => Box::new(XzDecoder::new(file)),
CompressionFormat::Zstd => Box::new(ZstdDecoder::new(file)?),
})
}
}
Expand All @@ -73,6 +89,7 @@ impl TryFrom<&'_ str> for CompressionFormats {
match format.trim() {
"gz" => parsed.push(CompressionFormat::Gz),
"xz" => parsed.push(CompressionFormat::Xz),
"zst" => parsed.push(CompressionFormat::Zstd),
other => anyhow::bail!("unknown compression format: {}", other),
}
}
Expand All @@ -97,6 +114,7 @@ impl fmt::Display for CompressionFormats {
fmt::Display::fmt(match format {
CompressionFormat::Xz => "xz",
CompressionFormat::Gz => "gz",
CompressionFormat::Zstd => "zst",
}, f)?;
}
Ok(())
Expand All @@ -113,6 +131,10 @@ impl CompressionFormats {
pub(crate) fn iter(&self) -> impl Iterator<Item = CompressionFormat> + '_ {
self.0.iter().map(|i| *i)
}

pub(crate) fn len(&self) -> usize {
self.0.len()
}
}

pub(crate) trait Encoder: Send + Write {
Expand All @@ -133,6 +155,13 @@ impl<W: Send + Write> Encoder for XzEncoder<W> {
}
}

impl<W: Send + Write> Encoder for ZstdEncoder<'_, W> {
fn finish(mut self: Box<Self>) -> Result<(), Error> {
ZstdEncoder::do_finish(self.as_mut()).context("failed to finish .zst file")?;
Ok(())
}
}

pub(crate) struct CombinedEncoder {
encoders: Vec<Box<dyn Encoder>>,
}
Expand Down
4 changes: 2 additions & 2 deletions src/tarballer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,13 @@ impl Tarballer {
.context("failed to collect file paths")?;
files.sort_by(|a, b| a.bytes().rev().cmp(b.bytes().rev()));

// Write the tar into both encoded files. We write all directories
// Write the tar the both encoded files. We write all directories
// first, so files may be directly created. (See rust-lang/rustup.rs#1092.)
let buf = BufWriter::with_capacity(1024 * 1024, encoder);
let mut builder = Builder::new(buf);

let pool = rayon::ThreadPoolBuilder::new()
.num_threads(2)
.num_threads(self.compression_formats.len())
.build()
.unwrap();
pool.install(move || {
Expand Down
15 changes: 15 additions & 0 deletions test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1218,6 +1218,21 @@ generate_compression_formats_multiple() {
}
runtest generate_compression_formats_multiple

generate_compression_formats_multiple_zst() {
try sh "$S/gen-installer.sh" \
--image-dir="$TEST_DIR/image1" \
--work-dir="$WORK_DIR" \
--output-dir="$OUT_DIR" \
--package-name="rustc" \
--component-name="rustc" \
--compression-formats="gz,zst"

try test -e "${OUT_DIR}/rustc.tar.gz"
try test ! -e "${OUT_DIR}/rustc.tar.xz"
try test -e "${OUT_DIR}/rustc.tar.zst"
}
runtest generate_compression_formats_multiple_zst

generate_compression_formats_error() {
expect_fail sh "$S/gen-installer.sh" \
--image-dir="$TEST_DIR/image1" \
Expand Down

0 comments on commit 29fc71d

Please sign in to comment.