Skip to content

Commit

Permalink
ok we win in parallel but lol
Browse files Browse the repository at this point in the history
  • Loading branch information
cosmicexplorer committed Aug 24, 2023
1 parent 037f3c7 commit 7f887fa
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 8 deletions.
3 changes: 2 additions & 1 deletion crawl-zip.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ def main(zip_src: str, out_zip: str, zipfile_zip: str | None = None) -> None:
print(f"zipped with parallelism {parallelism}", file=sys.stderr)

if zipfile_zip is not None:
with zipfile.ZipFile(zipfile_zip, mode="w", compression=zipfile.ZIP_DEFLATED) as zipfile_zf:
with zipfile.ZipFile(zipfile_zip, mode="w",
compresslevel=6, compression=zipfile.ZIP_DEFLATED) as zipfile_zf:
for rp in crawl_result.real_file_paths:
# FIXME: no intermediate directories written!
with Path(rp.resolved_path).open(mode="rb") as in_f,\
Expand Down
40 changes: 34 additions & 6 deletions lib/benches/my_benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ mod parallel_merge {
use tokio::runtime::Runtime;
use zip::{self, result::ZipError};

use std::{env, fs, path::Path, time::Duration};
use std::{env, fs, io, path::Path, time::Duration};

fn extract_example_zip(
target: &Path,
Expand Down Expand Up @@ -53,13 +53,16 @@ mod parallel_merge {
Ok((input_files, extract_dir))
}

async fn create_basic_zip(
async fn execute_medusa_zip(
input_files: Vec<lib::FileSource>,
parallelism: lib::zip::Parallelism,
) -> Result<zip::ZipArchive<fs::File>, lib::zip::MedusaZipError> {
let zip_spec = lib::zip::MedusaZip {
input_files,
zip_options: lib::zip::ZipOutputOptions::default(),
zip_options: lib::zip::ZipOutputOptions {
mtime_behavior: lib::zip::ModifiedTimeBehavior::Reproducible,
compression_options: lib::zip::CompressionStrategy::Deflated(Some(6)),
},
modifications: lib::zip::EntryModifications::default(),
parallelism,
};
Expand All @@ -69,6 +72,23 @@ mod parallel_merge {
Ok(output_zip.finish_into_readable()?)
}

fn execute_basic_zip(
input_files: Vec<lib::FileSource>,
) -> Result<zip::ZipArchive<fs::File>, ZipError> {
let mut output_zip = zip::ZipWriter::new(tempfile::tempfile()?);

let options = zip::write::FileOptions::default()
.compression_method(zip::CompressionMethod::Deflated)
.compression_level(Some(6));
for lib::FileSource { name, source } in input_files.into_iter() {
let mut in_f = fs::OpenOptions::new().read(true).open(source)?;
output_zip.start_file(name.into_string(), options)?;
io::copy(&mut in_f, &mut output_zip)?;
}

Ok(output_zip.finish_into_readable()?)
}

pub fn bench_zips(c: &mut Criterion) {
let rt = Runtime::new().unwrap();

Expand Down Expand Up @@ -106,7 +126,8 @@ mod parallel_merge {
"Babel-2.12.1-py3-none-any.whl",
(80, 10),
(Duration::from_secs(35), Duration::from_secs(35)),
(0.2, 0.3),
/* 50% variation is within noise given our low sample size for the sync tests. */
(0.2, 0.5),
SamplingMode::Flat,
),
/* ( */
Expand Down Expand Up @@ -143,7 +164,7 @@ mod parallel_merge {
.noise_threshold(*noise_p);
group.bench_with_input(BenchmarkId::new(&id, parallelism), &parallelism, |b, p| {
b.to_async(&rt)
.iter(|| create_basic_zip(input_files.clone(), *p));
.iter(|| execute_medusa_zip(input_files.clone(), *p));
});

/* Run the sync implementation. */
Expand All @@ -155,7 +176,14 @@ mod parallel_merge {
.noise_threshold(*noise_sync);
group.bench_with_input(BenchmarkId::new(&id, parallelism), &parallelism, |b, p| {
b.to_async(&rt)
.iter(|| create_basic_zip(input_files.clone(), *p));
.iter(|| execute_medusa_zip(input_files.clone(), *p));
});

/* Run the implementation based only off of the zip crate. We reuse the same
* sampling presets under the assumption it will have a very similar
* runtime. */
group.bench_function(BenchmarkId::new(&id, "<sync zip crate>"), |b| {
b.iter(|| execute_basic_zip(input_files.clone()));
});
}
}
Expand Down
2 changes: 1 addition & 1 deletion lib/src/zip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,7 @@ pub enum Parallelism {
///
/// Read source files and copy them to the output zip in order.
Synchronous,
/// <PARALLEL>
/// <PARALLEL-MERGE>
///
/// Parallelize creation by splitting up the input into chunks.
#[default]
Expand Down

0 comments on commit 7f887fa

Please sign in to comment.