From b995f3fc9d96f2ab471676e5a2f2eb7065926ad1 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Wed, 23 Aug 2023 19:08:08 -0400 Subject: [PATCH] fix waiting to link binaries, but tensorflow example is still huge! --- lib/benches/my_benchmark.rs | 52 ++++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/lib/benches/my_benchmark.rs b/lib/benches/my_benchmark.rs index 2103a50..e3755d0 100644 --- a/lib/benches/my_benchmark.rs +++ b/lib/benches/my_benchmark.rs @@ -18,32 +18,24 @@ mod parallel_merge { use tokio::runtime::Runtime; use zip::{self, result::ZipError}; - use std::{fs, io, time::Duration}; + use std::{fs, path::Path, time::Duration}; - /* This file is 461M, or about half a gigabyte, with multiple individual very - * large binary files. */ - /* const LARGE_ZIP_CONTENTS: &'static [u8] = */ - /* include_bytes!("tensorflow_gpu-2.5.3-cp38-cp38-manylinux2010_x86_64.whl"); */ - - /* This file is 37K. */ - const SMALLER_ZIP_CONTENTS: &'static [u8] = include_bytes!("Keras-2.4.3-py2.py3-none-any.whl"); - - fn prepare_memory_zip( - zip_contents: &[u8], + fn extract_example_zip( + target: &Path, ) -> Result<(Vec, tempfile::TempDir), ZipError> { /* Create the temp dir to extract into. */ let extract_dir = tempfile::tempdir()?; - /* Load the zip archive from memory. */ - let reader = io::Cursor::new(zip_contents); - let mut large_zip = zip::ZipArchive::new(reader)?; + /* Load the zip archive from file. */ + let handle = fs::OpenOptions::new().read(true).open(target)?; + let mut zip_archive = zip::ZipArchive::new(handle)?; /* Extract the zip's contents. */ - large_zip.extract(extract_dir.path())?; + zip_archive.extract(extract_dir.path())?; /* Generate the input to a MedusaZip by associating the (relative) file names * from the zip to their (absolute) extracted output paths. */ - let input_files: Vec = large_zip.file_names() + let input_files: Vec = zip_archive.file_names() /* Ignore any directories, which are not represented in FileSource structs. */ .filter(|f| !f.ends_with('/')) .map(|f| { @@ -75,7 +67,6 @@ mod parallel_merge { Ok(output_zip.finish_into_readable()?) } - pub fn bench_zips(c: &mut Criterion) { let rt = Runtime::new().unwrap(); @@ -89,29 +80,42 @@ mod parallel_merge { * some small true changes. */ .significance_level(0.01); - for (id, zip_contents, n, t) in [ + for (filename, n, t) in [ ( + /* This file is 37K. */ "Keras-2.4.3-py2.py3-none-any.whl", - SMALLER_ZIP_CONTENTS, 1000, Duration::from_secs(7), ), - /* ("tensorflow_gpu-2.5.3-cp38-cp38-manylinux2010_x86_64.whl", LARGE_ZIP_CONTENTS), */ + ( + /* This file is 461M, or about half a gigabyte, with multiple individual very + * large binary files. */ + "tensorflow_gpu-2.5.3-cp38-cp38-manylinux2010_x86_64.whl", + 10, + Duration::from_secs(330), + ), ] .iter() { + let target = Path::new(env!("CARGO_MANIFEST_DIR")) + .join("benches") + .join(filename); + let zip_len = target.metadata().unwrap().len(); + + let id = format!("{}({} bytes)", filename, zip_len); + group .sample_size(*n) .measurement_time(*t) - .throughput(Throughput::Bytes(zip_contents.len() as u64)); + .throughput(Throughput::Bytes(zip_len as u64)); /* FIXME: assigning `_` to the second arg of this tuple will destroy the * extract dir, which is only a silent error producing an empty file!!! * AWFUL UX!!! */ - let (input_files, _tmp_extract_dir) = prepare_memory_zip(zip_contents).unwrap(); + let (input_files, _tmp_extract_dir) = extract_example_zip(&target).unwrap(); group.noise_threshold(0.03); group.bench_with_input( - BenchmarkId::new(*id, "ParallelMerge"), + BenchmarkId::new(&id, "ParallelMerge"), &lib::zip::Parallelism::ParallelMerge, |b, p| { b.to_async(&rt) @@ -124,7 +128,7 @@ mod parallel_merge { * improvement immediately after the last bench. */ group.noise_threshold(0.05); group.bench_with_input( - BenchmarkId::new(*id, "Synchronous"), + BenchmarkId::new(&id, "Synchronous"), &lib::zip::Parallelism::Synchronous, |b, p| { b.to_async(&rt)