fix waiting to link binaries, but tensorflow example is still huge!

cosmicexplorer · Aug 23, 2023 · b995f3f · b995f3f
1 parent e2346b2
commit b995f3f
Showing 1 changed file with 28 additions and 24 deletions.
diff --git a/lib/benches/my_benchmark.rs b/lib/benches/my_benchmark.rs
@@ -18,32 +18,24 @@ mod parallel_merge {
   use tokio::runtime::Runtime;
   use zip::{self, result::ZipError};
 
-  use std::{fs, io, time::Duration};
+  use std::{fs, path::Path, time::Duration};
 
-  /* This file is 461M, or about half a gigabyte, with multiple individual very
-   * large binary files. */
-  /* const LARGE_ZIP_CONTENTS: &'static [u8] = */
-  /* include_bytes!("tensorflow_gpu-2.5.3-cp38-cp38-manylinux2010_x86_64.whl"); */
-
-  /* This file is 37K. */
-  const SMALLER_ZIP_CONTENTS: &'static [u8] = include_bytes!("Keras-2.4.3-py2.py3-none-any.whl");
-
-  fn prepare_memory_zip(
-    zip_contents: &[u8],
+  fn extract_example_zip(
+    target: &Path,
   ) -> Result<(Vec<lib::FileSource>, tempfile::TempDir), ZipError> {
     /* Create the temp dir to extract into. */
     let extract_dir = tempfile::tempdir()?;
 
-    /* Load the zip archive from memory. */
-    let reader = io::Cursor::new(zip_contents);
-    let mut large_zip = zip::ZipArchive::new(reader)?;
+    /* Load the zip archive from file. */
+    let handle = fs::OpenOptions::new().read(true).open(target)?;
+    let mut zip_archive = zip::ZipArchive::new(handle)?;
 
     /* Extract the zip's contents. */
-    large_zip.extract(extract_dir.path())?;
+    zip_archive.extract(extract_dir.path())?;
 
     /* Generate the input to a MedusaZip by associating the (relative) file names
      * from the zip to their (absolute) extracted output paths. */
-    let input_files: Vec<lib::FileSource> = large_zip.file_names()
+    let input_files: Vec<lib::FileSource> = zip_archive.file_names()
     /* Ignore any directories, which are not represented in FileSource structs. */
     .filter(|f| !f.ends_with('/'))
     .map(|f| {
@@ -75,7 +67,6 @@ mod parallel_merge {
     Ok(output_zip.finish_into_readable()?)
   }
 
-
   pub fn bench_zips(c: &mut Criterion) {
     let rt = Runtime::new().unwrap();
 
@@ -89,29 +80,42 @@ mod parallel_merge {
        * some small true changes. */
       .significance_level(0.01);
 
-    for (id, zip_contents, n, t) in [
+    for (filename, n, t) in [
       (
+        /* This file is 37K. */
         "Keras-2.4.3-py2.py3-none-any.whl",
-        SMALLER_ZIP_CONTENTS,
         1000,
         Duration::from_secs(7),
       ),
-      /* ("tensorflow_gpu-2.5.3-cp38-cp38-manylinux2010_x86_64.whl", LARGE_ZIP_CONTENTS), */
+      (
+        /* This file is 461M, or about half a gigabyte, with multiple individual very
+         * large binary files. */
+        "tensorflow_gpu-2.5.3-cp38-cp38-manylinux2010_x86_64.whl",
+        10,
+        Duration::from_secs(330),
+      ),
     ]
     .iter()
     {
+      let target = Path::new(env!("CARGO_MANIFEST_DIR"))
+        .join("benches")
+        .join(filename);
+      let zip_len = target.metadata().unwrap().len();
+
+      let id = format!("{}({} bytes)", filename, zip_len);
+
       group
         .sample_size(*n)
         .measurement_time(*t)
-        .throughput(Throughput::Bytes(zip_contents.len() as u64));
+        .throughput(Throughput::Bytes(zip_len as u64));
 
       /* FIXME: assigning `_` to the second arg of this tuple will destroy the
        * extract dir, which is only a silent error producing an empty file!!!
        * AWFUL UX!!! */
-      let (input_files, _tmp_extract_dir) = prepare_memory_zip(zip_contents).unwrap();
+      let (input_files, _tmp_extract_dir) = extract_example_zip(&target).unwrap();
       group.noise_threshold(0.03);
       group.bench_with_input(
-        BenchmarkId::new(*id, "ParallelMerge"),
+        BenchmarkId::new(&id, "ParallelMerge"),
         &lib::zip::Parallelism::ParallelMerge,
         |b, p| {
           b.to_async(&rt)
@@ -124,7 +128,7 @@ mod parallel_merge {
        * improvement immediately after the last bench. */
       group.noise_threshold(0.05);
       group.bench_with_input(
-        BenchmarkId::new(*id, "Synchronous"),
+        BenchmarkId::new(&id, "Synchronous"),
         &lib::zip::Parallelism::Synchronous,
         |b, p| {
           b.to_async(&rt)