Skip to content

Commit

Permalink
fix waiting to link binaries, but tensorflow example is still huge!
Browse files Browse the repository at this point in the history
  • Loading branch information
cosmicexplorer committed Aug 23, 2023
1 parent e2346b2 commit b995f3f
Showing 1 changed file with 28 additions and 24 deletions.
52 changes: 28 additions & 24 deletions lib/benches/my_benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,32 +18,24 @@ mod parallel_merge {
use tokio::runtime::Runtime;
use zip::{self, result::ZipError};

use std::{fs, io, time::Duration};
use std::{fs, path::Path, time::Duration};

/* This file is 461M, or about half a gigabyte, with multiple individual very
* large binary files. */
/* const LARGE_ZIP_CONTENTS: &'static [u8] = */
/* include_bytes!("tensorflow_gpu-2.5.3-cp38-cp38-manylinux2010_x86_64.whl"); */

/* This file is 37K. */
const SMALLER_ZIP_CONTENTS: &'static [u8] = include_bytes!("Keras-2.4.3-py2.py3-none-any.whl");

fn prepare_memory_zip(
zip_contents: &[u8],
fn extract_example_zip(
target: &Path,
) -> Result<(Vec<lib::FileSource>, tempfile::TempDir), ZipError> {
/* Create the temp dir to extract into. */
let extract_dir = tempfile::tempdir()?;

/* Load the zip archive from memory. */
let reader = io::Cursor::new(zip_contents);
let mut large_zip = zip::ZipArchive::new(reader)?;
/* Load the zip archive from file. */
let handle = fs::OpenOptions::new().read(true).open(target)?;
let mut zip_archive = zip::ZipArchive::new(handle)?;

/* Extract the zip's contents. */
large_zip.extract(extract_dir.path())?;
zip_archive.extract(extract_dir.path())?;

/* Generate the input to a MedusaZip by associating the (relative) file names
* from the zip to their (absolute) extracted output paths. */
let input_files: Vec<lib::FileSource> = large_zip.file_names()
let input_files: Vec<lib::FileSource> = zip_archive.file_names()
/* Ignore any directories, which are not represented in FileSource structs. */
.filter(|f| !f.ends_with('/'))
.map(|f| {
Expand Down Expand Up @@ -75,7 +67,6 @@ mod parallel_merge {
Ok(output_zip.finish_into_readable()?)
}


pub fn bench_zips(c: &mut Criterion) {
let rt = Runtime::new().unwrap();

Expand All @@ -89,29 +80,42 @@ mod parallel_merge {
* some small true changes. */
.significance_level(0.01);

for (id, zip_contents, n, t) in [
for (filename, n, t) in [
(
/* This file is 37K. */
"Keras-2.4.3-py2.py3-none-any.whl",
SMALLER_ZIP_CONTENTS,
1000,
Duration::from_secs(7),
),
/* ("tensorflow_gpu-2.5.3-cp38-cp38-manylinux2010_x86_64.whl", LARGE_ZIP_CONTENTS), */
(
/* This file is 461M, or about half a gigabyte, with multiple individual very
* large binary files. */
"tensorflow_gpu-2.5.3-cp38-cp38-manylinux2010_x86_64.whl",
10,
Duration::from_secs(330),
),
]
.iter()
{
let target = Path::new(env!("CARGO_MANIFEST_DIR"))
.join("benches")
.join(filename);
let zip_len = target.metadata().unwrap().len();

let id = format!("{}({} bytes)", filename, zip_len);

group
.sample_size(*n)
.measurement_time(*t)
.throughput(Throughput::Bytes(zip_contents.len() as u64));
.throughput(Throughput::Bytes(zip_len as u64));

/* FIXME: assigning `_` to the second arg of this tuple will destroy the
* extract dir, which is only a silent error producing an empty file!!!
* AWFUL UX!!! */
let (input_files, _tmp_extract_dir) = prepare_memory_zip(zip_contents).unwrap();
let (input_files, _tmp_extract_dir) = extract_example_zip(&target).unwrap();
group.noise_threshold(0.03);
group.bench_with_input(
BenchmarkId::new(*id, "ParallelMerge"),
BenchmarkId::new(&id, "ParallelMerge"),
&lib::zip::Parallelism::ParallelMerge,
|b, p| {
b.to_async(&rt)
Expand All @@ -124,7 +128,7 @@ mod parallel_merge {
* improvement immediately after the last bench. */
group.noise_threshold(0.05);
group.bench_with_input(
BenchmarkId::new(*id, "Synchronous"),
BenchmarkId::new(&id, "Synchronous"),
&lib::zip::Parallelism::Synchronous,
|b, p| {
b.to_async(&rt)
Expand Down

0 comments on commit b995f3f

Please sign in to comment.