tokio-rs · caspermeijn · Jul 19, 2024 · Apr 22, 2024
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,5 +1,6 @@
 [workspace]
 members = [
+  "benchmarks",
   "conformance",
   "prost",
   "prost-build",

diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml
@@ -0,0 +1,22 @@
+[package]
+name = "benchmarks"
+version = "0.0.0"
+edition = "2021"
+publish = false
+
+[dependencies]
+
+[build-dependencies]
+prost-build = { path = "../prost-build" }
+
+[dev-dependencies]
+prost = { path = "../prost" }
+criterion = { version = "0.5", default-features = false }
+
+[lib]
+# https://bheisler.github.io/criterion.rs/book/faq.html#cargo-bench-gives-unrecognized-option-errors-for-valid-command-line-options
+bench = false
+
+[[bench]]
+name = "dataset"
+harness = false
diff --git a/protobuf/benches/dataset.rs → benchmarks/benches/dataset.rs b/protobuf/benches/dataset.rs → benchmarks/benches/dataset.rs
@@ -1,21 +1,39 @@
-use std::error::Error;
-use std::fs::File;
-use std::io::Read;
-use std::path::Path;
-
 use criterion::{criterion_group, criterion_main, Criterion};
 use prost::Message;
+use std::error::Error;
+
+pub mod benchmarks {
+    include!(concat!(env!("OUT_DIR"), "/benchmarks.rs"));
+
+    pub mod dataset {
+        pub fn google_message1_proto2() -> &'static [u8] {
+            include_bytes!("../../third_party/old_protobuf_benchmarks/datasets/google_message1/proto2/dataset.google_message1_proto2.pb")
+        }
+
+        pub fn google_message1_proto3() -> &'static [u8] {
+            include_bytes!("../../third_party/old_protobuf_benchmarks/datasets/google_message1/proto3/dataset.google_message1_proto3.pb")
+        }
 
-use protobuf::benchmarks::{dataset, proto2, proto3, BenchmarkDataset};
+        pub fn google_message2() -> &'static [u8] {
+            include_bytes!("../../third_party/old_protobuf_benchmarks/datasets/google_message2/dataset.google_message2.pb")
+        }
+    }
+
+    pub mod proto2 {
+        include!(concat!(env!("OUT_DIR"), "/benchmarks.proto2.rs"));
+    }
+    pub mod proto3 {
+        include!(concat!(env!("OUT_DIR"), "/benchmarks.proto3.rs"));
+    }
+}
+
+use crate::benchmarks::BenchmarkDataset;
 
-fn load_dataset(dataset: &Path) -> Result<BenchmarkDataset, Box<dyn Error>> {
-    let mut f = File::open(dataset)?;
-    let mut buf = Vec::new();
-    f.read_to_end(&mut buf)?;
-    Ok(BenchmarkDataset::decode(buf.as_slice())?)
+fn load_dataset(dataset: &[u8]) -> Result<BenchmarkDataset, Box<dyn Error>> {
+    Ok(BenchmarkDataset::decode(dataset)?)
 }
 
-fn benchmark_dataset<M>(criterion: &mut Criterion, name: &str, dataset: &'static Path)
+fn benchmark_dataset<M>(criterion: &mut Criterion, name: &str, dataset: &'static [u8])
 where
     M: prost::Message + Default + 'static,
 {
@@ -71,14 +89,24 @@ where
 macro_rules! dataset {
     ($name: ident, $ty: ty) => {
         fn $name(criterion: &mut Criterion) {
-            benchmark_dataset::<$ty>(criterion, stringify!($name), dataset::$name());
+            benchmark_dataset::<$ty>(
+                criterion,
+                stringify!($name),
+                crate::benchmarks::dataset::$name(),
+            );
         }
     };
 }
 
-dataset!(google_message1_proto2, proto2::GoogleMessage1);
-dataset!(google_message1_proto3, proto3::GoogleMessage1);
-dataset!(google_message2, proto2::GoogleMessage2);
+dataset!(
+    google_message1_proto2,
+    crate::benchmarks::proto2::GoogleMessage1
+);
+dataset!(
+    google_message1_proto3,
+    crate::benchmarks::proto3::GoogleMessage1
+);
+dataset!(google_message2, crate::benchmarks::proto2::GoogleMessage2);
 
 criterion_group!(
     dataset,

diff --git a/benchmarks/build.rs b/benchmarks/build.rs
@@ -0,0 +1,19 @@
+use std::path::PathBuf;
+
+static DATASET_PROTOS: &[&str] = &[
+    "google_message1/proto2/benchmark_message1_proto2.proto",
+    "google_message1/proto3/benchmark_message1_proto3.proto",
+    "google_message2/benchmark_message2.proto",
+];
+
+fn main() {
+    let old_protobuf_benchmarks = PathBuf::from("../third_party/old_protobuf_benchmarks");
+
+    let mut benchmark_protos = vec![old_protobuf_benchmarks.join("benchmarks.proto")];
+    benchmark_protos.extend(
+        DATASET_PROTOS
+            .iter()
+            .map(|proto| old_protobuf_benchmarks.join("datasets").join(proto)),
+    );
+    prost_build::compile_protos(&benchmark_protos, &[old_protobuf_benchmarks]).unwrap();
+}
diff --git a/benchmarks/src/lib.rs b/benchmarks/src/lib.rs
@@ -0,0 +1 @@
+// This library is empty
diff --git a/protobuf/Cargo.toml b/protobuf/Cargo.toml
@@ -14,16 +14,5 @@ anyhow = "1.0.1"
 prost-build = { path = "../prost-build" }
 tempfile = "3"
 
-[dev-dependencies]
-criterion = { version = "0.5", default-features = false }
-
-[lib]
-# https://bheisler.github.io/criterion.rs/book/faq.html#cargo-bench-gives-unrecognized-option-errors-for-valid-command-line-options
-bench = false
-
-[[bench]]
-name = "dataset"
-harness = false
-
 [package.metadata.cargo-machete]
-ignored = ["prost-types"]
+ignored = ["prost", "prost-types"]
diff --git a/protobuf/README.md b/protobuf/README.md
@@ -1,8 +1,8 @@
 # `protobuf`
 
-`protobuf` is an internal library used by `prost` conformance tests, benchmarks,
+`protobuf` is an internal library used by `prost` conformance tests,
 and integration-tests. `protobuf` downloads, compiles, and installs the
-[Protobuf][1] project, including the conformance test runner, `libprotobuf`,
-benchmark data and test and benchmark .protos into the Cargo target directory.
+[Protobuf][1] project, including the conformance test runner and `libprotobuf` 
+into the Cargo target directory.
 
 [1]: https://github.com/google/protobuf/
diff --git a/protobuf/build.rs b/protobuf/build.rs
@@ -13,12 +13,6 @@ static TEST_PROTOS: &[&str] = &[
     "unittest_import_public.proto",
 ];
 
-static DATASET_PROTOS: &[&str] = &[
-    "google_message1/proto2/benchmark_message1_proto2.proto",
-    "google_message1/proto3/benchmark_message1_proto3.proto",
-    "google_message2/benchmark_message2.proto",
-];
-
 fn main() -> Result<()> {
     let out_dir =
         &PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR environment variable not set"));
@@ -44,20 +38,10 @@ fn main() -> Result<()> {
         fs::create_dir(prefix_dir).expect("failed to create prefix directory");
         install_conformance_test_runner(&src_dir, prefix_dir)?;
         install_protos(&src_dir, prefix_dir)?;
-        install_datasets(&src_dir, prefix_dir)?;
         fs::rename(prefix_dir, protobuf_dir).context("failed to move protobuf dir")?;
     }
 
     let include_dir = &protobuf_dir.join("include");
-    let benchmarks_include_dir = &include_dir.join("benchmarks");
-    let datasets_include_dir = &benchmarks_include_dir.join("datasets");
-    let mut benchmark_protos = vec![benchmarks_include_dir.join("benchmarks.proto")];
-    benchmark_protos.extend(
-        DATASET_PROTOS
-            .iter()
-            .map(|proto| datasets_include_dir.join(proto)),
-    );
-    prost_build::compile_protos(&benchmark_protos, &[benchmarks_include_dir]).unwrap();
 
     let conformance_include_dir = include_dir.join("conformance");
     prost_build::compile_protos(
@@ -198,49 +182,5 @@ fn install_protos(src_dir: &Path, prefix_dir: &Path) -> Result<()> {
     )
     .expect("failed to move conformance.proto");
 
-    // Move the benchmark datasets to the install directory.
-    let benchmarks_src_dir = &src_dir.join("benchmarks");
-    let benchmarks_include_dir = &include_dir.join("benchmarks");
-    let datasets_src_dir = &benchmarks_src_dir.join("datasets");
-    let datasets_include_dir = &benchmarks_include_dir.join("datasets");
-    fs::create_dir(benchmarks_include_dir).expect("failed to create benchmarks include directory");
-    fs::copy(
-        benchmarks_src_dir.join("benchmarks.proto"),
-        benchmarks_include_dir.join("benchmarks.proto"),
-    )
-    .expect("failed to move benchmarks.proto");
-    for proto in DATASET_PROTOS.iter().map(Path::new) {
-        let dir = &datasets_include_dir.join(proto.parent().unwrap());
-        fs::create_dir_all(dir)
-            .with_context(|| format!("unable to create directory {}", dir.display()))?;
-        fs::copy(
-            datasets_src_dir.join(proto),
-            datasets_include_dir.join(proto),
-        )
-        .with_context(|| format!("failed to move {}", proto.display()))?;
-    }
-
-    Ok(())
-}
-
-fn install_datasets(src_dir: &Path, prefix_dir: &Path) -> Result<()> {
-    let share_dir = &prefix_dir.join("share");
-    fs::create_dir(share_dir).expect("failed to create share directory");
-    for dataset in &[
-        Path::new("google_message1")
-            .join("proto2")
-            .join("dataset.google_message1_proto2.pb"),
-        Path::new("google_message1")
-            .join("proto3")
-            .join("dataset.google_message1_proto3.pb"),
-        Path::new("google_message2").join("dataset.google_message2.pb"),
-    ] {
-        fs::copy(
-            src_dir.join("benchmarks").join("datasets").join(dataset),
-            share_dir.join(dataset.file_name().unwrap()),
-        )
-        .with_context(|| format!("failed to move {}", dataset.display()))?;
-    }
-
     Ok(())
 }
diff --git a/protobuf/src/lib.rs b/protobuf/src/lib.rs
@@ -1,41 +1,5 @@
 #![allow(clippy::large_enum_variant, clippy::unreadable_literal)]
 
-pub mod benchmarks {
-    include!(concat!(env!("OUT_DIR"), "/benchmarks.rs"));
-
-    pub mod dataset {
-        use std::path::Path;
-
-        pub fn google_message1_proto2() -> &'static Path {
-            Path::new(concat!(
-                env!("PROTOBUF"),
-                "/share/dataset.google_message1_proto2.pb"
-            ))
-        }
-
-        pub fn google_message1_proto3() -> &'static Path {
-            Path::new(concat!(
-                env!("PROTOBUF"),
-                "/share/dataset.google_message1_proto3.pb"
-            ))
-        }
-
-        pub fn google_message2() -> &'static Path {
-            Path::new(concat!(
-                env!("PROTOBUF"),
-                "/share/dataset.google_message2.pb"
-            ))
-        }
-    }
-
-    pub mod proto2 {
-        include!(concat!(env!("OUT_DIR"), "/benchmarks.proto2.rs"));
-    }
-    pub mod proto3 {
-        include!(concat!(env!("OUT_DIR"), "/benchmarks.proto3.rs"));
-    }
-}
-
 pub mod conformance {
     use std::path::Path;
 

diff --git a/third_party/old_protobuf_benchmarks/README.md b/third_party/old_protobuf_benchmarks/README.md
@@ -0,0 +1,5 @@
+# Old protobuf benchmarks
+
+This directory contains the protos and datasets from protobuf v3.14. These benchmarks have been 
+changed (and later removed) in newer versions of the upstream library. This copy ensures that 
+those benchmarks stay available for prost.
diff --git a/third_party/old_protobuf_benchmarks/benchmarks.proto b/third_party/old_protobuf_benchmarks/benchmarks.proto
@@ -0,0 +1,63 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc.  All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto3";
+package benchmarks;
+option java_package = "com.google.protobuf.benchmarks";
+
+message BenchmarkDataset {
+  // Name of the benchmark dataset.  This should be unique across all datasets.
+  // Should only contain word characters: [a-zA-Z0-9_]
+  string name = 1;
+
+  // Fully-qualified name of the protobuf message for this dataset.
+  // It will be one of the messages defined benchmark_messages_proto2.proto
+  // or benchmark_messages_proto3.proto.
+  //
+  // Implementations that do not support reflection can implement this with
+  // an explicit "if/else" chain that lists every known message defined
+  // in those files.
+  string message_name = 2;
+
+  // The payload(s) for this dataset.  They should be parsed or serialized
+  // in sequence, in a loop, ie.
+  //
+  //  while (!benchmarkDone) {  // Benchmark runner decides when to exit.
+  //    for (i = 0; i < benchmark.payload.length; i++) {
+  //      parse(benchmark.payload[i])
+  //    }
+  //  }
+  //
+  // This is intended to let datasets include a variety of data to provide
+  // potentially more realistic results than just parsing the same message
+  // over and over.  A single message parsed repeatedly could yield unusually
+  // good branch prediction performance.
+  repeated bytes payload = 3;
+}