Add realistic_env_load_sweep forge test, to check latency across TPS

aptos-labs · Jun 8, 2023 · 93810ef · 93810ef
1 parent 065c93f
commit 93810ef
Show file tree

Hide file tree

Showing 9 changed files with 221 additions and 124 deletions.
diff --git a/.github/workflows/forge-stable.yaml b/.github/workflows/forge-stable.yaml
@@ -293,11 +293,24 @@ jobs:
     secrets: inherit
     with:
       IMAGE_TAG: ${{ needs.determine-test-metadata.outputs.IMAGE_TAG }}
-      FORGE_NAMESPACE: forge-land-blocking-new-${{ needs.determine-test-metadata.outputs.IMAGE_TAG }}
+      FORGE_NAMESPACE: forge-realistic-env-max-throughput-${{ needs.determine-test-metadata.outputs.IMAGE_TAG }}
       FORGE_RUNNER_DURATION_SECS: 600
       FORGE_TEST_SUITE: realistic_env_max_throughput
       POST_TO_SLACK: true
 
+  run-forge-realistic-env-load-sweep:
+    if: ${{ github.event_name != 'pull_request' }}
+    needs: determine-test-metadata
+    uses: aptos-labs/aptos-core/.github/workflows/workflow-run-forge.yaml@main
+    secrets: inherit
+    with:
+      IMAGE_TAG: ${{ needs.determine-test-metadata.outputs.IMAGE_TAG }}
+      FORGE_NAMESPACE: forge-realistic-env-load-sweep-${{ needs.determine-test-metadata.outputs.IMAGE_TAG }}
+      # 5 tests, each 300s
+      FORGE_RUNNER_DURATION_SECS: 1500
+      FORGE_TEST_SUITE: realistic_env_load_sweep
+      POST_TO_SLACK: true
+
   run-forge-three-region-graceful-overload:
     if: ${{ github.event_name != 'pull_request' }}
     needs: determine-test-metadata

diff --git a/crates/transaction-emitter-lib/src/emitter/mod.rs b/crates/transaction-emitter-lib/src/emitter/mod.rs
@@ -68,7 +68,7 @@ pub struct EmitModeParams {
     pub worker_offset_mode: WorkerOffsetMode,
     pub wait_millis: u64,
     pub check_account_sequence_only_once_fraction: f32,
-    pub check_account_sequence_sleep_millis: u64,
+    pub check_account_sequence_sleep: Duration,
 }
 
 #[derive(Clone, Debug)]
@@ -140,6 +140,8 @@ pub struct EmitJobRequest {
     prompt_before_spending: bool,
 
     coordination_delay_between_instances: Duration,
+
+    latency_polling_interval: Duration,
 }
 
 impl Default for EmitJobRequest {
@@ -163,6 +165,7 @@ impl Default for EmitJobRequest {
             expected_gas_per_txn: aptos_global_constants::MAX_GAS_AMOUNT,
             prompt_before_spending: false,
             coordination_delay_between_instances: Duration::from_secs(0),
+            latency_polling_interval: Duration::from_millis(300),
         }
     }
 }
@@ -257,6 +260,11 @@ impl EmitJobRequest {
         self
     }
 
+    pub fn latency_polling_interval(mut self, latency_polling_interval: Duration) -> Self {
+        self.latency_polling_interval = latency_polling_interval;
+        self
+    }
+
     pub fn calculate_mode_params(&self) -> EmitModeParams {
         let clients_count = self.rest_clients.len();
 
@@ -294,7 +302,7 @@ impl EmitJobRequest {
                     workers_per_endpoint: num_workers_per_endpoint,
                     endpoints: clients_count,
                     check_account_sequence_only_once_fraction: 0.0,
-                    check_account_sequence_sleep_millis: 300,
+                    check_account_sequence_sleep: self.latency_polling_interval,
                 }
             },
             EmitJobMode::ConstTps { tps }
@@ -382,7 +390,7 @@ impl EmitJobRequest {
                     workers_per_endpoint: num_workers_per_endpoint,
                     endpoints: clients_count,
                     check_account_sequence_only_once_fraction: 1.0 - sample_latency_fraction,
-                    check_account_sequence_sleep_millis: 300,
+                    check_account_sequence_sleep: self.latency_polling_interval,
                 }
             },
         }

diff --git a/crates/transaction-emitter-lib/src/emitter/submission_worker.rs b/crates/transaction-emitter-lib/src/emitter/submission_worker.rs
@@ -159,10 +159,8 @@ impl SubmissionWorker {
                 // generally, we should never need to recheck, as we wait enough time
                 // before calling here, but in case of shutdown/or client we are talking
                 // to being stale (having stale transaction_version), we might need to wait.
-                Duration::from_millis(
-                    if self.skip_latency_stats { 10 } else { 1 }
-                        * self.params.check_account_sequence_sleep_millis,
-                ),
+                if self.skip_latency_stats { 10 } else { 1 }
+                    * self.params.check_account_sequence_sleep,
                 loop_stats,
             )
             .await;

diff --git a/testsuite/forge-cli/src/main.rs b/testsuite/forge-cli/src/main.rs
@@ -228,9 +228,15 @@ fn main() -> Result<()> {
     logger.build();
 
     let args = Args::from_args();
-    let duration = Duration::from_secs(args.duration_secs as u64);
+    let duration = Duration::from_secs(5 * 300 as u64); // args.duration_secs as u64);
     let suite_name: &str = args.suite.as_ref();
 
+    let suite_name = if suite_name == "land_blocking" {
+        "realistic_env_load_sweep"
+    } else {
+        suite_name
+    };
+
     let runtime = Runtime::new()?;
     match args.cli_cmd {
         // cmd input for test
@@ -263,7 +269,7 @@ fn main() -> Result<()> {
             match test_cmd {
                 TestCommand::LocalSwarm(local_cfg) => {
                     // Loosen all criteria for local runs
-                    test_suite.get_success_criteria_mut().avg_tps = 400;
+                    test_suite.get_success_criteria_mut().min_avg_tps = 400;
                     let previous_emit_job = test_suite.get_emit_job().clone();
                     let test_suite =
                         test_suite.with_emit_job(previous_emit_job.mode(EmitJobMode::MaxLoad {
@@ -492,6 +498,7 @@ fn single_test_suite(test_name: &str, duration: Duration) -> Result<ForgeConfig>
         "compat" => compat(),
         "framework_upgrade" => upgrade(),
         // Rest of the tests:
+        "realistic_env_load_sweep" => realistic_env_load_sweep_test(),
         "epoch_changer_performance" => epoch_changer_performance(),
         "state_sync_perf_fullnodes_apply_outputs" => state_sync_perf_fullnodes_apply_outputs(),
         "state_sync_perf_fullnodes_execute_transactions" => {
@@ -591,8 +598,9 @@ fn run_consensus_only_perf_test() -> ForgeConfig {
     config
         .with_initial_validator_count(NonZeroUsize::new(20).unwrap())
         .add_network_test(LoadVsPerfBenchmark {
-            test: &PerformanceBenchmark,
+            test: Box::new(PerformanceBenchmark),
             workloads: Workloads::TPS(&[30000]),
+            criteria: vec![],
         })
         .with_genesis_helm_config_fn(Arc::new(|helm_values| {
             // no epoch change.
@@ -757,15 +765,57 @@ fn consensus_stress_test() -> ForgeConfig {
     })
 }
 
+fn realistic_env_load_sweep_test() -> ForgeConfig {
+    ForgeConfig::default()
+        .with_initial_validator_count(NonZeroUsize::new(20).unwrap())
+        .with_initial_fullnode_count(10)
+        .add_network_test(CompositeNetworkTest::new_with_two_wrappers(
+            MultiRegionNetworkEmulationTest {
+                override_config: None,
+            },
+            CpuChaosTest {
+                override_config: None,
+            },
+            LoadVsPerfBenchmark {
+                test: Box::new(PerformanceBenchmark),
+                workloads: Workloads::TPS(&[10, 100, 1000, 3000, 5000]),
+                criteria: [(9, 1.5, 4.), (95, 1.5, 4.), (950, 2., 4.), (2900, 3., 5.), (4900, 3., 5.)].into_iter().map(
+                    |(min_tps, max_lat_p50, max_lat_p99)|
+                    SuccessCriteria::new(min_tps)
+                    .add_latency_threshold(max_lat_p50, LatencyType::P50)
+                    .add_latency_threshold(max_lat_p99, LatencyType::P99)
+                ).collect(),
+            },
+        ))
+        // Test inherits the main EmitJobRequest, so update here for more precise latency measurements
+        .with_emit_job(
+            EmitJobRequest::default().latency_polling_interval(Duration::from_millis(100)),
+        )
+        .with_genesis_helm_config_fn(Arc::new(|helm_values| {
+            // no epoch change.
+            helm_values["chain"]["epoch_duration_secs"] = (24 * 3600).into();
+        }))
+        .with_success_criteria(
+            SuccessCriteria::new(0)
+                .add_no_restarts()
+                .add_wait_for_catchup_s(60)
+                .add_chain_progress(StateProgressThreshold {
+                    max_no_progress_secs: 30.0,
+                    max_round_gap: 10,
+                }),
+        )
+}
+
 fn load_vs_perf_benchmark() -> ForgeConfig {
     ForgeConfig::default()
         .with_initial_validator_count(NonZeroUsize::new(20).unwrap())
         .with_initial_fullnode_count(10)
         .add_network_test(LoadVsPerfBenchmark {
-            test: &PerformanceBenchmark,
+            test: Box::new(PerformanceBenchmark),
             workloads: Workloads::TPS(&[
                 200, 1000, 3000, 5000, 7000, 7500, 8000, 9000, 10000, 12000, 15000,
             ]),
+            criteria: Vec::new(),
         })
         .with_genesis_helm_config_fn(Arc::new(|helm_values| {
             // no epoch change.
@@ -794,7 +844,7 @@ fn workload_vs_perf_benchmark() -> ForgeConfig {
         //     mempool_backlog: 10000,
         // }))
         .add_network_test(LoadVsPerfBenchmark {
-            test: &PerformanceBenchmark,
+            test: Box::new(PerformanceBenchmark),
             workloads: Workloads::TRANSACTIONS(&[
                 TransactionWorkload {
                     transaction_type: TransactionTypeArg::NoOp,
@@ -837,6 +887,7 @@ fn workload_vs_perf_benchmark() -> ForgeConfig {
                     unique_senders: true,
                 },
             ]),
+            criteria: Vec::new(),
         })
         .with_genesis_helm_config_fn(Arc::new(|helm_values| {
             // no epoch change.
@@ -863,15 +914,14 @@ fn graceful_overload() -> ForgeConfig {
         // So having VFNs for all validators
         .with_initial_fullnode_count(10)
         .add_network_test(TwoTrafficsTest {
-            inner_mode: EmitJobMode::ConstTps { tps: 15000 },
-            inner_gas_price: aptos_global_constants::GAS_UNIT_PRICE,
-            inner_init_gas_price_multiplier: 20,
-            inner_transaction_type: TransactionTypeArg::CoinTransfer.materialize_default(),
+            inner_traffic: EmitJobRequest::default()
+                .mode(EmitJobMode::ConstTps { tps: 15000 })
+                .init_gas_price_multiplier(20),
+
             // Additionally - we are not really gracefully handling overlaods,
             // setting limits based on current reality, to make sure they
             // don't regress, but something to investigate
-            avg_tps: 3400,
-            latency_thresholds: &[],
+            inner_success_criteria: SuccessCriteria::new(3400),
         })
         // First start higher gas-fee traffic, to not cause issues with TxnEmitter setup - account creation
         .with_emit_job(
@@ -913,19 +963,13 @@ fn three_region_sim_graceful_overload() -> ForgeConfig {
         .add_network_test(CompositeNetworkTest::new(
             ThreeRegionSameCloudSimulationTest,
             TwoTrafficsTest {
-                inner_mode: EmitJobMode::ConstTps { tps: 15000 },
-                inner_gas_price: aptos_global_constants::GAS_UNIT_PRICE,
-                inner_init_gas_price_multiplier: 20,
-                // Cannot use TransactionTypeArg::materialize, as this needs to be static
-                inner_transaction_type: TransactionType::CoinTransfer {
-                    invalid_transaction_ratio: 0,
-                    sender_use_account_pool: false,
-                },
+                inner_traffic: EmitJobRequest::default()
+                    .mode(EmitJobMode::ConstTps { tps: 15000 })
+                    .init_gas_price_multiplier(20),
                 // Additionally - we are not really gracefully handling overlaods,
                 // setting limits based on current reality, to make sure they
                 // don't regress, but something to investigate
-                avg_tps: 1200,
-                latency_thresholds: &[],
+                inner_success_criteria: SuccessCriteria::new(3400),
             },
         ))
         // First start higher gas-fee traffic, to not cause issues with TxnEmitter setup - account creation
@@ -1333,14 +1377,12 @@ fn realistic_env_max_throughput_test_suite(duration: Duration) -> ForgeConfig {
                 override_config: None,
             },
             TwoTrafficsTest {
-                inner_mode: EmitJobMode::MaxLoad {
-                    mempool_backlog: 40000,
-                },
-                inner_gas_price: aptos_global_constants::GAS_UNIT_PRICE,
-                inner_init_gas_price_multiplier: 20,
-                inner_transaction_type: TransactionTypeArg::CoinTransfer.materialize_default(),
-                avg_tps: 5000,
-                latency_thresholds: &[],
+                inner_traffic: EmitJobRequest::default()
+                    .mode(EmitJobMode::MaxLoad {
+                        mempool_backlog: 40000,
+                    })
+                    .init_gas_price_multiplier(20),
+                inner_success_criteria: SuccessCriteria::new(5000),
             },
         ))
         .with_genesis_helm_config_fn(Arc::new(|helm_values| {
@@ -1351,7 +1393,8 @@ fn realistic_env_max_throughput_test_suite(duration: Duration) -> ForgeConfig {
         .with_emit_job(
             EmitJobRequest::default()
                 .mode(EmitJobMode::ConstTps { tps: 100 })
-                .gas_price(5 * aptos_global_constants::GAS_UNIT_PRICE),
+                .gas_price(5 * aptos_global_constants::GAS_UNIT_PRICE)
+                .latency_polling_interval(Duration::from_millis(100)),
         )
         .with_success_criteria(
             SuccessCriteria::new(95)
@@ -1366,8 +1409,8 @@ fn realistic_env_max_throughput_test_suite(duration: Duration) -> ForgeConfig {
                     // Check that we don't use more than 10 GB of memory for 30% of the time.
                     MetricsThreshold::new(10 * 1024 * 1024 * 1024, 30),
                 ))
-                .add_latency_threshold(4.0, LatencyType::P50)
-                .add_latency_threshold(8.0, LatencyType::P90)
+                .add_latency_threshold(3.0, LatencyType::P50)
+                .add_latency_threshold(5.0, LatencyType::P90)
                 .add_chain_progress(StateProgressThreshold {
                     max_no_progress_secs: 10.0,
                     max_round_gap: 4,

diff --git a/testsuite/forge/src/report.rs b/testsuite/forge/src/report.rs
@@ -5,6 +5,7 @@
 use aptos_transaction_emitter_lib::emitter::stats::TxnStats;
 use serde::Serialize;
 use std::fmt;
+use aptos_logger::info;
 
 #[derive(Default, Debug, Serialize)]
 pub struct TestReport {
@@ -37,6 +38,7 @@ impl TestReport {
             self.text.push('\n');
         }
         self.text.push_str(&text);
+        info!("{}", text);
     }
 
     pub fn report_txn_stats(&mut self, test_name: String, stats: &TxnStats) {