Skip to content

Commit

Permalink
test(perf): add mv write test that increases latency of regular reads
Browse files Browse the repository at this point in the history
the idea is to test the hardest case - modifying a column that is a regular column in the base table,
but in the materialized view is one of the primary key columns.
  • Loading branch information
temichus committed Sep 24, 2024
1 parent bbd4845 commit 081e6a0
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!groovy

// trick from https://github.com/jenkinsci/workflow-cps-global-lib-plugin/pull/43
def lib = library identifier: 'sct@snapshot', retriever: legacySCM(scm)

perfRegressionParallelPipeline(
backend: "aws",
test_name: "performance_regression_test.PerformanceRegressionMaterializedViewLatencyTest",
test_config: """["test-cases/performance/perf-regression-latency-mv-read-concurrency.yaml"]""",
sub_tests: ["test_read_mv_latency"],
email_recipients: 'wojciech.mitros@scylladb.com,artsiom.mishuta@scylladb.com,piodul@scylladb.com'
)
69 changes: 69 additions & 0 deletions performance_regression_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from sdcm.sct_events.system import HWPerforanceEvent, InfoEvent
from sdcm.utils.decorators import log_run_info, latency_calculator_decorator
from sdcm.utils.csrangehistogram import CSHistogramTagTypes
from sdcm.utils.nemesis_utils.indexes import wait_for_view_to_be_built

KB = 1024

Expand Down Expand Up @@ -891,3 +892,71 @@ def test_latency_write_with_upgrade(self):
def test_latency_mixed_with_upgrade(self):
self._prepare_latency_with_upgrade()
self.run_workload_and_upgrade(stress_cmd=self.params.get('stress_cmd_m'))


class PerformanceRegressionMaterializedViewLatencyTest(PerformanceRegressionTest):
"""
the idea is to reproduce the hardest scenario for MV
based on internal doc "Consistency problems in materialized views"
modifying a column that is a regular column in the base table,
but in the materialized view is one of the primary key columns.
Other types of materialized view updates are easier to handle,
once we figure out how to do the hardest case correctly, all of the other cases will be solved as well.
currently this problem is not solved.
The test is just reproducer of this problem and should not be used in regular runs
test steps:
1 - 3 node cluster with 2 tables
2 - do special prepare CMD for table 1, and use table 2 as for latency PERF TEST (prepare_write_cmd)
3 - start read workload for table 2 - measure latency for table 2 (10min) (stress_cmd_r)
4 - do a special rewrite workload for table 1 to measure latency for table 2 (while changing for table 1 applying )(stress_cmd_no_mv)
5 - create MV, and wait for MV to sync - measure latency for table 2 (while MV is syncing )
6- do special rewrite workload for table 1 again - measure latency for table 2 (while changing for table 1 applying ) (stress_cmd_mv)
"""

def test_read_mv_latency(self):
self.run_fstrim_on_all_db_nodes()
self.preload_data() # prepare_write_cmd
self.wait_no_compactions_running()
self.run_fstrim_on_all_db_nodes()

self.create_test_stats(sub_type="read", append_sub_test_to_name=False, test_index="mv-overloading-latency-read")
self.run_stress_thread(stress_cmd=self.params.get('stress_cmd_r'), stress_num=1,
stats_aggregate_cmds=False)

self.steady_state_read_workload_latency() # stress_cmd_r
self.do_rewrite_workload() # stress_cmd_no_mv + #stress_cmd_r
self.wait_mv_sync() # stress_cmd_r
self.do_rewrite_workload_with_mv() # stress_cmd_mv + #stress_cmd_r
self.loaders.kill_stress_thread()
self.check_latency_during_ops()

@latency_calculator_decorator
def steady_state_read_workload_latency(self):
InfoEvent(message='start_read_workload_latency begin').publish()
time.sleep(15*60)
InfoEvent(message='start_read_workload_latency ended').publish()

@latency_calculator_decorator
def do_rewrite_workload(self):
base_cmd = self.params.get('stress_cmd_no_mv')
stress_queue = self.run_stress_thread(stress_cmd=base_cmd, stress_num=1, stats_aggregate_cmds=False)
results = self.get_stress_results(queue=stress_queue, store_results=False)
self.display_results(results, test_name='do_rewrite_workload')

@latency_calculator_decorator
def wait_mv_sync(self):
node1 = self.db_cluster.nodes[0]
node1.run_cqlsh(
"CREATE TABLE IF NOT EXISTS scylla_bench.test (pk bigint,ck bigint,v blob,PRIMARY KEY(pk, ck)) WITH compression = { }")
node1.run_cqlsh("CREATE MATERIALIZED VIEW IF NOT EXISTS scylla_bench.view_test AS SELECT * FROM scylla_bench.test where v IS NOT NULL AND ck IS NOT NULL AND pk IS NOT NULL PRIMARY KEY (v, pk, ck)")
wait_for_view_to_be_built(node1, 'scylla_bench', 'view_test', timeout=1000)

@latency_calculator_decorator
def do_rewrite_workload_with_mv(self):
base_cmd = self.params.get('stress_cmd_mv')
stress_queue = self.run_stress_thread(stress_cmd=base_cmd, stress_num=1, stats_aggregate_cmds=False)
results = self.get_stress_results(queue=stress_queue, store_results=False)
self.display_results(results, test_name='do_rewrite_workload_with_mv')
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
test_duration: 680
prepare_write_cmd: ["cassandra-stress write no-warmup cl=ALL n=100000 -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate threads=1000 -col 'size=FIXED(128) n=FIXED(8)' -pop seq=1..2000000",
"scylla-bench -workload=sequential -mode=write -replication-factor=2 -partition-count=10 -partition-offset=0 -clustering-row-count=1000000 -clustering-row-size=uniform:100..5120 -concurrency=1000 -rows-per-request=10 -timeout=30s -connection-count 1000 -consistency-level=all",
"scylla-bench -workload=sequential -mode=write -replication-factor=2 -partition-count=10 -partition-offset=10 -clustering-row-count=1000000 -clustering-row-size=uniform:100..5120 -concurrency=1000 -rows-per-request=10 -timeout=30s -connection-count 1000 -consistency-level=all",
"scylla-bench -workload=sequential -mode=write -replication-factor=2 -partition-count=10 -partition-offset=20 -clustering-row-count=1000000 -clustering-row-size=uniform:100..5120 -concurrency=1000 -rows-per-request=10 -timeout=30s -connection-count 1000 -consistency-level=all"]

stress_cmd_r: "cassandra-stress read cl=ALL duration=600m -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate 'threads=10 throttle=100/s' -col 'size=FIXED(128) n=FIXED(8)' -pop 'dist=gauss(1..100000,50000,50000)' "
stress_cmd_no_mv: "scylla-bench -workload=uniform -mode=write -replication-factor=2 -partition-count=30 -clustering-row-count=1000000 -clustering-row-size=uniform:100..5120 -concurrency=500 -max-rate=16000 -rows-per-request=1 -timeout=30s -connection-count 500 -consistency-level=one -iterations=0 -duration=15m"
stress_cmd_mv: "scylla-bench -workload=uniform -mode=write -replication-factor=2 -partition-count=30 -clustering-row-count=1000000 -clustering-row-size=uniform:100..5120 -concurrency=500 -max-rate=4000 -rows-per-request=1 -timeout=30s -connection-count 500 -consistency-level=one -iterations=0 -duration=15m"

n_db_nodes: 3
n_loaders: 2
n_monitor_nodes: 1

instance_type_loader: 'c6i.2xlarge'
instance_type_monitor: 't3.large'
instance_type_db: 'i4i.2xlarge'

user_prefix: 'perf-latency-mv-overloaded'
space_node_threshold: 644245094
ami_id_db_scylla_desc: 'VERSION_DESC'

round_robin: true
append_scylla_args: '--blocked-reactor-notify-ms 5 --abort-on-lsa-bad-alloc 1 --abort-on-seastar-bad-alloc --abort-on-internal-error 1 --abort-on-ebadf 1'
backtrace_decoding: false
print_kernel_callstack: true

store_perf_results: true
use_prepared_loaders: true
use_hdr_cs_histogram: true
custom_es_index: 'mv-overloading-latency-read'

0 comments on commit 081e6a0

Please sign in to comment.