Skip to content

Commit

Permalink
fix(nemesis): fix 'disable_binary_gossip_execute_major_compaction'
Browse files Browse the repository at this point in the history
Check the gossip status and CQL workability in the end of the
'disrupt_disable_binary_gossip_execute_major_compaction' nemesis
instead of looking for the 'gate closed' message in DB logs.

Fixes: scylladb#6819
  • Loading branch information
vponomaryov authored and fruch committed Jan 23, 2024
1 parent 2c2369d commit 6f48e10
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 12 deletions.
3 changes: 2 additions & 1 deletion sdcm/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -2747,7 +2747,7 @@ def _gen_cqlsh_cmd(self, command, keyspace, timeout, connect_timeout):
return f'{cqlsh_cmd} {options} -e {command} {host}'

def run_cqlsh(self, cmd, keyspace=None, timeout=120, verbose=True, split=False, connect_timeout=60,
num_retry_on_failure=1):
num_retry_on_failure=1, retry_interval=3):
"""Runs CQL command using cqlsh utility"""
cmd = self._gen_cqlsh_cmd(command=cmd, keyspace=keyspace, timeout=timeout,
connect_timeout=connect_timeout)
Expand All @@ -2760,6 +2760,7 @@ def run_cqlsh(self, cmd, keyspace=None, timeout=120, verbose=True, split=False,
num_retry_on_failure -= 1
if not num_retry_on_failure:
raise
time.sleep(retry_interval)

# stdout of cqlsh example:
# pk
Expand Down
20 changes: 9 additions & 11 deletions sdcm/nemesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -4929,8 +4929,6 @@ def disrupt_bootstrap_streaming_error(self):
self.cluster.decommission(new_node, timeout=7200)

def disrupt_disable_binary_gossip_execute_major_compaction(self):
def are_gate_closed_messages_raised(log_reader):
return bool(list(log_reader))
with nodetool_context(node=self.target_node, start_command="disablebinary", end_command="enablebinary"):
self.target_node.run_nodetool("statusbinary")
self.target_node.run_nodetool("status")
Expand All @@ -4943,15 +4941,15 @@ def are_gate_closed_messages_raised(log_reader):
self.target_node.run_nodetool("statusgossip")
self.target_node.run_nodetool("statusbinary")
time.sleep(30)
gate_closed_log_reader = self.target_node.follow_system_log(patterns=['gate closed'])
gate_closed_appearing = bool(wait_for(func=are_gate_closed_messages_raised,
log_reader=gate_closed_log_reader,
timeout=100,
step=5,
text="Waiting for 'gate closed' exceptions",
throw_exc=False))
assert not gate_closed_appearing, \
"After re-enabling binary and gossip, 'gate closed' messages continue to appear"
try:
self.cluster.wait_for_nodes_up_and_normal(nodes=[self.target_node])
self.target_node.run_cqlsh(
"SELECT * FROM system_schema.keyspaces;", num_retry_on_failure=20, retry_interval=3)
except Exception: # pylint: disable=broad-except
# NOTE: restart the target node because it was the remedy for the problems with CQL workability
self.log.warning("'%s' node will be restarted to make the CQL work again", self.target_node)
self.target_node.restart_scylla_server()
raise


def disrupt_method_wrapper(method, is_exclusive=False): # pylint: disable=too-many-statements
Expand Down

0 comments on commit 6f48e10

Please sign in to comment.