Skip to content

Commit

Permalink
tests: improve manifest upload check
Browse files Browse the repository at this point in the history
Manifests of all partitions are now checked so that in case something
goes wrong, it will be possible to see if there is any progress at all.

Timeout for the check is made adaptive to the number of partitions.
  • Loading branch information
dlex authored and andrewhsu committed Aug 17, 2023
1 parent 8771e4d commit a5fc9ca
Showing 1 changed file with 10 additions and 3 deletions.
13 changes: 10 additions & 3 deletions tests/rptest/services/redpanda.py
Original file line number Diff line number Diff line change
Expand Up @@ -3686,6 +3686,7 @@ def stop_and_scrub_object_storage(self, run_timeout=60):
# can be set to None for no timeout

def all_partitions_uploaded_manifest():
manifest_not_uploaded = []
for p in self.partitions():
try:
status = self._admin.get_partition_cloud_storage_status(
Expand All @@ -3705,17 +3706,23 @@ def all_partitions_uploaded_manifest():
"metadata_update_pending"] is False or status.get(
'ms_since_last_manifest_upload', None) is not None
if remote_write and not has_uploaded_manifest:
self.logger.info(f"Partition {p} hasn't yet uploaded")
return False
manifest_not_uploaded.append(p)

if len(manifest_not_uploaded) != 0:
self.logger.info(
f"Partitions that haven't yet uploaded: {manifest_not_uploaded}"
)
return False

return True

# If any nodes are up, then we expect to be able to talk to the cluster and
# check tiered storage status to wait for uploads to complete.
if self._started:
# Aggressive retry because almost always this should already be done
# Each 1000 partititions add 30s of timeout
wait_until(all_partitions_uploaded_manifest,
timeout_sec=30,
timeout_sec=30 + len(self.partitions()) // 33,
backoff_sec=1)

# We stop because the scrubbing routine would otherwise interpret
Expand Down

0 comments on commit a5fc9ca

Please sign in to comment.