diff --git a/release/nightly_tests/dataset/multi_node_train_benchmark.py b/release/nightly_tests/dataset/multi_node_train_benchmark.py index 679261931ec6..69828258fdea 100644 --- a/release/nightly_tests/dataset/multi_node_train_benchmark.py +++ b/release/nightly_tests/dataset/multi_node_train_benchmark.py @@ -571,8 +571,13 @@ def __iter__(self): def benchmark_code( args, ): + ctx = ray.data.DataContext.get_current() + # This release test runs into ACCESS_DENIED errors fairly often. + # We add ACCESS_DENIED as a retryable exception type to avoid flakiness. + # See for more details: https://github.com/ray-project/ray/issues/47230 + ctx.retried_io_errors.append("AWS Error ACCESS_DENIED") + if args.target_max_block_size_mb is not None: - ctx = ray.data.DataContext.get_current() ctx.target_max_block_size = args.target_max_block_size_mb * 1024 * 1024 cache_input_ds = args.cache_input_ds