From 9495e7272005760dc7644ca700f7301928cdb617 Mon Sep 17 00:00:00 2001 From: Balaji Veeramani Date: Tue, 17 Sep 2024 03:34:16 +0900 Subject: [PATCH] [Data] Add `SERVICE_UNAVAILABLE` to list of retried transient errors (#47673) While reading or writing files with Ray Data, S3 might raise a transient SERVICE_UNAVAILABLE error. This PR adds the error to the list of retried transient errors. Signed-off-by: Balaji Veeramani --- python/ray/data/context.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/ray/data/context.py b/python/ray/data/context.py index 2280a18f9a44..3d9e745e148d 100644 --- a/python/ray/data/context.py +++ b/python/ray/data/context.py @@ -102,6 +102,8 @@ DEFAULT_ENABLE_GET_OBJECT_LOCATIONS_FOR_METRICS = False +# `write_file_retry_on_errors` is deprecated in favor of `retried_io_errors`. You +# shouldn't need to modify `DEFAULT_WRITE_FILE_RETRY_ON_ERRORS`. DEFAULT_WRITE_FILE_RETRY_ON_ERRORS = ( "AWS Error INTERNAL_FAILURE", "AWS Error NETWORK_CONNECTION", @@ -114,6 +116,7 @@ "AWS Error NETWORK_CONNECTION", "AWS Error SLOW_DOWN", "AWS Error UNKNOWN (HTTP status 503)", + "AWS Error SERVICE_UNAVAILABLE", ) DEFAULT_WARN_ON_DRIVER_MEMORY_USAGE_BYTES = 2 * 1024 * 1024 * 1024