From 700d08badaeb7ed3039fc70398298c697cabf6c3 Mon Sep 17 00:00:00 2001
From: Barbara Korycki <barbara@mlcommons.org>
Date: Thu, 6 Feb 2025 14:51:58 -0800
Subject: [PATCH 01/11] New backoff numbers

---
 plugins/mistral/modelgauge/suts/mistral_client.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/plugins/mistral/modelgauge/suts/mistral_client.py b/plugins/mistral/modelgauge/suts/mistral_client.py
index a456e6b8..3c0adc06 100644
--- a/plugins/mistral/modelgauge/suts/mistral_client.py
+++ b/plugins/mistral/modelgauge/suts/mistral_client.py
@@ -4,9 +4,9 @@
 
 from modelgauge.secret_values import RequiredSecret, SecretDescription
 
-BACKOFF_INITIAL_MILLIS = 500
-BACKOFF_MAX_INTERVAL_MILLIS = 10_000
-BACKOFF_EXPONENT = 1.1
+BACKOFF_INITIAL_MILLIS = 1000
+BACKOFF_MAX_INTERVAL_MILLIS = 20_000
+BACKOFF_EXPONENT = 1.9
 BACKOFF_MAX_ELAPSED_MILLIS = 120_000
 
 

From 329d8f2423b9a484b97bd3a90fe6e03a1cedb828 Mon Sep 17 00:00:00 2001
From: Barbara Korycki <barbara@mlcommons.org>
Date: Thu, 6 Feb 2025 17:25:02 -0800
Subject: [PATCH 02/11] change numbers

---
 plugins/mistral/modelgauge/suts/mistral_client.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/plugins/mistral/modelgauge/suts/mistral_client.py b/plugins/mistral/modelgauge/suts/mistral_client.py
index 3c0adc06..a528c11d 100644
--- a/plugins/mistral/modelgauge/suts/mistral_client.py
+++ b/plugins/mistral/modelgauge/suts/mistral_client.py
@@ -5,9 +5,9 @@
 from modelgauge.secret_values import RequiredSecret, SecretDescription
 
 BACKOFF_INITIAL_MILLIS = 1000
-BACKOFF_MAX_INTERVAL_MILLIS = 20_000
+BACKOFF_MAX_INTERVAL_MILLIS = 100_000
 BACKOFF_EXPONENT = 1.9
-BACKOFF_MAX_ELAPSED_MILLIS = 120_000
+BACKOFF_MAX_ELAPSED_MILLIS = 86_400_000  # 1 day
 
 
 class MistralAIAPIKey(RequiredSecret):

From 18f93c010f1e7dceddd6036f0e37a56a84990bc8 Mon Sep 17 00:00:00 2001
From: Barbara Korycki <barbara@mlcommons.org>
Date: Thu, 13 Feb 2025 12:28:35 -0800
Subject: [PATCH 03/11] Retry decorator + unit tests

---
 src/modelgauge/retry_decorator.py             | 41 +++++++++++++
 .../modelgauge_tests/test_retry_decorator.py  | 60 +++++++++++++++++++
 2 files changed, 101 insertions(+)
 create mode 100644 src/modelgauge/retry_decorator.py
 create mode 100644 tests/modelgauge_tests/test_retry_decorator.py

diff --git a/src/modelgauge/retry_decorator.py b/src/modelgauge/retry_decorator.py
new file mode 100644
index 00000000..4e79ae2d
--- /dev/null
+++ b/src/modelgauge/retry_decorator.py
@@ -0,0 +1,41 @@
+import time
+import functools
+
+BASE_RETRY_COUNT = 3
+MAX_RETRY_DURATION = 86400  # 1 day in seconds
+MAX_BACKOFF = 60  # 1 minute in seconds
+
+
+def retry(unacceptable_exceptions=None):
+    """
+    A decorator that retries a function at least BASE_RETRY_COUNT times.
+    If unacceptable_exceptions are specified, it will retry for up to 1 day if any of those exceptions occur.
+    """
+    unacceptable_exceptions = tuple(unacceptable_exceptions) if unacceptable_exceptions else ()
+
+    # TODO: Is functools.wraps necessary? Test if journal works without it.
+    def decorator(func):
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            attempt = 0
+            start_time = time.time()
+
+            while True:
+                try:
+                    return func(*args, **kwargs)
+                except unacceptable_exceptions as e:
+                    # Keep retrying "unacceptable" exceptions for 1 day.
+                    elapsed_time = time.time() - start_time
+                    if elapsed_time >= MAX_RETRY_DURATION:
+                        raise
+                except:
+                    # Retry all other exceptions BASE_RETRY_COUNT times.
+                    attempt += 1
+                    if attempt >= BASE_RETRY_COUNT:
+                        raise
+                sleep_time = min(2**attempt, MAX_BACKOFF)  # Exponential backoff with cap
+                time.sleep(sleep_time)
+
+        return wrapper
+
+    return decorator
diff --git a/tests/modelgauge_tests/test_retry_decorator.py b/tests/modelgauge_tests/test_retry_decorator.py
new file mode 100644
index 00000000..2372c93d
--- /dev/null
+++ b/tests/modelgauge_tests/test_retry_decorator.py
@@ -0,0 +1,60 @@
+import pytest
+import time
+
+from modelgauge.retry_decorator import retry, BASE_RETRY_COUNT
+
+
+def test_retry_success():
+    @retry()
+    def always_succeed():
+        return "success"
+
+    assert always_succeed() == "success"
+
+
+@pytest.mark.parametrize("exceptions", [None, [ValueError]])
+def test_retry_fails_after_base_retries(exceptions):
+    attempt_counter = 0
+
+    @retry(unacceptable_exceptions=exceptions)
+    def always_fail():
+        nonlocal attempt_counter
+        attempt_counter += 1
+        raise KeyError("Intentional failure")
+
+    with pytest.raises(KeyError):
+        always_fail()
+
+    assert attempt_counter == BASE_RETRY_COUNT
+
+
+def test_retry_eventually_succeeds():
+    attempt_counter = 0
+
+    @retry(unacceptable_exceptions=[ValueError])
+    def succeed_after_two_attempts():
+        nonlocal attempt_counter
+        attempt_counter += 1
+        if attempt_counter < BASE_RETRY_COUNT:
+            raise ValueError("Intentional failure")
+        return "success"
+
+    assert succeed_after_two_attempts() == "success"
+    assert attempt_counter == BASE_RETRY_COUNT
+
+
+def test_retry_unacceptable_eventually_succeeds():
+    attempt_counter = 0
+    start_time = time.time()
+
+    @retry(unacceptable_exceptions=[ValueError])
+    def succeed_after_twenty_seconds():
+        nonlocal attempt_counter
+        attempt_counter += 1
+        elapsed_time = time.time() - start_time
+        if elapsed_time < 20:
+            raise ValueError("Intentional failure")
+        return "success"
+
+    assert succeed_after_twenty_seconds() == "success"
+    assert attempt_counter > BASE_RETRY_COUNT

From bd621e2b618f73575db53f58889731b7f0e83c85 Mon Sep 17 00:00:00 2001
From: Barbara Korycki <barbara@mlcommons.org>
Date: Thu, 13 Feb 2025 13:28:05 -0800
Subject: [PATCH 04/11] Add retry decorator to SUTs without retry logic

---
 plugins/google/modelgauge/suts/google_genai_client.py         | 3 +++
 .../modelgauge/suts/huggingface_chat_completion.py            | 4 +++-
 plugins/mistral/modelgauge/suts/mistral_sut.py                | 4 +++-
 plugins/nvidia/modelgauge/suts/nvidia_nim_api_client.py       | 3 +++
 plugins/openai/modelgauge/suts/openai_client.py               | 3 +++
 src/modelgauge/retry_decorator.py                             | 2 +-
 6 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/plugins/google/modelgauge/suts/google_genai_client.py b/plugins/google/modelgauge/suts/google_genai_client.py
index 1e35dc37..f255b323 100644
--- a/plugins/google/modelgauge/suts/google_genai_client.py
+++ b/plugins/google/modelgauge/suts/google_genai_client.py
@@ -2,11 +2,13 @@
 from typing import Dict, List, Optional
 
 import google.generativeai as genai  # type: ignore
+from google.api_core.exceptions import InternalServerError, ResourceExhausted, RetryError, TooManyRequests
 from google.generativeai.types import HarmCategory, HarmBlockThreshold  # type: ignore
 from pydantic import BaseModel
 
 from modelgauge.general import APIException
 from modelgauge.prompt import TextPrompt
+from modelgauge.retry_decorator import retry
 from modelgauge.secret_values import InjectSecret, RequiredSecret, SecretDescription
 from modelgauge.sut import REFUSAL_RESPONSE, PromptResponseSUT, SUTCompletion, SUTResponse
 from modelgauge.sut_capabilities import AcceptsTextPrompt
@@ -108,6 +110,7 @@ def translate_text_prompt(self, prompt: TextPrompt) -> GoogleGenAiRequest:
             contents=prompt.text, generation_config=generation_config, safety_settings=self.safety_settings
         )
 
+    @retry(unacceptable_exceptions=[InternalServerError, ResourceExhausted, RetryError, TooManyRequests])
     def evaluate(self, request: GoogleGenAiRequest) -> GoogleGenAiResponse:
         if self.model is None:
             # Handle lazy init.
diff --git a/plugins/huggingface/modelgauge/suts/huggingface_chat_completion.py b/plugins/huggingface/modelgauge/suts/huggingface_chat_completion.py
index 12e1add4..8a58a79b 100644
--- a/plugins/huggingface/modelgauge/suts/huggingface_chat_completion.py
+++ b/plugins/huggingface/modelgauge/suts/huggingface_chat_completion.py
@@ -2,11 +2,12 @@
 from typing import Dict, List, Optional
 
 from huggingface_hub import get_inference_endpoint, InferenceClient, InferenceEndpointStatus  # type: ignore
-from huggingface_hub.utils import HfHubHTTPError  # type: ignore
+from huggingface_hub.utils import HfHubHTTPError, InferenceTimeoutError  # type: ignore
 from pydantic import BaseModel
 
 from modelgauge.auth.huggingface_inference_token import HuggingFaceInferenceToken
 from modelgauge.prompt import TextPrompt
+from modelgauge.retry_decorator import retry
 from modelgauge.secret_values import InjectSecret
 from modelgauge.sut import PromptResponseSUT, SUTCompletion, SUTResponse, TokenProbability, TopTokens
 from modelgauge.sut_capabilities import AcceptsTextPrompt, ProducesPerTokenLogProbabilities
@@ -86,6 +87,7 @@ def translate_text_prompt(self, prompt: TextPrompt) -> HuggingFaceChatCompletion
             **prompt.options.model_dump(),
         )
 
+    @retry(unacceptable_exceptions=[InferenceTimeoutError])  # Raised when model is unavailable or the request times out
     def evaluate(self, request: HuggingFaceChatCompletionRequest) -> HuggingFaceChatCompletionOutput:
         if self.client is None:
             self._create_client()
diff --git a/plugins/mistral/modelgauge/suts/mistral_sut.py b/plugins/mistral/modelgauge/suts/mistral_sut.py
index cc708134..aa9abfb2 100644
--- a/plugins/mistral/modelgauge/suts/mistral_sut.py
+++ b/plugins/mistral/modelgauge/suts/mistral_sut.py
@@ -1,8 +1,9 @@
 import warnings
 from typing import Optional
 
-from mistralai.models import ChatCompletionResponse, ClassificationResponse
+from mistralai.models import ChatCompletionResponse, ClassificationResponse, SDKError
 from modelgauge.prompt import TextPrompt
+from modelgauge.retry_decorator import retry
 from modelgauge.secret_values import InjectSecret
 from modelgauge.sut import PromptResponseSUT, SUTCompletion, SUTResponse
 from modelgauge.sut_capabilities import AcceptsTextPrompt
@@ -60,6 +61,7 @@ def translate_text_prompt(self, prompt: TextPrompt) -> MistralAIRequest:
             args["max_tokens"] = prompt.options.max_tokens
         return MistralAIRequest(**args)
 
+    @retry(unacceptable_exceptions=[SDKError])
     def evaluate(self, request: MistralAIRequest) -> ChatCompletionResponse:
         response = self.client.request(request.model_dump(exclude_none=True))  # type: ignore
         return response
diff --git a/plugins/nvidia/modelgauge/suts/nvidia_nim_api_client.py b/plugins/nvidia/modelgauge/suts/nvidia_nim_api_client.py
index 24d9a75e..9b993d25 100644
--- a/plugins/nvidia/modelgauge/suts/nvidia_nim_api_client.py
+++ b/plugins/nvidia/modelgauge/suts/nvidia_nim_api_client.py
@@ -1,10 +1,12 @@
 from typing import Any, Dict, List, Optional, Union
 
 from openai import OpenAI
+from openai import APITimeoutError, ConflictError, InternalServerError, RateLimitError
 from openai.types.chat import ChatCompletion
 from pydantic import BaseModel
 
 from modelgauge.prompt import ChatPrompt, ChatRole, SUTOptions, TextPrompt
+from modelgauge.retry_decorator import retry
 from modelgauge.secret_values import (
     InjectSecret,
     RequiredSecret,
@@ -115,6 +117,7 @@ def _translate_request(self, messages: List[OpenAIChatMessage], options: SUTOpti
             **optional_kwargs,
         )
 
+    @retry(unacceptable_exceptions=[APITimeoutError, ConflictError, InternalServerError, RateLimitError])
     def evaluate(self, request: OpenAIChatRequest) -> ChatCompletion:
         if self.client is None:
             # Handle lazy init.
diff --git a/plugins/openai/modelgauge/suts/openai_client.py b/plugins/openai/modelgauge/suts/openai_client.py
index ac591416..132e2952 100644
--- a/plugins/openai/modelgauge/suts/openai_client.py
+++ b/plugins/openai/modelgauge/suts/openai_client.py
@@ -1,10 +1,12 @@
 from typing import Any, Dict, List, Optional, Union
 
 from openai import OpenAI
+from openai import APITimeoutError, ConflictError, InternalServerError, RateLimitError
 from openai.types.chat import ChatCompletion
 from pydantic import BaseModel
 
 from modelgauge.prompt import ChatPrompt, ChatRole, SUTOptions, TextPrompt
+from modelgauge.retry_decorator import retry
 from modelgauge.secret_values import (
     InjectSecret,
     OptionalSecret,
@@ -139,6 +141,7 @@ def _translate_request(self, messages: List[OpenAIChatMessage], options: SUTOpti
             **optional_kwargs,
         )
 
+    @retry(unacceptable_exceptions=[APITimeoutError, ConflictError, InternalServerError, RateLimitError])
     def evaluate(self, request: OpenAIChatRequest) -> ChatCompletion:
         if self.client is None:
             # Handle lazy init.
diff --git a/src/modelgauge/retry_decorator.py b/src/modelgauge/retry_decorator.py
index 4e79ae2d..f243f90e 100644
--- a/src/modelgauge/retry_decorator.py
+++ b/src/modelgauge/retry_decorator.py
@@ -1,5 +1,5 @@
-import time
 import functools
+import time
 
 BASE_RETRY_COUNT = 3
 MAX_RETRY_DURATION = 86400  # 1 day in seconds

From 00b8e5325c4577f7d1e9dce848839f6a7ec03ed1 Mon Sep 17 00:00:00 2001
From: Barbara Korycki <barbara@mlcommons.org>
Date: Thu, 13 Feb 2025 13:36:32 -0800
Subject: [PATCH 05/11] remove todo

---
 src/modelgauge/retry_decorator.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/modelgauge/retry_decorator.py b/src/modelgauge/retry_decorator.py
index f243f90e..1d2a6577 100644
--- a/src/modelgauge/retry_decorator.py
+++ b/src/modelgauge/retry_decorator.py
@@ -13,7 +13,6 @@ def retry(unacceptable_exceptions=None):
     """
     unacceptable_exceptions = tuple(unacceptable_exceptions) if unacceptable_exceptions else ()
 
-    # TODO: Is functools.wraps necessary? Test if journal works without it.
     def decorator(func):
         @functools.wraps(func)
         def wrapper(*args, **kwargs):

From c2fa9918c98b93ccb349bb4e5fdd8d56d347d730 Mon Sep 17 00:00:00 2001
From: Barbara Korycki <barbara@mlcommons.org>
Date: Fri, 14 Feb 2025 09:22:41 -0800
Subject: [PATCH 06/11] Make unit test faster

---
 tests/modelgauge_tests/test_retry_decorator.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/modelgauge_tests/test_retry_decorator.py b/tests/modelgauge_tests/test_retry_decorator.py
index 2372c93d..367ad476 100644
--- a/tests/modelgauge_tests/test_retry_decorator.py
+++ b/tests/modelgauge_tests/test_retry_decorator.py
@@ -48,13 +48,13 @@ def test_retry_unacceptable_eventually_succeeds():
     start_time = time.time()
 
     @retry(unacceptable_exceptions=[ValueError])
-    def succeed_after_twenty_seconds():
+    def succeed_eventually():
         nonlocal attempt_counter
         attempt_counter += 1
         elapsed_time = time.time() - start_time
-        if elapsed_time < 20:
+        if elapsed_time < 5:
             raise ValueError("Intentional failure")
         return "success"
 
-    assert succeed_after_twenty_seconds() == "success"
+    assert succeed_eventually() == "success"
     assert attempt_counter > BASE_RETRY_COUNT

From 17cb816aa80a20093508c1d7b18d6720a114119a Mon Sep 17 00:00:00 2001
From: Barbara Korycki <barbara@mlcommons.org>
Date: Fri, 14 Feb 2025 09:24:56 -0800
Subject: [PATCH 07/11] Change param name to be less confusing

---
 src/modelgauge/retry_decorator.py              | 10 +++++-----
 tests/modelgauge_tests/test_retry_decorator.py |  8 ++++----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/modelgauge/retry_decorator.py b/src/modelgauge/retry_decorator.py
index 1d2a6577..79da5f36 100644
--- a/src/modelgauge/retry_decorator.py
+++ b/src/modelgauge/retry_decorator.py
@@ -6,12 +6,12 @@
 MAX_BACKOFF = 60  # 1 minute in seconds
 
 
-def retry(unacceptable_exceptions=None):
+def retry(transient_exceptions=None):
     """
     A decorator that retries a function at least BASE_RETRY_COUNT times.
-    If unacceptable_exceptions are specified, it will retry for up to 1 day if any of those exceptions occur.
+    If transient_exceptions are specified, it will retry for up to 1 day if any of those exceptions occur.
     """
-    unacceptable_exceptions = tuple(unacceptable_exceptions) if unacceptable_exceptions else ()
+    transient_exceptions = tuple(transient_exceptions) if transient_exceptions else ()
 
     def decorator(func):
         @functools.wraps(func)
@@ -22,8 +22,8 @@ def wrapper(*args, **kwargs):
             while True:
                 try:
                     return func(*args, **kwargs)
-                except unacceptable_exceptions as e:
-                    # Keep retrying "unacceptable" exceptions for 1 day.
+                except transient_exceptions as e:
+                    # Keep retrying transient exceptions for 1 day.
                     elapsed_time = time.time() - start_time
                     if elapsed_time >= MAX_RETRY_DURATION:
                         raise
diff --git a/tests/modelgauge_tests/test_retry_decorator.py b/tests/modelgauge_tests/test_retry_decorator.py
index 367ad476..cf80bad9 100644
--- a/tests/modelgauge_tests/test_retry_decorator.py
+++ b/tests/modelgauge_tests/test_retry_decorator.py
@@ -16,7 +16,7 @@ def always_succeed():
 def test_retry_fails_after_base_retries(exceptions):
     attempt_counter = 0
 
-    @retry(unacceptable_exceptions=exceptions)
+    @retry(transient_exceptions=exceptions)
     def always_fail():
         nonlocal attempt_counter
         attempt_counter += 1
@@ -31,7 +31,7 @@ def always_fail():
 def test_retry_eventually_succeeds():
     attempt_counter = 0
 
-    @retry(unacceptable_exceptions=[ValueError])
+    @retry(transient_exceptions=[ValueError])
     def succeed_after_two_attempts():
         nonlocal attempt_counter
         attempt_counter += 1
@@ -43,11 +43,11 @@ def succeed_after_two_attempts():
     assert attempt_counter == BASE_RETRY_COUNT
 
 
-def test_retry_unacceptable_eventually_succeeds():
+def test_retry_transient_eventually_succeeds():
     attempt_counter = 0
     start_time = time.time()
 
-    @retry(unacceptable_exceptions=[ValueError])
+    @retry(transient_exceptions=[ValueError])
     def succeed_eventually():
         nonlocal attempt_counter
         attempt_counter += 1

From 73758bba89e34b74ec34c5053ec25cf682ddb0f7 Mon Sep 17 00:00:00 2001
From: Barbara Korycki <barbara@mlcommons.org>
Date: Fri, 14 Feb 2025 10:06:50 -0800
Subject: [PATCH 08/11] oops. update all param names

---
 plugins/google/modelgauge/suts/google_genai_client.py          | 2 +-
 .../huggingface/modelgauge/suts/huggingface_chat_completion.py | 2 +-
 plugins/mistral/modelgauge/suts/mistral_sut.py                 | 3 ++-
 plugins/nvidia/modelgauge/suts/nvidia_nim_api_client.py        | 2 +-
 plugins/openai/modelgauge/suts/openai_client.py                | 2 +-
 5 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/plugins/google/modelgauge/suts/google_genai_client.py b/plugins/google/modelgauge/suts/google_genai_client.py
index f255b323..cc5f5a0f 100644
--- a/plugins/google/modelgauge/suts/google_genai_client.py
+++ b/plugins/google/modelgauge/suts/google_genai_client.py
@@ -110,7 +110,7 @@ def translate_text_prompt(self, prompt: TextPrompt) -> GoogleGenAiRequest:
             contents=prompt.text, generation_config=generation_config, safety_settings=self.safety_settings
         )
 
-    @retry(unacceptable_exceptions=[InternalServerError, ResourceExhausted, RetryError, TooManyRequests])
+    @retry(transient_exceptions=[InternalServerError, ResourceExhausted, RetryError, TooManyRequests])
     def evaluate(self, request: GoogleGenAiRequest) -> GoogleGenAiResponse:
         if self.model is None:
             # Handle lazy init.
diff --git a/plugins/huggingface/modelgauge/suts/huggingface_chat_completion.py b/plugins/huggingface/modelgauge/suts/huggingface_chat_completion.py
index 8a58a79b..23e1aa03 100644
--- a/plugins/huggingface/modelgauge/suts/huggingface_chat_completion.py
+++ b/plugins/huggingface/modelgauge/suts/huggingface_chat_completion.py
@@ -87,7 +87,7 @@ def translate_text_prompt(self, prompt: TextPrompt) -> HuggingFaceChatCompletion
             **prompt.options.model_dump(),
         )
 
-    @retry(unacceptable_exceptions=[InferenceTimeoutError])  # Raised when model is unavailable or the request times out
+    @retry(transient_exceptions=[InferenceTimeoutError])  # Raised when model is unavailable or the request times out
     def evaluate(self, request: HuggingFaceChatCompletionRequest) -> HuggingFaceChatCompletionOutput:
         if self.client is None:
             self._create_client()
diff --git a/plugins/mistral/modelgauge/suts/mistral_sut.py b/plugins/mistral/modelgauge/suts/mistral_sut.py
index aa9abfb2..a1380516 100644
--- a/plugins/mistral/modelgauge/suts/mistral_sut.py
+++ b/plugins/mistral/modelgauge/suts/mistral_sut.py
@@ -61,7 +61,7 @@ def translate_text_prompt(self, prompt: TextPrompt) -> MistralAIRequest:
             args["max_tokens"] = prompt.options.max_tokens
         return MistralAIRequest(**args)
 
-    @retry(unacceptable_exceptions=[SDKError])
+    @retry(transient_exceptions=[SDKError])
     def evaluate(self, request: MistralAIRequest) -> ChatCompletionResponse:
         response = self.client.request(request.model_dump(exclude_none=True))  # type: ignore
         return response
@@ -132,6 +132,7 @@ def translate_text_prompt(self, prompt: TextPrompt) -> MistralAIRequest:
             args["max_tokens"] = prompt.options.max_tokens
         return MistralAIRequest(temperature=self.temperature, n=self.num_generations, **args)
 
+    @retry(transient_exceptions=[SDKError])
     def evaluate(self, request: MistralAIRequest) -> MistralAIResponseWithModerations:
         response = self.client.request(request.model_dump(exclude_none=True))  # type: ignore
         assert (
diff --git a/plugins/nvidia/modelgauge/suts/nvidia_nim_api_client.py b/plugins/nvidia/modelgauge/suts/nvidia_nim_api_client.py
index 9b993d25..1bb5a0a2 100644
--- a/plugins/nvidia/modelgauge/suts/nvidia_nim_api_client.py
+++ b/plugins/nvidia/modelgauge/suts/nvidia_nim_api_client.py
@@ -117,7 +117,7 @@ def _translate_request(self, messages: List[OpenAIChatMessage], options: SUTOpti
             **optional_kwargs,
         )
 
-    @retry(unacceptable_exceptions=[APITimeoutError, ConflictError, InternalServerError, RateLimitError])
+    @retry(transient_exceptions=[APITimeoutError, ConflictError, InternalServerError, RateLimitError])
     def evaluate(self, request: OpenAIChatRequest) -> ChatCompletion:
         if self.client is None:
             # Handle lazy init.
diff --git a/plugins/openai/modelgauge/suts/openai_client.py b/plugins/openai/modelgauge/suts/openai_client.py
index 132e2952..f403fc66 100644
--- a/plugins/openai/modelgauge/suts/openai_client.py
+++ b/plugins/openai/modelgauge/suts/openai_client.py
@@ -141,7 +141,7 @@ def _translate_request(self, messages: List[OpenAIChatMessage], options: SUTOpti
             **optional_kwargs,
         )
 
-    @retry(unacceptable_exceptions=[APITimeoutError, ConflictError, InternalServerError, RateLimitError])
+    @retry(transient_exceptions=[APITimeoutError, ConflictError, InternalServerError, RateLimitError])
     def evaluate(self, request: OpenAIChatRequest) -> ChatCompletion:
         if self.client is None:
             # Handle lazy init.

From 84e840edd4970d7c87cef5961af73f3eb843cb14 Mon Sep 17 00:00:00 2001
From: Barbara Korycki <barbara@mlcommons.org>
Date: Fri, 14 Feb 2025 10:10:58 -0800
Subject: [PATCH 09/11] Remove retry from hf chat_completion (no relevant
 exceptions to retry?)

---
 .../modelgauge/suts/huggingface_chat_completion.py            | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/plugins/huggingface/modelgauge/suts/huggingface_chat_completion.py b/plugins/huggingface/modelgauge/suts/huggingface_chat_completion.py
index 23e1aa03..12e1add4 100644
--- a/plugins/huggingface/modelgauge/suts/huggingface_chat_completion.py
+++ b/plugins/huggingface/modelgauge/suts/huggingface_chat_completion.py
@@ -2,12 +2,11 @@
 from typing import Dict, List, Optional
 
 from huggingface_hub import get_inference_endpoint, InferenceClient, InferenceEndpointStatus  # type: ignore
-from huggingface_hub.utils import HfHubHTTPError, InferenceTimeoutError  # type: ignore
+from huggingface_hub.utils import HfHubHTTPError  # type: ignore
 from pydantic import BaseModel
 
 from modelgauge.auth.huggingface_inference_token import HuggingFaceInferenceToken
 from modelgauge.prompt import TextPrompt
-from modelgauge.retry_decorator import retry
 from modelgauge.secret_values import InjectSecret
 from modelgauge.sut import PromptResponseSUT, SUTCompletion, SUTResponse, TokenProbability, TopTokens
 from modelgauge.sut_capabilities import AcceptsTextPrompt, ProducesPerTokenLogProbabilities
@@ -87,7 +86,6 @@ def translate_text_prompt(self, prompt: TextPrompt) -> HuggingFaceChatCompletion
             **prompt.options.model_dump(),
         )
 
-    @retry(transient_exceptions=[InferenceTimeoutError])  # Raised when model is unavailable or the request times out
     def evaluate(self, request: HuggingFaceChatCompletionRequest) -> HuggingFaceChatCompletionOutput:
         if self.client is None:
             self._create_client()

From 8797205a3afffbf6494b1eb0fc1f5f6d0a4a875e Mon Sep 17 00:00:00 2001
From: Barbara Korycki <barbara@mlcommons.org>
Date: Fri, 14 Feb 2025 10:16:00 -0800
Subject: [PATCH 10/11] parameterize retry variables

---
 src/modelgauge/retry_decorator.py              | 15 ++++++++++-----
 tests/modelgauge_tests/test_retry_decorator.py |  9 ++++-----
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/src/modelgauge/retry_decorator.py b/src/modelgauge/retry_decorator.py
index 79da5f36..54761faa 100644
--- a/src/modelgauge/retry_decorator.py
+++ b/src/modelgauge/retry_decorator.py
@@ -6,9 +6,14 @@
 MAX_BACKOFF = 60  # 1 minute in seconds
 
 
-def retry(transient_exceptions=None):
+def retry(
+    transient_exceptions=None,
+    base_retry_count=BASE_RETRY_COUNT,
+    max_retry_duration=MAX_RETRY_DURATION,
+    max_backoff=MAX_BACKOFF,
+):
     """
-    A decorator that retries a function at least BASE_RETRY_COUNT times.
+    A decorator that retries a function at least base_retry_count times.
     If transient_exceptions are specified, it will retry for up to 1 day if any of those exceptions occur.
     """
     transient_exceptions = tuple(transient_exceptions) if transient_exceptions else ()
@@ -25,14 +30,14 @@ def wrapper(*args, **kwargs):
                 except transient_exceptions as e:
                     # Keep retrying transient exceptions for 1 day.
                     elapsed_time = time.time() - start_time
-                    if elapsed_time >= MAX_RETRY_DURATION:
+                    if elapsed_time >= max_retry_duration:
                         raise
                 except:
                     # Retry all other exceptions BASE_RETRY_COUNT times.
                     attempt += 1
-                    if attempt >= BASE_RETRY_COUNT:
+                    if attempt >= base_retry_count:
                         raise
-                sleep_time = min(2**attempt, MAX_BACKOFF)  # Exponential backoff with cap
+                sleep_time = min(2**attempt, max_backoff)  # Exponential backoff with cap
                 time.sleep(sleep_time)
 
         return wrapper
diff --git a/tests/modelgauge_tests/test_retry_decorator.py b/tests/modelgauge_tests/test_retry_decorator.py
index cf80bad9..44f3e947 100644
--- a/tests/modelgauge_tests/test_retry_decorator.py
+++ b/tests/modelgauge_tests/test_retry_decorator.py
@@ -32,14 +32,14 @@ def test_retry_eventually_succeeds():
     attempt_counter = 0
 
     @retry(transient_exceptions=[ValueError])
-    def succeed_after_two_attempts():
+    def succeed_before_base_retry_total():
         nonlocal attempt_counter
         attempt_counter += 1
         if attempt_counter < BASE_RETRY_COUNT:
             raise ValueError("Intentional failure")
         return "success"
 
-    assert succeed_after_two_attempts() == "success"
+    assert succeed_before_base_retry_total() == "success"
     assert attempt_counter == BASE_RETRY_COUNT
 
 
@@ -47,14 +47,13 @@ def test_retry_transient_eventually_succeeds():
     attempt_counter = 0
     start_time = time.time()
 
-    @retry(transient_exceptions=[ValueError])
+    @retry(transient_exceptions=[ValueError], max_retry_duration=3, base_retry_count=1)
     def succeed_eventually():
         nonlocal attempt_counter
         attempt_counter += 1
         elapsed_time = time.time() - start_time
-        if elapsed_time < 5:
+        if elapsed_time < 1:
             raise ValueError("Intentional failure")
         return "success"
 
     assert succeed_eventually() == "success"
-    assert attempt_counter > BASE_RETRY_COUNT

From 9dfd2455b38fd0550c7d5db8781f13f0cbc2a6a1 Mon Sep 17 00:00:00 2001
From: Barbara Korycki <barbara@mlcommons.org>
Date: Fri, 14 Feb 2025 10:24:18 -0800
Subject: [PATCH 11/11] logging

---
 src/modelgauge/retry_decorator.py              | 7 ++++++-
 tests/modelgauge_tests/test_retry_decorator.py | 5 +++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/modelgauge/retry_decorator.py b/src/modelgauge/retry_decorator.py
index 54761faa..fe21067c 100644
--- a/src/modelgauge/retry_decorator.py
+++ b/src/modelgauge/retry_decorator.py
@@ -1,10 +1,13 @@
 import functools
+import logging
 import time
 
 BASE_RETRY_COUNT = 3
 MAX_RETRY_DURATION = 86400  # 1 day in seconds
 MAX_BACKOFF = 60  # 1 minute in seconds
 
+logger = logging.getLogger(__name__)
+
 
 def retry(
     transient_exceptions=None,
@@ -32,11 +35,13 @@ def wrapper(*args, **kwargs):
                     elapsed_time = time.time() - start_time
                     if elapsed_time >= max_retry_duration:
                         raise
-                except:
+                    logger.warning(f"Transient exception occurred: {e}. Retrying...")
+                except Exception as e:
                     # Retry all other exceptions BASE_RETRY_COUNT times.
                     attempt += 1
                     if attempt >= base_retry_count:
                         raise
+                    logger.warning(f"Exception occurred after {attempt}/{base_retry_count} attempts: {e}. Retrying...")
                 sleep_time = min(2**attempt, max_backoff)  # Exponential backoff with cap
                 time.sleep(sleep_time)
 
diff --git a/tests/modelgauge_tests/test_retry_decorator.py b/tests/modelgauge_tests/test_retry_decorator.py
index 44f3e947..7e5437da 100644
--- a/tests/modelgauge_tests/test_retry_decorator.py
+++ b/tests/modelgauge_tests/test_retry_decorator.py
@@ -5,11 +5,16 @@
 
 
 def test_retry_success():
+    attempt_counter = 0
+
     @retry()
     def always_succeed():
+        nonlocal attempt_counter
+        attempt_counter += 1
         return "success"
 
     assert always_succeed() == "success"
+    assert attempt_counter == 1
 
 
 @pytest.mark.parametrize("exceptions", [None, [ValueError]])