From 700d08badaeb7ed3039fc70398298c697cabf6c3 Mon Sep 17 00:00:00 2001 From: Barbara Korycki Date: Thu, 6 Feb 2025 14:51:58 -0800 Subject: [PATCH 01/11] New backoff numbers --- plugins/mistral/modelgauge/suts/mistral_client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plugins/mistral/modelgauge/suts/mistral_client.py b/plugins/mistral/modelgauge/suts/mistral_client.py index a456e6b8..3c0adc06 100644 --- a/plugins/mistral/modelgauge/suts/mistral_client.py +++ b/plugins/mistral/modelgauge/suts/mistral_client.py @@ -4,9 +4,9 @@ from modelgauge.secret_values import RequiredSecret, SecretDescription -BACKOFF_INITIAL_MILLIS = 500 -BACKOFF_MAX_INTERVAL_MILLIS = 10_000 -BACKOFF_EXPONENT = 1.1 +BACKOFF_INITIAL_MILLIS = 1000 +BACKOFF_MAX_INTERVAL_MILLIS = 20_000 +BACKOFF_EXPONENT = 1.9 BACKOFF_MAX_ELAPSED_MILLIS = 120_000 From 329d8f2423b9a484b97bd3a90fe6e03a1cedb828 Mon Sep 17 00:00:00 2001 From: Barbara Korycki Date: Thu, 6 Feb 2025 17:25:02 -0800 Subject: [PATCH 02/11] change numbers --- plugins/mistral/modelgauge/suts/mistral_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/mistral/modelgauge/suts/mistral_client.py b/plugins/mistral/modelgauge/suts/mistral_client.py index 3c0adc06..a528c11d 100644 --- a/plugins/mistral/modelgauge/suts/mistral_client.py +++ b/plugins/mistral/modelgauge/suts/mistral_client.py @@ -5,9 +5,9 @@ from modelgauge.secret_values import RequiredSecret, SecretDescription BACKOFF_INITIAL_MILLIS = 1000 -BACKOFF_MAX_INTERVAL_MILLIS = 20_000 +BACKOFF_MAX_INTERVAL_MILLIS = 100_000 BACKOFF_EXPONENT = 1.9 -BACKOFF_MAX_ELAPSED_MILLIS = 120_000 +BACKOFF_MAX_ELAPSED_MILLIS = 86_400_000 # 1 day class MistralAIAPIKey(RequiredSecret): From 18f93c010f1e7dceddd6036f0e37a56a84990bc8 Mon Sep 17 00:00:00 2001 From: Barbara Korycki Date: Thu, 13 Feb 2025 12:28:35 -0800 Subject: [PATCH 03/11] Retry decorator + unit tests --- src/modelgauge/retry_decorator.py | 41 +++++++++++++ .../modelgauge_tests/test_retry_decorator.py | 60 +++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 src/modelgauge/retry_decorator.py create mode 100644 tests/modelgauge_tests/test_retry_decorator.py diff --git a/src/modelgauge/retry_decorator.py b/src/modelgauge/retry_decorator.py new file mode 100644 index 00000000..4e79ae2d --- /dev/null +++ b/src/modelgauge/retry_decorator.py @@ -0,0 +1,41 @@ +import time +import functools + +BASE_RETRY_COUNT = 3 +MAX_RETRY_DURATION = 86400 # 1 day in seconds +MAX_BACKOFF = 60 # 1 minute in seconds + + +def retry(unacceptable_exceptions=None): + """ + A decorator that retries a function at least BASE_RETRY_COUNT times. + If unacceptable_exceptions are specified, it will retry for up to 1 day if any of those exceptions occur. + """ + unacceptable_exceptions = tuple(unacceptable_exceptions) if unacceptable_exceptions else () + + # TODO: Is functools.wraps necessary? Test if journal works without it. + def decorator(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + attempt = 0 + start_time = time.time() + + while True: + try: + return func(*args, **kwargs) + except unacceptable_exceptions as e: + # Keep retrying "unacceptable" exceptions for 1 day. + elapsed_time = time.time() - start_time + if elapsed_time >= MAX_RETRY_DURATION: + raise + except: + # Retry all other exceptions BASE_RETRY_COUNT times. + attempt += 1 + if attempt >= BASE_RETRY_COUNT: + raise + sleep_time = min(2**attempt, MAX_BACKOFF) # Exponential backoff with cap + time.sleep(sleep_time) + + return wrapper + + return decorator diff --git a/tests/modelgauge_tests/test_retry_decorator.py b/tests/modelgauge_tests/test_retry_decorator.py new file mode 100644 index 00000000..2372c93d --- /dev/null +++ b/tests/modelgauge_tests/test_retry_decorator.py @@ -0,0 +1,60 @@ +import pytest +import time + +from modelgauge.retry_decorator import retry, BASE_RETRY_COUNT + + +def test_retry_success(): + @retry() + def always_succeed(): + return "success" + + assert always_succeed() == "success" + + +@pytest.mark.parametrize("exceptions", [None, [ValueError]]) +def test_retry_fails_after_base_retries(exceptions): + attempt_counter = 0 + + @retry(unacceptable_exceptions=exceptions) + def always_fail(): + nonlocal attempt_counter + attempt_counter += 1 + raise KeyError("Intentional failure") + + with pytest.raises(KeyError): + always_fail() + + assert attempt_counter == BASE_RETRY_COUNT + + +def test_retry_eventually_succeeds(): + attempt_counter = 0 + + @retry(unacceptable_exceptions=[ValueError]) + def succeed_after_two_attempts(): + nonlocal attempt_counter + attempt_counter += 1 + if attempt_counter < BASE_RETRY_COUNT: + raise ValueError("Intentional failure") + return "success" + + assert succeed_after_two_attempts() == "success" + assert attempt_counter == BASE_RETRY_COUNT + + +def test_retry_unacceptable_eventually_succeeds(): + attempt_counter = 0 + start_time = time.time() + + @retry(unacceptable_exceptions=[ValueError]) + def succeed_after_twenty_seconds(): + nonlocal attempt_counter + attempt_counter += 1 + elapsed_time = time.time() - start_time + if elapsed_time < 20: + raise ValueError("Intentional failure") + return "success" + + assert succeed_after_twenty_seconds() == "success" + assert attempt_counter > BASE_RETRY_COUNT From bd621e2b618f73575db53f58889731b7f0e83c85 Mon Sep 17 00:00:00 2001 From: Barbara Korycki Date: Thu, 13 Feb 2025 13:28:05 -0800 Subject: [PATCH 04/11] Add retry decorator to SUTs without retry logic --- plugins/google/modelgauge/suts/google_genai_client.py | 3 +++ .../modelgauge/suts/huggingface_chat_completion.py | 4 +++- plugins/mistral/modelgauge/suts/mistral_sut.py | 4 +++- plugins/nvidia/modelgauge/suts/nvidia_nim_api_client.py | 3 +++ plugins/openai/modelgauge/suts/openai_client.py | 3 +++ src/modelgauge/retry_decorator.py | 2 +- 6 files changed, 16 insertions(+), 3 deletions(-) diff --git a/plugins/google/modelgauge/suts/google_genai_client.py b/plugins/google/modelgauge/suts/google_genai_client.py index 1e35dc37..f255b323 100644 --- a/plugins/google/modelgauge/suts/google_genai_client.py +++ b/plugins/google/modelgauge/suts/google_genai_client.py @@ -2,11 +2,13 @@ from typing import Dict, List, Optional import google.generativeai as genai # type: ignore +from google.api_core.exceptions import InternalServerError, ResourceExhausted, RetryError, TooManyRequests from google.generativeai.types import HarmCategory, HarmBlockThreshold # type: ignore from pydantic import BaseModel from modelgauge.general import APIException from modelgauge.prompt import TextPrompt +from modelgauge.retry_decorator import retry from modelgauge.secret_values import InjectSecret, RequiredSecret, SecretDescription from modelgauge.sut import REFUSAL_RESPONSE, PromptResponseSUT, SUTCompletion, SUTResponse from modelgauge.sut_capabilities import AcceptsTextPrompt @@ -108,6 +110,7 @@ def translate_text_prompt(self, prompt: TextPrompt) -> GoogleGenAiRequest: contents=prompt.text, generation_config=generation_config, safety_settings=self.safety_settings ) + @retry(unacceptable_exceptions=[InternalServerError, ResourceExhausted, RetryError, TooManyRequests]) def evaluate(self, request: GoogleGenAiRequest) -> GoogleGenAiResponse: if self.model is None: # Handle lazy init. diff --git a/plugins/huggingface/modelgauge/suts/huggingface_chat_completion.py b/plugins/huggingface/modelgauge/suts/huggingface_chat_completion.py index 12e1add4..8a58a79b 100644 --- a/plugins/huggingface/modelgauge/suts/huggingface_chat_completion.py +++ b/plugins/huggingface/modelgauge/suts/huggingface_chat_completion.py @@ -2,11 +2,12 @@ from typing import Dict, List, Optional from huggingface_hub import get_inference_endpoint, InferenceClient, InferenceEndpointStatus # type: ignore -from huggingface_hub.utils import HfHubHTTPError # type: ignore +from huggingface_hub.utils import HfHubHTTPError, InferenceTimeoutError # type: ignore from pydantic import BaseModel from modelgauge.auth.huggingface_inference_token import HuggingFaceInferenceToken from modelgauge.prompt import TextPrompt +from modelgauge.retry_decorator import retry from modelgauge.secret_values import InjectSecret from modelgauge.sut import PromptResponseSUT, SUTCompletion, SUTResponse, TokenProbability, TopTokens from modelgauge.sut_capabilities import AcceptsTextPrompt, ProducesPerTokenLogProbabilities @@ -86,6 +87,7 @@ def translate_text_prompt(self, prompt: TextPrompt) -> HuggingFaceChatCompletion **prompt.options.model_dump(), ) + @retry(unacceptable_exceptions=[InferenceTimeoutError]) # Raised when model is unavailable or the request times out def evaluate(self, request: HuggingFaceChatCompletionRequest) -> HuggingFaceChatCompletionOutput: if self.client is None: self._create_client() diff --git a/plugins/mistral/modelgauge/suts/mistral_sut.py b/plugins/mistral/modelgauge/suts/mistral_sut.py index cc708134..aa9abfb2 100644 --- a/plugins/mistral/modelgauge/suts/mistral_sut.py +++ b/plugins/mistral/modelgauge/suts/mistral_sut.py @@ -1,8 +1,9 @@ import warnings from typing import Optional -from mistralai.models import ChatCompletionResponse, ClassificationResponse +from mistralai.models import ChatCompletionResponse, ClassificationResponse, SDKError from modelgauge.prompt import TextPrompt +from modelgauge.retry_decorator import retry from modelgauge.secret_values import InjectSecret from modelgauge.sut import PromptResponseSUT, SUTCompletion, SUTResponse from modelgauge.sut_capabilities import AcceptsTextPrompt @@ -60,6 +61,7 @@ def translate_text_prompt(self, prompt: TextPrompt) -> MistralAIRequest: args["max_tokens"] = prompt.options.max_tokens return MistralAIRequest(**args) + @retry(unacceptable_exceptions=[SDKError]) def evaluate(self, request: MistralAIRequest) -> ChatCompletionResponse: response = self.client.request(request.model_dump(exclude_none=True)) # type: ignore return response diff --git a/plugins/nvidia/modelgauge/suts/nvidia_nim_api_client.py b/plugins/nvidia/modelgauge/suts/nvidia_nim_api_client.py index 24d9a75e..9b993d25 100644 --- a/plugins/nvidia/modelgauge/suts/nvidia_nim_api_client.py +++ b/plugins/nvidia/modelgauge/suts/nvidia_nim_api_client.py @@ -1,10 +1,12 @@ from typing import Any, Dict, List, Optional, Union from openai import OpenAI +from openai import APITimeoutError, ConflictError, InternalServerError, RateLimitError from openai.types.chat import ChatCompletion from pydantic import BaseModel from modelgauge.prompt import ChatPrompt, ChatRole, SUTOptions, TextPrompt +from modelgauge.retry_decorator import retry from modelgauge.secret_values import ( InjectSecret, RequiredSecret, @@ -115,6 +117,7 @@ def _translate_request(self, messages: List[OpenAIChatMessage], options: SUTOpti **optional_kwargs, ) + @retry(unacceptable_exceptions=[APITimeoutError, ConflictError, InternalServerError, RateLimitError]) def evaluate(self, request: OpenAIChatRequest) -> ChatCompletion: if self.client is None: # Handle lazy init. diff --git a/plugins/openai/modelgauge/suts/openai_client.py b/plugins/openai/modelgauge/suts/openai_client.py index ac591416..132e2952 100644 --- a/plugins/openai/modelgauge/suts/openai_client.py +++ b/plugins/openai/modelgauge/suts/openai_client.py @@ -1,10 +1,12 @@ from typing import Any, Dict, List, Optional, Union from openai import OpenAI +from openai import APITimeoutError, ConflictError, InternalServerError, RateLimitError from openai.types.chat import ChatCompletion from pydantic import BaseModel from modelgauge.prompt import ChatPrompt, ChatRole, SUTOptions, TextPrompt +from modelgauge.retry_decorator import retry from modelgauge.secret_values import ( InjectSecret, OptionalSecret, @@ -139,6 +141,7 @@ def _translate_request(self, messages: List[OpenAIChatMessage], options: SUTOpti **optional_kwargs, ) + @retry(unacceptable_exceptions=[APITimeoutError, ConflictError, InternalServerError, RateLimitError]) def evaluate(self, request: OpenAIChatRequest) -> ChatCompletion: if self.client is None: # Handle lazy init. diff --git a/src/modelgauge/retry_decorator.py b/src/modelgauge/retry_decorator.py index 4e79ae2d..f243f90e 100644 --- a/src/modelgauge/retry_decorator.py +++ b/src/modelgauge/retry_decorator.py @@ -1,5 +1,5 @@ -import time import functools +import time BASE_RETRY_COUNT = 3 MAX_RETRY_DURATION = 86400 # 1 day in seconds From 00b8e5325c4577f7d1e9dce848839f6a7ec03ed1 Mon Sep 17 00:00:00 2001 From: Barbara Korycki Date: Thu, 13 Feb 2025 13:36:32 -0800 Subject: [PATCH 05/11] remove todo --- src/modelgauge/retry_decorator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/modelgauge/retry_decorator.py b/src/modelgauge/retry_decorator.py index f243f90e..1d2a6577 100644 --- a/src/modelgauge/retry_decorator.py +++ b/src/modelgauge/retry_decorator.py @@ -13,7 +13,6 @@ def retry(unacceptable_exceptions=None): """ unacceptable_exceptions = tuple(unacceptable_exceptions) if unacceptable_exceptions else () - # TODO: Is functools.wraps necessary? Test if journal works without it. def decorator(func): @functools.wraps(func) def wrapper(*args, **kwargs): From c2fa9918c98b93ccb349bb4e5fdd8d56d347d730 Mon Sep 17 00:00:00 2001 From: Barbara Korycki Date: Fri, 14 Feb 2025 09:22:41 -0800 Subject: [PATCH 06/11] Make unit test faster --- tests/modelgauge_tests/test_retry_decorator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/modelgauge_tests/test_retry_decorator.py b/tests/modelgauge_tests/test_retry_decorator.py index 2372c93d..367ad476 100644 --- a/tests/modelgauge_tests/test_retry_decorator.py +++ b/tests/modelgauge_tests/test_retry_decorator.py @@ -48,13 +48,13 @@ def test_retry_unacceptable_eventually_succeeds(): start_time = time.time() @retry(unacceptable_exceptions=[ValueError]) - def succeed_after_twenty_seconds(): + def succeed_eventually(): nonlocal attempt_counter attempt_counter += 1 elapsed_time = time.time() - start_time - if elapsed_time < 20: + if elapsed_time < 5: raise ValueError("Intentional failure") return "success" - assert succeed_after_twenty_seconds() == "success" + assert succeed_eventually() == "success" assert attempt_counter > BASE_RETRY_COUNT From 17cb816aa80a20093508c1d7b18d6720a114119a Mon Sep 17 00:00:00 2001 From: Barbara Korycki Date: Fri, 14 Feb 2025 09:24:56 -0800 Subject: [PATCH 07/11] Change param name to be less confusing --- src/modelgauge/retry_decorator.py | 10 +++++----- tests/modelgauge_tests/test_retry_decorator.py | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/modelgauge/retry_decorator.py b/src/modelgauge/retry_decorator.py index 1d2a6577..79da5f36 100644 --- a/src/modelgauge/retry_decorator.py +++ b/src/modelgauge/retry_decorator.py @@ -6,12 +6,12 @@ MAX_BACKOFF = 60 # 1 minute in seconds -def retry(unacceptable_exceptions=None): +def retry(transient_exceptions=None): """ A decorator that retries a function at least BASE_RETRY_COUNT times. - If unacceptable_exceptions are specified, it will retry for up to 1 day if any of those exceptions occur. + If transient_exceptions are specified, it will retry for up to 1 day if any of those exceptions occur. """ - unacceptable_exceptions = tuple(unacceptable_exceptions) if unacceptable_exceptions else () + transient_exceptions = tuple(transient_exceptions) if transient_exceptions else () def decorator(func): @functools.wraps(func) @@ -22,8 +22,8 @@ def wrapper(*args, **kwargs): while True: try: return func(*args, **kwargs) - except unacceptable_exceptions as e: - # Keep retrying "unacceptable" exceptions for 1 day. + except transient_exceptions as e: + # Keep retrying transient exceptions for 1 day. elapsed_time = time.time() - start_time if elapsed_time >= MAX_RETRY_DURATION: raise diff --git a/tests/modelgauge_tests/test_retry_decorator.py b/tests/modelgauge_tests/test_retry_decorator.py index 367ad476..cf80bad9 100644 --- a/tests/modelgauge_tests/test_retry_decorator.py +++ b/tests/modelgauge_tests/test_retry_decorator.py @@ -16,7 +16,7 @@ def always_succeed(): def test_retry_fails_after_base_retries(exceptions): attempt_counter = 0 - @retry(unacceptable_exceptions=exceptions) + @retry(transient_exceptions=exceptions) def always_fail(): nonlocal attempt_counter attempt_counter += 1 @@ -31,7 +31,7 @@ def always_fail(): def test_retry_eventually_succeeds(): attempt_counter = 0 - @retry(unacceptable_exceptions=[ValueError]) + @retry(transient_exceptions=[ValueError]) def succeed_after_two_attempts(): nonlocal attempt_counter attempt_counter += 1 @@ -43,11 +43,11 @@ def succeed_after_two_attempts(): assert attempt_counter == BASE_RETRY_COUNT -def test_retry_unacceptable_eventually_succeeds(): +def test_retry_transient_eventually_succeeds(): attempt_counter = 0 start_time = time.time() - @retry(unacceptable_exceptions=[ValueError]) + @retry(transient_exceptions=[ValueError]) def succeed_eventually(): nonlocal attempt_counter attempt_counter += 1 From 73758bba89e34b74ec34c5053ec25cf682ddb0f7 Mon Sep 17 00:00:00 2001 From: Barbara Korycki Date: Fri, 14 Feb 2025 10:06:50 -0800 Subject: [PATCH 08/11] oops. update all param names --- plugins/google/modelgauge/suts/google_genai_client.py | 2 +- .../huggingface/modelgauge/suts/huggingface_chat_completion.py | 2 +- plugins/mistral/modelgauge/suts/mistral_sut.py | 3 ++- plugins/nvidia/modelgauge/suts/nvidia_nim_api_client.py | 2 +- plugins/openai/modelgauge/suts/openai_client.py | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) diff --git a/plugins/google/modelgauge/suts/google_genai_client.py b/plugins/google/modelgauge/suts/google_genai_client.py index f255b323..cc5f5a0f 100644 --- a/plugins/google/modelgauge/suts/google_genai_client.py +++ b/plugins/google/modelgauge/suts/google_genai_client.py @@ -110,7 +110,7 @@ def translate_text_prompt(self, prompt: TextPrompt) -> GoogleGenAiRequest: contents=prompt.text, generation_config=generation_config, safety_settings=self.safety_settings ) - @retry(unacceptable_exceptions=[InternalServerError, ResourceExhausted, RetryError, TooManyRequests]) + @retry(transient_exceptions=[InternalServerError, ResourceExhausted, RetryError, TooManyRequests]) def evaluate(self, request: GoogleGenAiRequest) -> GoogleGenAiResponse: if self.model is None: # Handle lazy init. diff --git a/plugins/huggingface/modelgauge/suts/huggingface_chat_completion.py b/plugins/huggingface/modelgauge/suts/huggingface_chat_completion.py index 8a58a79b..23e1aa03 100644 --- a/plugins/huggingface/modelgauge/suts/huggingface_chat_completion.py +++ b/plugins/huggingface/modelgauge/suts/huggingface_chat_completion.py @@ -87,7 +87,7 @@ def translate_text_prompt(self, prompt: TextPrompt) -> HuggingFaceChatCompletion **prompt.options.model_dump(), ) - @retry(unacceptable_exceptions=[InferenceTimeoutError]) # Raised when model is unavailable or the request times out + @retry(transient_exceptions=[InferenceTimeoutError]) # Raised when model is unavailable or the request times out def evaluate(self, request: HuggingFaceChatCompletionRequest) -> HuggingFaceChatCompletionOutput: if self.client is None: self._create_client() diff --git a/plugins/mistral/modelgauge/suts/mistral_sut.py b/plugins/mistral/modelgauge/suts/mistral_sut.py index aa9abfb2..a1380516 100644 --- a/plugins/mistral/modelgauge/suts/mistral_sut.py +++ b/plugins/mistral/modelgauge/suts/mistral_sut.py @@ -61,7 +61,7 @@ def translate_text_prompt(self, prompt: TextPrompt) -> MistralAIRequest: args["max_tokens"] = prompt.options.max_tokens return MistralAIRequest(**args) - @retry(unacceptable_exceptions=[SDKError]) + @retry(transient_exceptions=[SDKError]) def evaluate(self, request: MistralAIRequest) -> ChatCompletionResponse: response = self.client.request(request.model_dump(exclude_none=True)) # type: ignore return response @@ -132,6 +132,7 @@ def translate_text_prompt(self, prompt: TextPrompt) -> MistralAIRequest: args["max_tokens"] = prompt.options.max_tokens return MistralAIRequest(temperature=self.temperature, n=self.num_generations, **args) + @retry(transient_exceptions=[SDKError]) def evaluate(self, request: MistralAIRequest) -> MistralAIResponseWithModerations: response = self.client.request(request.model_dump(exclude_none=True)) # type: ignore assert ( diff --git a/plugins/nvidia/modelgauge/suts/nvidia_nim_api_client.py b/plugins/nvidia/modelgauge/suts/nvidia_nim_api_client.py index 9b993d25..1bb5a0a2 100644 --- a/plugins/nvidia/modelgauge/suts/nvidia_nim_api_client.py +++ b/plugins/nvidia/modelgauge/suts/nvidia_nim_api_client.py @@ -117,7 +117,7 @@ def _translate_request(self, messages: List[OpenAIChatMessage], options: SUTOpti **optional_kwargs, ) - @retry(unacceptable_exceptions=[APITimeoutError, ConflictError, InternalServerError, RateLimitError]) + @retry(transient_exceptions=[APITimeoutError, ConflictError, InternalServerError, RateLimitError]) def evaluate(self, request: OpenAIChatRequest) -> ChatCompletion: if self.client is None: # Handle lazy init. diff --git a/plugins/openai/modelgauge/suts/openai_client.py b/plugins/openai/modelgauge/suts/openai_client.py index 132e2952..f403fc66 100644 --- a/plugins/openai/modelgauge/suts/openai_client.py +++ b/plugins/openai/modelgauge/suts/openai_client.py @@ -141,7 +141,7 @@ def _translate_request(self, messages: List[OpenAIChatMessage], options: SUTOpti **optional_kwargs, ) - @retry(unacceptable_exceptions=[APITimeoutError, ConflictError, InternalServerError, RateLimitError]) + @retry(transient_exceptions=[APITimeoutError, ConflictError, InternalServerError, RateLimitError]) def evaluate(self, request: OpenAIChatRequest) -> ChatCompletion: if self.client is None: # Handle lazy init. From 84e840edd4970d7c87cef5961af73f3eb843cb14 Mon Sep 17 00:00:00 2001 From: Barbara Korycki Date: Fri, 14 Feb 2025 10:10:58 -0800 Subject: [PATCH 09/11] Remove retry from hf chat_completion (no relevant exceptions to retry?) --- .../modelgauge/suts/huggingface_chat_completion.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/plugins/huggingface/modelgauge/suts/huggingface_chat_completion.py b/plugins/huggingface/modelgauge/suts/huggingface_chat_completion.py index 23e1aa03..12e1add4 100644 --- a/plugins/huggingface/modelgauge/suts/huggingface_chat_completion.py +++ b/plugins/huggingface/modelgauge/suts/huggingface_chat_completion.py @@ -2,12 +2,11 @@ from typing import Dict, List, Optional from huggingface_hub import get_inference_endpoint, InferenceClient, InferenceEndpointStatus # type: ignore -from huggingface_hub.utils import HfHubHTTPError, InferenceTimeoutError # type: ignore +from huggingface_hub.utils import HfHubHTTPError # type: ignore from pydantic import BaseModel from modelgauge.auth.huggingface_inference_token import HuggingFaceInferenceToken from modelgauge.prompt import TextPrompt -from modelgauge.retry_decorator import retry from modelgauge.secret_values import InjectSecret from modelgauge.sut import PromptResponseSUT, SUTCompletion, SUTResponse, TokenProbability, TopTokens from modelgauge.sut_capabilities import AcceptsTextPrompt, ProducesPerTokenLogProbabilities @@ -87,7 +86,6 @@ def translate_text_prompt(self, prompt: TextPrompt) -> HuggingFaceChatCompletion **prompt.options.model_dump(), ) - @retry(transient_exceptions=[InferenceTimeoutError]) # Raised when model is unavailable or the request times out def evaluate(self, request: HuggingFaceChatCompletionRequest) -> HuggingFaceChatCompletionOutput: if self.client is None: self._create_client() From 8797205a3afffbf6494b1eb0fc1f5f6d0a4a875e Mon Sep 17 00:00:00 2001 From: Barbara Korycki Date: Fri, 14 Feb 2025 10:16:00 -0800 Subject: [PATCH 10/11] parameterize retry variables --- src/modelgauge/retry_decorator.py | 15 ++++++++++----- tests/modelgauge_tests/test_retry_decorator.py | 9 ++++----- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/modelgauge/retry_decorator.py b/src/modelgauge/retry_decorator.py index 79da5f36..54761faa 100644 --- a/src/modelgauge/retry_decorator.py +++ b/src/modelgauge/retry_decorator.py @@ -6,9 +6,14 @@ MAX_BACKOFF = 60 # 1 minute in seconds -def retry(transient_exceptions=None): +def retry( + transient_exceptions=None, + base_retry_count=BASE_RETRY_COUNT, + max_retry_duration=MAX_RETRY_DURATION, + max_backoff=MAX_BACKOFF, +): """ - A decorator that retries a function at least BASE_RETRY_COUNT times. + A decorator that retries a function at least base_retry_count times. If transient_exceptions are specified, it will retry for up to 1 day if any of those exceptions occur. """ transient_exceptions = tuple(transient_exceptions) if transient_exceptions else () @@ -25,14 +30,14 @@ def wrapper(*args, **kwargs): except transient_exceptions as e: # Keep retrying transient exceptions for 1 day. elapsed_time = time.time() - start_time - if elapsed_time >= MAX_RETRY_DURATION: + if elapsed_time >= max_retry_duration: raise except: # Retry all other exceptions BASE_RETRY_COUNT times. attempt += 1 - if attempt >= BASE_RETRY_COUNT: + if attempt >= base_retry_count: raise - sleep_time = min(2**attempt, MAX_BACKOFF) # Exponential backoff with cap + sleep_time = min(2**attempt, max_backoff) # Exponential backoff with cap time.sleep(sleep_time) return wrapper diff --git a/tests/modelgauge_tests/test_retry_decorator.py b/tests/modelgauge_tests/test_retry_decorator.py index cf80bad9..44f3e947 100644 --- a/tests/modelgauge_tests/test_retry_decorator.py +++ b/tests/modelgauge_tests/test_retry_decorator.py @@ -32,14 +32,14 @@ def test_retry_eventually_succeeds(): attempt_counter = 0 @retry(transient_exceptions=[ValueError]) - def succeed_after_two_attempts(): + def succeed_before_base_retry_total(): nonlocal attempt_counter attempt_counter += 1 if attempt_counter < BASE_RETRY_COUNT: raise ValueError("Intentional failure") return "success" - assert succeed_after_two_attempts() == "success" + assert succeed_before_base_retry_total() == "success" assert attempt_counter == BASE_RETRY_COUNT @@ -47,14 +47,13 @@ def test_retry_transient_eventually_succeeds(): attempt_counter = 0 start_time = time.time() - @retry(transient_exceptions=[ValueError]) + @retry(transient_exceptions=[ValueError], max_retry_duration=3, base_retry_count=1) def succeed_eventually(): nonlocal attempt_counter attempt_counter += 1 elapsed_time = time.time() - start_time - if elapsed_time < 5: + if elapsed_time < 1: raise ValueError("Intentional failure") return "success" assert succeed_eventually() == "success" - assert attempt_counter > BASE_RETRY_COUNT From 9dfd2455b38fd0550c7d5db8781f13f0cbc2a6a1 Mon Sep 17 00:00:00 2001 From: Barbara Korycki Date: Fri, 14 Feb 2025 10:24:18 -0800 Subject: [PATCH 11/11] logging --- src/modelgauge/retry_decorator.py | 7 ++++++- tests/modelgauge_tests/test_retry_decorator.py | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/modelgauge/retry_decorator.py b/src/modelgauge/retry_decorator.py index 54761faa..fe21067c 100644 --- a/src/modelgauge/retry_decorator.py +++ b/src/modelgauge/retry_decorator.py @@ -1,10 +1,13 @@ import functools +import logging import time BASE_RETRY_COUNT = 3 MAX_RETRY_DURATION = 86400 # 1 day in seconds MAX_BACKOFF = 60 # 1 minute in seconds +logger = logging.getLogger(__name__) + def retry( transient_exceptions=None, @@ -32,11 +35,13 @@ def wrapper(*args, **kwargs): elapsed_time = time.time() - start_time if elapsed_time >= max_retry_duration: raise - except: + logger.warning(f"Transient exception occurred: {e}. Retrying...") + except Exception as e: # Retry all other exceptions BASE_RETRY_COUNT times. attempt += 1 if attempt >= base_retry_count: raise + logger.warning(f"Exception occurred after {attempt}/{base_retry_count} attempts: {e}. Retrying...") sleep_time = min(2**attempt, max_backoff) # Exponential backoff with cap time.sleep(sleep_time) diff --git a/tests/modelgauge_tests/test_retry_decorator.py b/tests/modelgauge_tests/test_retry_decorator.py index 44f3e947..7e5437da 100644 --- a/tests/modelgauge_tests/test_retry_decorator.py +++ b/tests/modelgauge_tests/test_retry_decorator.py @@ -5,11 +5,16 @@ def test_retry_success(): + attempt_counter = 0 + @retry() def always_succeed(): + nonlocal attempt_counter + attempt_counter += 1 return "success" assert always_succeed() == "success" + assert attempt_counter == 1 @pytest.mark.parametrize("exceptions", [None, [ValueError]])