diff --git a/python/ray/serve/_private/replica.py b/python/ray/serve/_private/replica.py index 61d486c82cf2..607322c5b9ac 100644 --- a/python/ray/serve/_private/replica.py +++ b/python/ray/serve/_private/replica.py @@ -727,7 +727,19 @@ async def perform_graceful_shutdown(self): # can skip the wait period. if self._user_callable_initialized: await self._drain_ongoing_requests() + + try: await self._user_callable_wrapper.call_destructor() + except: # noqa: E722 + # We catch a blanket exception since the constructor may still be + # running, so instance variables used by the destructor may not exist. + if self._user_callable_initialized: + logger.exception( + "__del__ ran before replica finished initializing, and " + "raised an exception." + ) + else: + logger.exception("__del__ raised an exception.") await self._metrics_manager.shutdown() @@ -1159,10 +1171,15 @@ async def call_user_method( async def call_destructor(self): """Explicitly call the `__del__` method of the user callable. - Calling this multiple times has no effect; only the first call will actually - call the destructor. + Calling this multiple times has no effect; only the first call will + actually call the destructor. """ - self._raise_if_not_initialized("call_destructor") + if self._callable is None: + logger.info( + "This replica has not yet started running user code. " + "Skipping __del__." + ) + return # Only run the destructor once. This is safe because there is no `await` between # checking the flag here and flipping it to `True` below. diff --git a/python/ray/serve/tests/test_api.py b/python/ray/serve/tests/test_api.py index c246dcfd8436..9fb36047bf7b 100644 --- a/python/ray/serve/tests/test_api.py +++ b/python/ray/serve/tests/test_api.py @@ -412,6 +412,52 @@ def g(): assert requests.get("http://127.0.0.1:8000/app_g").text == "got g" +@pytest.mark.asyncio +async def test_delete_while_initializing(serve_instance): + """Test that __del__ runs when a replica terminates while initializing.""" + + @ray.remote + class Counter: + def __init__(self): + self.count = 0 + + def incr(self): + self.count += 1 + + def get_count(self) -> int: + return self.count + + signal = SignalActor.remote() + counter = Counter.remote() + + @serve.deployment(graceful_shutdown_timeout_s=0.01) + class HangingStart: + async def __init__( + self, signal: ray.actor.ActorHandle, counter: ray.actor.ActorHandle + ): + self.signal = signal + self.counter = counter + await signal.send.remote() + print("HangingStart set the EventHolder.") + await asyncio.sleep(10000) + + async def __del__(self): + print("Running __del__") + await self.counter.incr.remote() + + serve._run(HangingStart.bind(signal, counter), _blocking=False) + + print("Waiting for the deployment to start initialization.") + await signal.wait.remote() + + print("Calling serve.delete().") + serve.delete(name=SERVE_DEFAULT_APP_NAME) + + # Ensure that __del__ ran once, even though the deployment terminated + # during initialization. + assert (await counter.get_count.remote()) == 1 + + def test_deployment_name_with_app_name(serve_instance): """Test replica name with app name as prefix""" diff --git a/python/ray/serve/tests/unit/test_user_callable_wrapper.py b/python/ray/serve/tests/unit/test_user_callable_wrapper.py index a9c49a3d1c0b..fb83acf59b81 100644 --- a/python/ray/serve/tests/unit/test_user_callable_wrapper.py +++ b/python/ray/serve/tests/unit/test_user_callable_wrapper.py @@ -146,9 +146,6 @@ async def test_calling_methods_before_initialize(): with pytest.raises(RuntimeError): await user_callable_wrapper.call_reconfigure(None) - with pytest.raises(RuntimeError): - await user_callable_wrapper.call_destructor() - @pytest.mark.asyncio async def test_basic_class_callable():