From c6c16344561292082a9886ca75fab19fb97b49af Mon Sep 17 00:00:00 2001 From: Kevin Bates Date: Fri, 2 Dec 2022 14:29:10 -0800 Subject: [PATCH 1/2] Remove redundant list kernels request during session poll --- jupyter_server/gateway/managers.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/jupyter_server/gateway/managers.py b/jupyter_server/gateway/managers.py index e826670b08..5ed767db61 100644 --- a/jupyter_server/gateway/managers.py +++ b/jupyter_server/gateway/managers.py @@ -297,18 +297,21 @@ async def get_kernel_spec_resource(self, kernel_name, path): class GatewaySessionManager(SessionManager): kernel_manager = Instance("jupyter_server.gateway.managers.GatewayMappingKernelManager") - async def kernel_culled(self, kernel_id): + async def kernel_culled(self, kernel_id: str) -> bool: """Checks if the kernel is still considered alive and returns true if it's not found.""" - kernel = None + km: Optional[GatewayKernelManager] = None try: + # Since we keep the models up-to-date via client polling, use that state to determine + # if this kernel no longer exists on the gateway server rather than perform a redundant + # fetch operation - especially since this is called at approximately the same interval. + # This has the effect of reducing GET /api/kernels requests against the gateway server + # by 50%! + # Note that should the redundant polling be consolidated, or replaced with an event-based + # notification model, this will need to be revisited. km = self.kernel_manager.get_kernel(kernel_id) - kernel = await km.refresh_model() except Exception: # Let exceptions here reflect culled kernel pass - return kernel is None - - -"""KernelManager class to manage a kernel running on a Gateway Server via the REST API""" + return km is None class GatewayKernelManager(AsyncKernelManager): From 8233da1a3ae686a45d27d981a773f263969f06b8 Mon Sep 17 00:00:00 2001 From: Kevin Bates Date: Fri, 2 Dec 2022 16:50:58 -0800 Subject: [PATCH 2/2] Cull kernel in gateway kernel and session lifecycle tests --- tests/test_gateway.py | 66 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 55 insertions(+), 11 deletions(-) diff --git a/tests/test_gateway.py b/tests/test_gateway.py index ff9370d766..bfef11c8fe 100644 --- a/tests/test_gateway.py +++ b/tests/test_gateway.py @@ -427,33 +427,49 @@ async def test_gateway_get_named_kernelspec(init_gateway, jp_fetch): assert expected_http_error(e, 404) -async def test_gateway_session_lifecycle(init_gateway, jp_root_dir, jp_fetch): +@pytest.mark.parametrize("cull_kernel", [False, True]) +async def test_gateway_session_lifecycle(init_gateway, jp_root_dir, jp_fetch, cull_kernel): # Validate session lifecycle functions; create and delete. # create session_id, kernel_id = await create_session(jp_root_dir, jp_fetch, "kspec_foo") # ensure kernel still considered running - assert await is_kernel_running(jp_fetch, kernel_id) is True + assert await is_session_active(jp_fetch, session_id) is True # interrupt await interrupt_kernel(jp_fetch, kernel_id) # ensure kernel still considered running - assert await is_kernel_running(jp_fetch, kernel_id) is True + assert await is_session_active(jp_fetch, session_id) is True # restart await restart_kernel(jp_fetch, kernel_id) - # ensure kernel still considered running - assert await is_kernel_running(jp_fetch, kernel_id) is True + assert await is_session_active(jp_fetch, session_id) is True - # delete - await delete_session(jp_fetch, session_id) - assert await is_kernel_running(jp_fetch, kernel_id) is False + if cull_kernel: + running_kernels.pop(kernel_id) + + # fetch kernel and session and ensure not considered running + assert await is_kernel_running(jp_fetch, kernel_id) is not cull_kernel + assert await is_session_active(jp_fetch, session_id) is not cull_kernel + + # delete session. If culled, ensure 404 is raised + if cull_kernel: + with pytest.raises(tornado.httpclient.HTTPClientError) as e: + await delete_session(jp_fetch, session_id) + assert expected_http_error(e, 404) + else: + await delete_session(jp_fetch, session_id) + + assert await is_session_active(jp_fetch, session_id) is False -async def test_gateway_kernel_lifecycle(init_gateway, jp_serverapp, jp_ws_fetch, jp_fetch): +@pytest.mark.parametrize("cull_kernel", [False, True]) +async def test_gateway_kernel_lifecycle( + init_gateway, jp_serverapp, jp_ws_fetch, jp_fetch, cull_kernel +): # Validate kernel lifecycle functions; create, interrupt, restart and delete. # create @@ -480,8 +496,20 @@ async def test_gateway_kernel_lifecycle(init_gateway, jp_serverapp, jp_ws_fetch, # ensure kernel still considered running assert await is_kernel_running(jp_fetch, kernel_id) is True - # delete - await delete_kernel(jp_fetch, kernel_id) + if cull_kernel: + running_kernels.pop(kernel_id) + + # fetch kernel and session and ensure not considered running + assert await is_kernel_running(jp_fetch, kernel_id) is not cull_kernel + + # delete kernel. If culled, ensure 404 is raised + if cull_kernel: + with pytest.raises(tornado.httpclient.HTTPClientError) as e: + await delete_kernel(jp_fetch, kernel_id) + assert expected_http_error(e, 404) + else: + await delete_kernel(jp_fetch, kernel_id) + assert await is_kernel_running(jp_fetch, kernel_id) is False @@ -583,6 +611,22 @@ async def test_channel_queue_get_msg_when_response_router_had_finished(): # # Test methods below... # + + +async def is_session_active(jp_fetch, session_id): + """Issues request to get the set of running kernels""" + with mocked_gateway: + # Get list of running kernels + r = await jp_fetch("api", "sessions", method="GET") + assert r.code == 200 + sessions = json.loads(r.body.decode("utf-8")) + assert len(sessions) == len(running_kernels) # Use running_kernels as truth + for model in sessions: + if model.get("id") == session_id: + return True + return False + + async def create_session(root_dir, jp_fetch, kernel_name): """Creates a session for a kernel. The session is created against the server which then uses the gateway for kernel management.