Skip to content

Commit

Permalink
[Gateway] Remove redundant list kernels request during session poll (#…
Browse files Browse the repository at this point in the history
…1112)

* Remove redundant list kernels request during session poll

* Cull kernel in gateway kernel and session lifecycle tests
  • Loading branch information
kevin-bates authored Dec 7, 2022
1 parent 770cd3e commit ed99ddc
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 18 deletions.
17 changes: 10 additions & 7 deletions jupyter_server/gateway/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,18 +297,21 @@ async def get_kernel_spec_resource(self, kernel_name, path):
class GatewaySessionManager(SessionManager):
kernel_manager = Instance("jupyter_server.gateway.managers.GatewayMappingKernelManager")

async def kernel_culled(self, kernel_id):
async def kernel_culled(self, kernel_id: str) -> bool:
"""Checks if the kernel is still considered alive and returns true if it's not found."""
kernel = None
km: Optional[GatewayKernelManager] = None
try:
# Since we keep the models up-to-date via client polling, use that state to determine
# if this kernel no longer exists on the gateway server rather than perform a redundant
# fetch operation - especially since this is called at approximately the same interval.
# This has the effect of reducing GET /api/kernels requests against the gateway server
# by 50%!
# Note that should the redundant polling be consolidated, or replaced with an event-based
# notification model, this will need to be revisited.
km = self.kernel_manager.get_kernel(kernel_id)
kernel = await km.refresh_model()
except Exception: # Let exceptions here reflect culled kernel
pass
return kernel is None


"""KernelManager class to manage a kernel running on a Gateway Server via the REST API"""
return km is None


class GatewayKernelManager(AsyncKernelManager):
Expand Down
66 changes: 55 additions & 11 deletions tests/test_gateway.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,33 +422,49 @@ async def test_gateway_get_named_kernelspec(init_gateway, jp_fetch):
assert expected_http_error(e, 404)


async def test_gateway_session_lifecycle(init_gateway, jp_root_dir, jp_fetch):
@pytest.mark.parametrize("cull_kernel", [False, True])
async def test_gateway_session_lifecycle(init_gateway, jp_root_dir, jp_fetch, cull_kernel):
# Validate session lifecycle functions; create and delete.

# create
session_id, kernel_id = await create_session(jp_root_dir, jp_fetch, "kspec_foo")

# ensure kernel still considered running
assert await is_kernel_running(jp_fetch, kernel_id) is True
assert await is_session_active(jp_fetch, session_id) is True

# interrupt
await interrupt_kernel(jp_fetch, kernel_id)

# ensure kernel still considered running
assert await is_kernel_running(jp_fetch, kernel_id) is True
assert await is_session_active(jp_fetch, session_id) is True

# restart
await restart_kernel(jp_fetch, kernel_id)

# ensure kernel still considered running
assert await is_kernel_running(jp_fetch, kernel_id) is True
assert await is_session_active(jp_fetch, session_id) is True

# delete
await delete_session(jp_fetch, session_id)
assert await is_kernel_running(jp_fetch, kernel_id) is False
if cull_kernel:
running_kernels.pop(kernel_id)

# fetch kernel and session and ensure not considered running
assert await is_kernel_running(jp_fetch, kernel_id) is not cull_kernel
assert await is_session_active(jp_fetch, session_id) is not cull_kernel

# delete session. If culled, ensure 404 is raised
if cull_kernel:
with pytest.raises(tornado.httpclient.HTTPClientError) as e:
await delete_session(jp_fetch, session_id)
assert expected_http_error(e, 404)
else:
await delete_session(jp_fetch, session_id)

assert await is_session_active(jp_fetch, session_id) is False


async def test_gateway_kernel_lifecycle(init_gateway, jp_serverapp, jp_ws_fetch, jp_fetch):
@pytest.mark.parametrize("cull_kernel", [False, True])
async def test_gateway_kernel_lifecycle(
init_gateway, jp_serverapp, jp_ws_fetch, jp_fetch, cull_kernel
):
# Validate kernel lifecycle functions; create, interrupt, restart and delete.

# create
Expand All @@ -475,8 +491,20 @@ async def test_gateway_kernel_lifecycle(init_gateway, jp_serverapp, jp_ws_fetch,
# ensure kernel still considered running
assert await is_kernel_running(jp_fetch, kernel_id) is True

# delete
await delete_kernel(jp_fetch, kernel_id)
if cull_kernel:
running_kernels.pop(kernel_id)

# fetch kernel and session and ensure not considered running
assert await is_kernel_running(jp_fetch, kernel_id) is not cull_kernel

# delete kernel. If culled, ensure 404 is raised
if cull_kernel:
with pytest.raises(tornado.httpclient.HTTPClientError) as e:
await delete_kernel(jp_fetch, kernel_id)
assert expected_http_error(e, 404)
else:
await delete_kernel(jp_fetch, kernel_id)

assert await is_kernel_running(jp_fetch, kernel_id) is False


Expand Down Expand Up @@ -578,6 +606,22 @@ async def test_channel_queue_get_msg_when_response_router_had_finished():
#
# Test methods below...
#


async def is_session_active(jp_fetch, session_id):
"""Issues request to get the set of running kernels"""
with mocked_gateway:
# Get list of running kernels
r = await jp_fetch("api", "sessions", method="GET")
assert r.code == 200
sessions = json.loads(r.body.decode("utf-8"))
assert len(sessions) == len(running_kernels) # Use running_kernels as truth
for model in sessions:
if model.get("id") == session_id:
return True
return False


async def create_session(root_dir, jp_fetch, kernel_name):
"""Creates a session for a kernel. The session is created against the server
which then uses the gateway for kernel management.
Expand Down

0 comments on commit ed99ddc

Please sign in to comment.