Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] possible solution to propagate informative spawn failure messages from spawner to bhub ui #819

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions binderhub/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import docker
from tornado.concurrent import chain_future, Future
from tornado import gen
from tornado.web import Finish, authenticated
from tornado.web import Finish, authenticated, HTTPError
from tornado.queues import Queue
from tornado.iostream import StreamClosedError
from tornado.ioloop import IOLoop
Expand Down Expand Up @@ -518,8 +518,8 @@ async def launch(self, kube):
status=status, **self.repo_metric_labels,
).inc()

if i + 1 == launcher.retries:
# last attempt failed, let it raise
if i + 1 == launcher.retries or (isinstance(e, HTTPError) and e.status_code == 409):
# last attempt failed or 409 client error, let it raise
raise

# not the last attempt, try again
Expand Down
15 changes: 8 additions & 7 deletions binderhub/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,6 @@ async def api_request(self, url, *args, **kwargs):
try:
return await AsyncHTTPClient().fetch(req)
except HTTPError as e:
# swallow 409 errors on retry only (not first attempt)
if i > 1 and e.code == 409 and e.response:
self.log.warning("Treating 409 conflict on retry as success")
return e.response
# retry requests that fail with error codes greater than 500
# because they are likely intermittent issues in the cluster
# e.g. 502,504 due to ingress issues or Hub relocating,
Expand Down Expand Up @@ -189,12 +185,17 @@ async def launch(self, image, username, server_name='', repo_url=''):
except HTTPError as e:
if e.response:
body = e.response.body
message = json.loads(body.decode('utf-8')).get('message', '')
else:
body = ''
message = ''

app_log.error("Error starting server{} for user {}: {}\n{}".
format(_server_name, username, e, body))
raise web.HTTPError(500, "Failed to launch image %s" % image)
if e.code == 409:
raise web.HTTPError(409, message)
else:
app_log.error("Error starting server{} for user {}: {}\n{}".
format(_server_name, username, e, body))
raise web.HTTPError(500, "Failed to launch image %s" % image)

data['url'] = self.hub_url + 'user/%s/%s' % (username, server_name)
return data
3 changes: 3 additions & 0 deletions helm-chart/binderhub/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,10 @@ jupyterhub:
rbac:
enabled: true
hub:
consecutiveFailureLimit: 0
extraConfig:
hub: |
c.JupyterHub.tornado_settings['slow_spawn_timeout']= 10
binder: |
import os
import sys
Expand Down