diff --git a/docs/source/conf.py b/docs/source/conf.py index c78443f012..3e28778284 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -73,6 +73,7 @@ "sphinx.ext.intersphinx", "sphinx.ext.autosummary", "sphinx.ext.mathjax", + "sphinx.ext.napoleon", "IPython.sphinxext.ipython_console_highlighting", "sphinxcontrib_github_alt", "sphinxcontrib.openapi", @@ -131,7 +132,7 @@ # The reST default role (used for this markup: `text`) to use for all # documents. -# default_role = None +default_role = "literal" # If true, '()' will be appended to :func: etc. cross-reference text. # add_function_parentheses = True @@ -360,6 +361,7 @@ "nbconvert": ("https://nbconvert.readthedocs.io/en/latest/", None), "nbformat": ("https://nbformat.readthedocs.io/en/latest/", None), "jupyter": ("https://jupyter.readthedocs.io/en/latest/", None), + "tornado": ("https://www.tornadoweb.org/en/stable/", None), } spelling_lang = "en_US" diff --git a/docs/source/operators/security.rst b/docs/source/operators/security.rst index 87148cbe41..04568cb80f 100644 --- a/docs/source/operators/security.rst +++ b/docs/source/operators/security.rst @@ -77,17 +77,129 @@ but this is **NOT RECOMMENDED**, unless authentication or access restrictions ar c.ServerApp.token = '' c.ServerApp.password = '' -Authorization -------------- + +Authentication and Authorization +-------------------------------- .. versionadded:: 2.0 +There are two steps to deciding whether to allow a given request to be happen. + +The first step is "Authentication" (identifying who is making the request). +This is handled by the :class:`.IdentityProvider`. + +Whether a given user is allowed to take a specific action is called "Authorization", +and is handled separately, by an :class:`.Authorizer`. + +These two classes may work together, +as the information returned by the IdentityProvider is given to the Authorizer when it makes its decisions. + +Authentication always takes precedence because if no user is authenticated, +no authorization checks need to be made, +as all requests requiring _authorization_ must first complete _authentication_. + +Identity Providers +****************** + +The :class:`.IdentityProvider` class is responsible for the "authorization" step, +identifying the user making the request, +and constructing information about them. + +It principally implements two methods. + +.. autoclass:: jupyter_server.auth.IdentityProvider + + .. automethod:: get_user + .. automethod:: identity_model + +The first is :meth:`.IdentityProvider.get_user`. +This method is given a RequestHandler, and is responsible for deciding whether there is an authenticated user making the request. +If the request is authenticated, it should return a :class:`.jupyter_server.auth.User` object representing the authenticated user. +It should return None if the request is not authenticated. + +The default implementation accepts token or password authentication. + +This User object will be available as `self.current_user` in any request handler. +Request methods decorated with tornado's `@web.authenticated` decorator +will only be allowed if this method returns something. + +The User object will be a Python :py:class:`dataclasses.dataclass`, `jupyter_server.auth.User`: + +.. autoclass:: jupyter_server.auth.User + +A custom IdentityProvider _may_ return a custom subclass. + + +The next method an identity provider has is :meth:`~.IdentityProvider.identity_model`. +`identity_model(user)` is responsible for transforming the user object returned from `.get_user()` +into a standard identity model dictionary, +for use in the `/api/me` endpoint. + +If your user object is a simple username string or a dict with a `username` field, +you may not need to implement this method, as the default implementation will suffice. + +Any required fields missing from the dict returned by this method will be filled-out with defaults. +Only `username` is strictly required, if that is all the information the identity provider has available. + +Missing will be derived according to: + +- if `name` is missing, use `username` +- if `display_name` is missing, use `name` + +Other required fields will be filled with `None`. + + +Identity Model +^^^^^^^^^^^^^^ + +The identity model is the model accessed at `/api/me`, +and describes the currently authenticated user. + +It has the following fields: + +username + (string) + Unique string identifying the user. + Must be non-empty. +name + (string) + For-humans name of the user. + May be the same as `username` in systems where only usernames are available. +display_name + (string) + Alternate rendering of name for display, such as a nickname. + Often the same as `name`. +initials + (string or null) + Short string of initials. + Initials should not be derived automatically due to localization issues. + May be `null` if unavailable. +avatar_url + (string or null) + URL of an avatar image to be used for the user. + May be `null` if unavailable. +color + (string or null) + A CSS color string to use as a preferred color, + such as for collaboration cursors. + May be `null` if unavailable. + +Authorization +************* + +Authorization is the second step in allowing an action, +after a user has been _authenticated_ by the IdentityProvider. + Authorization in Jupyter Server serves to provide finer grained control of access to its API resources. With authentication, requests are accepted if the current user is known by the server. Thus it can restrain access to specific users, but there is no way to give allowed users more or less permissions. Jupyter Server provides a thin and extensible authorization layer which checks if the current user is authorized to make a specific request. +.. autoclass:: jupyter_server.auth.Authorizer + + .. automethod:: is_authorized + This is done by calling a ``is_authorized(handler, user, action, resource)`` method before each request handler. Each request is labeled as either a "read", "write", or "execute" ``action``: @@ -233,6 +345,7 @@ The ``is_authorized()`` method will automatically be called whenever a handler i ``@authorized`` (from ``jupyter_server.auth``), similarly to the ``@authenticated`` decorator for authorization (from ``tornado.web``). + Security in notebook documents ============================== diff --git a/jupyter_server/auth/__init__.py b/jupyter_server/auth/__init__.py index 54477ffd1b..77a2599560 100644 --- a/jupyter_server/auth/__init__.py +++ b/jupyter_server/auth/__init__.py @@ -1,3 +1,4 @@ from .authorizer import * # noqa from .decorator import authorized # noqa +from .identity import * # noqa from .security import passwd # noqa diff --git a/jupyter_server/auth/authorizer.py b/jupyter_server/auth/authorizer.py index 952cb0278d..50c04f8a2e 100644 --- a/jupyter_server/auth/authorizer.py +++ b/jupyter_server/auth/authorizer.py @@ -11,6 +11,8 @@ from jupyter_server.base.handlers import JupyterHandler +from .identity import User + class Authorizer(LoggingConfigurable): """Base class for authorizing access to resources @@ -32,23 +34,28 @@ class Authorizer(LoggingConfigurable): .. versionadded:: 2.0 """ - def is_authorized(self, handler: JupyterHandler, user: str, action: str, resource: str) -> bool: + def is_authorized( + self, handler: JupyterHandler, user: User, action: str, resource: str + ) -> bool: """A method to determine if `user` is authorized to perform `action` (read, write, or execute) on the `resource` type. Parameters ---------- - user : usually a dict or string - A truthy model representing the authenticated user. - A username string by default, - but usually a dict when integrating with an auth provider. + user : jupyter_server.auth.User + An object representing the authenticated user, + as returned by :meth:`.IdentityProvider.get_user`. + action : str the category of action for the current request: read, write, or execute. resource : str the type of resource (i.e. contents, kernels, files, etc.) the user is requesting. - Returns True if user authorized to make request; otherwise, returns False. + Returns + ------- + bool + True if user authorized to make request; False, otherwise """ raise NotImplementedError() @@ -61,7 +68,9 @@ class AllowAllAuthorizer(Authorizer): .. versionadded:: 2.0 """ - def is_authorized(self, handler: JupyterHandler, user: str, action: str, resource: str) -> bool: + def is_authorized( + self, handler: JupyterHandler, user: User, action: str, resource: str + ) -> bool: """This method always returns True. All authenticated users are allowed to do anything in the Jupyter Server. diff --git a/jupyter_server/auth/decorator.py b/jupyter_server/auth/decorator.py index 72a489dbe9..930d79be47 100644 --- a/jupyter_server/auth/decorator.py +++ b/jupyter_server/auth/decorator.py @@ -8,7 +8,7 @@ from tornado.log import app_log from tornado.web import HTTPError -from .utils import HTTP_METHOD_TO_AUTH_ACTION, warn_disabled_authorization +from .utils import HTTP_METHOD_TO_AUTH_ACTION def authorized( @@ -57,18 +57,13 @@ def inner(self, *args, **kwargs): if not user: app_log.warning("Attempting to authorize request without authentication!") raise HTTPError(status_code=403, log_message=message) - - # Handle the case where an authorizer wasn't attached to the handler. - if not self.authorizer: - warn_disabled_authorization() - return method(self, *args, **kwargs) - - # Only return the method if the action is authorized. + # If the user is allowed to do this action, + # call the method. if self.authorizer.is_authorized(self, user, action, resource): return method(self, *args, **kwargs) - - # Raise an exception if the method wasn't returned (i.e. not authorized) - raise HTTPError(status_code=403, log_message=message) + # else raise an exception. + else: + raise HTTPError(status_code=403, log_message=message) return inner diff --git a/jupyter_server/auth/identity.py b/jupyter_server/auth/identity.py new file mode 100644 index 0000000000..d3cca77911 --- /dev/null +++ b/jupyter_server/auth/identity.py @@ -0,0 +1,141 @@ +"""Identity Provider interface + +This defines the _authentication_ layer of Jupyter Server, +to be used in combination with Authorizer for _authorization_. + +.. versionadded:: 2.0 +""" +from dataclasses import asdict, dataclass +from typing import Any, Optional + +from tornado.web import RequestHandler +from traitlets.config import LoggingConfigurable + +# from dataclasses import field + + +@dataclass +class User: + """Object representing a User + + This or a subclass should be returned from IdentityProvider.get_user + """ + + username: str # the only truly required field + + # these fields are filled from username if not specified + # name is the 'real' name of the user + name: str = "" + # display_name is a shorter name for us in UI, + # if different from name. e.g. a nickname + display_name: str = "" + + # these fields are left as None if undefined + initials: Optional[str] = None + avatar_url: Optional[str] = None + color: Optional[str] = None + + # TODO: extension fields? + # ext: Dict[str, Dict[str, Any]] = field(default_factory=dict) + + def __post_init__(self): + self.fill_defaults() + + def fill_defaults(self): + """Fill out default fields in the identity model + + - Ensures all values are defined + - Fills out derivative values for name fields fields + - Fills out null values for optional fields + """ + + # username is the only truly required field + if not self.username: + raise ValueError(f"user.username must not be empty: {self}") + + # derive name fields from username -> name -> display name + if not self.name: + self.name = self.username + if not self.display_name: + self.display_name = self.name + + def to_dict(self): + pass + + +def _backward_compat_user(got_user: Any) -> User: + """Backward-compatibility for LoginHandler.get_user + + Prior to 2.0, LoginHandler.get_user could return anything truthy. + + Typically, this was either a simple string username, + or a simple dict. + + Make some effort to allow common patterns to keep working. + """ + if isinstance(got_user, str): + return User(username=got_user) + elif isinstance(got_user, dict): + kwargs = {} + if "username" not in got_user: + if "name" in got_user: + kwargs["username"] = got_user["name"] + for field in User.__dataclass_fields__: + if field in got_user: + kwargs[field] = got_user[field] + try: + return User(**kwargs) + except TypeError: + raise ValueError(f"Unrecognized user: {got_user}") + else: + raise ValueError(f"Unrecognized user: {got_user}") + + +class IdentityProvider(LoggingConfigurable): + """ + Interface for providing identity + + _may_ be a coroutine. + + Two principle methods: + + - :meth:`~.IdentityProvider.get_user` returns a :class:`~.User` object + for successful authentication, or None for no-identity-found. + - :meth:`~.IdentityProvider.identity_model` turns a :class:`~.User` into a JSONable dict. + The default is to use :py:meth:`dataclasses.asdict`, + and usually shouldn't need override. + + .. versionadded:: 2.0 + """ + + def get_user(self, handler: RequestHandler) -> User: + """Get the authenticated user for a request + + Must return a :class:`.jupyter_server.auth.User`, + though it may be a subclass. + + Return None if the request is not authenticated. + """ + + if handler.login_handler is None: + return User("anonymous") + + # The default: call LoginHandler.get_user for backward-compatibility + # TODO: move default implementation to this class, + # deprecate `LoginHandler.get_user` + user = handler.login_handler.get_user(handler) + if user and not isinstance(user, User): + return _backward_compat_user(user) + return user + + def identity_model(self, user: User) -> dict: + """Return a User as an Identity model""" + # TODO: validate? + return asdict(user) + + def get_handlers(self) -> list: + """Return list of additional handlers for this identity provider + + For example, an OAuth callback handler. + """ + return [] diff --git a/jupyter_server/auth/login.py b/jupyter_server/auth/login.py index 382077d9e0..6eb07e5748 100644 --- a/jupyter_server/auth/login.py +++ b/jupyter_server/auth/login.py @@ -152,7 +152,7 @@ def is_token_authenticated(cls, handler): """ if getattr(handler, "_user_id", None) is None: # ensure get_user has been called, so we know if we're token-authenticated - handler.get_current_user() + handler.current_user return getattr(handler, "_token_authenticated", False) @classmethod @@ -165,17 +165,20 @@ def get_user(cls, handler): # called on LoginHandler itself. if getattr(handler, "_user_id", None): return handler._user_id - user_id = cls.get_user_token(handler) - if user_id is None: - get_secure_cookie_kwargs = handler.settings.get("get_secure_cookie_kwargs", {}) - user_id = handler.get_secure_cookie(handler.cookie_name, **get_secure_cookie_kwargs) - if user_id: - user_id = user_id.decode() - else: - cls.set_login_cookie(handler, user_id) + token_user_id = cls.get_user_token(handler) + cookie_user_id = cls.get_user_cookie(handler) + # prefer token to cookie if both given, + # because token is always explicit + user_id = token_user_id or cookie_user_id + if token_user_id: + # if token-authenticated, persist user_id in cookie + # if it hasn't already been stored there + if user_id != cookie_user_id: + cls.set_login_cookie(handler, user_id) # Record that the current request has been authenticated with a token. # Used in is_token_authenticated above. handler._token_authenticated = True + if user_id is None: # If an invalid cookie was sent, clear it to prevent unnecessary # extra warnings. But don't do this on a request with *no* cookie, @@ -192,6 +195,15 @@ def get_user(cls, handler): handler._user_id = user_id return user_id + @classmethod + def get_user_cookie(cls, handler): + """Get user-id from a cookie""" + get_secure_cookie_kwargs = handler.settings.get("get_secure_cookie_kwargs", {}) + user_id = handler.get_secure_cookie(handler.cookie_name, **get_secure_cookie_kwargs) + if user_id: + user_id = user_id.decode() + return user_id + @classmethod def get_user_token(cls, handler): """Identify the user based on a token in the URL or Authorization header @@ -215,7 +227,17 @@ def get_user_token(cls, handler): authenticated = True if authenticated: - return uuid.uuid4().hex + # token does not correspond to user-id, + # which is stored in a cookie. + # still check the cookie for the user id + user_id = cls.get_user_cookie(handler) + if user_id is None: + # no cookie, generate new random user_id + user_id = uuid.uuid4().hex + handler.log.info( + f"Generating new user_id for token-authenticated request: {user_id}" + ) + return user_id else: return None diff --git a/jupyter_server/auth/utils.py b/jupyter_server/auth/utils.py index b939b87ae0..3f129dce63 100644 --- a/jupyter_server/auth/utils.py +++ b/jupyter_server/auth/utils.py @@ -8,16 +8,11 @@ def warn_disabled_authorization(): + """DEPRECATED, does nothing""" warnings.warn( - "The Tornado web application does not have an 'authorizer' defined " - "in its settings. In future releases of jupyter_server, this will " - "be a required key for all subclasses of `JupyterHandler`. For an " - "example, see the jupyter_server source code for how to " - "add an authorizer to the tornado settings: " - "https://github.com/jupyter-server/jupyter_server/blob/" - "653740cbad7ce0c8a8752ce83e4d3c2c754b13cb/jupyter_server/serverapp.py" - "#L234-L256", - FutureWarning, + "jupyter_server.auth.utils.warn_disabled_authorization is deprecated", + DeprecationWarning, + stacklevel=2, ) diff --git a/jupyter_server/base/handlers.py b/jupyter_server/base/handlers.py index 42f7fb3d5e..b3c827bc6d 100644 --- a/jupyter_server/base/handlers.py +++ b/jupyter_server/base/handlers.py @@ -3,6 +3,7 @@ # Distributed under the terms of the Modified BSD License. import datetime import functools +import inspect import ipaddress import json import mimetypes @@ -134,9 +135,21 @@ def clear_login_cookie(self): self.force_clear_cookie(self.cookie_name) def get_current_user(self): - if self.login_handler is None: - return "anonymous" - return self.login_handler.get_user(self) + clsname = self.__class__.__name__ + msg = ( + f"Calling `{clsname}.get_current_user()` directly is deprecated in jupyter-server 2.0." + " Use `self.current_user` instead (works in all versions)." + ) + if hasattr(self, "_jupyter_current_user"): + # backward-compat: return _jupyter_current_user + warnings.warn( + msg, + DeprecationWarning, + stacklevel=2, + ) + return self._jupyter_current_user + # haven't called get_user in prepare, raise + raise RuntimeError(msg) def skip_check_origin(self): """Ask my login_handler if I should skip the origin_check @@ -166,7 +179,7 @@ def cookie_name(self): @property def logged_in(self): """Is a user currently logged in?""" - user = self.get_current_user() + user = self.current_user return user and not user == "anonymous" @property @@ -193,8 +206,46 @@ def login_available(self): @property def authorizer(self): + if "authorizer" not in self.settings: + warnings.warn( + "The Tornado web application does not have an 'authorizer' defined " + "in its settings. In future releases of jupyter_server, this will " + "be a required key for all subclasses of `JupyterHandler`. For an " + "example, see the jupyter_server source code for how to " + "add an authorizer to the tornado settings: " + "https://github.com/jupyter-server/jupyter_server/blob/" + "653740cbad7ce0c8a8752ce83e4d3c2c754b13cb/jupyter_server/serverapp.py" + "#L234-L256", + ) + from jupyter_server.auth import AllowAllAuthorizer + + self.settings["authorizer"] = AllowAllAuthorizer( + config=self.settings.get("config", None) + ) + return self.settings.get("authorizer") + @property + def identity_provider(self): + if "identity_provider" not in self.settings: + warnings.warn( + "The Tornado web application does not have an 'identity_provider' defined " + "in its settings. In future releases of jupyter_server, this will " + "be a required key for all subclasses of `JupyterHandler`. For an " + "example, see the jupyter_server source code for how to " + "add an identity provider to the tornado settings: " + "https://github.com/jupyter-server/jupyter_server/blob/" + "aa8fd8b3faf37466eeb99689d5555314c5bf6640/jupyter_server/serverapp.py" + "#L253", + ) + from jupyter_server.auth import IdentityProvider + + # no identity provider set, load default + self.settings["identity_provider"] = IdentityProvider( + config=self.settings.get("config", None) + ) + return self.settings["identity_provider"] + class JupyterHandler(AuthenticatedHandler): """Jupyter-specific extensions to authenticated handling @@ -310,6 +361,13 @@ def allow_credentials(self): def set_default_headers(self): """Add CORS headers, if defined""" super().set_default_headers() + + def set_cors_headers(self): + """Add CORS headers, if defined + + Now that current_user is async (jupyter-server 2.0), + must be called at the end of prepare(), instead of in set_default_headers. + """ if self.allow_origin: self.set_header("Access-Control-Allow-Origin", self.allow_origin) elif self.allow_origin_pat: @@ -448,6 +506,9 @@ def check_referer(self): def check_xsrf_cookie(self): """Bypass xsrf cookie checks when token-authenticated""" + if not hasattr(self, "_jupyter_current_user"): + # Called too early, will be checked later + return if self.token_authenticated or self.settings.get("disable_check_xsrf", False): # Token-authenticated requests do not need additional XSRF-check # Servers without authentication are vulnerable to XSRF @@ -507,9 +568,40 @@ def check_host(self): ) return allow - def prepare(self): + async def prepare(self): if not self.check_host(): raise web.HTTPError(403) + + from jupyter_server.auth import IdentityProvider + + if ( + type(self.identity_provider) is IdentityProvider + and inspect.getmodule(self.get_current_user).__name__ != __name__ + ): + # check for overridden get_current_user + default IdentityProvider + # deprecated way to override auth (e.g. JupyterHub < 3.0) + # allow deprecated, overridden get_current_user + warnings.warn( + "Overriding JupyterHandler.get_current_user is deprecated in jupyter-server 2.0." + " Use an IdentityProvider class.", + DeprecationWarning, + # stacklevel not useful here + ) + user = self.get_current_user() + else: + user = self.identity_provider.get_user(self) + if inspect.isawaitable(user): + # IdentityProvider.get_user _may_ be async + user = await user + + # self.current_user for tornado's @web.authenticated + # self._jupyter_current_user for backward-compat in deprecated get_current_user calls + # and our own private checks for whether .current_user has been set + self.current_user = self._jupyter_current_user = user + # complete initial steps which require auth to resolve first: + self.set_cors_headers() + if self.request.method not in {"GET", "HEAD", "OPTIONS"}: + self.check_xsrf_cookie() return super().prepare() # --------------------------------------------------------------- @@ -602,10 +694,10 @@ def write_error(self, status_code, **kwargs): class APIHandler(JupyterHandler): """Base class for API handlers""" - def prepare(self): + async def prepare(self): + await super().prepare() if not self.check_origin(): raise web.HTTPError(404) - return super().prepare() def write_error(self, status_code, **kwargs): """APIHandler errors are JSON, not human pages""" @@ -627,14 +719,6 @@ def write_error(self, status_code, **kwargs): self.log.warning(reply["message"]) self.finish(json.dumps(reply)) - def get_current_user(self): - """Raise 403 on API handlers instead of redirecting to human login page""" - # preserve _user_cache so we don't raise more than once - if hasattr(self, "_user_cache"): - return self._user_cache - self._user_cache = user = super().get_current_user() - return user - def get_login_url(self): # if get_login_url is invoked in an API handler, # that means @web.authenticated is trying to trigger a redirect. diff --git a/jupyter_server/base/zmqhandlers.py b/jupyter_server/base/zmqhandlers.py index 28e296c722..ad6342af85 100644 --- a/jupyter_server/base/zmqhandlers.py +++ b/jupyter_server/base/zmqhandlers.py @@ -19,8 +19,6 @@ from tornado import ioloop, web from tornado.websocket import WebSocketHandler -from jupyter_server.auth.utils import warn_disabled_authorization - from .handlers import JupyterHandler @@ -315,16 +313,13 @@ def pre_get(self): the websocket finishes completing. """ # authenticate the request before opening the websocket - user = self.get_current_user() + user = self.current_user if user is None: self.log.warning("Couldn't authenticate WebSocket connection") raise web.HTTPError(403) # authorize the user. - if not self.authorizer: - # Warn if there is not authorizer. - warn_disabled_authorization() - elif not self.authorizer.is_authorized(self, user, "execute", "kernels"): + if not self.authorizer.is_authorized(self, user, "execute", "kernels"): raise web.HTTPError(403) if self.get_argument("session_id", False): diff --git a/jupyter_server/gateway/handlers.py b/jupyter_server/gateway/handlers.py index a36f2d4faf..e31302c464 100644 --- a/jupyter_server/gateway/handlers.py +++ b/jupyter_server/gateway/handlers.py @@ -48,7 +48,7 @@ def authenticate(self): the websocket finishes completing. """ # authenticate the request before opening the websocket - if self.get_current_user() is None: + if self.current_user is None: self.log.warning("Couldn't authenticate WebSocket connection") raise web.HTTPError(403) diff --git a/jupyter_server/log.py b/jupyter_server/log.py index d23799456d..0a50160ecf 100644 --- a/jupyter_server/log.py +++ b/jupyter_server/log.py @@ -8,6 +8,7 @@ from tornado.log import access_log +from .auth import User from .prometheus.log_functions import prometheus_log_method @@ -44,7 +45,23 @@ def log_request(handler): uri=request.uri, request_time=request_time, ) - msg = "{status} {method} {uri} ({ip}) {request_time:.2f}ms" + # log username + # make sure we don't break anything + # in case mixins cause current_user to not be a User somehow + try: + user = handler.current_user + except Exception: + user = None + if user: + if isinstance(user, User): + username = user.username + else: + username = "unknown" + else: + username = "" + ns["username"] = username + + msg = "{status} {method} {uri} ({username}@{ip}) {request_time:.2f}ms" if status >= 400: # log bad referers ns["referer"] = request.headers.get("Referer", "None") diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py index 1a25bfb07a..964bd0b587 100644 --- a/jupyter_server/serverapp.py +++ b/jupyter_server/serverapp.py @@ -91,6 +91,7 @@ from jupyter_server._sysinfo import get_sys_info from jupyter_server._tz import utcnow from jupyter_server.auth.authorizer import AllowAllAuthorizer, Authorizer +from jupyter_server.auth.identity import IdentityProvider from jupyter_server.auth.login import LoginHandler from jupyter_server.auth.logout import LogoutHandler from jupyter_server.base.handlers import ( @@ -212,7 +213,9 @@ def __init__( default_url, settings_overrides, jinja_env_options, + *, authorizer=None, + identity_provider=None, ): if authorizer is None: warnings.warn( @@ -221,7 +224,16 @@ def __init__( RuntimeWarning, stacklevel=2, ) - authorizer = AllowAllAuthorizer(jupyter_app) + authorizer = AllowAllAuthorizer(parent=jupyter_app) + + if identity_provider is None: + warnings.warn( + "identity_provider unspecified. Using default IdentityProvider." + " Specify an identity_provider to avoid this message.", + RuntimeWarning, + stacklevel=2, + ) + identity_provider = IdentityProvider(parent=jupyter_app) settings = self.init_settings( jupyter_app, @@ -237,6 +249,7 @@ def __init__( settings_overrides, jinja_env_options, authorizer=authorizer, + identity_provider=identity_provider, ) handlers = self.init_handlers(default_services, settings) @@ -256,7 +269,9 @@ def init_settings( default_url, settings_overrides, jinja_env_options=None, + *, authorizer=None, + identity_provider=None, ): _template_path = settings_overrides.get( @@ -338,6 +353,7 @@ def init_settings( kernel_spec_manager=kernel_spec_manager, config_manager=config_manager, authorizer=authorizer, + identity_provider=identity_provider, # handlers extra_services=extra_services, # Jupyter stuff @@ -395,6 +411,8 @@ def init_handlers(self, default_services, settings): # Add extra handlers from contents manager. handlers.extend(settings["contents_manager"].get_extra_handlers()) + # And from identity provider + handlers.extend(settings["identity_provider"].get_handlers()) # If gateway mode is enabled, replace appropriate handlers to perform redirection if GatewayClient.instance().gateway_enabled: @@ -1488,6 +1506,13 @@ def _observe_contents_manager_class(self, change): help=_i18n("The authorizer class to use."), ) + identity_provider_class = Type( + default_value=IdentityProvider, + klass=IdentityProvider, + config=True, + help=_i18n("The identity provider class to use."), + ) + trust_xheaders = Bool( False, config=True, @@ -1811,6 +1836,7 @@ def init_configurables(self): log=self.log, ) self.authorizer = self.authorizer_class(parent=self, log=self.log) + self.identity_provider = self.identity_provider_class(parent=self, log=self.log) def init_logging(self): # This prevents double log messages because tornado use a root logger that @@ -1898,6 +1924,7 @@ def init_webapp(self): self.tornado_settings, self.jinja_environment_options, authorizer=self.authorizer, + identity_provider=self.identity_provider, ) if self.certfile: self.ssl_options["certfile"] = self.certfile diff --git a/jupyter_server/services/api/api.yaml b/jupyter_server/services/api/api.yaml index 844831e045..976e5726f2 100644 --- a/jupyter_server/services/api/api.yaml +++ b/jupyter_server/services/api/api.yaml @@ -33,6 +33,16 @@ parameters: in: path description: file path type: string + permissions: + name: permissions + type: string + required: false + in: query + description: | + JSON-serialized dictionary of `{"resource": ["action",]}` + (dict of lists of strings) to check. + The same dictionary structure will be returned, + containing only the actions for which the user is authorized. checkpoint_id: name: checkpoint_id required: true @@ -616,7 +626,42 @@ paths: description: Forbidden to access 404: description: Not found - + /api/me: + get: + summary: | + Get the identity of the currently authenticated user. + If present, a `permissions` argument may be specified + to check what actions the user currently is authorized to take. + tags: + - identity + parameters: + - $ref: "#/parameters/permissions" + responses: + 200: + description: The user's identity and permissions + schema: + type: object + properties: + identity: + $ref: "#/definitions/Identity" + permissions: + $ref: "#/definitions/Permissions" + example: + identity: + username: minrk + name: Min Ragan-Kelley + display_name: Min RK + initials: MRK + avatar_url: null + color: null + permissions: + contents: + - read + - write + kernels: + - read + - write + - execute /api/status: get: summary: Get the current status/activity of the server. @@ -663,6 +708,53 @@ definitions: type: number description: | The total number of running kernels. + Identity: + description: The identity of the currently authenticated user + properties: + username: + type: string + description: | + Unique string identifying the user + name: + type: string + description: | + For-humans name of the user. + May be the same as `username` in systems where + only usernames are available. + display_name: + type: string + description: | + Alternate rendering of name for display. + Often the same as `name`. + initials: + type: string + description: | + Short string of initials. + Initials should not be derived automatically due to localization issues. + May be `null` if unavailable. + avatar_url: + type: string + description: | + URL of an avatar to be used for the user. + May be `null` if unavailable. + color: + type: string + description: | + A CSS color string to use as a preferred color, + such as for collaboration cursors. + May be `null` if unavailable. + Permissions: + type: object + description: | + A dict of the form: `{"resource": ["action",]}` + containing only the AUTHORIZED subset of resource+actions + from the permissions specified in the request. + If no permission checks were made in the request, + this will be empty. + additionalProperties: + type: array + items: + type: string KernelSpec: description: Kernel spec (contents of kernel.json) properties: diff --git a/jupyter_server/services/api/handlers.py b/jupyter_server/services/api/handlers.py index 1c0cca5e19..b6e20866d6 100644 --- a/jupyter_server/services/api/handlers.py +++ b/jupyter_server/services/api/handlers.py @@ -3,6 +3,7 @@ # Distributed under the terms of the Modified BSD License. import json import os +from typing import Dict, List from tornado import web @@ -55,7 +56,49 @@ async def get(self): self.finish(json.dumps(model, sort_keys=True)) +class IdentityHandler(APIHandler): + """Get the current user's identity model""" + + @web.authenticated + def get(self): + permissions_json: str = self.get_argument("permissions", "") + bad_permissions_msg = f'permissions should be a JSON dict of {{"resource": ["action",]}}, got {permissions_json!r}' + if permissions_json: + try: + permissions_to_check = json.loads(permissions_json) + except ValueError: + raise web.HTTPError(400, bad_permissions_msg) + if not isinstance(permissions_to_check, dict): + raise web.HTTPError(400, bad_permissions_msg) + else: + permissions_to_check = {} + + permissions: Dict[str, List[str]] = {} + user = self.current_user + + for resource, actions in permissions_to_check.items(): + if ( + not isinstance(resource, str) + or not isinstance(actions, list) + or not all(isinstance(action, str) for action in actions) + ): + raise web.HTTPError(400, bad_permissions_msg) + + allowed = permissions[resource] = [] + for action in actions: + if self.authorizer.is_authorized(self, user=user, resource=resource, action=action): + allowed.append(action) + + identity: Dict = self.identity_provider.identity_model(user) + model = { + "identity": identity, + "permissions": permissions, + } + self.write(json.dumps(model)) + + default_handlers = [ (r"/api/spec.yaml", APISpecHandler), (r"/api/status", APIStatusHandler), + (r"/api/me", IdentityHandler), ] diff --git a/tests/auth/test_identity.py b/tests/auth/test_identity.py new file mode 100644 index 0000000000..e60997f2fd --- /dev/null +++ b/tests/auth/test_identity.py @@ -0,0 +1,107 @@ +import pytest + +from jupyter_server.auth import IdentityProvider, User +from jupyter_server.auth.identity import _backward_compat_user + + +class CustomUser: + def __init__(self, name): + self.name = name + + +@pytest.mark.parametrize( + "old_user, expected", + [ + ( + "str-name", + {"username": "str-name", "name": "str-name", "display_name": "str-name"}, + ), + ( + {"username": "user.username", "name": "user.name"}, + { + "username": "user.username", + "name": "user.name", + "display_name": "user.name", + }, + ), + ( + {"username": "user.username", "display_name": "display"}, + { + "username": "user.username", + "name": "user.username", + "display_name": "display", + }, + ), + ({"name": "user.name"}, {"username": "user.name", "name": "user.name"}), + ({"unknown": "value"}, ValueError), + (CustomUser("custom_name"), ValueError), + ], +) +def test_identity_model(old_user, expected): + if isinstance(expected, type) and issubclass(expected, Exception): + with pytest.raises(expected): + user = _backward_compat_user(old_user) + return + user = _backward_compat_user(old_user) + idp = IdentityProvider() + identity = idp.identity_model(user) + print(identity) + identity_subset = {key: identity[key] for key in expected} + print(type(identity), type(identity_subset), type(expected)) + assert identity_subset == expected + + +@pytest.mark.parametrize( + "fields, expected", + [ + ({"name": "user"}, TypeError), + ( + {"username": "user.username"}, + { + "username": "user.username", + "name": "user.username", + "initials": None, + "avatar_url": None, + "color": None, + }, + ), + ( + {"username": "user.username", "name": "user.name", "color": "#abcdef"}, + { + "username": "user.username", + "name": "user.name", + "display_name": "user.name", + "color": "#abcdef", + }, + ), + ( + {"username": "user.username", "display_name": "display"}, + { + "username": "user.username", + "name": "user.username", + "display_name": "display", + }, + ), + ], +) +def test_user_defaults(fields, expected): + if isinstance(expected, type) and issubclass(expected, Exception): + with pytest.raises(expected): + user = User(**fields) + return + user = User(**fields) + + # check expected fields + for key in expected: + assert getattr(user, key) == expected[key] + + # check types + for key in ("username", "name", "display_name"): + value = getattr(user, key) + assert isinstance(value, str) + # don't allow empty strings + assert value + + for key in ("initials", "avatar_url", "color"): + value = getattr(user, key) + assert value is None or isinstance(value, str) diff --git a/tests/auth/test_login.py b/tests/auth/test_login.py index 0b918d91d5..9f120c13ab 100644 --- a/tests/auth/test_login.py +++ b/tests/auth/test_login.py @@ -1,4 +1,5 @@ """Tests for login redirects""" +import json from functools import partial from urllib.parse import urlencode @@ -92,3 +93,26 @@ async def test_next_ok(login, jp_base_url, next_path): expected = jp_base_url + next_path actual = await login(next=expected) assert actual == expected + + +async def test_token_cookie_user_id(jp_serverapp, jp_fetch): + token = jp_serverapp.token + + # first request with token, sets cookie with user-id + resp = await jp_fetch("/") + assert resp.code == 200 + set_cookie = resp.headers["set-cookie"] + headers = {"Cookie": set_cookie} + + # subsequent requests with cookie and no token + # receive same user-id + resp = await jp_fetch("/api/me", headers=headers) + user_id = json.loads(resp.body.decode("utf8")) + resp = await jp_fetch("/api/me", headers=headers) + user_id2 = json.loads(resp.body.decode("utf8")) + assert user_id["identity"] == user_id2["identity"] + + # new request, just token -> new user_id + resp = await jp_fetch("/api/me") + user_id3 = json.loads(resp.body.decode("utf8")) + assert user_id["identity"] != user_id3["identity"] diff --git a/tests/services/api/test_api.py b/tests/services/api/test_api.py index c1620ff052..03bf74e97f 100644 --- a/tests/services/api/test_api.py +++ b/tests/services/api/test_api.py @@ -1,4 +1,11 @@ import json +from typing import Dict, List +from unittest import mock + +import pytest +from tornado.httpclient import HTTPError + +from jupyter_server.auth import Authorizer, IdentityProvider, User async def test_get_spec(jp_fetch): @@ -21,3 +28,151 @@ async def test_get_status(jp_fetch): assert status["kernels"] == 0 assert status["last_activity"].endswith("Z") assert status["started"].endswith("Z") + + +class MockUser(User): + permissions: Dict[str, List[str]] + + +class MockIdentityProvider(IdentityProvider): + mock_user: MockUser + + def get_user(self, handler): + # super returns a UUID + # return our mock user instead, as long as the request is authorized + authenticated = super().get_user(handler) + if isinstance(self.mock_user, dict): + self.mock_user = MockUser(**self.mock_user) + if authenticated: + return self.mock_user + + +class MockAuthorizer(Authorizer): + def is_authorized(self, handler, user, action, resource): + permissions = user.permissions + if permissions == "*": + return True + actions = permissions.get(resource, []) + return action in actions + + +@pytest.fixture +def identity_provider(jp_serverapp): + idp = MockIdentityProvider(parent=jp_serverapp) + authorizer = MockAuthorizer(parent=jp_serverapp) + with mock.patch.dict( + jp_serverapp.web_app.settings, + {"identity_provider": idp, "authorizer": authorizer}, + ): + yield idp + + +@pytest.mark.parametrize( + "identity, expected", + [ + ( + {"username": "user.username"}, + { + "username": "user.username", + "name": "user.username", + "display_name": "user.username", + }, + ), + ( + {"username": "user", "name": "name", "display_name": "display"}, + {"username": "user", "name": "name", "display_name": "display"}, + ), + ( + None, + 403, + ), + ], +) +async def test_identity(jp_fetch, identity, expected, identity_provider): + if identity: + identity_provider.mock_user = MockUser(**identity) + else: + identity_provider.mock_user = None + + if isinstance(expected, int): + with pytest.raises(HTTPError) as exc: + await jp_fetch("api/me") + print(exc) + assert exc.value.code == expected + return + + r = await jp_fetch("api/me") + + assert r.code == 200 + response = json.loads(r.body.decode()) + assert set(response.keys()) == {"identity", "permissions"} + identity_model = response["identity"] + print(identity_model) + for key, value in expected.items(): + assert identity_model[key] == value + + assert set(identity_model.keys()) == set(User.__dataclass_fields__) + + +@pytest.mark.parametrize( + "have_permissions, check_permissions, expected", + [ + ("*", None, {}), + ( + { + "contents": ["read"], + "kernels": ["read", "write"], + "sessions": ["write"], + }, + { + "contents": ["read", "write"], + "kernels": ["read", "write", "execute"], + "terminals": ["execute"], + }, + { + "contents": ["read"], + "kernels": ["read", "write"], + "terminals": [], + }, + ), + ("*", {"contents": ["write"]}, {"contents": ["write"]}), + ], +) +async def test_identity_permissions( + jp_fetch, have_permissions, check_permissions, expected, identity_provider +): + user = MockUser("username") + user.permissions = have_permissions + identity_provider.mock_user = user + + if check_permissions is not None: + params = {"permissions": json.dumps(check_permissions)} + else: + params = None + + r = await jp_fetch("api/me", params=params) + assert r.code == 200 + response = json.loads(r.body.decode()) + assert set(response.keys()) == {"identity", "permissions"} + assert response["permissions"] == expected + + +@pytest.mark.parametrize( + "permissions", + [ + "", + "[]", + '"abc"', + json.dumps({"resource": "action"}), + json.dumps({"resource": [5]}), + json.dumps({"resource": {}}), + ], +) +async def test_identity_bad_permissions(jp_fetch, permissions): + with pytest.raises(HTTPError) as exc: + await jp_fetch("api/me", params={"permissions": json.dumps(permissions)}) + + r = exc.value.response + assert r.code == 400 + reply = json.loads(r.body.decode()) + assert "permissions should be a JSON dict" in reply["message"]