Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ALMA: auth handling to login to Keycloak #2712

Merged
merged 6 commits into from
Jun 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 106 additions & 63 deletions astroquery/alma/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
from ..exceptions import LoginError
from ..utils import commons
from ..utils.process_asyncs import async_to_sync
from ..query import QueryWithLogin
from ..query import BaseQuery, QueryWithLogin
from .tapsql import _gen_pos_sql, _gen_str_sql, _gen_numeric_sql,\
_gen_band_list_sql, _gen_datetime_sql, _gen_pol_sql, _gen_pub_sql,\
_gen_science_sql, _gen_spec_res_sql, ALMA_DATE_FORMAT
Expand Down Expand Up @@ -212,6 +212,101 @@ def _gen_sql(payload):
return sql + where


class AlmaAuth(BaseQuery):
"""Authentication session information for passing credentials to an OIDC instance

Assumes an OIDC system like Keycloak with a preconfigured client app called "oidc" to validate against.
This does not use Tokens in the traditional OIDC sense, but rather uses the Keycloak specific endpoint
to validate a username and password. Passwords are then kept in a Python keyring.
"""

_CLIENT_ID = 'oidc'
_GRANT_TYPE = 'password'
_INVALID_PASSWORD_MESSAGE = 'Invalid user credentials'
_REALM_ENDPOINT = '/auth/realms/ALMA'
_LOGIN_ENDPOINT = f'{_REALM_ENDPOINT}/protocol/openid-connect/token'
_VERIFY_WELL_KNOWN_ENDPOINT = f'{_REALM_ENDPOINT}/.well-known/openid-configuration'

def __init__(self):
super().__init__()
self._auth_hosts = auth_urls
self._auth_host = None

@property
def auth_hosts(self):
return self._auth_hosts

@auth_hosts.setter
def auth_hosts(self, auth_hosts):
"""
Set the available hosts to check for login endpoints.

Parameters
----------
auth_hosts : array
Available hosts name. Checking each one until one returns a 200 for
the well-known endpoint.
"""
if auth_hosts is None:
raise LoginError('Valid authentication hosts cannot be None')
else:
self._auth_hosts = auth_hosts

def get_valid_host(self):
if self._auth_host is None:
for auth_url in self._auth_hosts:
at88mph marked this conversation as resolved.
Show resolved Hide resolved
# set session cookies (they do not get set otherwise)
url_to_check = f'https://{auth_url}{self._VERIFY_WELL_KNOWN_ENDPOINT}'
response = self._request("HEAD", url_to_check, cache=False)

if response.status_code == 200:
self._auth_host = auth_url
log.debug(f'Set auth host to {self._auth_host}')
break

if self._auth_host is None:
raise LoginError(f'No useable hosts to login to: {self._auth_hosts}')
else:
return self._auth_host

def login(self, username, password):
"""
Authenticate to one of the configured hosts.

Parameters
----------
username : str
The username to authenticate with
password : str
The user's password
"""
data = {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It could probably return here if self._auth_host is set.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That might be true. If the self._auth_host is set, that just means that self.get_valid_host() was called at some point, not that the current user is logged in.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right. Please ignore my comment.

'username': username,
'password': password,
'grant_type': self._GRANT_TYPE,
'client_id': self._CLIENT_ID
}

login_url = f'https://{self.get_valid_host()}{self._LOGIN_ENDPOINT}'
log.info(f'Authenticating {username} on {login_url}.')
login_response = self._request('POST', login_url, data=data, cache=False)
json_auth = login_response.json()

if 'error' in json_auth:
log.debug(f'{json_auth}')
error_message = json_auth['error_description']
if self._INVALID_PASSWORD_MESSAGE not in error_message:
raise LoginError("Could not log in to ALMA authorization portal: "
f"{self.get_valid_host()} Message from server: {error_message}")
else:
raise LoginError(error_message)
elif 'access_token' not in json_auth:
raise LoginError("Could not log in to any of the known ALMA authorization portals: \n"
f"No error from server, but missing access token from host: {self.get_valid_host()}")
else:
log.info(f'Successfully logged in to {self._auth_host}')


@async_to_sync
class AlmaClass(QueryWithLogin):

Expand All @@ -228,6 +323,11 @@ def __init__(self):
self._sia_url = None
self._tap_url = None
self._datalink_url = None
self._auth = AlmaAuth()

@property
def auth(self):
return self._auth

@property
def datalink(self):
Expand Down Expand Up @@ -875,11 +975,7 @@ def _get_auth_info(self, username, *, store_password=False,
else:
username = self.USERNAME

if hasattr(self, '_auth_url'):
auth_url = self._auth_url
else:
raise LoginError("Login with .login() to acquire the appropriate"
" login URL")
auth_url = self.auth.get_valid_host()

# Get password from keyring or prompt
password, password_from_keyring = self._get_password(
Expand Down Expand Up @@ -909,69 +1005,16 @@ def _login(self, username=None, store_password=False,
on the keyring. Default is False.
"""

success = False
for auth_url in auth_urls:
# set session cookies (they do not get set otherwise)
cookiesetpage = self._request("GET",
urljoin(self._get_dataarchive_url(),
'rh/forceAuthentication'),
cache=False)
self._login_cookiepage = cookiesetpage
cookiesetpage.raise_for_status()

if (auth_url+'/cas/login' in cookiesetpage.request.url):
# we've hit a target, we're good
success = True
break
if not success:
raise LoginError("Could not log in to any of the known ALMA "
"authorization portals: {0}".format(auth_urls))

# Check if already logged in
loginpage = self._request("GET", "https://{auth_url}/cas/login".format(auth_url=auth_url),
cache=False)
root = BeautifulSoup(loginpage.content, 'html5lib')
if root.find('div', class_='success'):
log.info("Already logged in.")
return True

self._auth_url = auth_url
self.auth.auth_hosts = auth_urls

username, password = self._get_auth_info(username=username,
store_password=store_password,
reenter_password=reenter_password)

# Authenticate
log.info("Authenticating {0} on {1} ...".format(username, auth_url))
# Do not cache pieces of the login process
data = {kw: root.find('input', {'name': kw})['value']
for kw in ('execution', '_eventId')}
data['username'] = username
data['password'] = password
data['submit'] = 'LOGIN'

login_response = self._request("POST", "https://{0}/cas/login".format(auth_url),
params={'service': self._get_dataarchive_url()},
data=data,
cache=False)

# save the login response for debugging purposes
self._login_response = login_response
# do not expose password back to user
del data['password']
# but save the parameters for debug purposes
self._login_parameters = data

authenticated = ('You have successfully logged in' in
login_response.text)

if authenticated:
log.info("Authentication successful!")
self.USERNAME = username
else:
log.exception("Authentication failed!")
self.auth.login(username, password)
self.USERNAME = username

return authenticated
return True

def get_cycle0_uid_contents(self, uid):
"""
Expand Down
109 changes: 109 additions & 0 deletions astroquery/alma/tests/test_alma_auth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# Licensed under a 3-clause BSD style license - see LICENSE.rst
from ..core import AlmaAuth
from ...exceptions import LoginError

import pytest
from unittest.mock import Mock


def test_host():
def _requests_mock_ok(method, url, **kwargs):
response = Mock()
response.status_code = 200
return response

test_subject = AlmaAuth()
test_subject.auth_hosts = ['almaexample.com']
test_subject._request = Mock(side_effect=_requests_mock_ok)
assert test_subject.get_valid_host() == 'almaexample.com'


def test_host_default():
def _requests_mock_ok(method, url, **kwargs):
response = Mock()
response.status_code = 200
return response

test_subject = AlmaAuth()
test_subject._request = Mock(side_effect=_requests_mock_ok)
assert test_subject.get_valid_host() == 'asa.alma.cl'


def test_host_err():
def _requests_mock_err(method, url, **kwargs):
response = Mock()
response.status_code = 404
return response

test_subject = AlmaAuth()
test_subject.auth_hosts = ['almaexample.com']
test_subject._request = Mock(side_effect=_requests_mock_err)
with pytest.raises(LoginError):
test_subject.get_valid_host()


def test_login_bad_error():
def _response_json():
return {
'error': 'Badness',
'error_description': 'Something very bad'
}

def _requests_mock_err(method, url, **kwargs):
response = Mock()
if test_subject._VERIFY_WELL_KNOWN_ENDPOINT in url:
response.status_code = 200
elif test_subject._LOGIN_ENDPOINT in url:
response.json = _response_json
return response

test_subject = AlmaAuth()
test_subject.auth_hosts = ['almaexample.com']
test_subject._request = Mock(side_effect=_requests_mock_err)
with pytest.raises(LoginError) as e:
test_subject.login('TESTUSER', 'TESTPASS')
assert 'Could not log in to ALMA authorization portal' in e.value.args[0]


def test_login_missing_token():
def _response_json():
return {
'irrlevant': 'Weird',
}

def _requests_mock_err(method, url, **kwargs):
response = Mock()
if test_subject._VERIFY_WELL_KNOWN_ENDPOINT in url:
response.status_code = 200
elif test_subject._LOGIN_ENDPOINT in url:
response.json = _response_json
return response

test_subject = AlmaAuth()
test_subject.auth_hosts = ['almaexample.com']
test_subject._request = Mock(side_effect=_requests_mock_err)
with pytest.raises(LoginError) as e:
test_subject.login('TESTUSER', 'TESTPASS')

assert 'No error from server, but missing access token from host' in e.value.args[0]


def test_login_success():
def _response_json():
return {
'access_token': 'MYTOKEN'
}

def _requests_mock_good(method, url, **kwargs):
response = Mock()
print(f'URL is {url}')
if test_subject._VERIFY_WELL_KNOWN_ENDPOINT in url:
response.status_code = 200
elif test_subject._LOGIN_ENDPOINT in url:
response.json = _response_json
return response

test_subject = AlmaAuth()
test_subject.auth_hosts = ['almaexample.com']
test_subject._request = Mock(side_effect=_requests_mock_good)
test_subject.login('TESTUSER', 'TESTPASS')
6 changes: 3 additions & 3 deletions docs/alma/alma.rst
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ Authentication
==============

Users can log in to acquire proprietary data products. Login is performed
via the ALMA CAS (central authentication server).
via the ALMA OIDC (OpenID Connect) service, Keycloak.

.. doctest-skip::

Expand All @@ -97,11 +97,11 @@ via the ALMA CAS (central authentication server).
ICONDOR, enter your ALMA password:
<BLANKLINE>
Authenticating ICONDOR on asa.alma.cl...
Authentication successful!
Successfully logged in to asa.alma.cl
>>> # After the first login, your password has been stored
>>> alma.login("ICONDOR")
Authenticating ICONDOR on asa.alma.cl...
Authentication successful!
Successfully logged in to asa.alma.cl

Your password will be stored by the `keyring
<https://pypi.python.org/pypi/keyring>`_ module.
Expand Down