diff --git a/.secrets.baseline b/.secrets.baseline index 8c97f29fb..9aa531c19 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -268,14 +268,14 @@ "filename": "tests/conftest.py", "hashed_secret": "1348b145fa1a555461c1b790a2f66614781091e9", "is_verified": false, - "line_number": 1561 + "line_number": 1569 }, { "type": "Base64 High Entropy String", "filename": "tests/conftest.py", "hashed_secret": "227dea087477346785aefd575f91dd13ab86c108", "is_verified": false, - "line_number": 1583 + "line_number": 1593 } ], "tests/credentials/google/test_credentials.py": [ @@ -422,5 +422,5 @@ } ] }, - "generated_at": "2024-03-16T00:09:27Z" + "generated_at": "2024-07-25T17:19:58Z" } diff --git a/README.md b/README.md index ac413b9ac..fc4ef7025 100644 --- a/README.md +++ b/README.md @@ -9,93 +9,25 @@ only trusted entities to enter. Fence is a core service of the Gen3 stack that has multiple capabilities: -1. Act as an [auth broker](#auth-broker) to integrate with one or more [IdPs](#IdP) and provide downstream authentication and authorization for Gen3 services. -2. [Manage tokens](#token-management). -3. Act as an [OIDC provider](#oidc--oauth2) to support external applications to use Gen3 services. -4. [Issue short lived, cloud native credentials to access data in various cloud storage services](#accessing-data) +1. Act as an [auth broker](docs/additional_documentation/terminology.md#auth-broker) to integrate with one +or more [IdPs](docs/additional_documentation/terminology.md#idp) and provide downstream authentication +and authorization for Gen3 services. +2. [Manage tokens](docs/additional_documentation/token_management.md). +3. Act as an [OIDC provider](README.md#oidc--oauth2) to support external +applications to use Gen3 services. +4. [Issue short-lived, cloud native credentials to access data in various cloud storage services](docs/additional_documentation/data_access.md#accessing-data) -## Contents +## Overview -1. [API Documentation](#API-documentation) -1. [Terminologies](#Terminologies) -1. [Identity Providers](#identity-providers) -1. [OIDC & OAuth2](#oidc--oauth2) -1. [Accessing Data](#accessing-data) -1. [Setup](#setup) -1. [Token management](#token-management) -1. [fence-create](#fence-create-automating-common-tasks-with-a-command-line-interface) -1. [Default expiration times](#default-expiration-times-in-fence) +### Identity Providers -## API Documentation - -[OpenAPI documentation available here.](http://petstore.swagger.io/?url=https://raw.githubusercontent.com/uc-cdis/fence/master/openapis/swagger.yaml) - -YAML file for the OpenAPI documentation is found in the `openapis` folder (in -the root directory); see the README in that folder for more details. - -## Terminologies - -### AuthN - -Authentication - establishes "who you are" with the application through communication with an [Identity Provider](#IdP). - -### AuthZ - -Authorization - establishes "what you can do" and "which resources you have access to" within the application. - -### IdP - -Identity Provider - the service that lets a user login and provides the identity of the user to downstream services. Examples: Google login, University login, NIH Login. - -### Auth broker - -An interface which enables a user to authenticate using any of multiple IdPs. - -### OAuth2 - -A widely used AuthZ protocol for delegating access to an application to use resources on behalf of a user. - -https://tools.ietf.org/html/rfc6749 - -https://oauth.net/2/ - -#### Client - -OAuth 2.0 Client - An application which makes requests for protected resources (on a resource server) on behalf of a resource owner (end-user) and with the resource owner's authorization. - -#### Auth Server - -OAuth 2.0 Authorization Server - A server which issues access tokens to the client after successfully authenticating the resource owner and obtaining authorization. - -#### Access Token - -A string, issued by the auth server to the client, representing authorization credentials used to access protected resources (on a resource server). - -### OIDC - -OpenID Connect - an extension of OAuth2 which provides an AuthN layer on top of the OAuth 2.0 AuthZ layer. It introduced a new type of token, the id token, that is specifically designed to be consumed by clients to get the identity information of the user. - -http://openid.net/specs/openid-connect-core-1_0.html - -#### OP - -OpenID Provider - an OAuth 2.0 Authentication Server which also implements OpenID Connect. - -#### RP - -Relying Party - an OAuth 2.0 Client which uses (requests) OpenID Connect. - - - -## Identity Providers - Fence can be configured to support different Identity Providers (IdPs) for AuthN. At the moment, supported IDPs include: - Google -- [Shibboleth](docs/fence_shibboleth.md) +- [Shibboleth](docs/additional_documentation/fence_shibboleth.md) - NIH iTrust - InCommon - eduGAIN @@ -106,6 +38,14 @@ At the moment, supported IDPs include: - ORCID - RAS +### API Documentation + +[OpenAPI documentation available here.](http://petstore.swagger.io/?url=https://raw.githubusercontent.com/uc-cdis/fence/master/openapis/swagger.yaml) + +YAML file for the OpenAPI documentation is found in the `openapis` folder (in +the root directory); see the README in that folder for more details. + + ## OIDC & OAuth2 Fence acts as a central broker that supports multiple IdPs. @@ -134,14 +74,14 @@ Note that the `3rd Party App` acts as the `RP` in these examples. #### Flow: Client Registration -![Client Registration](docs/images/seq_diagrams/client_registration.png) +![Client Registration](./docs/images/seq_diagrams/client_registration.png) #### Flow: OpenID Connect In the following flow, Fence and the IdP together constitute an `OP`. Fence, by itself, acts as an OAuth 2.0 Auth Server; the IdP enables the additional implementation of OIDC (by providing AuthN). From an OIDC viewpoint, therefore, Fence and the IdP can be abstracted into one `OP`. -![OIDC Flow](docs/images/seq_diagrams/openid_connect_flow.png) +![OIDC Flow](./docs/images/seq_diagrams/openid_connect_flow.png) If the third-party application doesn't need to use any Gen3 resources (and just wants to authenticate the user), they can just get @@ -155,479 +95,48 @@ passed in an `Authorization` header. In the following flow, `3rd Party App` is the `RP`; `Protected Endpoint` is an endpoint of a Gen3 Resource (the `microservice`), and both of these are part of a `resource server`; and `Fence` is the `OP`. Here, importantly, `Fence` may be interfacing with another IdP _or_ with another `Fence` instance in order to implement the OIDC layer. Either way, note that the `Fence` blob in this diagram actually abstracts Fence in concert with some IdP, which may or may not also be (a different instance of) Fence. -![Using Access Token](docs/images/seq_diagrams/token_use_for_access.png) +![Using Access Token](./docs/images/seq_diagrams/token_use_for_access.png) #### Flow: Refresh Token Use -![Using Refresh Token](docs/images/seq_diagrams/refresh_token_use.png) +![Using Refresh Token](./docs/images/seq_diagrams/refresh_token_use.png) #### Flow: Refresh Token Use (Token is Expired) -![Using Expired Refresh Token](docs/images/seq_diagrams/refresh_token_use_expired.png) +![Using Expired Refresh Token](./docs/images/seq_diagrams/refresh_token_use_expired.png) #### Flow: Multi-Tenant Fence The following diagram illustrates the case in which one fence instance uses another fence instance as its identity provider. -A use case for this is when we setup a fence instance that uses NIH login as the IdP. Here, we go through a detailed approval process in NIH. Therefore we would like to do it only once for a single lead Fence instance, and then allow other fence instances to simply redirect to use the lead Fence as an IdP for logging in via NIH. +A use case for this is when we set up a fence instance that uses NIH login as the IdP. Here, we go through a detailed approval process in NIH. Therefore, we would like to do it only once for a single lead Fence instance, and then allow other fence instances to simply redirect to use the lead Fence as an IdP for logging in via NIH. In the following flow, `Fence (Client Instance)` is an OP relative to `OAuth Client`, but an RP relative to `Fence (IDP)`. -![Multi-Tenant Flow](docs/images/seq_diagrams/multi-tenant_flow.png) +![Multi-Tenant Flow](./docs/images/seq_diagrams/multi-tenant_flow.png) #### Notes See the [OIDC specification](http://openid.net/specs/openid-connect-core-1_0.html) for more details. Additionally, see the [OAuth2 specification](https://tools.ietf.org/html/rfc6749). -## Access Control / Authz - -Currently fence works with another Gen3 service named -[arborist](https://github.com/uc-cdis/arborist) to implement attribute-based access -control for commons users. The YAML file of access control information (see -[#create-user-access-file](#create-user-access-file)) contains a section `authz` which are data sent to -arborist in order to set up the access control model. - -## Accessing Data - -Fence has multiple options that provide a mechanism to access data. The access -to data can be moderated through authorization information in a User Access File. - -Users can be provided specific `privilege`'s on `projects` in the User Access -File. A `project` is identified by a unique authorization identifier AKA `auth_id`. - -A `project` can be associated with various storage backends that store -object data for that given `project`. You can assign `read-storage` and `write-storage` -privileges to users who should have access to that stored object data. `read` and -`write` allow access to the data stored in a graph database. - -Depending on the backend, Fence can be configured to provide users access to -the data in different ways. - - -### Signed URLS - -Temporary signed URLs are supported in all major commercial clouds. Signed URLs are the most 'cloud agnostic' way to allow users to access data located in different platforms. - -Fence has the ability to request a specific file by its GUID (globally unique identifier) and retrieve a temporary signed URL for object data in AWS or GCP that will provide direct access to that object. ### Google Cloud Storage -Whereas pre-signed URL is a cloud agnostic solution, services and tools on Google Cloud Platform prefer to use Google's concept of a "Service Account". Because of that, Fence provides a few more methods to access data in Google. +Whereas pre-signed URL is a cloud-agnostic solution, services and tools on Google Cloud Platform prefer to use Google's concept of a "Service Account". Because of that, Fence provides a few more methods to access data in Google. -See [Fence and Google](docs/google_architecture.md) for more details on data access methods specific to Google. +See [Fence and Google](docs/additional_documentation/google_architecture.md) for more details on data access methods specific to Google. -## Setup - -### Install Requirements and Fence - -Install [Poetry](https://python-poetry.org/docs/#installation). - -```bash -# Install Fence and dependencies -poetry install -``` - -### Create Configuration File - -Fence requires a configuration file to run. We have a command line -utility to help you create one based on a default configuration. - -The configuration file itself will live outside of this repo (to -prevent accidentally checking in sensitive information like database passwords). - -To create a new configuration file from the default configuration: - -```bash -python cfg_help.py create -``` - -This file will be placed in one of the default search directories for Fence. - -To get the exact path where the new configuration file was created, use: - -```bash -python cfg_help.py get -``` - -The file should have detailed information about each of the configuration -variables. **Remember to fill out the new configuration file!** - -#### Other Configuration Notes - -* Fence will look for configuration files from a list of search directories ( -which are currently defined in `fence/settings.py`.) -* For more configuration options (such as having multiple different config -files for development), see the `cfg_help.py` file. - -### Set Up Databases - -The tests clear out the database every time they are run. If you want -to keep a persistent database for manual testing and general local usage, -create a second test database with a different name: - -> NOTE: Requires a minimum of Postgres v9.4 (because of `JSONB` types used) - -```bash -# Create test database(s). -# This one is for automated tests, which clear the database after running; -# `tests/test_settings.py` should have `fence_test_tmp` in the `DB` variable. -psql -U test postgres -c 'create database fence_test_tmp' -userdatamodel-init --db fence_test_tmp -# This one is for manual testing/general local usage; Your config -# should have `fence_test` in the `DB` variable. -psql -U test postgres -c 'create database fence_test' -userdatamodel-init --db fence_test --username test --password test -``` - -### Keypair Configuration - -Fence uses RSA keypairs to sign and allow verification of JWTs that it issues. -When the application is initialized, Fence loads in keypair files from the -`keys` directory. To store keypair files, use the following procedure: - - Create a subdirectory in the `fence/keys` directory, named with a - unique identifier, preferably a timestamp in ISO 8601 format of when - the keys are created. The name of the directory is used for the `kid` - (key ID) for those keys; the default (assuming the directory is named - with an ISO timestamp) looks like this: - - fence_key_2018-05-01T14:00:00Z - - - Generate a private and public keypair following the RSA 256 algorithm - and store those in that directory. The key files must be named - `jwt_public_key.pem` and `jwt_private_key.pem`. - -To generate a keypair using `openssl`: -```bash -# Generate the private key. -openssl genpkey -algorithm RSA -out jwt_private_key.pem -pkeyopt rsa_keygen_bits:2048 - -# Generate the public key. -openssl rsa -pubout -in jwt_private_key.pem -out jwt_public_key.pem - -# Depending on the `openssl` distribution, you may find these work instead: -# -# openssl rsa -out private_key.pem 2048 -# openssl rsa -in private_key.pem -pubout -out public_key.pem -``` -It's not a bad idea to confirm that the files actually say `RSA PRIVATE KEY` -and `PUBLIC KEY` (and in fact Fence will require that the private key files it -uses actually say "PRIVATE KEY" and that the public keys do not). - -Files containing public/private keys should have this format (the format used -by `openssl` for generating RSA keys): -``` ------BEGIN PUBLIC KEY----- -... [key is here] ... ------END PUBLIC KEY----- -``` -If a key is not in this format, then `PyJWT` will raise errors about not being -able to read the key. - -Fence will use the first keypair in the list to sign the tokens it issues -through OAuth. - - -### Create User Access File - -You can setup user access via admin fence script providing a user yaml file -Example user yaml: -``` -cloud_providers: {} -groups: {} -users: - userA@gmail.com: - projects: - - auth_id: project_a - privilege: [read, update, create, delete] - - auth_id: project_b - privilege: [read] - userB@gmail.com: - projects: - - auth_id: project_b - privilege: [read] -``` -Example sync command: - -```bash -fence-create sync --yaml user.yaml -``` - -### Register OAuth Client - -When you want to build an application that uses Gen3 resources on behalf of a user, you should register an OAuth client for this app. -Fence right now exposes client registration via admin CLI, because the Oauth2 client for a Gen3 commons needs approval from the sponsor of the commons. If you are an external developer, you should submit a support ticket. - -As a Gen3 commons administrator, you can run following command for an approved client: -```bash -fence-create client-create --client CLIENT_NAME --urls OAUTH_REDIRECT_URL --username USERNAME -``` -This command should output a tuple of `(client_id, client_secret)` which must be -saved by the OAuth client to use with -`fence`. - -## Quickstart with Helm - -You can now deploy individual services via Helm! -Please refer to the Helm quickstart guide HERE (https://github.com/uc-cdis/fence/blob/master/docs/quickstart_helm.md) - -## Token management - -Fence utilizes [OpenID Connect](#OIDC) to generate tokens -for clients. It can also provide tokens directly to a user. - -Clients and users may then use those tokens with other -Gen3 Data Commons services to access protected endpoints that require specific permissions. - -We use JSON Web Tokens (JWTs) as the format for all tokens of the following types: - -- OIDC ID token: this token is used by the OIDC client to get a user's identity from the token content -- OIDC access token: this token can be sent to Gen3 services via bearer header and get protected resources. -- OIDC refresh token: this token can be sent to fence to request a new access / id token. - - - -### JWT Information - -#### Example ID Token - -``` -{ - "sub": "7", - "azp": "test-client", - "pur": "id", - "aud": [ - "openid", - "user", - "test-client" - ], - "context": { - "user": { - "is_admin": false, - "name": "test", - "projects": { - "phs000178": [ - "read", - "update", - "create", - "delete", - "read-storage" - ] - }, - "google": { - "linked_google_account": "somebody@example.com" - } - } - }, - "iss": "https://commons.org", - "jti": "3ae2910b-0294-43dc-af2a-03fd60082aef", - "exp": 1516983302, - "iat": 1516982102, - "auth_time": 1516982102 -} -``` - -#### Example Access Token - -``` -{ - "sub": "7", - "azp": "test-client", - "pur": "access", - "aud": [ - "openid", - "user", - "test-client" - ], - "context": { - "user": { - "is_admin": false, - "name": "test", - "projects": { - "phs000178": [ - "read", - "update", - "create", - "delete", - "read-storage" - ] - }, - "google": { - "proxy_group": "abcdefgh123456", - "linked_google_account": "somebody@example.com" - } - } - }, - "iss": "https://commons.org", - "jti": "2e6ade06-5afb-4ce7-9ab5-e206225ce291", - "exp": 1516983302, - "iat": 1516982102 -} -``` - -#### Example Refresh Token - -``` -{ - "sub": "7", - "azp": "test-client", - "pur": "refresh", - "aud": [ - "openid", - "user", - "test-client" - ], - "iss": "https://commons.org", - "jti": "c72e5573-39fa-4391-a445-191e370b7cc5", - "exp": 1517010902, - "iat": 1516982102 -} -``` - -## fence-create: Automating common tasks with a command line interface - -fence-create is a command line utility that is bundled with fence and allows you to automate some commons tasks within fence. For the latest and greatest run the command `fence-create --help`. - -WARNING: fence-create directly modifies the database in some cases and may circumvent security checks (most of these utilities are used for testing). BE CAREFUL when you're running these commands and make sure you know what they're doing. - - -### Register Internal Oauth Client - -As a Gen3 commons administrator, if you want to create an oauth client that skips user consent step, use the following command: - -```bash -fence-create client-create --client CLIENT_NAME --urls OAUTH_REDIRECT_URL --username USERNAME --auto-approve (--expires-in 30) -``` - -The optional `--expires-in` parameter allows specifying the number of days until this client expires. - -### Register an Implicit Oauth Client - -As a Gen3 commons administrator, if you want to create an implicit oauth client for a webapp: - -```bash -fence-create client-create --client fancywebappname --urls 'https://betawebapp.example/fence -https://webapp.example/fence' --public --username fancyapp --grant-types authorization_code refresh_token implicit -``` - -If there are more than one URL to add, use space to delimit them like this: - -```bash -fence-create client-create --urls 'https://url1/' 'https://url2/' --client ... -``` - -To specify allowed scopes, use the `allowed-scopes` argument: -```bash -fence-create client-create ... --allowed-scopes openid user data -``` - -### Register an Oauth Client for a Client Credentials flow - -The OAuth2 Client Credentials flow is used for machine-to-machine communication and scenarios in which typical authentication schemes like username + password do not make sense. The system authenticates and authorizes the app rather than a user. See the [OAuth2 specification](https://www.rfc-editor.org/rfc/rfc6749#section-4.4) for more details. - -As a Gen3 commons administrator, if you want to create an OAuth client for a client credentials flow: - -```bash -fence-create client-create --client CLIENT_NAME --grant-types client_credentials (--expires-in 30) -``` - -This command will return a client ID and client secret, which you can then use to obtain an access token: - -```bash -curl --request POST https://FENCE_URL/oauth2/token?grant_type=client_credentials -d scope="openid user" --user CLIENT_ID:CLIENT_SECRET -``` - -The optional `--expires-in` parameter allows specifying the number of *days* until this client expires. The recommendation is to rotate credentials with the `client_credentials` grant at least once a year (see [Rotate client credentials](#rotate-client-credentials) section). -NOTE: In Gen3, you can grant specific access to a client the same way you would to a user. See the [user.yaml guide](https://github.com/uc-cdis/fence/blob/master/docs/user.yaml_guide.md) for more details. - -NOTE: Client credentials tokens are not linked to a user (the claims contain no `sub` or `context.user.name` like other tokens). Some Gen3 endpoints that assume the token is linked to a user, or whose logic require there being a user, do not support them. For an example of how to adapt an endpoint to support client credentials tokens, see [here](https://github.com/uc-cdis/requestor/commit/a5078fae27fa258ac78045cf2bb89cb2104f53cf). For an example of how to explicitly reject client credentials tokens, see [here](https://github.com/uc-cdis/requestor/commit/0f4974c25343d2185c7cdb48dcdeb58f97800672). - -### Modify OAuth Client - -```bash -fence-create client-modify --client CLIENT_NAME --urls http://localhost/api/v0/oauth2/authorize -``` - -That command should output any modifications to the client. Similarly, multiple URLs are -allowed here too. - -Add `--append` argument to add new callback urls or allowed scopes to existing client (instead of replacing them) using `--append --urls` or `--append --allowed-scopes` -```bash -fence-create client-modify --client CLIENT_NAME --urls http://localhost/api/v0/new/oauth2/authorize --append (--expires-in 30) -``` - -### Rotate client credentials - -Use the `client-rotate` command to receive a new set of credentials (client ID and secret) for a client. The old credentials are NOT deactivated and must be deleted or expired separately (see [Delete Expired OAuth Clients](#delete-expired-oauth-clients) section). This allows for a rotation without downtime. - -```bash -fence-create client-rotate --client CLIENT_NAME (--expires-in 30) -``` - -Note that the `usersync` job must be run after rotating the credentials so that the new client ID is granted the same access as the old one. - -### Delete OAuth Client - -```bash -fence-create client-delete --client CLIENT_NAME -``` -That command should output the result of the deletion attempt. - -### Delete Expired OAuth Clients - -```bash -fence-create client-delete-expired -``` - -To post a warning in Slack about any clients that expired or are about to expire: - -```bash -fence-create client-delete-expired --slack-webhook --warning-days -``` - - -### List OAuth Clients - -```bash -fence-create client-list -``` -That command should output the full records for any registered OAuth clients. - -### Set up for External Buckets on Google - -```bash -fence-create link-external-bucket --bucket-name demo-bucket -fence-create link-bucket-to-project --bucket_id demo-bucket --bucket_provider google --project_auth_id test-project -``` - -The link-external-bucket returns an email for a Google group which needs to be added to access to the bucket `demo-bucket`. - -### Notify users who are blocking service account registration - -```bash -fence-create notify-problem-users --emails ex1@gmail.com ex2@gmail.com --auth_ids test --google_project_id test-google -``` - -`notify-problem-users` emails users in the provided list (can be fence user email or linked google email) who do not have access to any of the auth_ids provided. Also accepts a `check_linking` flag to check that each user has linked their google account. - -## Default Expiration Times in Fence +## Setup -Table contains various artifacts in fence that have temporary lifetimes and their default values. +See detailed explanation [here](docs/additional_documentation/setup.md) -> NOTE: "SA" in the below table stands for Service Account +## Additional documentation -| Name | Lifetime | Extendable? | Maximum Lifetime | Details -|--------------------------------------|--------------|-------------|-----------------------|------------------------------------------------------------------------------------------| -| Access Token | 20 minutes | TRUE | Life of Refresh Token | | -| Refresh Token | 30 days | FALSE | N/A | | -| User's SA Account Access | 7 days | TRUE | N/A | Access to data (e.g. length it stays in the proxy group). Can optionally provide an expiration less than 7 days | -| User's Google Account Access | 1 day | TRUE | N/A | After AuthN, how long we associate a Google email with the given user. Can optionally provide an expiration less than 1 day | -| User's Google Account Linkage | Indefinite | N/A | N/A | Can optionally provide an expiration less than 1 hour | -| Google Signed URL | Up to 1 hour | FALSE | N/A | Can optionally provide an expiration less than 1 hour | -| AWS Signed URL | Up to 1 hour | FALSE | N/A | Obtained by an oauth client through /credentials/google | -| Client SA (for User) Key | 10 days | FALSE | N/A | Obtained by the user themselves for temp access. Can optionally provide an expiration less than 10 days | -| User Primary SA Key | 10 days | FALSE | N/A | Used for Google URL signing | -| User Primary SA Key for URL Signing | 30 days | FALSE | N/A | | -| Sliding Session Window | 15 minutes | TRUE | 8 hours | access_token cookies get generated automatically when expired if session is still active | +1. [Terminologies](docs/additional_documentation/terminology.md) +2. [Accessing Data](docs/additional_documentation/data_access.md#accessing-data) +3. [Token management](docs/additional_documentation/token_management.md) +4. [fence-create](docs/additional_documentation/fence_create.md) +5. [Default expiration times](docs/additional_documentation/default_expiration_times.md) diff --git a/clear_prometheus_multiproc b/clear_prometheus_multiproc index 4bb2b425f..af1ba6d18 100755 --- a/clear_prometheus_multiproc +++ b/clear_prometheus_multiproc @@ -4,6 +4,8 @@ set -ex rm -Rf $1 -mkdir $1 +mkdir -p $1 chmod 755 $1 -chown 100:101 $1 +if id -u nginx &>/dev/null; then + chown $(id -u nginx):$(id -g nginx) $1 +fi diff --git a/docs/additional_documentation/authorization.md b/docs/additional_documentation/authorization.md new file mode 100644 index 000000000..6fc31c7c1 --- /dev/null +++ b/docs/additional_documentation/authorization.md @@ -0,0 +1,8 @@ + +## Access Control / Authz + +Currently fence works with another Gen3 service named +[arborist](https://github.com/uc-cdis/arborist) to implement attribute-based access +control for commons users. The YAML file of access control information (see +[#create-user-access-file](setup.md#create-user-access-file)) contains a section `authz` which are data sent to +arborist in order to set up the access control model. diff --git a/docs/additional_documentation/data_access.md b/docs/additional_documentation/data_access.md new file mode 100644 index 000000000..3e5190fc9 --- /dev/null +++ b/docs/additional_documentation/data_access.md @@ -0,0 +1,22 @@ +## Accessing Data + +Fence has multiple options that provide a mechanism to access data. The access +to data can be moderated through authorization information in a User Access File. + +Users can be provided specific `privilege`'s on `projects` in the User Access +File. A `project` is identified by a unique authorization identifier AKA `auth_id`. + +A `project` can be associated with various storage backends that store +object data for that given `project`. You can assign `read-storage` and `write-storage` +privileges to users who should have access to that stored object data. `read` and +`write` allow access to the data stored in a graph database. + +Depending on the backend, Fence can be configured to provide users access to +the data in different ways. + + +### Signed URLS + +Temporary signed URLs are supported in all major commercial clouds. Signed URLs are the most 'cloud agnostic' way to allow users to access data located in different platforms. + +Fence has the ability to request a specific file by its GUID (globally unique identifier) and retrieve a temporary signed URL for object data in AWS or GCP that will provide direct access to that object. diff --git a/docs/dbgap_info.md b/docs/additional_documentation/dbgap_info.md similarity index 100% rename from docs/dbgap_info.md rename to docs/additional_documentation/dbgap_info.md diff --git a/docs/additional_documentation/default_expiration_times.md b/docs/additional_documentation/default_expiration_times.md new file mode 100644 index 000000000..9b0432270 --- /dev/null +++ b/docs/additional_documentation/default_expiration_times.md @@ -0,0 +1,19 @@ +## Default Expiration Times in Fence + +Table contains various artifacts in fence that have temporary lifetimes and their default values. + +> NOTE: "SA" in the below table stands for Service Account + +| Name | Lifetime | Extendable? | Maximum Lifetime | Details | +|-------------------------------------|--------------|-------------|-----------------------|-----------------------------------------------------------------------------------------------------------------------------| +| Access Token | 20 minutes | TRUE | Life of Refresh Token | | +| Refresh Token | 30 days | FALSE | N/A | | +| User's SA Account Access | 7 days | TRUE | N/A | Access to data (e.g. length it stays in the proxy group). Can optionally provide an expiration less than 7 days | +| User's Google Account Access | 1 day | TRUE | N/A | After AuthN, how long we associate a Google email with the given user. Can optionally provide an expiration less than 1 day | +| User's Google Account Linkage | Indefinite | N/A | N/A | Can optionally provide an expiration less than 1 hour | +| Google Signed URL | Up to 1 hour | FALSE | N/A | Can optionally provide an expiration less than 1 hour | +| AWS Signed URL | Up to 1 hour | FALSE | N/A | Obtained by an oauth client through /credentials/google | +| Client SA (for User) Key | 10 days | FALSE | N/A | Obtained by the user themselves for temp access. Can optionally provide an expiration less than 10 days | +| User Primary SA Key | 10 days | FALSE | N/A | Used for Google URL signing | +| User Primary SA Key for URL Signing | 30 days | FALSE | N/A | | +| Sliding Session Window | 15 minutes | TRUE | 8 hours | access_token cookies get generated automatically when expired if session is still active | diff --git a/docs/fence-create-deprecated.md b/docs/additional_documentation/fence-create-deprecated.md similarity index 100% rename from docs/fence-create-deprecated.md rename to docs/additional_documentation/fence-create-deprecated.md diff --git a/docs/additional_documentation/fence_create.md b/docs/additional_documentation/fence_create.md new file mode 100644 index 000000000..398544f98 --- /dev/null +++ b/docs/additional_documentation/fence_create.md @@ -0,0 +1,126 @@ +## fence-create: Automating common tasks with a command line interface + +fence-create is a command line utility that is bundled with fence and allows you to automate some commons tasks within fence. For the latest and greatest run the command `fence-create --help`. + +WARNING: fence-create directly modifies the database in some cases and may circumvent security checks (most of these utilities are used for testing). BE CAREFUL when you're running these commands and make sure you know what they're doing. + + +### Register Internal Oauth Client + +As a Gen3 commons administrator, if you want to create an oauth client that skips user consent step, use the following command: + +```bash +fence-create client-create --client CLIENT_NAME --urls OAUTH_REDIRECT_URL --username USERNAME --auto-approve (--expires-in 30) +``` + +The optional `--expires-in` parameter allows specifying the number of days until this client expires. + +### Register an Implicit Oauth Client + +As a Gen3 commons administrator, if you want to create an implicit oauth client for a webapp: + +```bash +fence-create client-create --client fancywebappname --urls 'https://betawebapp.example/fence +https://webapp.example/fence' --public --username fancyapp --grant-types authorization_code refresh_token implicit +``` + +If there are more than one URL to add, use space to delimit them like this: + +```bash +fence-create client-create --urls 'https://url1/' 'https://url2/' --client ... +``` + +To specify allowed scopes, use the `allowed-scopes` argument: +```bash +fence-create client-create ... --allowed-scopes openid user data +``` + +### Register an Oauth Client for a Client Credentials flow + +The OAuth2 Client Credentials flow is used for machine-to-machine communication and scenarios in which typical authentication schemes like username + password do not make sense. The system authenticates and authorizes the app rather than a user. See the [OAuth2 specification](https://www.rfc-editor.org/rfc/rfc6749#section-4.4) for more details. + +As a Gen3 commons administrator, if you want to create an OAuth client for a client credentials flow: + +```bash +fence-create client-create --client CLIENT_NAME --grant-types client_credentials (--expires-in 30) +``` + +This command will return a client ID and client secret, which you can then use to obtain an access token: + +```bash +curl --request POST https://FENCE_URL/oauth2/token?grant_type=client_credentials -d scope="openid user" --user CLIENT_ID:CLIENT_SECRET +``` + +The optional `--expires-in` parameter allows specifying the number of *days* until this client expires. The recommendation is to rotate credentials with the `client_credentials` grant at least once a year (see [Rotate client credentials](#rotate-client-credentials) section). + +NOTE: In Gen3, you can grant specific access to a client the same way you would to a user. See the [user.yaml guide](https://github.com/uc-cdis/fence/blob/master/docs/user.yaml_guide.md) for more details. + +NOTE: Client credentials tokens are not linked to a user (the claims contain no `sub` or `context.user.name` like other tokens). Some Gen3 endpoints that assume the token is linked to a user, or whose logic require there being a user, do not support them. For an example of how to adapt an endpoint to support client credentials tokens, see [here](https://github.com/uc-cdis/requestor/commit/a5078fae27fa258ac78045cf2bb89cb2104f53cf). For an example of how to explicitly reject client credentials tokens, see [here](https://github.com/uc-cdis/requestor/commit/0f4974c25343d2185c7cdb48dcdeb58f97800672). + +### Modify OAuth Client + +```bash +fence-create client-modify --client CLIENT_NAME --urls http://localhost/api/v0/oauth2/authorize +``` + +That command should output any modifications to the client. Similarly, multiple URLs are +allowed here too. + +Add `--append` argument to add new callback urls or allowed scopes to existing client (instead of replacing them) using `--append --urls` or `--append --allowed-scopes` +```bash +fence-create client-modify --client CLIENT_NAME --urls http://localhost/api/v0/new/oauth2/authorize --append (--expires-in 30) +``` + +### Rotate client credentials + +Use the `client-rotate` command to receive a new set of credentials (client ID and secret) for a client. The old credentials are NOT deactivated and must be deleted or expired separately (see [Delete Expired OAuth Clients](#delete-expired-oauth-clients) section). This allows for a rotation without downtime. + +```bash +fence-create client-rotate --client CLIENT_NAME (--expires-in 30) +``` + +Note that the `usersync` job must be run after rotating the credentials so that the new client ID is granted the same access as the old one. + +### Delete OAuth Client + +```bash +fence-create client-delete --client CLIENT_NAME +``` +That command should output the result of the deletion attempt. + +### Delete Expired OAuth Clients + +```bash +fence-create client-delete-expired +``` + +To post a warning in Slack about any clients that expired or are about to expire: + +```bash +fence-create client-delete-expired --slack-webhook --warning-days +``` + + +### List OAuth Clients + +```bash +fence-create client-list +``` +That command should output the full records for any registered OAuth clients. + +### Set up for External Buckets on Google + +```bash +fence-create link-external-bucket --bucket-name demo-bucket +fence-create link-bucket-to-project --bucket_id demo-bucket --bucket_provider google --project_auth_id test-project +``` + +The link-external-bucket returns an email for a Google group which needs to be added to access to the bucket `demo-bucket`. + +### Notify users who are blocking service account registration + +```bash +fence-create notify-problem-users --emails ex1@gmail.com ex2@gmail.com --auth_ids test --google_project_id test-google +``` + +`notify-problem-users` emails users in the provided list (can be fence user email or linked google email) who do not have access to any of the auth_ids provided. Also accepts a `check_linking` flag to check that each user has linked their google account. diff --git a/docs/fence_multifactor_authentication_guide.md b/docs/additional_documentation/fence_multifactor_authentication_guide.md similarity index 100% rename from docs/fence_multifactor_authentication_guide.md rename to docs/additional_documentation/fence_multifactor_authentication_guide.md diff --git a/docs/fence_shibboleth.md b/docs/additional_documentation/fence_shibboleth.md similarity index 95% rename from docs/fence_shibboleth.md rename to docs/additional_documentation/fence_shibboleth.md index 2cae15ef5..41db778ac 100644 --- a/docs/fence_shibboleth.md +++ b/docs/additional_documentation/fence_shibboleth.md @@ -21,7 +21,7 @@ The `/login/shib` endpoint accepts the query parameter `shib_idp`. Fence checks After the user logs in and is redirected to `/login/shib/login`, we get the `eppn` (EduPerson Principal Name) from the request headers to use as username. If the `eppn` is not available, we use the `persistent-id` (or `cn`) instead. -![Shibboleth Login Flow](images/seq_diagrams/shibboleth_flow.png) +![Shibboleth Login Flow](../images/seq_diagrams/shibboleth_flow.png) Notes about the NIH login implementation: - NIH login is used as the default when the `idp` is fence and no `shib_idp` is specified (for backwards compatibility). @@ -32,7 +32,7 @@ Notes about the NIH login implementation: ### In the multi-tenant Fence instance -The [Shibboleth dockerfile](../DockerfileShib) image is at https://quay.io/repository/cdis/fence-shib and is NOT compatible yet with python 3/the latest Fence (for now, use Fence 2.7.x). +The [Shibboleth dockerfile](../../DockerfileShib) image is at https://quay.io/repository/cdis/fence-shib and is NOT compatible yet with python 3/the latest Fence (for now, use Fence 2.7.x). The deployment only includes `revproxy` and `fenceshib`. The Fence configuration enables the `shibboleth` provider: diff --git a/docs/ga4gh_passports.md b/docs/additional_documentation/ga4gh_passports.md similarity index 91% rename from docs/ga4gh_passports.md rename to docs/additional_documentation/ga4gh_passports.md index 0e6f0da21..235a248cd 100644 --- a/docs/ga4gh_passports.md +++ b/docs/additional_documentation/ga4gh_passports.md @@ -25,7 +25,7 @@ References: This shows external DRS Client(s) communicating with Gen3 Framework Services (as a GA4GH DRS Server) and how G3FS interacts with Passport Brokers to validate and verify JWTs. -![Passport and Visa JWT Handling](images/ga4gh/passport_jwt_handling.png) +![Passport and Visa JWT Handling](../images/ga4gh/passport_jwt_handling.png) ## G3FS: Configurable Roles for Data Access @@ -33,11 +33,11 @@ Gen3 Framework Services are capable of acting in many different roles. As data r In order to describe the role of the passport in these various configurations, the following diagrams may help. -![Gen3 as DRS Server](images/ga4gh/gen3_as_drs.png) +![Gen3 as DRS Server](../images/ga4gh/gen3_as_drs.png) -![Gen3 as Client](images/ga4gh/gen3_as_client.png) +![Gen3 as Client](../images/ga4gh/gen3_as_client.png) -![Gen3 as Both](images/ga4gh/gen3_as_client_and_drs_server.png) +![Gen3 as Both](../images/ga4gh/gen3_as_client_and_drs_server.png) ## Performance Improvements @@ -52,15 +52,15 @@ We added a number of things to mitigate the performance impact on researchers' w To illustrate the need for such a cache, see the images below for before and after. -![Before Caching](images/ga4gh/caching_before.png) +![Before Caching](../images/ga4gh/caching_before.png) -![After Caching](images/ga4gh/caching_after.png) +![After Caching](../images/ga4gh/caching_after.png) ## User Identities Different GA4GH Visas may refer to the same subject differently. In order to maintain the known mappings between different representations of the same identity, we are creating an Issuer+Subject to User mapping table. The primary key on this table is the combination of the `iss` and `sub` from JWTs. -![User Identities](images/ga4gh/users.png) +![User Identities](../images/ga4gh/users.png) ## Backend Updates and Expiration @@ -68,6 +68,6 @@ In order to ensure the removal of access at the right time, the cronjobs we have There is an argument here for event-based architecture, but Gen3 does not currently support such an architecture. We are instead extending the support of our cronjobs to ensure expirations occur at the right time. -![Cronjobs and Expirations](images/ga4gh/expiration.png) +![Cronjobs and Expirations](../images/ga4gh/expiration.png) > _All diagrams are originally from an **internal** CTDS Document. The link to that document is [here](https://lucid.app/lucidchart/5c52b868-5cd2-4c6e-b53b-de2981f7da98/edit?invitationId=inv_9a757cb1-fc81-4189-934d-98c3db06d2fc) for internal people who need to edit the above diagrams._ diff --git a/docs/google_architecture.md b/docs/additional_documentation/google_architecture.md similarity index 98% rename from docs/google_architecture.md rename to docs/additional_documentation/google_architecture.md index b00012ff0..65bc8d7d1 100644 --- a/docs/google_architecture.md +++ b/docs/additional_documentation/google_architecture.md @@ -30,7 +30,7 @@ To support the 3 methods of access mentioned above, we have a generic architectu That architecture involves Google's concept of **groups** and use of their **IAM Policies** in the Google Cloud Platform. The following diagram shows the layers between the user themselves and the bucket. -![Google Access Architecture](images/g_architecture.png) +![Google Access Architecture](../images/g_architecture.png) Working backwards from the Google Bucket itself, we have a **Google Bucket Access Group**, which, as you probably guessed, is a Google Group that provides access to the bucket. That group is assigned a **role** on the Google **resource** (the Google Bucket). **Roles** provide a set of permissions (like read privileges). The combinations of those roles on the bucket become the bucket's **Policy**. You can read more about Google's IAM terms and concepts in [their docs](https://cloud.google.com/iam/docs). @@ -46,7 +46,7 @@ Google groups contain **members** (another Google term) and a Google group can b A more representative diagram of the structures that allow users to get access to the buckets may look something like this: -![Representative Google Access Architecture](images/rep_g_architecture.png) +![Representative Google Access Architecture](../images/rep_g_architecture.png) #### User's Proxy Group @@ -169,7 +169,7 @@ In the above script, `google-project-to-bill` is either the `userProject` provid Fence facilitates the creation of Signed URLs to access Google Storage objects. These URLs provide temporary, authenticated, access to anyone with the URL but must be generated by someone who has access. -![Signed URLs](images/signed_urls.png) +![Signed URLs](../images/signed_urls.png) Design Requirements: @@ -195,7 +195,7 @@ This allows clients to manage their temporary credentials without the chance of Each Client Service Account is a member in the User's Proxy Group, meaning it has the same access that the user themselves have. -![Temporary Service Account Credentials](images/g_sa_creds.png) +![Temporary Service Account Credentials](../images/g_sa_creds.png) > WARNING: By default, Google Service Account Keys have an expiration of 10 years. To create a more manageable and secure expiration you must manually "expire" the keys by deleting them with a cronjob (once they are alive longer than a configured expiration). Fence's command line tool `fence-create` has a function for expiring keys that you should run on a schedule. Check out `fence-create google-manage-keys --help` @@ -229,7 +229,7 @@ A user logs into fence with their eRA Commons ID. To get access to data through Google Account Linking is achieved by sending the user through the beginning of the OIDC flow with Google. The user is redirected to a Google Login page and whichever account they successfully log in to becomes linked to their fence identity. -![Google Account Linking](images/g_accnt_link.png) +![Google Account Linking](../images/g_accnt_link.png) We require the user to log in so that we can authenticate them and only link an account they actually own. @@ -239,7 +239,7 @@ Once linked, the user's Google Account is then placed *temporarily* inside their At the moment, the *link* between the User and their Google Account does not expire. The access to data *does* expire though. Explicit refreshing of access must be done by an authenticated user or valid client with those permissions through Fence's API. -![Google Account Linking After Expiration](images/g_accnt_link_2.png) +![Google Account Linking After Expiration](../images/g_accnt_link_2.png) #### Service Account Registration @@ -312,7 +312,7 @@ The Service Accounts are validated first in the cronjob so that if multiple SA's This diagram shows a single Google Project with 3 users (`UserA`, `UserB`, and `UserC`). All of them have already gone through the linking process with fence to associate their Google Account with their fence identity. -![Service Account Registration](images/sa_reg.png) +![Service Account Registration](../images/sa_reg.png) The project service account, `Service Account A`, has been registered for access to a fence `Project` which has data in `Bucket Y`. The service account is given access by placing it *directly in the Google Bucket Access Group*. @@ -326,6 +326,6 @@ The user must request fence `Projects` that the service account should have acce If someone attempting to register `Service Account A` with fence `Projects` that have data in *both* `Bucket X` and `Bucket Y`, registration will fail. Why? Because not every user in the Google Project have access to that data. -![Service Account Registration](images/sa_invalid_reg.png) +![Service Account Registration](../images/sa_invalid_reg.png) --- diff --git a/docs/local_multi_fence.md b/docs/additional_documentation/local_multi_fence.md similarity index 100% rename from docs/local_multi_fence.md rename to docs/additional_documentation/local_multi_fence.md diff --git a/docs/quickstart_helm.md b/docs/additional_documentation/quickstart_helm.md similarity index 100% rename from docs/quickstart_helm.md rename to docs/additional_documentation/quickstart_helm.md diff --git a/docs/register.md b/docs/additional_documentation/register.md similarity index 100% rename from docs/register.md rename to docs/additional_documentation/register.md diff --git a/docs/additional_documentation/setup.md b/docs/additional_documentation/setup.md new file mode 100644 index 000000000..5dcafc37b --- /dev/null +++ b/docs/additional_documentation/setup.md @@ -0,0 +1,157 @@ + +## Setup + +### Install Requirements and Fence + +Install [Poetry](https://python-poetry.org/docs/#installation). + +```bash +# Install Fence and dependencies +poetry install +``` + +### Create Configuration File + +Fence requires a configuration file to run. We have a command line +utility to help you create one based on a default configuration. + +The configuration file itself will live outside of this repo (to +prevent accidentally checking in sensitive information like database passwords). + +To create a new configuration file from the default configuration: + +```bash +python cfg_help.py create +``` + +This file will be placed in one of the default search directories for Fence. + +To get the exact path where the new configuration file was created, use: + +```bash +python cfg_help.py get +``` + +The file should have detailed information about each of the configuration +variables. **Remember to fill out the new configuration file!** + +Once you have done so, you can run `alembic upgrade head` to generate the tables needed +to run fence. + +#### Other Configuration Notes + +* Fence will look for configuration files from a list of search directories ( +which are currently defined in `fence/settings.py`.) +* For more configuration options (such as having multiple different config +files for development), see the `cfg_help.py` file. + +### Set Up Databases + +The tests clear out the database every time they are run. If you want +to keep a persistent database for manual testing and general local usage, +create a second test database with a different name: + +> NOTE: Requires a minimum of Postgres v9.4 (because of `JSONB` types used) + +```bash +# Create test database(s). +# This one is for automated tests, which clear the database after running; +# `tests/test_settings.py` should have `fence_test_tmp` in the `DB` variable. +psql -U test postgres -c 'create database fence_test_tmp' +userdatamodel-init --db fence_test_tmp +# This one is for manual testing/general local usage; Your config +# should have `fence_test` in the `DB` variable. +psql -U test postgres -c 'create database fence_test' +userdatamodel-init --db fence_test --username test --password test +``` + +### Keypair Configuration + +Fence uses RSA keypairs to sign and allow verification of JWTs that it issues. +When the application is initialized, Fence loads in keypair files from the +`keys` directory. To store keypair files, use the following procedure: + - Create a subdirectory in the `fence/keys` directory, named with a + unique identifier, preferably a timestamp in ISO 8601 format of when + the keys are created. The name of the directory is used for the `kid` + (key ID) for those keys; the default (assuming the directory is named + with an ISO timestamp) looks like this: + + fence_key_2018-05-01T14:00:00Z + + - Generate a private and public keypair following the RSA 256 algorithm + and store those in that directory. The key files must be named + `jwt_public_key.pem` and `jwt_private_key.pem`. + +To generate a keypair using `openssl`: +```bash +# Generate the private key. +openssl genpkey -algorithm RSA -out jwt_private_key.pem -pkeyopt rsa_keygen_bits:2048 + +# Generate the public key. +openssl rsa -pubout -in jwt_private_key.pem -out jwt_public_key.pem + +# Depending on the `openssl` distribution, you may find these work instead: +# +# openssl rsa -out private_key.pem 2048 +# openssl rsa -in private_key.pem -pubout -out public_key.pem +``` +It's not a bad idea to confirm that the files actually say `RSA PRIVATE KEY` +and `PUBLIC KEY` (and in fact Fence will require that the private key files it +uses actually say "PRIVATE KEY" and that the public keys do not). + +Files containing public/private keys should have this format (the format used +by `openssl` for generating RSA keys): +``` +-----BEGIN PUBLIC KEY----- +... [key is here] ... +-----END PUBLIC KEY----- +``` +If a key is not in this format, then `PyJWT` will raise errors about not being +able to read the key. + +Fence will use the first keypair in the list to sign the tokens it issues +through OAuth. + + +### Create User Access File + +You can setup user access via admin fence script providing a user yaml file +Example user yaml: +``` +cloud_providers: {} +groups: {} +users: + userA@gmail.com: + projects: + - auth_id: project_a + privilege: [read, update, create, delete] + - auth_id: project_b + privilege: [read] + userB@gmail.com: + projects: + - auth_id: project_b + privilege: [read] +``` +Example sync command: + +```bash +fence-create sync --yaml user.yaml +``` + +### Register OAuth Client + +When you want to build an application that uses Gen3 resources on behalf of a user, you should register an OAuth client for this app. +Fence right now exposes client registration via admin CLI, because the Oauth2 client for a Gen3 commons needs approval from the sponsor of the commons. If you are an external developer, you should submit a support ticket. + +As a Gen3 commons administrator, you can run following command for an approved client: +```bash +fence-create client-create --client CLIENT_NAME --urls OAUTH_REDIRECT_URL --username USERNAME +``` +This command should output a tuple of `(client_id, client_secret)` which must be +saved by the OAuth client to use with +`fence`. + +## Quickstart with Helm + +You can now deploy individual services via Helm! +Please refer to the Helm quickstart guide HERE (https://github.com/uc-cdis/fence/blob/master/docs/quickstart_helm.md) diff --git a/docs/additional_documentation/terminology.md b/docs/additional_documentation/terminology.md new file mode 100644 index 000000000..8e3ffd812 --- /dev/null +++ b/docs/additional_documentation/terminology.md @@ -0,0 +1,51 @@ +## Terminologies + +### AuthN + +Authentication - establishes "who you are" with the application through communication with an [Identity Provider](#IdP). + +### AuthZ + +Authorization - establishes "what you can do" and "which resources you have access to" within the application. + +### IdP + +Identity Provider - the service that lets a user login and provides the identity of the user to downstream services. Examples: Google login, University login, NIH Login. + +### Auth broker + +An interface which enables a user to authenticate using any of multiple IdPs. + +### OAuth2 + +A widely used AuthZ protocol for delegating access to an application to use resources on behalf of a user. + +https://tools.ietf.org/html/rfc6749 + +https://oauth.net/2/ + +#### Client + +OAuth 2.0 Client - An application which makes requests for protected resources (on a resource server) on behalf of a resource owner (end-user) and with the resource owner's authorization. + +#### Auth Server + +OAuth 2.0 Authorization Server - A server which issues access tokens to the client after successfully authenticating the resource owner and obtaining authorization. + +#### Access Token + +A string, issued by the auth server to the client, representing authorization credentials used to access protected resources (on a resource server). + +### OIDC + +OpenID Connect - an extension of OAuth2 which provides an AuthN layer on top of the OAuth 2.0 AuthZ layer. It introduced a new type of token, the id token, that is specifically designed to be consumed by clients to get the identity information of the user. + +http://openid.net/specs/openid-connect-core-1_0.html + +#### OP + +OpenID Provider - an OAuth 2.0 Authentication Server which also implements OpenID Connect. + +#### RP + +Relying Party - an OAuth 2.0 Client which uses (requests) OpenID Connect. diff --git a/docs/additional_documentation/token_management.md b/docs/additional_documentation/token_management.md new file mode 100644 index 000000000..c8c39eba5 --- /dev/null +++ b/docs/additional_documentation/token_management.md @@ -0,0 +1,113 @@ + +## Token management + +Fence utilizes [OpenID Connect](terminology.md#oidc) to generate tokens +for clients. It can also provide tokens directly to a user. + +Clients and users may then use those tokens with other +Gen3 Data Commons services to access protected endpoints that require specific permissions. + +We use JSON Web Tokens (JWTs) as the format for all tokens of the following types: + +- OIDC ID token: this token is used by the OIDC client to get a user's identity from the token content +- OIDC access token: this token can be sent to Gen3 services via bearer header and get protected resources. +- OIDC refresh token: this token can be sent to fence to request a new access / id token. + + + +### JWT Information + +#### Example ID Token + +``` +{ + "sub": "7", + "azp": "test-client", + "pur": "id", + "aud": [ + "openid", + "user", + "test-client" + ], + "context": { + "user": { + "is_admin": false, + "name": "test", + "projects": { + "phs000178": [ + "read", + "update", + "create", + "delete", + "read-storage" + ] + }, + "google": { + "linked_google_account": "somebody@example.com" + } + } + }, + "iss": "https://commons.org", + "jti": "3ae2910b-0294-43dc-af2a-03fd60082aef", + "exp": 1516983302, + "iat": 1516982102, + "auth_time": 1516982102 +} +``` + +#### Example Access Token + +``` +{ + "sub": "7", + "azp": "test-client", + "pur": "access", + "aud": [ + "openid", + "user", + "test-client" + ], + "context": { + "user": { + "is_admin": false, + "name": "test", + "projects": { + "phs000178": [ + "read", + "update", + "create", + "delete", + "read-storage" + ] + }, + "google": { + "proxy_group": "abcdefgh123456", + "linked_google_account": "somebody@example.com" + } + } + }, + "iss": "https://commons.org", + "jti": "2e6ade06-5afb-4ce7-9ab5-e206225ce291", + "exp": 1516983302, + "iat": 1516982102 +} +``` + +#### Example Refresh Token + +``` +{ + "sub": "7", + "azp": "test-client", + "pur": "refresh", + "aud": [ + "openid", + "user", + "test-client" + ], + "iss": "https://commons.org", + "jti": "c72e5573-39fa-4391-a445-191e370b7cc5", + "exp": 1517010902, + "iat": 1516982102 +} +``` \ No newline at end of file diff --git a/docs/user.yaml_guide.md b/docs/additional_documentation/user.yaml_guide.md similarity index 98% rename from docs/user.yaml_guide.md rename to docs/additional_documentation/user.yaml_guide.md index ec9e583eb..893d32045 100644 --- a/docs/user.yaml_guide.md +++ b/docs/additional_documentation/user.yaml_guide.md @@ -29,13 +29,13 @@ In a fully deployed Gen3 Commons using [Cloud Automation](https://github.com/uc- } ``` -A template, ready-to-use `user.yaml` file can be found [here](base_user.yaml). +A template, ready-to-use `user.yaml` file can be found [here](../base_user.yaml). When updating your `user.yaml` file, you should use the [`gen3users` CLI](https://github.com/uc-cdis/gen3users#gen3users) to validate it before use. ## Format -Note that the `user.yaml` example below is minimal, as the goal is only to describe its structure. For a working `user.yaml` file that contains everything needed to get started, refer to the [base user.yaml](base_user.yaml) instead. +Note that the `user.yaml` example below is minimal, as the goal is only to describe its structure. For a working `user.yaml` file that contains everything needed to get started, refer to the [base user.yaml](../base_user.yaml) instead. ``` authz: diff --git a/docs/usersync.md b/docs/additional_documentation/usersync.md similarity index 99% rename from docs/usersync.md rename to docs/additional_documentation/usersync.md index ef896c37b..8c0a5d79a 100644 --- a/docs/usersync.md +++ b/docs/additional_documentation/usersync.md @@ -6,7 +6,7 @@ Usersync is a script that parses user access information from multiple sources ( ## Usersync flow -![Usersync Flow](images/usersync.png) +![Usersync Flow](../images/usersync.png) > The access from the user.yaml file and the dbGaP authorization files is combined (see example below), but the user.yaml file overrides the user information (such as email) obtained from the dbGaP authorization files. diff --git a/docs/azure_architecture.md b/docs/azure/azure_architecture.md similarity index 75% rename from docs/azure_architecture.md rename to docs/azure/azure_architecture.md index b47c19a41..acf15ff55 100755 --- a/docs/azure_architecture.md +++ b/docs/azure/azure_architecture.md @@ -11,7 +11,7 @@ You can review how `fence` works with [Azure Blob Storage](#Azure-Blob-Storage) ### Azure Blob Storage -![Azure Blob Storage with Fence](./images/m_fence_azure_blob_storage.png) +![Azure Blob Storage with Fence](../images/m_fence_azure_blob_storage.png) The diagram shows 2 separate workflows in order for `fence` to interact with [Azure Blob Storage](https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blobs-introduction): @@ -26,7 +26,7 @@ Also note that there's alternatives that could be considered for [future develop You can provision an Azure Storage Account with [Blob Storage as a one-time setup](https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-portal). You can further make the blobs [public read only](https://docs.microsoft.com/en-us/azure/storage/blobs/anonymous-read-access-configure?tabs=portal) for dev / test purposes, but it would be advisable to avoid this setup in a production scenario and consider using non-public ones instead. -With the Azure Blob Storage Account setup, you can further upload files into your Blob Storage using [Azure Storage Explorer](https://azure.microsoft.com/en-us/features/storage-explorer/) for manual testing, or you can automate loading data files into Azure Blob Storage (e.g. using [Azure Blob Storage Python SDK](https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-blob)). +With the Azure Blob Storage Account setup, you can further upload files into your Blob Storage using [Azure Storage Explorer](https://azure.microsoft.com/en-us/products/storage/storage-explorer/) for manual testing, or you can automate loading data files into Azure Blob Storage (e.g. using [Azure Blob Storage Python SDK](https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/storage/azure-storage-blob)). Assuming that you have preexisting files in an Azure Blob Storage Account, you can work through the following steps to index the files: @@ -44,12 +44,12 @@ You can use the Azure Blob Storage client to connect to Azure Blob Storage, and #### Configuration Details -You can update the [Fence config.yaml](../fence/config-default.yaml) to include the following values: +You can update the [Fence config.yaml](../../fence/config-default.yaml) to include the following values: -Name | Value | Description ------- | ------|---------- -`AZ_BLOB_CREDENTIALS` | DefaultEndpointsProtocol=https;AccountName=somestorageaccount;AccountKey=storageaccountkey;BlobEndpoint=`https://somestorageaccount.blob.core.windows.net/`; | This is the [Azure Blob Storage Connection String](https://docs.microsoft.com/en-us/azure/storage/common/storage-account-keys-manage?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&tabs=azure-portal#view-account-access-keys). You can also set this to `'*'` if you are indexing URLs for [public read access Azure Blob Storage containers](https://docs.microsoft.com/en-us/azure/storage/blobs/anonymous-read-access-configure?tabs=portal). Note that if you're using the URL for a public read access Azure Blob Storage container, then operations such as `delete` and `upload` will not work. -`AZ_BLOB_CONTAINER_URL` | `https://storageaccountname.blob.core.windows.net/storage-container` | This is the destination container for uploading with a given SAS token. You can set this value to designate a pre-existing storage container to upload indexed files, for example the new files could sit in `https://storageaccountname.blob.core.windows.net/storage-container/someguid/some/blob/file.txt`. If the storage account doesn't align with the indexed URL (e.g. you're using a public url or the storage account doesn't match), the upload will not work. If `AZ_BLOB_CREDENTIALS` is `'*'` then uploads from an indexed file using a public URL will not work. This value should be associated with the same Azure Blob Storage account used with the [Azure Blob Storage Connection String](https://docs.microsoft.com/en-us/azure/storage/common/storage-account-keys-manage?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&tabs=azure-portal#view-account-access-keys) for `AZ_BLOB_CREDENTIALS`. +| Name | Value | Description | +|-------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `AZ_BLOB_CREDENTIALS` | DefaultEndpointsProtocol=https;AccountName=somestorageaccount;AccountKey=storageaccountkey;BlobEndpoint=`https://somestorageaccount.blob.core.windows.net/`; | This is the [Azure Blob Storage Connection String](https://docs.microsoft.com/en-us/azure/storage/common/storage-account-keys-manage?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&tabs=azure-portal#view-account-access-keys). You can also set this to `'*'` if you are indexing URLs for [public read access Azure Blob Storage containers](https://docs.microsoft.com/en-us/azure/storage/blobs/anonymous-read-access-configure?tabs=portal). Note that if you're using the URL for a public read access Azure Blob Storage container, then operations such as `delete` and `upload` will not work. | +| `AZ_BLOB_CONTAINER_URL` | `https://storageaccountname.blob.core.windows.net/storage-container` | This is the destination container for uploading with a given SAS token. You can set this value to designate a pre-existing storage container to upload indexed files, for example the new files could sit in `https://storageaccountname.blob.core.windows.net/storage-container/someguid/some/blob/file.txt`. If the storage account doesn't align with the indexed URL (e.g. you're using a public url or the storage account doesn't match), the upload will not work. If `AZ_BLOB_CREDENTIALS` is `'*'` then uploads from an indexed file using a public URL will not work. This value should be associated with the same Azure Blob Storage account used with the [Azure Blob Storage Connection String](https://docs.microsoft.com/en-us/azure/storage/common/storage-account-keys-manage?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&tabs=azure-portal#view-account-access-keys) for `AZ_BLOB_CREDENTIALS`. | Using pre-signed urls for download is implemented; it's currently using a [SAS Token](https://docs.microsoft.com/en-us/azure/storage/common/storage-sas-overview). @@ -64,7 +64,7 @@ For example, when you index the file (e.g. using the [gen3sdk](https://github.co So if you navigate to `https://mydatacommons/files/guid` (assuming that the metadata is already setup), you can click on the file to download which will make the call to get the appropriate signed URL. -![Presigned URL](./images/m_fence_presigned_url.png) +![Presigned URL](../images/m_fence_presigned_url.png) * If you index the file with a URL for a blob in a **public** Azure Blob Storage Container and the `AZ_BLOB_CREDENTIALS` are set to `'*'`, then the **non-signed** converted indexed URL will be used (e.g. `https://.blob.core.windows.net//some/path/to/file.txt`) > You need to replace the URL such as `https://.blob.core.windows.net//some/path/to/file.txt` with `az://.blob.core.windows.net//some/path/to/file.txt` upon submitting the record to `indexd`. @@ -83,7 +83,7 @@ You can use [user delegation SAS tokens](https://docs.microsoft.com/en-us/rest/a ### Azure Active Directory -![Azure AD with Fence](./images/m_fence_azure_AD.png) +![Azure AD with Fence](../images/m_fence_azure_AD.png) The diagram shows 3 separate workflows in order for `fence` to interact with Azure AD: @@ -104,16 +104,16 @@ Also note that there's alternatives that could be considered for [future develop 1. [Create](https://docs.microsoft.com/en-us/azure/data-explorer/provision-azure-ad-app) AAD Application 2. Add a redirect URL * The application needs to have redirect URL that is the FDQN of commons appended with `(commons fdqn)/user/login/microsoft/login`. -![Add Redirect URI](./images/m_fence_azure_AD_app_registration_1.png) +![Add Redirect URI](../images/m_fence_azure_AD_app_registration_1.png) 3. Set a secret for the AAD application -![Set the Client Secret](./images/m_fence_azure_AD_app_registration_2.png) +![Set the Client Secret](../images/m_fence_azure_AD_app_registration_2.png) 4. Retrieve the `client id` of the AAD application -![Retrieve client ID](./images/m_fence_azure_AD_app_registration_3.png) -5. Update [fence-config.yaml](../fence/config-default.yaml) +![Retrieve client ID](../images/m_fence_azure_AD_app_registration_3.png) +5. Update [fence-config.yaml](../../fence/config-default.yaml) * Set the `microsoft_client_id` to be the `client_id` in step 4. * Set the `microsoft_client_secret` to be the secret value in step 3. - * Make sure the `BASE_URL` in [fence-config.yaml](../fence/config-default.yaml) is correct. - * Make sure the `redirect_url` in [fence-config.yaml](../fence/config-default.yaml) is `{{BASE_URL}}/login/microsoft/login/` is matches the redirect URL (`(commons fdqn)/user/login/microsoft/login`) in step 2 + * Make sure the `BASE_URL` in [fence-config.yaml](../../fence/config-default.yaml) is correct. + * Make sure the `redirect_url` in [fence-config.yaml](../../fence/config-default.yaml) is `{{BASE_URL}}/login/microsoft/login/` is matches the redirect URL (`(commons fdqn)/user/login/microsoft/login`) in step 2 6. Restart `fence` service with the updated secrets #### User Yaml Setup @@ -122,7 +122,7 @@ Also note that there's alternatives that could be considered for [future develop It's helpful to understand some of the [Arborist terms and definitions](https://github.com/uc-cdis/arborist#terminology-and-definitions), which covers **action**, **permission**, **role**, **resource**, **policy**, and **group**. -Further, it's helpful to understand the Arborist options for [configuring access](https://github.com/uc-cdis/arborist#configuring-access). You can see an example of granting **users** and **groups** access and more details in the [user.yaml guide](./user.yaml_guide.md). +Further, it's helpful to understand the Arborist options for [configuring access](https://github.com/uc-cdis/arborist#configuring-access). You can see an example of granting **users** and **groups** access and more details in the [user.yaml guide](../additional_documentation/user.yaml_guide.md). At a high level, this setup involves a couple steps: diff --git a/docs/azure_devops_pipeline.md b/docs/azure/azure_devops_pipeline.md similarity index 87% rename from docs/azure_devops_pipeline.md rename to docs/azure/azure_devops_pipeline.md index b0ff95d82..7e7be9fd8 100755 --- a/docs/azure_devops_pipeline.md +++ b/docs/azure/azure_devops_pipeline.md @@ -1,12 +1,12 @@ # Azure DevOps Build Pipeline -The purpose of this [Azure DevOps Pipeline](../azure-devops-pipeline.yaml) is to build `fence`, run a test suite, and then push the `fence` container into an [Azure Container Registry](https://docs.microsoft.com/en-us/azure/container-registry/container-registry-get-started-portal). +The purpose of this [Azure DevOps Pipeline](../../azure-devops-pipeline.yaml) is to build `fence`, run a test suite, and then push the `fence` container into an [Azure Container Registry](https://docs.microsoft.com/en-us/azure/container-registry/container-registry-get-started-portal). ## Getting Started If you don't already have access, you can use the free sign up with [Azure Devops](https://docs.microsoft.com/en-us/azure/devops/pipelines/get-started/pipelines-sign-up?view=azure-devops). -You can also import the [pipeline](../azure-devops-pipeline.yaml), see these [doc notes](https://docs.microsoft.com/en-us/azure/devops/pipelines/get-started/clone-import-pipeline?view=azure-devops&tabs=yaml#export-and-import-a-pipeline) as a guide. +You can also import the [pipeline](../../azure-devops-pipeline.yaml), see these [doc notes](https://docs.microsoft.com/en-us/azure/devops/pipelines/get-started/clone-import-pipeline?view=azure-devops&tabs=yaml#export-and-import-a-pipeline) as a guide. ### Setup Azure Container Registry @@ -103,7 +103,7 @@ First, make sure you have already [imported your Azure DevOps Pipeline](https:// Click on the pipeline and then click edit, which will let you update the variables in the Azure DevOps pipeline: -![Click on Variables](./azure_devops_pipeline_config_1.png) +![Click on Variables](azure_devops_pipeline_config_1.png) Variable Name | Description ------ | ------ @@ -117,8 +117,8 @@ GIT_REPO_TAG | This is the tag to use for the `fence` git repository, with a def After updating the variables, be sure to click **save**: -![Save updated variables](./azure_devops_pipeline_config_2.png) +![Save updated variables](azure_devops_pipeline_config_2.png) You can run the pipeline to validate the `fence` build and push to ACR. -![Run the pipeline](./azure_devops_pipeline_config_3.png) \ No newline at end of file +![Run the pipeline](azure_devops_pipeline_config_3.png) \ No newline at end of file diff --git a/docs/azure_devops_pipeline_config_1.png b/docs/azure/azure_devops_pipeline_config_1.png similarity index 100% rename from docs/azure_devops_pipeline_config_1.png rename to docs/azure/azure_devops_pipeline_config_1.png diff --git a/docs/azure_devops_pipeline_config_2.png b/docs/azure/azure_devops_pipeline_config_2.png similarity index 100% rename from docs/azure_devops_pipeline_config_2.png rename to docs/azure/azure_devops_pipeline_config_2.png diff --git a/docs/azure_devops_pipeline_config_3.png b/docs/azure/azure_devops_pipeline_config_3.png similarity index 100% rename from docs/azure_devops_pipeline_config_3.png rename to docs/azure/azure_devops_pipeline_config_3.png diff --git a/fence/__init__.py b/fence/__init__.py index 050a4f82d..e1aec601d 100755 --- a/fence/__init__.py +++ b/fence/__init__.py @@ -1,21 +1,17 @@ from collections import OrderedDict import os -import tempfile from urllib.parse import urljoin -import flask -from flask_cors import CORS -from sqlalchemy.orm import scoped_session -from flask import current_app -from werkzeug.local import LocalProxy from authutils.oauth2.client import OAuthClient -from cdislogging import get_logger -from gen3authz.client.arborist.client import ArboristClient -from flask_wtf.csrf import validate_csrf -from werkzeug.middleware.dispatcher import DispatcherMiddleware from azure.storage.blob import BlobServiceClient from azure.core.exceptions import ResourceNotFoundError -from urllib.parse import urlparse +from cdislogging import get_logger +import flask +from flask_cors import CORS +from flask_wtf.csrf import validate_csrf +from gen3authz.client.arborist.client import ArboristClient +from sqlalchemy.orm import scoped_session + # Can't read config yet. Just set to debug for now, else no handlers. # Later, in app_config(), will actually set level based on config @@ -31,6 +27,7 @@ ) from fence.auth import logout, build_redirect_url +from fence.metrics import metrics from fence.blueprints.data.indexd import S3IndexedFileLocation from fence.blueprints.login.utils import allowed_login_redirects, domain from fence.errors import UserError @@ -67,11 +64,6 @@ import fence.blueprints.ga4gh -# for some reason the temp dir does not get created properly if we move -# this statement to `_setup_prometheus()` -PROMETHEUS_TMP_COUNTER_DIR = tempfile.TemporaryDirectory() - - app = flask.Flask(__name__) CORS(app=app, headers=["content-type", "accept"], expose_headers="*") @@ -102,6 +94,9 @@ def app_init( app_sessions(app) app_register_blueprints(app) server.init_app(app, query_client=query_client) + logger.info( + f"Prometheus metrics are{'' if config['ENABLE_PROMETHEUS_METRICS'] else ' NOT'} enabled." + ) def app_sessions(app): @@ -206,6 +201,15 @@ def public_keys(): {"keys": [(keypair.kid, keypair.public_key) for keypair in app.keypairs]} ) + @app.route("/metrics") + def metrics_endpoint(): + """ + /!\ There is no authz control on this endpoint! + In cloud-automation setups, access to this endpoint is blocked at the revproxy level. + """ + data, content_type = metrics.get_latest_metrics() + return flask.Response(data, content_type=content_type) + def _check_azure_storage(app): """ @@ -365,13 +369,6 @@ def app_config( _setup_data_endpoint_and_boto(app) _load_keys(app, root_dir) - app.prometheus_counters = {} - if config["ENABLE_PROMETHEUS_METRICS"]: - logger.info("Enabling Prometheus metrics...") - _setup_prometheus(app) - else: - logger.info("Prometheus metrics are NOT enabled.") - app.storage_manager = StorageManager(config["STORAGE_CREDENTIALS"], logger=logger) app.debug = config["DEBUG"] @@ -496,27 +493,6 @@ def _setup_audit_service_client(app): ) -def _setup_prometheus(app): - # This environment variable MUST be declared before importing the - # prometheus modules (or unit tests fail) - # More details on this awkwardness: https://github.com/prometheus/client_python/issues/250 - os.environ["prometheus_multiproc_dir"] = PROMETHEUS_TMP_COUNTER_DIR.name - - from prometheus_client import ( - CollectorRegistry, - multiprocess, - make_wsgi_app, - ) - - app.prometheus_registry = CollectorRegistry() - multiprocess.MultiProcessCollector(app.prometheus_registry) - - # Add prometheus wsgi middleware to route /metrics requests - app.wsgi_app = DispatcherMiddleware( - app.wsgi_app, {"/metrics": make_wsgi_app(registry=app.prometheus_registry)} - ) - - @app.errorhandler(Exception) def handle_error(error): """ diff --git a/fence/blueprints/data/indexd.py b/fence/blueprints/data/indexd.py index f7b9488f6..380fcd43e 100755 --- a/fence/blueprints/data/indexd.py +++ b/fence/blueprints/data/indexd.py @@ -49,6 +49,7 @@ from fence.resources.ga4gh.passports import sync_gen3_users_authz_from_ga4gh_passports from fence.resources.audit.utils import enable_audit_logging from fence.utils import get_valid_expiration_from_request +from fence.metrics import metrics from . import multipart_upload from ...models import AssumeRoleCacheAWS, query_for_user, query_for_user_by_id @@ -77,6 +78,7 @@ def get_signed_url_for_file( ga4gh_passports=None, db_session=None, bucket=None, + drs="False", ): requested_protocol = requested_protocol or flask.request.args.get("protocol", None) r_pays_project = flask.request.args.get("userProject", None) @@ -164,12 +166,33 @@ def get_signed_url_for_file( user_sub=flask.g.audit_data.get("sub", ""), client_id=_get_client_id(), requested_protocol=requested_protocol, + action=action, + drs=drs, ) return {"url": signed_url} -def _log_signed_url_data_info(indexed_file, user_sub, client_id, requested_protocol): +def get_bucket_from_urls(urls, protocol): + """ + Return the bucket name from the first of the provided URLs that starts with the given protocol (usually `gs`, `s3`, `az`...) + """ + bucket = "" + for url in urls: + if "://" in url: + # Extract the protocol and the rest of the URL + bucket_protocol, rest_of_url = url.split("://", 1) + + if bucket_protocol == protocol: + # Extract bucket name + bucket = f"{bucket_protocol}://{rest_of_url.split('/')[0]}" + break + return bucket + + +def _log_signed_url_data_info( + indexed_file, user_sub, client_id, requested_protocol, action, drs="False" +): size_in_kibibytes = (indexed_file.index_document.get("size") or 0) / 1024 acl = indexed_file.index_document.get("acl") authz = indexed_file.index_document.get("authz") @@ -180,23 +203,25 @@ def _log_signed_url_data_info(indexed_file, user_sub, client_id, requested_proto protocol = indexed_file.indexed_file_locations[0].protocol # figure out which bucket was used based on the protocol - bucket = "" - for url in indexed_file.index_document.get("urls", []): - bucket_name = None - if "://" in url: - # Extract the protocol and the rest of the URL - bucket_protocol, rest_of_url = url.split("://", 1) - - if bucket_protocol == protocol: - # Extract bucket name - bucket = f"{bucket_protocol}://{rest_of_url.split('/')[0]}" - break + bucket = get_bucket_from_urls(indexed_file.index_document.get("urls", []), protocol) logger.info( - f"Signed URL Generated. size_in_kibibytes={size_in_kibibytes} " + f"Signed URL Generated. action={action} size_in_kibibytes={size_in_kibibytes} " f"acl={acl} authz={authz} bucket={bucket} user_sub={user_sub} client_id={client_id}" ) + metrics.add_signed_url_event( + action, + protocol, + acl, + authz, + bucket, + user_sub, + client_id, + drs, + size_in_kibibytes, + ) + def _get_client_id(): client_id = "Unknown Client" @@ -208,6 +233,7 @@ def _get_client_id(): return client_id + def prepare_presigned_url_audit_log(protocol, indexed_file): """ Store in `flask.g.audit_data` the data needed to record an audit log. diff --git a/fence/blueprints/ga4gh.py b/fence/blueprints/ga4gh.py index 7b4ef0603..890da83f6 100644 --- a/fence/blueprints/ga4gh.py +++ b/fence/blueprints/ga4gh.py @@ -41,5 +41,7 @@ def get_ga4gh_signed_url(object_id, access_id): object_id, requested_protocol=access_id, ga4gh_passports=ga4gh_passports, + drs="True", ) + return flask.jsonify(result) diff --git a/fence/blueprints/login/base.py b/fence/blueprints/login/base.py index 0b6ae3f95..08fcab61d 100644 --- a/fence/blueprints/login/base.py +++ b/fence/blueprints/login/base.py @@ -7,6 +7,7 @@ from fence.blueprints.login.redirect import validate_redirect from fence.config import config from fence.errors import UserError +from fence.metrics import metrics logger = get_logger(__name__) @@ -133,6 +134,14 @@ def get(self): def post_login(self, user=None, token_result=None, **kwargs): prepare_login_log(self.idp_name) + metrics.add_login_event( + user_sub=flask.g.user.id, + idp=self.idp_name, + fence_idp=flask.session.get("fence_idp"), + shib_idp=flask.session.get("shib_idp"), + client_id=flask.session.get("client_id"), + ) + if token_result: username = token_result.get(self.username_field) if self.is_mfa_enabled: diff --git a/fence/blueprints/login/google.py b/fence/blueprints/login/google.py index 0fa3e4cb5..2c7570795 100644 --- a/fence/blueprints/login/google.py +++ b/fence/blueprints/login/google.py @@ -25,4 +25,5 @@ def get(self): config.get("BASE_URL", "") + "/link/google/callback?code={}".format(flask.request.args.get("code")) ) + return super(GoogleCallback, self).get() diff --git a/fence/config-default.yaml b/fence/config-default.yaml index a73ebb976..5e43e21dc 100755 --- a/fence/config-default.yaml +++ b/fence/config-default.yaml @@ -62,6 +62,7 @@ MOCK_STORAGE: true # WARNING: ONLY set to true when fence will be deployed in such a way that it will # ONLY receive traffic from internal clients and can safely use HTTP. AUTHLIB_INSECURE_TRANSPORT: true + # enable Prometheus Metrics for observability purposes # # WARNING: Any counters, gauges, histograms, etc. should be carefully diff --git a/fence/metrics.py b/fence/metrics.py new file mode 100644 index 000000000..acdb200a9 --- /dev/null +++ b/fence/metrics.py @@ -0,0 +1,199 @@ +""" +Metrics are collected by the Prometheus client and exposed at the `/metrics` endpoint. + +To add a new metric: +- Add a new method to the `Metrics` class below (see `add_login_event` and `add_signed_url_event` +for example). +- The new method should call the `_increment_counter` and/or `_set_gauge` methods with the +appropriate metric name and labels. +- Call the new method from the code where relevant, for example: + from fence.metric import metrics + metrics.add_login_event(...) +- Add unit tests to the `tests/test_metrics` file. +""" + + +import os +import pathlib + +from cdislogging import get_logger +from prometheus_client import ( + CollectorRegistry, + multiprocess, + Counter, + Gauge, + generate_latest, + CONTENT_TYPE_LATEST, +) + +from fence.config import config + + +logger = get_logger(__name__) + + +class Metrics: + """ + Class to handle Prometheus metrics + Attributes: + registry (CollectorRegistry): Prometheus registry + metrics (dict): Dictionary to store Prometheus metrics + """ + + def __init__(self, prometheus_dir="/var/tmp/uwsgi_flask_metrics"): + pathlib.Path(prometheus_dir).mkdir(parents=True, exist_ok=True) + os.environ["PROMETHEUS_MULTIPROC_DIR"] = prometheus_dir + + self._registry = CollectorRegistry() + multiprocess.MultiProcessCollector(self._registry) + self._metrics = {} + + # set the descriptions of new metrics here. Descriptions not specified here + # will default to the metric name. + self._counter_descriptions = { + "gen3_fence_presigned_url": "Fence presigned urls", + "gen3_fence_login": "Fence logins", + } + self._gauge_descriptions = { + "gen3_fence_presigned_url_size": "Fence presigned urls", + } + + def get_latest_metrics(self): + """ + Generate the latest Prometheus metrics + Returns: + str: Latest Prometheus metrics + str: Content type of the latest Prometheus metrics + """ + # When metrics gathering is not enabled, the metrics endpoint should not error, but it should + # not return any data. + if not config["ENABLE_PROMETHEUS_METRICS"]: + return "", None + + return generate_latest(self._registry), CONTENT_TYPE_LATEST + + def _increment_counter(self, name, labels): + """ + Increment a Prometheus counter metric. + Note that this function should not be called directly - implement a function like + `add_login_event` instead. A metric's labels should always be consistent. + Args: + name (str): Name of the metric + labels (dict): Dictionary of labels for the metric + """ + # create the counter if it doesn't already exist + if name not in self._metrics: + description = self._counter_descriptions.get(name, name) + logger.info( + f"Creating counter '{name}' with description '{description}' and labels: {labels}" + ) + self._metrics[name] = Counter(name, description, [*labels.keys()]) + elif type(self._metrics[name]) != Counter: + raise ValueError( + f"Trying to create counter '{name}' but a {type(self._metrics[name])} with this name already exists" + ) + + logger.debug(f"Incrementing counter '{name}' with labels: {labels}") + self._metrics[name].labels(*labels.values()).inc() + + def _set_gauge(self, name, labels, value): + """ + Set a Prometheus gauge metric. + Note that this function should not be called directly - implement a function like + `add_signed_url_event` instead. A metric's labels should always be consistent. + Args: + name (str): Name of the metric + labels (dict): Dictionary of labels for the metric + value (int): Value to set the metric to + """ + # create the gauge if it doesn't already exist + if name not in self._metrics: + description = self._gauge_descriptions.get(name, name) + logger.info( + f"Creating gauge '{name}' with description '{description}' and labels: {labels}" + ) + self._metrics[name] = Gauge(name, description, [*labels.keys()]) + elif type(self._metrics[name]) != Gauge: + raise ValueError( + f"Trying to create gauge '{name}' but a {type(self._metrics[name])} with this name already exists" + ) + + logger.debug(f"Setting gauge '{name}' with labels: {labels}") + self._metrics[name].labels(*labels.values()).set(value) + + def add_login_event(self, user_sub, idp, fence_idp, shib_idp, client_id): + """ + Record a login event + """ + if not config["ENABLE_PROMETHEUS_METRICS"]: + return + self._increment_counter( + "gen3_fence_login", + { + "user_sub": user_sub, + "idp": idp, + "client_id": client_id, + "fence_idp": fence_idp, + "shib_idp": shib_idp, + }, + ) + self._increment_counter( + "gen3_fence_login", + { + "user_sub": user_sub, + "idp": "all", + "client_id": client_id, + # when counting all IDPs, we don't care about the fence and shib IDP values + "fence_idp": None, + "shib_idp": None, + }, + ) + + def add_signed_url_event( + self, + action, + protocol, + acl, + authz, + bucket, + user_sub, + client_id, + drs, + size_in_kibibytes, + ): + """ + Record a signed URL event + """ + if not config["ENABLE_PROMETHEUS_METRICS"]: + return + self._increment_counter( + "gen3_fence_presigned_url", + { + "action": action, + "protocol": protocol, + "acl": acl, + "authz": authz, + "bucket": bucket, + "user_sub": user_sub, + "client_id": client_id, + "drs": drs, + }, + ) + self._set_gauge( + "gen3_fence_presigned_url_size", + { + "action": action, + "protocol": protocol, + "acl": acl, + "authz": authz, + "bucket": bucket, + "user_sub": user_sub, + "client_id": client_id, + "drs": drs, + }, + size_in_kibibytes, + ) + + +# Initialize the Metrics instance +metrics = Metrics() diff --git a/fence/sync/sync_users.py b/fence/sync/sync_users.py index 4320e58ab..7b1b164ea 100644 --- a/fence/sync/sync_users.py +++ b/fence/sync/sync_users.py @@ -507,7 +507,7 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True): """ user_projects = dict() - user_info = dict() + user_info = defaultdict(dict) # parse dbGaP sftp server information dbgap_key = dbgap_config.get("decrypt_key", None) @@ -542,6 +542,7 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True): ] # when converting the YAML from fence-config, python reads it as Python string literal. So "\" turns into "\\" which messes with the regex match project_id_patterns += patterns + self.logger.info(f"Using these file paths: {file_dict.items()}") for filepath, privileges in file_dict.items(): self.logger.info("Reading file {}".format(filepath)) if os.stat(filepath).st_size == 0: @@ -657,9 +658,9 @@ def _parse_csv(self, file_dict, sess, dbgap_config={}, encrypted=True): tags["pi"] = row["downloader for names"] user_info[username] = { - "email": row.get("email") or "", + "email": row.get("email") or user_info[username].get('email') or "", "display_name": display_name, - "phone_number": row.get("phone") or "", + "phone_number": row.get("phone") or user_info[username].get('phone_number') or "", "tags": tags, } @@ -1570,6 +1571,8 @@ def _sync(self, sess): local_csv_file_list = glob.glob( os.path.join(self.sync_from_local_csv_dir, "*") ) + # Sort the list so the order of of files is consistent across platforms + local_csv_file_list.sort() user_projects_csv, user_info_csv = self._merge_multiple_local_csv_files( local_csv_file_list, diff --git a/openapis/swagger.yaml b/openapis/swagger.yaml index 70d1f8e1a..0336fec54 100644 --- a/openapis/swagger.yaml +++ b/openapis/swagger.yaml @@ -68,6 +68,18 @@ paths: description: successful operation schema: $ref: '#/definitions/SystemVersionOutputRef' + /metrics: + get: + tags: + - system + summary: Get Prometheus metrics + description: >- + Returns Prometheus metrics if the `ENABLE_PROMETHEUS_METRICS` setting is `True`. + By default, this endpoint is public. Authorization controls can be setup externally; + in cloud-automation setups, access to this endpoint is blocked at the revproxy level. + responses: + '200': + description: successful operation /oauth2/authorize: get: tags: diff --git a/poetry.lock b/poetry.lock index 4f2a43732..b3ea2a211 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "addict" @@ -13,13 +13,13 @@ files = [ [[package]] name = "alembic" -version = "1.13.1" +version = "1.13.2" description = "A database migration tool for SQLAlchemy." optional = false python-versions = ">=3.8" files = [ - {file = "alembic-1.13.1-py3-none-any.whl", hash = "sha256:2edcc97bed0bd3272611ce3a98d98279e9c209e7186e43e75bbb1b2bdfdbcc43"}, - {file = "alembic-1.13.1.tar.gz", hash = "sha256:4932c8558bf68f2ee92b9bbcb8218671c627064d5b08939437af6d77dc05e595"}, + {file = "alembic-1.13.2-py3-none-any.whl", hash = "sha256:6b8733129a6224a9a711e17c99b08462dbf7cc9670ba8f2e2ae9af860ceb1953"}, + {file = "alembic-1.13.2.tar.gz", hash = "sha256:1ff0ae32975f4fd96028c39ed9bb3c867fe3af956bd7bb37343b54c9fe7445ef"}, ] [package.dependencies] @@ -170,13 +170,13 @@ aio = ["aiohttp (>=3.0)"] [[package]] name = "azure-storage-blob" -version = "12.20.0" +version = "12.21.0" description = "Microsoft Azure Blob Storage Client Library for Python" optional = false python-versions = ">=3.8" files = [ - {file = "azure-storage-blob-12.20.0.tar.gz", hash = "sha256:eeb91256e41d4b5b9bad6a87fd0a8ade07dd58aa52344e2c8d2746e27a017d3b"}, - {file = "azure_storage_blob-12.20.0-py3-none-any.whl", hash = "sha256:de6b3bf3a90e9341a6bcb96a2ebe981dffff993e9045818f6549afea827a52a9"}, + {file = "azure-storage-blob-12.21.0.tar.gz", hash = "sha256:b9722725072f5b7373c0f4dd6d78fbae2bb37bffc5c3e01731ab8c750ee8dd7e"}, + {file = "azure_storage_blob-12.21.0-py3-none-any.whl", hash = "sha256:f9ede187dd5a0ef296b583a7c1861c6938ddd6708d6e70f4203a163c2ab42d43"}, ] [package.dependencies] @@ -250,17 +250,17 @@ files = [ [[package]] name = "boto3" -version = "1.34.128" +version = "1.34.147" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" files = [ - {file = "boto3-1.34.128-py3-none-any.whl", hash = "sha256:a048ff980a81cd652724a73bc496c519b336fabe19cc8bfc6c53b2ff6eb22c7b"}, - {file = "boto3-1.34.128.tar.gz", hash = "sha256:43a6e99f53a8d34b3b4dbe424dbcc6b894350dc41a85b0af7c7bc24a7ec2cead"}, + {file = "boto3-1.34.147-py3-none-any.whl", hash = "sha256:e1cef9a1a301866bcdee32ae0c699465eb2345f9a8e613a5835821430165ff6d"}, + {file = "boto3-1.34.147.tar.gz", hash = "sha256:9ec1c6ab22588242a47549f51a63dfc7c21fdf95a94820fc6e629ab060c38bd9"}, ] [package.dependencies] -botocore = ">=1.34.128,<1.35.0" +botocore = ">=1.34.147,<1.35.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.10.0,<0.11.0" @@ -269,13 +269,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.34.128" +version = "1.34.147" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" files = [ - {file = "botocore-1.34.128-py3-none-any.whl", hash = "sha256:db67fda136c372ab3fa432580c819c89ba18d28a6152a4d2a7ea40d44082892e"}, - {file = "botocore-1.34.128.tar.gz", hash = "sha256:8d8e03f7c8c080ecafda72036eb3b482d649f8417c90b5dca33b7c2c47adb0c9"}, + {file = "botocore-1.34.147-py3-none-any.whl", hash = "sha256:be94a2f4874b1d1705cae2bd512c475047497379651678593acb6c61c50d91de"}, + {file = "botocore-1.34.147.tar.gz", hash = "sha256:2e8f000b77e4ca345146cb2edab6403769a517b564f627bb084ab335417f3dbe"}, ] [package.dependencies] @@ -313,13 +313,13 @@ files = [ [[package]] name = "cachetools" -version = "5.3.3" +version = "5.4.0" description = "Extensible memoizing collections and decorators" optional = false python-versions = ">=3.7" files = [ - {file = "cachetools-5.3.3-py3-none-any.whl", hash = "sha256:0abad1021d3f8325b2fc1d2e9c8b9c9d57b04c3932657a72465447332c24d945"}, - {file = "cachetools-5.3.3.tar.gz", hash = "sha256:ba29e2dfa0b8b556606f097407ed1aa62080ee108ab0dc5ec9d6a723a007d105"}, + {file = "cachetools-5.4.0-py3-none-any.whl", hash = "sha256:3ae3b49a3d5e28a77a0be2b37dbcb89005058959cb2323858c2657c4a8cab474"}, + {file = "cachetools-5.4.0.tar.gz", hash = "sha256:b8adc2e7c07f105ced7bc56dbb6dfbe7c4a00acce20e2227b3f355be89bc6827"}, ] [[package]] @@ -384,13 +384,13 @@ resolved_reference = "74a607736ca4af5ec35f17830ab9b78b5db15837" [[package]] name = "certifi" -version = "2024.6.2" +version = "2024.7.4" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" files = [ - {file = "certifi-2024.6.2-py3-none-any.whl", hash = "sha256:ddc6c8ce995e6987e7faf5e3f1b02b302836a0e5d98ece18392cb1a36c72ad56"}, - {file = "certifi-2024.6.2.tar.gz", hash = "sha256:3cd43f1c6fa7dedc5899d69d3ad0398fd018ad1a17fba83ddaf78aa46c747516"}, + {file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"}, + {file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"}, ] [[package]] @@ -684,43 +684,38 @@ yaml = ["PyYAML (>=3.10)"] [[package]] name = "cryptography" -version = "42.0.8" +version = "43.0.0" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." optional = false python-versions = ">=3.7" files = [ - {file = "cryptography-42.0.8-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:81d8a521705787afe7a18d5bfb47ea9d9cc068206270aad0b96a725022e18d2e"}, - {file = "cryptography-42.0.8-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:961e61cefdcb06e0c6d7e3a1b22ebe8b996eb2bf50614e89384be54c48c6b63d"}, - {file = "cryptography-42.0.8-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3ec3672626e1b9e55afd0df6d774ff0e953452886e06e0f1eb7eb0c832e8902"}, - {file = "cryptography-42.0.8-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e599b53fd95357d92304510fb7bda8523ed1f79ca98dce2f43c115950aa78801"}, - {file = "cryptography-42.0.8-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:5226d5d21ab681f432a9c1cf8b658c0cb02533eece706b155e5fbd8a0cdd3949"}, - {file = "cryptography-42.0.8-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:6b7c4f03ce01afd3b76cf69a5455caa9cfa3de8c8f493e0d3ab7d20611c8dae9"}, - {file = "cryptography-42.0.8-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:2346b911eb349ab547076f47f2e035fc8ff2c02380a7cbbf8d87114fa0f1c583"}, - {file = "cryptography-42.0.8-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:ad803773e9df0b92e0a817d22fd8a3675493f690b96130a5e24f1b8fabbea9c7"}, - {file = "cryptography-42.0.8-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:2f66d9cd9147ee495a8374a45ca445819f8929a3efcd2e3df6428e46c3cbb10b"}, - {file = "cryptography-42.0.8-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d45b940883a03e19e944456a558b67a41160e367a719833c53de6911cabba2b7"}, - {file = "cryptography-42.0.8-cp37-abi3-win32.whl", hash = "sha256:a0c5b2b0585b6af82d7e385f55a8bc568abff8923af147ee3c07bd8b42cda8b2"}, - {file = "cryptography-42.0.8-cp37-abi3-win_amd64.whl", hash = "sha256:57080dee41209e556a9a4ce60d229244f7a66ef52750f813bfbe18959770cfba"}, - {file = "cryptography-42.0.8-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:dea567d1b0e8bc5764b9443858b673b734100c2871dc93163f58c46a97a83d28"}, - {file = "cryptography-42.0.8-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4783183f7cb757b73b2ae9aed6599b96338eb957233c58ca8f49a49cc32fd5e"}, - {file = "cryptography-42.0.8-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0608251135d0e03111152e41f0cc2392d1e74e35703960d4190b2e0f4ca9c70"}, - {file = "cryptography-42.0.8-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:dc0fdf6787f37b1c6b08e6dfc892d9d068b5bdb671198c72072828b80bd5fe4c"}, - {file = "cryptography-42.0.8-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:9c0c1716c8447ee7dbf08d6db2e5c41c688544c61074b54fc4564196f55c25a7"}, - {file = "cryptography-42.0.8-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:fff12c88a672ab9c9c1cf7b0c80e3ad9e2ebd9d828d955c126be4fd3e5578c9e"}, - {file = "cryptography-42.0.8-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:cafb92b2bc622cd1aa6a1dce4b93307792633f4c5fe1f46c6b97cf67073ec961"}, - {file = "cryptography-42.0.8-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:31f721658a29331f895a5a54e7e82075554ccfb8b163a18719d342f5ffe5ecb1"}, - {file = "cryptography-42.0.8-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b297f90c5723d04bcc8265fc2a0f86d4ea2e0f7ab4b6994459548d3a6b992a14"}, - {file = "cryptography-42.0.8-cp39-abi3-win32.whl", hash = "sha256:2f88d197e66c65be5e42cd72e5c18afbfae3f741742070e3019ac8f4ac57262c"}, - {file = "cryptography-42.0.8-cp39-abi3-win_amd64.whl", hash = "sha256:fa76fbb7596cc5839320000cdd5d0955313696d9511debab7ee7278fc8b5c84a"}, - {file = "cryptography-42.0.8-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:ba4f0a211697362e89ad822e667d8d340b4d8d55fae72cdd619389fb5912eefe"}, - {file = "cryptography-42.0.8-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:81884c4d096c272f00aeb1f11cf62ccd39763581645b0812e99a91505fa48e0c"}, - {file = "cryptography-42.0.8-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c9bb2ae11bfbab395bdd072985abde58ea9860ed84e59dbc0463a5d0159f5b71"}, - {file = "cryptography-42.0.8-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:7016f837e15b0a1c119d27ecd89b3515f01f90a8615ed5e9427e30d9cdbfed3d"}, - {file = "cryptography-42.0.8-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5a94eccb2a81a309806027e1670a358b99b8fe8bfe9f8d329f27d72c094dde8c"}, - {file = "cryptography-42.0.8-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:dec9b018df185f08483f294cae6ccac29e7a6e0678996587363dc352dc65c842"}, - {file = "cryptography-42.0.8-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:343728aac38decfdeecf55ecab3264b015be68fc2816ca800db649607aeee648"}, - {file = "cryptography-42.0.8-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:013629ae70b40af70c9a7a5db40abe5d9054e6f4380e50ce769947b73bf3caad"}, - {file = "cryptography-42.0.8.tar.gz", hash = "sha256:8d09d05439ce7baa8e9e95b07ec5b6c886f548deb7e0f69ef25f64b3bce842f2"}, + {file = "cryptography-43.0.0-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:64c3f16e2a4fc51c0d06af28441881f98c5d91009b8caaff40cf3548089e9c74"}, + {file = "cryptography-43.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3dcdedae5c7710b9f97ac6bba7e1052b95c7083c9d0e9df96e02a1932e777895"}, + {file = "cryptography-43.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d9a1eca329405219b605fac09ecfc09ac09e595d6def650a437523fcd08dd22"}, + {file = "cryptography-43.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ea9e57f8ea880eeea38ab5abf9fbe39f923544d7884228ec67d666abd60f5a47"}, + {file = "cryptography-43.0.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:9a8d6802e0825767476f62aafed40532bd435e8a5f7d23bd8b4f5fd04cc80ecf"}, + {file = "cryptography-43.0.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:cc70b4b581f28d0a254d006f26949245e3657d40d8857066c2ae22a61222ef55"}, + {file = "cryptography-43.0.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4a997df8c1c2aae1e1e5ac49c2e4f610ad037fc5a3aadc7b64e39dea42249431"}, + {file = "cryptography-43.0.0-cp37-abi3-win32.whl", hash = "sha256:6e2b11c55d260d03a8cf29ac9b5e0608d35f08077d8c087be96287f43af3ccdc"}, + {file = "cryptography-43.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:31e44a986ceccec3d0498e16f3d27b2ee5fdf69ce2ab89b52eaad1d2f33d8778"}, + {file = "cryptography-43.0.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:7b3f5fe74a5ca32d4d0f302ffe6680fcc5c28f8ef0dc0ae8f40c0f3a1b4fca66"}, + {file = "cryptography-43.0.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac1955ce000cb29ab40def14fd1bbfa7af2017cca696ee696925615cafd0dce5"}, + {file = "cryptography-43.0.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:299d3da8e00b7e2b54bb02ef58d73cd5f55fb31f33ebbf33bd00d9aa6807df7e"}, + {file = "cryptography-43.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ee0c405832ade84d4de74b9029bedb7b31200600fa524d218fc29bfa371e97f5"}, + {file = "cryptography-43.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:cb013933d4c127349b3948aa8aaf2f12c0353ad0eccd715ca789c8a0f671646f"}, + {file = "cryptography-43.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fdcb265de28585de5b859ae13e3846a8e805268a823a12a4da2597f1f5afc9f0"}, + {file = "cryptography-43.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:2905ccf93a8a2a416f3ec01b1a7911c3fe4073ef35640e7ee5296754e30b762b"}, + {file = "cryptography-43.0.0-cp39-abi3-win32.whl", hash = "sha256:47ca71115e545954e6c1d207dd13461ab81f4eccfcb1345eac874828b5e3eaaf"}, + {file = "cryptography-43.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:0663585d02f76929792470451a5ba64424acc3cd5227b03921dab0e2f27b1709"}, + {file = "cryptography-43.0.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2c6d112bf61c5ef44042c253e4859b3cbbb50df2f78fa8fae6747a7814484a70"}, + {file = "cryptography-43.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:844b6d608374e7d08f4f6e6f9f7b951f9256db41421917dfb2d003dde4cd6b66"}, + {file = "cryptography-43.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:51956cf8730665e2bdf8ddb8da0056f699c1a5715648c1b0144670c1ba00b48f"}, + {file = "cryptography-43.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:aae4d918f6b180a8ab8bf6511a419473d107df4dbb4225c7b48c5c9602c38c7f"}, + {file = "cryptography-43.0.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:232ce02943a579095a339ac4b390fbbe97f5b5d5d107f8a08260ea2768be8cc2"}, + {file = "cryptography-43.0.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5bcb8a5620008a8034d39bce21dc3e23735dfdb6a33a06974739bfa04f853947"}, + {file = "cryptography-43.0.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:08a24a7070b2b6804c1940ff0f910ff728932a9d0e80e7814234269f9d46d069"}, + {file = "cryptography-43.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:e9c5266c432a1e23738d178e51c2c7a5e2ddf790f248be939448c0ba2021f9d1"}, + {file = "cryptography-43.0.0.tar.gz", hash = "sha256:b88075ada2d51aa9f18283532c9f60e72170041bba88d7f37e49cbb10275299e"}, ] [package.dependencies] @@ -733,7 +728,7 @@ nox = ["nox"] pep8test = ["check-sdist", "click", "mypy", "ruff"] sdist = ["build"] ssh = ["bcrypt (>=3.1.5)"] -test = ["certifi", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] +test = ["certifi", "cryptography-vectors (==43.0.0)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] test-randomorder = ["pytest-randomly"] [[package]] @@ -834,13 +829,13 @@ idna = ">=2.0.0" [[package]] name = "exceptiongroup" -version = "1.2.1" +version = "1.2.2" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" files = [ - {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, - {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, + {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, + {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, ] [package.extras] @@ -988,36 +983,37 @@ six = "*" [[package]] name = "gen3users" -version = "1.0.3" +version = "1.1.1" description = "Utils for Gen3 Commons user management" optional = false -python-versions = ">=3.9,<4.0" +python-versions = "<4.0,>=3.9" files = [ - {file = "gen3users-1.0.3-py3-none-any.whl", hash = "sha256:faf07717b7df28ea2c25a308e49c65d8ed69e14945c6f36e99deb697240bb8bb"}, - {file = "gen3users-1.0.3.tar.gz", hash = "sha256:a2269433ab886c23db37050144821405c7d5dfcbbadccc43302611aad9e34525"}, + {file = "gen3users-1.1.1-py3-none-any.whl", hash = "sha256:5a38ba90c8cef5f7c4ed6ae2f1f1d733524d48b1b2c60e66db8537e36194faab"}, + {file = "gen3users-1.1.1.tar.gz", hash = "sha256:6636ff127ce145f9104fc72358dd17de54b19be19ae45b89e13876c0adcf4ba0"}, ] [package.dependencies] cdislogging = ">=1,<2" click = "*" pyyaml = ">=6,<7" +requests = "*" [[package]] name = "google-api-core" -version = "2.19.0" +version = "2.19.1" description = "Google API client core library" optional = false python-versions = ">=3.7" files = [ - {file = "google-api-core-2.19.0.tar.gz", hash = "sha256:cf1b7c2694047886d2af1128a03ae99e391108a08804f87cfd35970e49c9cd10"}, - {file = "google_api_core-2.19.0-py3-none-any.whl", hash = "sha256:8661eec4078c35428fd3f69a2c7ee29e342896b70f01d1a1cbcb334372dd6251"}, + {file = "google-api-core-2.19.1.tar.gz", hash = "sha256:f4695f1e3650b316a795108a76a1c416e6afb036199d1c1f1f110916df479ffd"}, + {file = "google_api_core-2.19.1-py3-none-any.whl", hash = "sha256:f12a9b8309b5e21d92483bbd47ce2c445861ec7d269ef6784ecc0ea8c1fa6125"}, ] [package.dependencies] google-auth = ">=2.14.1,<3.0.dev0" googleapis-common-protos = ">=1.56.2,<2.0.dev0" proto-plus = ">=1.22.3,<2.0.0dev" -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0.dev0" +protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0" requests = ">=2.18.0,<3.0.0.dev0" [package.extras] @@ -1027,13 +1023,13 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] [[package]] name = "google-api-python-client" -version = "2.133.0" +version = "2.137.0" description = "Google API Client Library for Python" optional = false python-versions = ">=3.7" files = [ - {file = "google-api-python-client-2.133.0.tar.gz", hash = "sha256:293092905b66a046d3187a99ac454e12b00cc2c70444f26eb2f1f9c1a82720b4"}, - {file = "google_api_python_client-2.133.0-py2.py3-none-any.whl", hash = "sha256:396fe676ea0dfed066654dcf9f8dea77a1342f9d9bb23bb88e45b7b81e773926"}, + {file = "google_api_python_client-2.137.0-py2.py3-none-any.whl", hash = "sha256:a8b5c5724885e5be9f5368739aa0ccf416627da4ebd914b410a090c18f84d692"}, + {file = "google_api_python_client-2.137.0.tar.gz", hash = "sha256:e739cb74aac8258b1886cb853b0722d47c81fe07ad649d7f2206f06530513c04"}, ] [package.dependencies] @@ -1045,13 +1041,13 @@ uritemplate = ">=3.0.1,<5" [[package]] name = "google-auth" -version = "2.30.0" +version = "2.32.0" description = "Google Authentication Library" optional = false python-versions = ">=3.7" files = [ - {file = "google-auth-2.30.0.tar.gz", hash = "sha256:ab630a1320f6720909ad76a7dbdb6841cdf5c66b328d690027e4867bdfb16688"}, - {file = "google_auth-2.30.0-py2.py3-none-any.whl", hash = "sha256:8df7da660f62757388b8a7f249df13549b3373f24388cb5d2f1dd91cc18180b5"}, + {file = "google_auth-2.32.0-py2.py3-none-any.whl", hash = "sha256:53326ea2ebec768070a94bee4e1b9194c9646ea0c2bd72422785bd0f9abfad7b"}, + {file = "google_auth-2.32.0.tar.gz", hash = "sha256:49315be72c55a6a37d62819e3573f6b416aca00721f7e3e31a008d928bf64022"}, ] [package.dependencies] @@ -1101,13 +1097,13 @@ grpc = ["grpcio (>=1.38.0,<2.0dev)", "grpcio-status (>=1.38.0,<2.0.dev0)"] [[package]] name = "google-cloud-storage" -version = "2.17.0" +version = "2.18.0" description = "Google Cloud Storage API client library" optional = false python-versions = ">=3.7" files = [ - {file = "google-cloud-storage-2.17.0.tar.gz", hash = "sha256:49378abff54ef656b52dca5ef0f2eba9aa83dc2b2c72c78714b03a1a95fe9388"}, - {file = "google_cloud_storage-2.17.0-py2.py3-none-any.whl", hash = "sha256:5b393bc766b7a3bc6f5407b9e665b2450d36282614b7945e570b3480a456d1e1"}, + {file = "google_cloud_storage-2.18.0-py2.py3-none-any.whl", hash = "sha256:e8e1a9577952143c3fca8163005ecfadd2d70ec080fa158a8b305000e2c22fbb"}, + {file = "google_cloud_storage-2.18.0.tar.gz", hash = "sha256:0aa3f7c57f3632f81b455d91558d2b27ada96eee2de3aaa17f689db1470d9578"}, ] [package.dependencies] @@ -1119,7 +1115,8 @@ google-resumable-media = ">=2.6.0" requests = ">=2.18.0,<3.0.0dev" [package.extras] -protobuf = ["protobuf (<5.0.0dev)"] +protobuf = ["protobuf (<6.0.0dev)"] +tracing = ["opentelemetry-api (>=1.1.0)"] [[package]] name = "google-crc32c" @@ -1221,17 +1218,17 @@ requests = ["requests (>=2.18.0,<3.0.0dev)"] [[package]] name = "googleapis-common-protos" -version = "1.63.1" +version = "1.63.2" description = "Common protobufs used in Google APIs" optional = false python-versions = ">=3.7" files = [ - {file = "googleapis-common-protos-1.63.1.tar.gz", hash = "sha256:c6442f7a0a6b2a80369457d79e6672bb7dcbaab88e0848302497e3ec80780a6a"}, - {file = "googleapis_common_protos-1.63.1-py2.py3-none-any.whl", hash = "sha256:0e1c2cdfcbc354b76e4a211a35ea35d6926a835cba1377073c4861db904a1877"}, + {file = "googleapis-common-protos-1.63.2.tar.gz", hash = "sha256:27c5abdffc4911f28101e635de1533fb4cfd2c37fbaa9174587c799fac90aa87"}, + {file = "googleapis_common_protos-1.63.2-py2.py3-none-any.whl", hash = "sha256:27a2499c7e8aff199665b22741997e485eccc8645aa9176c7c988e6fae507945"}, ] [package.dependencies] -protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0" +protobuf = ">=3.20.2,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0" [package.extras] grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"] @@ -1379,33 +1376,33 @@ socks = ["socksio (==1.*)"] [[package]] name = "idna" -version = "2.10" +version = "3.7" description = "Internationalized Domain Names in Applications (IDNA)" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +python-versions = ">=3.5" files = [ - {file = "idna-2.10-py2.py3-none-any.whl", hash = "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"}, - {file = "idna-2.10.tar.gz", hash = "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6"}, + {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, + {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, ] [[package]] name = "importlib-metadata" -version = "7.1.0" +version = "8.1.0" description = "Read metadata from Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "importlib_metadata-7.1.0-py3-none-any.whl", hash = "sha256:30962b96c0c223483ed6cc7280e7f0199feb01a0e40cfae4d4450fc6fab1f570"}, - {file = "importlib_metadata-7.1.0.tar.gz", hash = "sha256:b78938b926ee8d5f020fc4772d487045805a55ddbad2ecf21c6d60938dc7fcd2"}, + {file = "importlib_metadata-8.1.0-py3-none-any.whl", hash = "sha256:3cd29f739ed65973840b068e3132135ce954c254d48b5b640484467ef7ab3c8c"}, + {file = "importlib_metadata-8.1.0.tar.gz", hash = "sha256:fcdcb1d5ead7bdf3dd32657bb94ebe9d2aabfe89a19782ddc32da5041d6ebfb4"}, ] [package.dependencies] zipp = ">=0.5" [package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] perf = ["ipython"] -testing = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"] +test = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"] [[package]] name = "isodate" @@ -1472,13 +1469,13 @@ files = [ [[package]] name = "jsonpickle" -version = "3.2.1" +version = "3.2.2" description = "Python library for serializing arbitrary object graphs into JSON" optional = false python-versions = ">=3.7" files = [ - {file = "jsonpickle-3.2.1-py3-none-any.whl", hash = "sha256:ec291e4719674dd35d390fbdb521ac6517fbe9f541d361c8bffc8131133b1661"}, - {file = "jsonpickle-3.2.1.tar.gz", hash = "sha256:4b6d7640974199f7acf9035295365b5a1a71a91109effa15ba170fbb48cf871c"}, + {file = "jsonpickle-3.2.2-py3-none-any.whl", hash = "sha256:87cd82d237fd72c5a34970e7222dddc0accc13fddf49af84111887ed9a9445aa"}, + {file = "jsonpickle-3.2.2.tar.gz", hash = "sha256:d425fd2b8afe9f5d7d57205153403fbf897782204437882a477e8eed60930f8c"}, ] [package.extras] @@ -1733,13 +1730,13 @@ dev = ["pre-commit", "tox"] [[package]] name = "prometheus-client" -version = "0.9.0" +version = "0.20.0" description = "Python client for the Prometheus monitoring system." optional = false -python-versions = "*" +python-versions = ">=3.8" files = [ - {file = "prometheus_client-0.9.0-py2.py3-none-any.whl", hash = "sha256:b08c34c328e1bf5961f0b4352668e6c8f145b4a087e09b7296ef62cbe4693d35"}, - {file = "prometheus_client-0.9.0.tar.gz", hash = "sha256:9da7b32f02439d8c04f7777021c304ed51d9ec180604700c1ba72a4d44dceb03"}, + {file = "prometheus_client-0.20.0-py3-none-any.whl", hash = "sha256:cde524a85bce83ca359cc837f28b8c0db5cac7aa653a588fd7e84ba061c329e7"}, + {file = "prometheus_client-0.20.0.tar.gz", hash = "sha256:287629d00b147a32dcb2be0b9df905da599b2d82f80377083ec8463309a4bb89"}, ] [package.extras] @@ -1747,39 +1744,39 @@ twisted = ["twisted"] [[package]] name = "proto-plus" -version = "1.23.0" +version = "1.24.0" description = "Beautiful, Pythonic protocol buffers." optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" files = [ - {file = "proto-plus-1.23.0.tar.gz", hash = "sha256:89075171ef11988b3fa157f5dbd8b9cf09d65fffee97e29ce403cd8defba19d2"}, - {file = "proto_plus-1.23.0-py3-none-any.whl", hash = "sha256:a829c79e619e1cf632de091013a4173deed13a55f326ef84f05af6f50ff4c82c"}, + {file = "proto-plus-1.24.0.tar.gz", hash = "sha256:30b72a5ecafe4406b0d339db35b56c4059064e69227b8c3bda7462397f966445"}, + {file = "proto_plus-1.24.0-py3-none-any.whl", hash = "sha256:402576830425e5f6ce4c2a6702400ac79897dab0b4343821aa5188b0fab81a12"}, ] [package.dependencies] -protobuf = ">=3.19.0,<5.0.0dev" +protobuf = ">=3.19.0,<6.0.0dev" [package.extras] -testing = ["google-api-core[grpc] (>=1.31.5)"] +testing = ["google-api-core (>=1.31.5)"] [[package]] name = "protobuf" -version = "4.25.3" +version = "5.27.2" description = "" optional = false python-versions = ">=3.8" files = [ - {file = "protobuf-4.25.3-cp310-abi3-win32.whl", hash = "sha256:d4198877797a83cbfe9bffa3803602bbe1625dc30d8a097365dbc762e5790faa"}, - {file = "protobuf-4.25.3-cp310-abi3-win_amd64.whl", hash = "sha256:209ba4cc916bab46f64e56b85b090607a676f66b473e6b762e6f1d9d591eb2e8"}, - {file = "protobuf-4.25.3-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:f1279ab38ecbfae7e456a108c5c0681e4956d5b1090027c1de0f934dfdb4b35c"}, - {file = "protobuf-4.25.3-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:e7cb0ae90dd83727f0c0718634ed56837bfeeee29a5f82a7514c03ee1364c019"}, - {file = "protobuf-4.25.3-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:7c8daa26095f82482307bc717364e7c13f4f1c99659be82890dcfc215194554d"}, - {file = "protobuf-4.25.3-cp38-cp38-win32.whl", hash = "sha256:f4f118245c4a087776e0a8408be33cf09f6c547442c00395fbfb116fac2f8ac2"}, - {file = "protobuf-4.25.3-cp38-cp38-win_amd64.whl", hash = "sha256:c053062984e61144385022e53678fbded7aea14ebb3e0305ae3592fb219ccfa4"}, - {file = "protobuf-4.25.3-cp39-cp39-win32.whl", hash = "sha256:19b270aeaa0099f16d3ca02628546b8baefe2955bbe23224aaf856134eccf1e4"}, - {file = "protobuf-4.25.3-cp39-cp39-win_amd64.whl", hash = "sha256:e3c97a1555fd6388f857770ff8b9703083de6bf1f9274a002a332d65fbb56c8c"}, - {file = "protobuf-4.25.3-py3-none-any.whl", hash = "sha256:f0700d54bcf45424477e46a9f0944155b46fb0639d69728739c0e47bab83f2b9"}, - {file = "protobuf-4.25.3.tar.gz", hash = "sha256:25b5d0b42fd000320bd7830b349e3b696435f3b329810427a6bcce6a5492cc5c"}, + {file = "protobuf-5.27.2-cp310-abi3-win32.whl", hash = "sha256:354d84fac2b0d76062e9b3221f4abbbacdfd2a4d8af36bab0474f3a0bb30ab38"}, + {file = "protobuf-5.27.2-cp310-abi3-win_amd64.whl", hash = "sha256:0e341109c609749d501986b835f667c6e1e24531096cff9d34ae411595e26505"}, + {file = "protobuf-5.27.2-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:a109916aaac42bff84702fb5187f3edadbc7c97fc2c99c5ff81dd15dcce0d1e5"}, + {file = "protobuf-5.27.2-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:176c12b1f1c880bf7a76d9f7c75822b6a2bc3db2d28baa4d300e8ce4cde7409b"}, + {file = "protobuf-5.27.2-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:b848dbe1d57ed7c191dfc4ea64b8b004a3f9ece4bf4d0d80a367b76df20bf36e"}, + {file = "protobuf-5.27.2-cp38-cp38-win32.whl", hash = "sha256:4fadd8d83e1992eed0248bc50a4a6361dc31bcccc84388c54c86e530b7f58863"}, + {file = "protobuf-5.27.2-cp38-cp38-win_amd64.whl", hash = "sha256:610e700f02469c4a997e58e328cac6f305f649826853813177e6290416e846c6"}, + {file = "protobuf-5.27.2-cp39-cp39-win32.whl", hash = "sha256:9e8f199bf7f97bd7ecebffcae45ebf9527603549b2b562df0fbc6d4d688f14ca"}, + {file = "protobuf-5.27.2-cp39-cp39-win_amd64.whl", hash = "sha256:7fc3add9e6003e026da5fc9e59b131b8f22b428b991ccd53e2af8071687b4fce"}, + {file = "protobuf-5.27.2-py3-none-any.whl", hash = "sha256:54330f07e4949d09614707c48b06d1a22f8ffb5763c159efd5c0928326a91470"}, + {file = "protobuf-5.27.2.tar.gz", hash = "sha256:f3ecdef226b9af856075f28227ff2c90ce3a594d092c39bee5513573f25e2714"}, ] [[package]] @@ -1793,6 +1790,8 @@ files = [ {file = "psycopg2-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:426f9f29bde126913a20a96ff8ce7d73fd8a216cfb323b1f04da402d452853c3"}, {file = "psycopg2-2.9.9-cp311-cp311-win32.whl", hash = "sha256:ade01303ccf7ae12c356a5e10911c9e1c51136003a9a1d92f7aa9d010fb98372"}, {file = "psycopg2-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:121081ea2e76729acfb0673ff33755e8703d45e926e416cb59bae3a86c6a4981"}, + {file = "psycopg2-2.9.9-cp312-cp312-win32.whl", hash = "sha256:d735786acc7dd25815e89cc4ad529a43af779db2e25aa7c626de864127e5a024"}, + {file = "psycopg2-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:a7653d00b732afb6fc597e29c50ad28087dcb4fbfb28e86092277a559ae4e693"}, {file = "psycopg2-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:5e0d98cade4f0e0304d7d6f25bbfbc5bd186e07b38eac65379309c4ca3193efa"}, {file = "psycopg2-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:7e2dacf8b009a1c1e843b5213a87f7c544b2b042476ed7755be813eaf4e8347a"}, {file = "psycopg2-2.9.9-cp38-cp38-win32.whl", hash = "sha256:ff432630e510709564c01dafdbe996cb552e0b9f3f065eb89bdce5bd31fabf4c"}, @@ -1815,13 +1814,13 @@ files = [ [[package]] name = "pyaml" -version = "24.4.0" +version = "24.7.0" description = "PyYAML-based module to produce a bit more pretty and readable YAML-serialized data" optional = false python-versions = ">=3.8" files = [ - {file = "pyaml-24.4.0-py3-none-any.whl", hash = "sha256:acc2b39c55cb0cbe4f694a6d3886f89ad3d2a5b3efcece526202f8de9a6b27de"}, - {file = "pyaml-24.4.0.tar.gz", hash = "sha256:0e483d9289010e747a325dc43171bcc39d6562dd1dd4719e8cc7e7c96c99fce6"}, + {file = "pyaml-24.7.0-py3-none-any.whl", hash = "sha256:6b06596cb5ac438a3fad1e1bf5775088c4d3afb927e2b03a29305d334835deb2"}, + {file = "pyaml-24.7.0.tar.gz", hash = "sha256:5d0fdf9e681036fb263a783d0298fc3af580a6e2a6cf1a3314ffc48dc3d91ccb"}, ] [package.dependencies] @@ -2124,6 +2123,7 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -2229,13 +2229,13 @@ pyasn1 = ">=0.1.3" [[package]] name = "s3transfer" -version = "0.10.1" +version = "0.10.2" description = "An Amazon S3 Transfer Manager" optional = false -python-versions = ">= 3.8" +python-versions = ">=3.8" files = [ - {file = "s3transfer-0.10.1-py3-none-any.whl", hash = "sha256:ceb252b11bcf87080fb7850a224fb6e05c8a776bab8f2b64b7f25b969464839d"}, - {file = "s3transfer-0.10.1.tar.gz", hash = "sha256:5683916b4c724f799e600f41dd9e10a9ff19871bf87623cc8f491cb4f5fa0a19"}, + {file = "s3transfer-0.10.2-py3-none-any.whl", hash = "sha256:eca1c20de70a39daee580aef4986996620f365c4e0fda6a86100231d62f1bf69"}, + {file = "s3transfer-0.10.2.tar.gz", hash = "sha256:0711534e9356d3cc692fdde846b4a1e4b0cb6519971860796e6bc4c7aea00ef6"}, ] [package.dependencies] @@ -2578,4 +2578,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = ">=3.9,<4.0.0" -content-hash = "3d8d2736573ee2b07bc32fb6a4ad3d84366be2e4d5f9b4fe0dfd18286bc58c57" +content-hash = "d003418dcc0d68257a215186d21776941f8739bd3b4f898762a93e7895d7c89e" diff --git a/pyproject.toml b/pyproject.toml old mode 100755 new mode 100644 index 742a05c43..143ca2940 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "fence" -version = "10.0.0" +version = "10.1.0" description = "Gen3 AuthN/AuthZ OIDC Service" authors = ["CTDS UChicago "] license = "Apache-2.0" @@ -31,14 +31,14 @@ gen3authz = "^1.5.1" gen3cirrus = ">=3.0.1" gen3config = ">=1.1.0" gen3users = "^1.0.2" -idna = "^2.10" # https://github.com/python-poetry/poetry/issues/3555 +idna = "^3.7" markdown = "^3.1.1" # this markupsafe pin is due to an error somewhere between Python 3.9.6 and 3.9.16 markupsafe = "^2.0.1" paramiko = ">=2.6.0" -prometheus-client = "^0.9.0" +prometheus-client = "<1" psycopg2 = "^2.8.3" PyJWT = "^2.4.0" python_dateutil = "^2.6.1" diff --git a/tests/ci_commands_script.sh b/tests/ci_commands_script.sh index 5ab1d6c6c..fef98a668 100755 --- a/tests/ci_commands_script.sh +++ b/tests/ci_commands_script.sh @@ -1,3 +1,5 @@ #!/usr/bin/env bash +mkdir -p /var/tmp/uwsgi_flask_metrics/ || true +export PROMETHEUS_MULTIPROC_DIR="/var/tmp/uwsgi_flask_metrics/" poetry run pytest -vv --cov=fence --cov-report xml tests diff --git a/tests/conftest.py b/tests/conftest.py index f7b9d432d..273f4a496 100755 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -552,6 +552,13 @@ def drop_all(): return app.db +@pytest.fixture +def prometheus_metrics_before(client): + resp = client.get("/metrics") + assert resp.status_code == 200, "Could not get prometheus metrics initial state" + yield resp.text + + @fence.app.route("/protected") @fence.auth.login_required({"access"}) def protected_endpoint(methods=["GET"]): @@ -966,6 +973,7 @@ def do_patch(authz): "mocker": mocker, # only gs or s3 for location, ignore specifiers after the _ "indexed_file_location": protocol.split("_")[0], + "record": record, } return output @@ -1734,3 +1742,37 @@ def get_all_shib_idps_patcher(): yield mock get_all_shib_idps_patch.stop() + + +@pytest.fixture(scope="function") +def mock_authn_user_flask_context(app): + """ + Mock g and session to simulate a simple user who has authenticated. + + This is primarily to ensure that tests which mock the start of authN where sessions get set can still + test the callbacks (where metrics logging rely on session data). + """ + from flask import g + from flask import session + + g_before = copy.deepcopy(g) + session_before = copy.deepcopy(session) + + user_mock = MagicMock() + user_mock.id = 1 + + user_mocker = MagicMock() + user_mocker.return_value = user_mock + g.user = user_mocker + + session = MagicMock() + session.return_value = { + "fence_idp": "google", + "shib_idp": "shib_idp_foobar", + "client_id": "client_id_foobar", + } + + yield + + g = g_before + session = session_before diff --git a/tests/login/test_base.py b/tests/login/test_base.py index a9bfff7ec..a32452b2c 100644 --- a/tests/login/test_base.py +++ b/tests/login/test_base.py @@ -4,7 +4,7 @@ @patch("fence.blueprints.login.base.prepare_login_log") -def test_post_login_set_mfa(app, monkeypatch): +def test_post_login_set_mfa(app, monkeypatch, mock_authn_user_flask_context): """ Verifies the arborist is called with the mfa_policy if a given token contains the claims found in the configured multifactor_auth_claim_info @@ -37,7 +37,7 @@ def test_post_login_set_mfa(app, monkeypatch): @patch("fence.blueprints.login.base.prepare_login_log") -def test_post_login_no_mfa_enabled(app, monkeypatch): +def test_post_login_no_mfa_enabled(app, monkeypatch, mock_authn_user_flask_context): """ Verifies arborist is not called when there is no multifactor_auth_claim_info defined for the given IDP. """ diff --git a/tests/test_app_config.py b/tests/test_app_config.py index 3c3af9fd0..ec7b7b8b7 100755 --- a/tests/test_app_config.py +++ b/tests/test_app_config.py @@ -81,7 +81,6 @@ def test_app_config(): {"patch_name": "fence.app_sessions"}, {"patch_name": "fence.app_register_blueprints"}, {"patch_name": "fence.oidc.oidc_server.OIDCServer.init_app"}, - {"patch_name": "fence._setup_prometheus"}, { "patch_name": "fence.resources.storage.StorageManager.__init__", "return_value": None, diff --git a/tests/test_audit_service.py b/tests/test_metrics.py similarity index 71% rename from tests/test_audit_service.py rename to tests/test_metrics.py index cbf5c8cf2..be7d6b2ab 100644 --- a/tests/test_audit_service.py +++ b/tests/test_metrics.py @@ -1,9 +1,15 @@ """ +Tests for the metrics features (Audit Service and Prometheus) + Tests for the Audit Service integration: - test the creation of presigned URL audit logs - test the creation of login audit logs - test the SQS flow +In Audit Service tests where it makes sense, we also test that Prometheus +metrics are created as expected. The last section tests Prometheus metrics +independently. + Note 1: there is no test for the /oauth2 endpoint: the /oauth2 endpoint should redirect the user to the /login endpoint (tested in `test_redirect_oauth2_authorize`), and the login endpoint should @@ -16,7 +22,6 @@ tests looking at users are not affected. """ - import boto3 import flask import json @@ -27,11 +32,17 @@ from unittest.mock import ANY, MagicMock, patch import fence +from fence.metrics import metrics from fence.config import config +from fence.blueprints.data.indexd import get_bucket_from_urls +from fence.models import User from fence.resources.audit.utils import _clean_authorization_request_url from tests import utils from tests.conftest import LOGIN_IDPS +# `reset_prometheus_metrics` must be imported even if not used so the autorun fixture gets triggered +from tests.utils.metrics import assert_prometheus_metrics, reset_prometheus_metrics + def test_clean_authorization_request_url(): """ @@ -111,6 +122,7 @@ def __init__(self, data, status_code=200): @pytest.mark.parametrize("protocol", ["gs", None]) def test_presigned_url_log( endpoint, + prometheus_metrics_before, protocol, client, user_client, @@ -126,7 +138,7 @@ def test_presigned_url_log( """ Get a presigned URL from Fence and make sure a call to the Audit Service was made to create an audit log. Test with and without a requested - protocol. + protocol. Also check that a prometheus metric is created. """ mock_arborist_requests({"arborist/auth/request": {"POST": ({"auth": True}, 200)}}) audit_service_mocker = mock.patch( @@ -142,7 +154,7 @@ def test_presigned_url_log( else: path = f"/ga4gh/drs/v1/objects/{guid}/access/{protocol or 's3'}" resource_paths = ["/my/resource/path1", "/path2"] - indexd_client_with_arborist(resource_paths) + record = indexd_client_with_arborist(resource_paths)["record"] headers = { "Authorization": "Bearer " + jwt.encode( @@ -183,6 +195,39 @@ def test_presigned_url_log( }, ) + # check prometheus metrics + resp = client.get("/metrics") + assert resp.status_code == 200 + bucket = get_bucket_from_urls(record["urls"], expected_protocol) + size_in_kibibytes = record["size"] / 1024 + expected_metrics = [ + { + "name": "gen3_fence_presigned_url_total", + "labels": { + "action": "download", + "authz": resource_paths, + "bucket": bucket, + "drs": endpoint == "ga4gh-drs", + "protocol": expected_protocol, + "user_sub": user_client.user_id, + }, + "value": 1.0, + }, + { + "name": "gen3_fence_presigned_url_size", + "labels": { + "action": "download", + "authz": resource_paths, + "bucket": bucket, + "drs": endpoint == "ga4gh-drs", + "protocol": expected_protocol, + "user_sub": user_client.user_id, + }, + "value": size_in_kibibytes, + }, + ] + assert_prometheus_metrics(prometheus_metrics_before, resp.text, expected_metrics) + @pytest.mark.parametrize( "indexd_client_with_arborist", ["s3_and_gs_acl_no_authz"], indirect=True @@ -411,10 +456,11 @@ def test_login_log_login_endpoint( rsa_private_key, db_session, # do not remove :-) See note at top of file monkeypatch, + prometheus_metrics_before, ): """ Test that logging in via any of the existing IDPs triggers the creation - of a login audit log. + of a login audit log and of a prometheus metric. """ mock_arborist_requests() audit_service_mocker = mock.patch( @@ -493,13 +539,14 @@ def test_login_log_login_endpoint( path = f"/login/{idp}/{callback_endpoint}" # SEE fence/blueprints/login/fence_login.py L91 response = client.get(path, headers=headers) assert response.status_code == 200, response + user_sub = db_session.query(User).filter(User.username == username).first().id audit_service_requests.post.assert_called_once_with( "http://audit-service/log/login", json={ "request_url": path, "status_code": 200, "username": username, - "sub": ANY, + "sub": user_sub, "idp": idp_name, "fence_idp": None, "shib_idp": None, @@ -510,10 +557,27 @@ def test_login_log_login_endpoint( if get_auth_info_patch: get_auth_info_patch.stop() + # check prometheus metrics + resp = client.get("/metrics") + assert resp.status_code == 200 + expected_metrics = [ + { + "name": "gen3_fence_login_total", + "labels": {"idp": "all", "user_sub": user_sub}, + "value": 1.0, + }, + { + "name": "gen3_fence_login_total", + "labels": {"idp": idp_name, "user_sub": user_sub}, + "value": 1.0, + }, + ] + assert_prometheus_metrics(prometheus_metrics_before, resp.text, expected_metrics) + -########################## -# Push audit logs to SQS # -########################## +########################################## +# Audit Service - Push audit logs to SQS # +########################################## def mock_audit_service_sqs(app): @@ -638,3 +702,171 @@ def test_login_log_push_to_sqs( mocked_sqs.send_message.assert_called_once() get_auth_info_patch.stop() + + +###################### +# Prometheus metrics # +###################### + + +def test_disabled_prometheus_metrics(client, monkeypatch): + """ + When metrics gathering is not enabled, the metrics endpoint should not error, but it should + not return any data. + """ + monkeypatch.setitem(config, "ENABLE_PROMETHEUS_METRICS", False) + metrics.add_login_event( + user_sub="123", + idp="test_idp", + fence_idp="shib", + shib_idp="university", + client_id="test_azp", + ) + resp = client.get("/metrics") + assert resp.status_code == 200 + assert resp.text == "" + + +def test_record_prometheus_events(prometheus_metrics_before, client): + """ + Validate the returned value of the metrics endpoint before any event is logged, after an event + is logged, and after more events (one identical to the 1st one, and two different) are logged. + """ + # NOTE: To update later. The metrics utils don't support this yet. The gauges are not handled correctly. + # resp = client.get("/metrics") + # assert resp.status_code == 200 + # # no metrics have been recorded yet + # assert_prometheus_metrics(prometheus_metrics_before, resp.text, []) + + # record a login event and check that we get both a metric for the specific IDP, and an + # IDP-agnostic metric for the total number of login events. The latter should have no IDP + # information (no `fence_idp` or `shib_idp`). + metrics.add_login_event( + user_sub="123", + idp="test_idp", + fence_idp="shib", + shib_idp="university", + client_id="test_azp", + ) + resp = client.get("/metrics") + assert resp.status_code == 200 + expected_metrics = [ + { + "name": "gen3_fence_login_total", + "labels": { + "user_sub": "123", + "idp": "test_idp", + "fence_idp": "shib", + "shib_idp": "university", + "client_id": "test_azp", + }, + "value": 1.0, + }, + { + "name": "gen3_fence_login_total", + "labels": { + "user_sub": "123", + "idp": "all", + "fence_idp": "None", + "shib_idp": "None", + "client_id": "test_azp", + }, + "value": 1.0, + }, + ] + assert_prometheus_metrics(prometheus_metrics_before, resp.text, expected_metrics) + + # same login: should increase the existing counter by 1 + metrics.add_login_event( + user_sub="123", + idp="test_idp", + fence_idp="shib", + shib_idp="university", + client_id="test_azp", + ) + # login with different IDP labels: should create a new metric + metrics.add_login_event( + user_sub="123", + idp="another_idp", + fence_idp=None, + shib_idp=None, + client_id="test_azp", + ) + # new signed URL event: should create a new metric + metrics.add_signed_url_event( + action="upload", + protocol="s3", + acl=None, + authz=["/test/path"], + bucket="s3://test-bucket", + user_sub="123", + client_id="test_azp", + drs=True, + size_in_kibibytes=1.2, + ) + resp = client.get("/metrics") + assert resp.status_code == 200 + expected_metrics = [ + { + "name": "gen3_fence_login_total", + "labels": { + "user_sub": "123", + "idp": "all", + "fence_idp": "None", + "shib_idp": "None", + "client_id": "test_azp", + }, + "value": 3.0, # recorded login events since the beginning of the test + }, + { + "name": "gen3_fence_login_total", + "labels": { + "user_sub": "123", + "idp": "test_idp", + "fence_idp": "shib", + "shib_idp": "university", + "client_id": "test_azp", + }, + "value": 2.0, # recorded login events for this idp, fence_idp and shib_idp combo + }, + { + "name": "gen3_fence_login_total", + "labels": { + "user_sub": "123", + "idp": "another_idp", + "fence_idp": "None", + "shib_idp": "None", + "client_id": "test_azp", + }, + "value": 1.0, # recorded login events for the different idp + }, + { + "name": "gen3_fence_presigned_url_total", + "labels": { + "user_sub": "123", + "action": "upload", + "protocol": "s3", + "authz": ["/test/path"], + "bucket": "s3://test-bucket", + "user_sub": "123", + "client_id": "test_azp", + "drs": True, + }, + "value": 1.0, # recorded presigned URL events + }, + { + "name": "gen3_fence_presigned_url_size", + "labels": { + "user_sub": "123", + "action": "upload", + "protocol": "s3", + "authz": ["/test/path"], + "bucket": "s3://test-bucket", + "user_sub": "123", + "client_id": "test_azp", + "drs": True, + }, + "value": 1.2, # presigned URL gauge with the file size as value + }, + ] + assert_prometheus_metrics(prometheus_metrics_before, resp.text, expected_metrics) diff --git a/tests/utils/metrics.py b/tests/utils/metrics.py new file mode 100644 index 000000000..0443589a0 --- /dev/null +++ b/tests/utils/metrics.py @@ -0,0 +1,223 @@ +""" +At the time of writing, Prometheus metrics out of the box can't be reset between each +unit test. To be able to write independent unit tests, we have to manually save the "previous +state" (see `prometheus_metrics_before` fixture) and compare it to the new state. This involves +manually parsing the "previous state" (a python object) and the "current state" (raw text) into +the same format so they can be compared: +{ "name": "", "labels": {}, "value": 0 } + +The utility functions below can be used to check that the expected metrics have been recorded, +while discarding any previous metrics. + +https://stackoverflow.com/questions/73198616/how-do-i-reset-a-prometheus-python-client-python-runtime-between-pytest-test-fun +""" + + +import os +import shutil + +import pytest + + +@pytest.fixture(autouse=True, scope="session") +def reset_prometheus_metrics(): + """ + Delete the prometheus files after all the tests have run. + Without this, when running the tests locally, we would keep reading the metrics from + previous test runs. + So why not run this in-between the unit tests instead of the `assert_prometheus_metrics` + logic? Because it doesn't work, the prometheus client also keeps the state, and the mismatch + causes errors. This only works when the client is reset too (new process) + """ + yield + + folder = os.environ["PROMETHEUS_MULTIPROC_DIR"] + for filename in os.listdir(folder): + file_path = os.path.join(folder, filename) + try: + if os.path.isfile(file_path) or os.path.islink(file_path): + os.unlink(file_path) + elif os.path.isdir(file_path): + shutil.rmtree(file_path) + except Exception as e: + print(f"Failed to delete Prometheus metrics file '{file_path}': {e}") + + +def _diff_new_metrics_from_old_metrics(new_metrics, old_metrics): + """ + Return a list of "current metrics" by comparing the "new metrics" (current state) to the "old metrics" (previous state). + + Input metric format example: { + 'gen3_fence_login_total{client_id="test_azp",fence_idp="shib",idp="test_idp",shib_idp="university",user_sub="123"}': 2.0, + 'gen3_fence_login_total{client_id="test_azp",fence_idp="None",idp="all",shib_idp="None",user_sub="123"}': 3.0, + } + + Functionality example: + old_metrics = { 'counter1': 2, 'counter2': 2, 'gauge1': 1 } + new_metrics = { 'counter1': 1, 'counter3': 1 } + Returned value = [ + ('counter1', 1) (difference between 2 and 1), + ('counter3', 1) + ] (counter2 and gauge1 omitted since they are not part of the current state) + + Args: + new_metrics (dict): format { : } + old_metrics (dict): format { : } + } + + Return: + list> + """ + + def metric_is_gauge(metric_name): + return not metric_name.endswith("_total") and not metric_name.endswith( + "_created" + ) + + diff = [] + for long_metric_name, old_value in old_metrics.items(): + # long_metric_name = metric name + labels (see example in docstring) + metric_name = long_metric_name.split("{")[0] + if long_metric_name not in new_metrics or metric_is_gauge(metric_name): + # ignore all old metrics that are not also present in the new metrics + continue + # the metric value generated by the current test is the difference between the previous + # value and the current value + val = new_metrics[long_metric_name] - old_value + if val != 0: + diff.append((long_metric_name, val)) + for long_metric_name, new_value in new_metrics.items(): + metric_name = long_metric_name.split("{")[0] + if metric_is_gauge(metric_name): # all gauge metrics must be listed + diff.append((long_metric_name, new_value)) + elif long_metric_name not in old_metrics: + diff.append((long_metric_name, new_value)) + return diff + + +def _parse_raw_metrics_to_dict(text_metric): + """ + Parse raw text metrics into a dictionary of metric (metric name + labels) to value, + ignoring lines that are not metrics. + + Args: + text_metric (str) + Example: + # TYPE gen3_fence_login_total counter + gen3_fence_login_total{idp="test_idp",shib_idp="university",user_sub="123"} 2.0 + # HELP gen3_fence_presigned_url_total Fence presigned urls + # TYPE gen3_fence_presigned_url_total counter + gen3_fence_presigned_url_total{client_id="test_azp",drs="True",user_sub="123"} 1.0 + + Return: + dict + Example: + { + "gen3_fence_login_total{idp="test_idp",shib_idp="university",user_sub="123"}": 2.0, + "gen3_fence_presigned_url_total{client_id="test_azp",drs="True",user_sub="123"}": 1.0, + } + """ + if not text_metric: + return {} + return { + " ".join(m.split(" ")[:-1]): float(m.split(" ")[-1]) + for m in text_metric.strip().split("\n") + if not m.startswith("#") + } + + +def _parse_raw_name_to_labels(text_metric_name): + """ + Parse a raw metric name into a name and a dict of labels. + + Example: + text_metric_name = `metric_name{param1="None",param2="upload",param3="['/test/path']"` + Returned value = { + "name": "metric_name", + "labels": { "param1": "None", "param2": "upload", "param3": "['/test/path']" } + } + + Args: + text_metric (str) + + Returns: + dict + """ + name = text_metric_name.split("{")[0] + labels = text_metric_name.split("{")[1].split("}")[0].split('",') + labels = {l.split("=")[0]: l.split("=")[1].strip('"') for l in labels} + return {"name": name, "labels": labels} + + +def assert_prometheus_metrics( + previous_text_metrics, current_text_metrics, expected_metrics +): + """ + Compare the previous state and the current state of prometheus metrics, and checks if the difference between the 2 is the same as the new metrics a test expects to have recorded. + + Expected: only provide labels we need to check for, the rest will be ignored + + Args: + previous_text_metrics (str): previous state of prometheus metrics + current_text_metrics (str): current state + Example `previous_text_metrics` or `current_text_metrics`: + # TYPE gen3_fence_login_total counter + gen3_fence_login_total{idp="test_idp",shib_idp="university",user_sub="123"} 2.0 + # HELP gen3_fence_presigned_url_total Fence presigned urls + # TYPE gen3_fence_presigned_url_total counter + gen3_fence_presigned_url_total{acl="None",action="upload",authz="['/test/path']",bucket="s3://test-bucket",client_id="test_azp",drs="True",protocol="s3",user_sub="123"} 1.0 + expected_metrics (list): the expected difference between previous state and current state. + Only provide the labels we need to check; omitted labels will be ignored even if they + are present in the current state. + Example: [ + { + 'name': 'gen3_fence_login_total', + 'labels': { + 'idp': 'test_idp', 'shib_idp': 'university', 'user_sub': '123' + }, + 'value': 2.0 + } + ] + """ + old_metrics = _parse_raw_metrics_to_dict(previous_text_metrics) + print("Old metrics:") + for k, v in old_metrics.items(): + print(f"- {k} = {v}") + + new_metrics = _parse_raw_metrics_to_dict(current_text_metrics) + print("Received metrics:") + for k, v in new_metrics.items(): + print(f"- {k} = {v}") + + diff_metrics = _diff_new_metrics_from_old_metrics(new_metrics, old_metrics) + current_metrics = [] + print("Diff:") + for (metric_name, val) in diff_metrics: + parsed_m = _parse_raw_name_to_labels(metric_name) + parsed_m["value"] = val + current_metrics.append(parsed_m) + print(f"- {parsed_m}") + + print("Expecting metrics:") + # check that for each metric+label combination, the value is identical to the expected value + for expected_m in expected_metrics: + found = False + print(f"- {expected_m}") + for current_m in current_metrics: # look for the right metric + if current_m["name"] != expected_m["name"]: + continue + # if the metric name is identical, check the labels + right_labels = True + for label_k, label_v in expected_m["labels"].items(): + if current_m["labels"].get(label_k) != str(label_v): + right_labels = False + break + # if both the name and the labels are identical, this is the right metric: + # check that the value is the same as expected + if right_labels: + assert ( + current_m["value"] == expected_m["value"] + ), f"Missing metric: {expected_m}" + found = True + break # we found the right metric and it has the right value: moving on + assert found, f"Missing metric: {expected_m}"