diff --git a/.env.example b/.env.example index c3e07dee..40909b65 100644 --- a/.env.example +++ b/.env.example @@ -27,12 +27,6 @@ AWS_SECRET_ACCESS_KEY='' AWS_REGION='' AWS_STORAGE_BUCKET_NAME='' -# bigquery credentials -BIGQUERY_ENABLED=False -BIGQUERY_PROJECT_ID='' -BIGQUERY_LOCATION='' -BIGQUERY_CREDENTIALS='' - # redis details REDIS_HOSTNAME='redis' REDIS_PORT=6379 @@ -44,13 +38,6 @@ DEFAULT_TENANT_NAME=Plio DEFAULT_TENANT_SHORTCODE=plio DEFAULT_TENANT_DOMAIN=0.0.0.0 -# Analytics Identity Provider (IDP) configurations -ANALYTICS_IDP_TYPE='' # possible values are `cognito` or `auth0` -ANALYTICS_IDP_TOKEN_URL='' -ANALYTICS_IDP_CLIENT_ID='' -ANALYTICS_IDP_CLIENT_SECRET='' -ANALYTICS_IDP_AUDIENCE='' # not needed when IDP is `cognito` - # The driver for sending SMSs. Possible values are `sns` or `log`. # Use `sns` to have AWS SNS support. The AWS credentials must be present for this. # Use an empty string to log SMSs into a file instead. Recommended for development mode. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 888299f7..814d147e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -63,10 +63,6 @@ jobs: AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} AWS_REGION: ${{ secrets.AWS_REGION }} AWS_STORAGE_BUCKET_NAME: ${{ secrets.AWS_STORAGE_BUCKET_NAME }} - ANALYTICS_IDP_TYPE: ${{ secrets.ANALYTICS_IDP_TYPE }} - ANALYTICS_IDP_TOKEN_URL: ${{ secrets.ANALYTICS_IDP_TOKEN_URL }} - ANALYTICS_IDP_CLIENT_ID: ${{ secrets.ANALYTICS_IDP_CLIENT_ID }} - ANALYTICS_IDP_CLIENT_SECRET: ${{ secrets.ANALYTICS_IDP_CLIENT_SECRET }} REDIS_HOSTNAME: 127.0.0.1 REDIS_PORT: 6379 # command to run tests and generate coverage metrics @@ -74,133 +70,3 @@ jobs: - name: Upload coverage to Codecov uses: codecov/codecov-action@v1 - - integration-tests: - name: Integration tests - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - - name: Set up Plio Frontend - run: | - # clone the project - mkdir -p projects/ && cd projects/ - git clone https://github.com/avantifellows/plio-frontend/ - cd plio-frontend/ - - # check branch and switch to branch if exists - if [ `git branch --list --remote origin/${{ github.head_ref }}` ] - then - echo "Switching to branch ${{ github.head_ref }}." - git checkout ${{ github.head_ref }} - git pull origin ${{ github.head_ref }} - else - echo "Branch not found. Going with default branch." - fi - - # create the env file - cp .env.example .env - - # add env secrets - echo 'VUE_APP_GOOGLE_CLIENT_ID=${{ secrets.GOOGLE_OAUTH2_CLIENT_ID }}' >> .env - echo 'VUE_APP_BACKEND_API_CLIENT_ID=${{ secrets.DJANGO_DEFAULT_OAUTH2_CLIENT_ID }}' >> .env - echo 'VUE_APP_BACKEND_API_CLIENT_SECRET=${{ secrets.DJANGO_DEFAULT_OAUTH2_CLIENT_SECRET }}' >> .env - - # setup docker containers - docker-compose up -d --build - - - name: Set up Plio Backend - run: | - # clone the project - mkdir -p projects/ && cd projects/ - git clone https://github.com/avantifellows/plio-backend - cd plio-backend/ - - # check branch and switch to branch if exists - if [ `git branch --list --remote origin/${{ github.head_ref }}` ] - then - echo "Switching to branch ${{ github.head_ref }}." - git checkout ${{ github.head_ref }} - git pull origin ${{ github.head_ref }} - else - echo "Branch not found. Going with default branch." - fi - - # create the env file - cp .env.example .env - - # add env secrets - echo 'SECRET_KEY=${{ secrets.DJANGO_SECRET_KEY }}' >> .env - echo 'DEFAULT_OAUTH2_CLIENT_SETUP=${{ secrets.DJANGO_DEFAULT_OAUTH2_CLIENT_SETUP }}' >> .env - echo 'DEFAULT_OAUTH2_CLIENT_ID=${{ secrets.DJANGO_DEFAULT_OAUTH2_CLIENT_ID }}' >> .env - echo 'DEFAULT_OAUTH2_CLIENT_SECRET=${{ secrets.DJANGO_DEFAULT_OAUTH2_CLIENT_SECRET }}' >> .env - echo 'GOOGLE_OAUTH2_CLIENT_ID=${{ secrets.GOOGLE_OAUTH2_CLIENT_ID }}' >> .env - echo 'GOOGLE_OAUTH2_CLIENT_SECRET=${{ secrets.GOOGLE_OAUTH2_CLIENT_SECRET }}' >> .env - - # setup docker containers - docker-compose up -d --build - - - name: Set up Plio Analytics - run: | - # clone the project - mkdir -p projects/ && cd projects/ - git clone https://github.com/avantifellows/plio-analytics - cd plio-analytics/ - - # check branch and switch to branch if exists - if [ `git branch --list --remote origin/${{ github.head_ref }}` ] - then - echo "Switching to branch ${{ github.head_ref }}." - git checkout ${{ github.head_ref }} - git pull origin ${{ github.head_ref }} - else - echo "Branch not found. Going with default branch." - fi - - # create the env file - cp .env.example .env - - # add env secrets - echo 'CUBEJS_API_SECRET=${{ secrets.ANALYTICS_CUBEJS_API_SECRET }}' >> .env - - # setup docker containers - docker-compose up -d --build - - - name: Run cypress - run: | - cd projects/plio-frontend/ - - # delete the node_modules created by the docker - echo 'deleting node modules' - rm -rf node_modules/ - - # install dependencies from the current shell user - echo 'running npm' - npm install - echo 'finished npm install' - - # setup env secrets - export cypress_backend_convert_social_auth_token_url=${{ secrets.CYPRESS_PLIO_BACKEND_CONVERT_SOCIAL_AUTH_TOKEN_URL }} - export cypress_backend_client_id=${{ secrets.DJANGO_DEFAULT_OAUTH2_CLIENT_ID }} - export cypress_backend_client_secret=${{ secrets.DJANGO_DEFAULT_OAUTH2_CLIENT_SECRET }} - export cypress_auth_google_refresh_token=${{ secrets.CYPRESS_AUTH_GOOGLE_REFRESH_TOKEN }} - export cypress_auth_google_client_id=${{ secrets.GOOGLE_OAUTH2_CLIENT_ID }} - export cypress_auth_google_client_secret=${{ secrets.GOOGLE_OAUTH2_CLIENT_SECRET }} - - # run cypress test cases - npx cypress run --record --key ${{ secrets.CYPRESS_RECORD_KEY }} - - - name: Coveralls - uses: coverallsapp/github-action@master - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - path-to-lcov: ./projects/plio-frontend/coverage/lcov.info - - - name: Stop the containers - if: always() - run: | - cd projects/ - cd plio-frontend/ && docker-compose down - cd ../plio-backend/ && docker-compose down - cd ../plio-analytics/ && docker-compose down diff --git a/docs/DEPLOYMENT.md b/docs/DEPLOYMENT.md index 19bc5378..2b058856 100644 --- a/docs/DEPLOYMENT.md +++ b/docs/DEPLOYMENT.md @@ -19,7 +19,6 @@ Deploying on AWS requires a basic understanding of the following tools and servi 9. AWS Elastic IPs 10. AWS Identity and Access Management (IAM) 11. AWS Relational Database Service (RDS) -12. Google BigQuery ## Staging @@ -245,11 +244,6 @@ Follow the steps below to set up the staging environment on AWS. - AWS_SECRET_ACCESS_KEY - AWS_REGION - AWS_STORAGE_BUCKET_NAME - - ANALYTICS_IDP_TYPE - - ANALYTICS_IDP_TOKEN_URL - - ANALYTICS_IDP_CLIENT_ID - - ANALYTICS_IDP_CLIENT_SECRET - - ANALYTICS_IDP_AUDIENCE (optional) 14. We are using Github Actions to trigger deployments. You can find the workflow defined in `.github/workflows/deploy_to_ecs_staging.yml`. It defines a target branch such that a deployment is initiated whenever a change is pushed to the target branch. @@ -296,6 +290,3 @@ Setting up a production environment on AWS is almost the same as staging. Additi 14. Save the scaling policy. 15. Create or update the service name. 16. Use [k6.io](https://k6.io/) or other load testing tool to check if auto-scaling is working fine or not. You can lower down the target threshold for testing purposes. -5. If you're setting up [Plio Analytics](https://github.com/avantifellows/plio-analytics), also make sure to configure the required environment variables: - 1. [Identity Provider for Plio Analytics](./ENV.md#identity-provider-for-plio-analytics). - 2. [BigQuery configurations](./ENV.md#bigquery-configurations). diff --git a/docs/ENV.md b/docs/ENV.md index 57d49c6c..bf55d1fa 100644 --- a/docs/ENV.md +++ b/docs/ENV.md @@ -103,53 +103,6 @@ Shortcode for the default tenant (e.g. plio) The domain for the default tenant (e.g. 0.0.0.0 locally, plio.in on production) -### Identity Provider for Plio Analytics -While setting up Plio analytics, you need to make sure the following variables are also updated. These are responsible to fetch an access token from the configured Identity Provider. - -#### `ANALYTICS_IDP_TYPE` -Plio Analytics supports two identity providers. The possible values for this variable are `cognito` (AWS Cognito) and `auth0` (Auth0). - -#### `ANALYTICS_IDP_TOKEN_URL` -The url to request access token from the Identity Provider. Generally looks like: -1. When type is `cognito`: `https://.auth..amazoncognito.com/oauth2/token`. This is the same as the Amazon Cognito domain you have configured. -2. When type is `auth0`: `https://..auth0.com/oauth/token` - -#### `ANALYTICS_IDP_CLIENT_ID` -The client id for your identity provider app. -1. When type is `cognito`: Retrieve this from your User pool's "App clients" page. -2. When type is `auth0`: Retrieve from Auth0 Application settings page. - -#### `ANALYTICS_IDP_CLIENT_SECRET` -The client secret for your identity provider app. -1. When type is `cognito`: Retrieve this from your User pool's "App clients" page. -2. When type is `auth0`: Retrieve from Auth0 Application settings page. - -#### `ANALYTICS_IDP_AUDIENCE` -Unique Identifier for your Auth0 API. -1. When type is `cognito`: Not needed. -2. When type is `auth0`: Retrieve from Auth0 API settings. - - -### BigQuery configurations -BigQuery settings are needed if Plio Analytics is configured to use BigQuery. We recommended using BigQuery for staging/production set ups. - -#### `BIGQUERY_ENABLED` -Boolean value. Defaults to `False` if not set. - -#### `BIGQUERY_PROJECT_ID` -The BigQuery project id that contains the datasets. - -#### `BIGQUERY_LOCATION` -The location of the BigQuery project. All datasets must be in the same location. - -#### `BIGQUERY_CREDENTIALS` -This is a base64 encoded value of your Google Cloud Platform's service account. You can learn more about acquiring service account credentials [here](https://cloud.google.com/docs/authentication/getting-started) and [here](https://console.cloud.google.com/projectselector2/iam-admin/serviceaccounts?supportedpurview=project). The service account must have BigQuery admin permissions. - -Once you have downloaded the JSON file, run the following commands and use the output for this environment variable: -```sh -cat /path/to/gcp-service-account-filename.json | base64 -``` - ### Error monitoring Plio supports error monitoring on your app with [Sentry](https://sentry.io/). Visit our guide on [Error Monitoring](./docs/../ERROR-MONITORING.md) to enable it for your Plio setup. diff --git a/organizations/middleware.py b/organizations/middleware.py index d56a0000..765a3eba 100644 --- a/organizations/middleware.py +++ b/organizations/middleware.py @@ -37,7 +37,6 @@ def get_schema(self, request): tenant = self.get_tenant(request) if tenant: return tenant.schema_name - # as get_schema is being used when querying BigQuery datasets, we explicity need to mention `public` return "public" def process_request(self, request): diff --git a/plio/migrations/0029_auto_20220124_2224.py b/plio/migrations/0029_auto_20220124_2224.py new file mode 100644 index 00000000..94986dca --- /dev/null +++ b/plio/migrations/0029_auto_20220124_2224.py @@ -0,0 +1,17 @@ +# Generated by Django 3.1.1 on 2022-01-24 22:24 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("plio", "0028_auto_20210902_1120"), + ] + + operations = [ + migrations.AlterModelOptions( + name="plio", + options={"ordering": ["-updated_at"]}, + ), + ] diff --git a/plio/migrations/0030_merge_20220125_0510.py b/plio/migrations/0030_merge_20220125_0510.py new file mode 100644 index 00000000..14692acd --- /dev/null +++ b/plio/migrations/0030_merge_20220125_0510.py @@ -0,0 +1,13 @@ +# Generated by Django 3.1.1 on 2022-01-25 05:10 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("plio", "0029_auto_20220124_2224"), + ("plio", "0029_auto_20220110_1044"), + ] + + operations = [] diff --git a/plio/models.py b/plio/models.py index 40762dd3..dc0983af 100644 --- a/plio/models.py +++ b/plio/models.py @@ -81,6 +81,7 @@ class Plio(SafeDeleteModel): class Meta: db_table = "plio" + ordering = ["-updated_at"] def __str__(self): return "%d: %s" % (self.id, self.name) diff --git a/plio/ordering.py b/plio/ordering.py index 8c6e5cac..5e35c987 100644 --- a/plio/ordering.py +++ b/plio/ordering.py @@ -1,7 +1,4 @@ from rest_framework.filters import OrderingFilter -from django.db.models import OuterRef, Subquery, Count -from entries.models import Session -from django.db.models.functions import Coalesce class CustomOrderingFilter(OrderingFilter): @@ -50,23 +47,6 @@ def filter_queryset(self, request, queryset, view): ordering = self.get_ordering(request, queryset, view) if ordering: - # if the ordering fields contain "unique_viewers" - if any("unique_viewers" in order_by for order_by in ordering): - # prepare a session queryset which has an annotated field "count_unique_users" - # that holds the count of unique users for every plio in the plio's queryset - plio_session_group = Session.objects.filter( - plio__uuid=OuterRef("uuid") - ).values("plio__uuid") - - plios_unique_users_count = plio_session_group.annotate( - count_unique_users=Count("user__id", distinct=True) - ).values("count_unique_users") - - # annotate the plio's queryset with the count of unique users - queryset = queryset.annotate( - unique_viewers=Coalesce(Subquery(plios_unique_users_count), 0) - ) - return queryset.order_by(*ordering) return queryset diff --git a/plio/queries.py b/plio/queries.py index 1fec1f7e..0d543c89 100644 --- a/plio/queries.py +++ b/plio/queries.py @@ -1,12 +1,74 @@ -from plio.settings import BIGQUERY +from typing import Tuple + + +def get_plio_latest_sessions_query(plio_uuid: str, schema: str): + """Returns the most recent sessions for each user for the given plio + + :param plio_uuid: The plio to fetch the details for + :type plio_uuid: str + :param schema: The schema from which the tables are to be accessed + :type schema: str + """ + return f""" + WITH summary AS ( + SELECT + session.id, + plio.uuid as plio_uuid, + session.watch_time, + session.retention, + ROW_NUMBER() OVER(PARTITION BY session.user_id, session.plio_id + ORDER BY session.id DESC) AS rank + FROM {schema}.session + INNER JOIN {schema}.plio AS plio ON plio.id = session.plio_id + ) + SELECT id, watch_time, retention + FROM summary + WHERE rank = 1 AND plio_uuid = '{plio_uuid}'""" + + +def get_plio_latest_responses_query(schema: str, session_ids: Tuple[int]): + """ + Returns the responses of each user to the given plio based on + their most recent session. + + :param schema: The schema from which the tables are to be accessed + :type schema: str + :param session_ids: The database ids corresponding to the most recent session by each user + :type session_ids: Tuple[int] + """ + query = f""" + SELECT + sessionAnswer.id, + session.user_id, + sessionAnswer.answer, + item.type AS item_type, + question.type AS question_type, + question.correct_answer AS question_correct_answer + FROM {schema}.session AS session + INNER JOIN {schema}.session_answer AS sessionAnswer + ON session.id = sessionAnswer.session_id + INNER JOIN {schema}.item AS item + ON item.id=sessionAnswer.item_id + INNER JOIN {schema}.question AS question ON question.item_id = item.id """ + + # for some reason, when there is only one id, we cannot use the + # tuple form and have to resort to equality + if len(session_ids) == 1: + query += f"WHERE session.id = {session_ids[0]}" + else: + query += f"WHERE session.id IN {session_ids}" + + return query def get_plio_details_query(plio_uuid: str, schema: str, **kwargs): """ Returns the details for the given plio - plio_uuid: The plio to fetch the details for. - schema: The schema from which the tables are to be accessed. + :param plio_uuid: The plio to fetch the details for + :type plio_uuid: str + :param schema: The schema from which the tables are to be accessed + :type schema: str """ return f""" SELECT @@ -27,9 +89,12 @@ def get_sessions_dump_query(plio_uuid: str, schema: str, mask_user_id: bool = Tr """ Returns the dump of all the sessions for the given plio - plio_uuid: The plio to fetch the details for. - schema: The schema from which the tables are to be accessed. - mask_user_id: whether the user id should be masked + :param plio_uuid: The plio to fetch the details for + :type plio_uuid: str + :param schema: The schema from which the tables are to be accessed + :type schema: str + :param mask_user_id: whether the user id should be masked, defaults to True + :type mask_user_id: bool """ return f""" SELECT @@ -38,11 +103,11 @@ def get_sessions_dump_query(plio_uuid: str, schema: str, mask_user_id: bool = Tr CASE WHEN {str(mask_user_id).lower()} THEN COALESCE(users.email, users.mobile, CONCAT('unique_id:', users.unique_id)) ELSE - {'TO_HEX(MD5(CAST(session.user_id as STRING)))' if BIGQUERY['enabled'] else 'MD5(session.user_id::varchar(255))'} + {'MD5(session.user_id::varchar(255))'} END AS user_identifier FROM {schema}.session AS session INNER JOIN {schema}.plio AS plio ON plio.id = session.plio_id - INNER JOIN {schema if BIGQUERY['enabled'] else 'public'}.user AS users ON session.user_id = users.id + INNER JOIN public.user AS users ON session.user_id = users.id WHERE plio.uuid = '{plio_uuid}'""" @@ -50,9 +115,12 @@ def get_responses_dump_query(plio_uuid: str, schema: str, mask_user_id: bool = T """ Returns the dump of all the session responses for the given plio - plio_uuid: The plio to fetch the details for. - schema: The schema from which the tables are to be accessed. - mask_user_id: whether the user id should be masked + :param plio_uuid: The plio to fetch the details for + :type plio_uuid: str + :param schema: The schema from which the tables are to be accessed + :type schema: str + :param mask_user_id: whether the user id should be masked, defaults to True + :type mask_user_id: bool """ return f""" SELECT @@ -60,7 +128,7 @@ def get_responses_dump_query(plio_uuid: str, schema: str, mask_user_id: bool = T CASE WHEN {str(mask_user_id).lower()} THEN COALESCE(users.email, users.mobile, CONCAT('unique_id:', users.unique_id)) ELSE - {'TO_HEX(MD5(CAST(session.user_id as STRING)))' if BIGQUERY['enabled'] else 'MD5(session.user_id::varchar(255))'} + {'MD5(session.user_id::varchar(255))'} END AS user_identifier, sessionAnswer.answer, sessionAnswer.item_id, @@ -68,7 +136,7 @@ def get_responses_dump_query(plio_uuid: str, schema: str, mask_user_id: bool = T FROM {schema}.session AS session INNER JOIN {schema}.session_answer sessionAnswer ON session.id = sessionAnswer.session_id INNER JOIN {schema}.plio AS plio ON plio.id = session.plio_id - INNER JOIN {schema if BIGQUERY['enabled'] else 'public'}.user AS users ON session.user_id = users.id + INNER JOIN public.user AS users ON session.user_id = users.id INNER JOIN {schema}.item item ON item.id = sessionAnswer.item_id INNER JOIN {schema}.question question ON question.item_id = item.id WHERE plio.uuid = '{plio_uuid}'""" @@ -78,9 +146,12 @@ def get_events_query(plio_uuid: str, schema: str, mask_user_id: bool = True): """ Returns the dump of all events across all sessions for the given plio - plio_uuid: The plio to fetch the details for. - schema: The schema from which the tables are to be accessed. - mask_user_id: whether the user id should be masked + :param plio_uuid: The plio to fetch the details for + :type plio_uuid: str + :param schema: The schema from which the tables are to be accessed + :type schema: str + :param mask_user_id: whether the user id should be masked, defaults to True + :type mask_user_id: bool """ return f""" SELECT @@ -88,7 +159,7 @@ def get_events_query(plio_uuid: str, schema: str, mask_user_id: bool = True): CASE WHEN {str(mask_user_id).lower()} THEN COALESCE(users.email, users.mobile, CONCAT('unique_id:', users.unique_id)) ELSE - {'TO_HEX(MD5(CAST(session.user_id as STRING)))' if BIGQUERY['enabled'] else 'MD5(session.user_id::varchar(255))'} + {'MD5(session.user_id::varchar(255))'} END AS user_identifier, event.type AS event_type, event.player_time AS event_player_time, @@ -97,5 +168,5 @@ def get_events_query(plio_uuid: str, schema: str, mask_user_id: bool = True): FROM {schema}.session AS session INNER JOIN {schema}.event AS event ON session.id = event.session_id INNER JOIN {schema}.plio AS plio ON plio.id = session.plio_id - INNER JOIN {schema if BIGQUERY['enabled'] else 'public'}.user AS users ON session.user_id = users.id + INNER JOIN public.user AS users ON session.user_id = users.id WHERE plio.uuid = '{plio_uuid}'""" diff --git a/plio/serializers.py b/plio/serializers.py index 6ce00140..3fcb6a40 100644 --- a/plio/serializers.py +++ b/plio/serializers.py @@ -63,9 +63,7 @@ def to_representation(self, instance): response = super().to_representation(instance) response["video"] = VideoSerializer(instance.video).data response["created_by"] = UserSerializer(instance.created_by).data - response["items"] = ItemSerializer( - instance.item_set.order_by("time"), many=True - ).data + response["items"] = ItemSerializer(instance.item_set, many=True).data cache.set(cache_key, response) # set a cached version return response diff --git a/plio/settings.py b/plio/settings.py index 843627d2..9e88cd4a 100644 --- a/plio/settings.py +++ b/plio/settings.py @@ -292,15 +292,6 @@ } } -# authentication -ANALYTICS_IDP = { - "type": os.environ.get("ANALYTICS_IDP_TYPE"), - "token_url": os.environ.get("ANALYTICS_IDP_TOKEN_URL"), - "client_id": os.environ.get("ANALYTICS_IDP_CLIENT_ID"), - "client_secret": os.environ.get("ANALYTICS_IDP_CLIENT_SECRET"), - "audience": os.environ.get("ANALYTICS_IDP_AUDIENCE", ""), -} - SMS_DRIVER = os.environ.get("SMS_DRIVER") # file storage for uploaded images @@ -309,13 +300,6 @@ AWS_QUERYSTRING_AUTH = False DATA_UPLOAD_MAX_MEMORY_SIZE = 10485760 # 10 mb -BIGQUERY = { - "enabled": True if os.environ.get("BIGQUERY_ENABLED") == "True" else False, - "project_id": os.environ.get("BIGQUERY_PROJECT_ID", ""), - "location": os.environ.get("BIGQUERY_LOCATION", ""), - "credentials": os.environ.get("BIGQUERY_CREDENTIALS", ""), -} - SENTRY_DSN = os.environ.get("SENTRY_DSN", None) if APP_ENV in ["staging", "production"] and SENTRY_DSN is not None: diff --git a/plio/tests.py b/plio/tests.py index 98d6e2ac..4816d683 100644 --- a/plio/tests.py +++ b/plio/tests.py @@ -19,12 +19,16 @@ from organizations.models import Organization from plio.settings import API_APPLICATION_NAME, OAUTH2_PROVIDER from plio.models import Plio, Video, Item, Question, Image -from entries.models import Session +from entries.models import Session, SessionAnswer from plio.views import StandardResultsSetPagination from plio.cache import get_cache_key from plio.serializers import ImageSerializer +def get_uuid_list(plio_list): + return [plio["uuid"] for plio in plio_list] + + class BaseTestCase(APITestCase): """Base class that sets up generic pre-requisites for all further test classes""" @@ -102,12 +106,12 @@ def test_guest_cannot_list_plios(self): # unset the credentials self.client.credentials() # get plios - response = self.client.get("/api/v1/plios/list_uuid/") + response = self.client.get("/api/v1/plios/") self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED) def test_user_can_list_plios(self): # get plios - response = self.client.get("/api/v1/plios/list_uuid/") + response = self.client.get("/api/v1/plios/") self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response.data["count"], 2) @@ -117,7 +121,7 @@ def test_user_list_own_plios(self): Plio.objects.create(name="Plio 1", video=self.video, created_by=self.user_2) # get plios - response = self.client.get("/api/v1/plios/list_uuid/") + response = self.client.get("/api/v1/plios/") self.assertEqual(response.status_code, status.HTTP_200_OK) # the count should remain 2 as the new plio was created with user 2 self.assertEqual(response.data["count"], 2) @@ -149,11 +153,11 @@ def test_user_list_own_plios_in_org(self): ) # get plios - response = self.client.get("/api/v1/plios/list_uuid/") + response = self.client.get("/api/v1/plios/") # the plio created above should be listed self.assertEqual(len(response.data["results"]), 1) - self.assertEqual(response.data["results"][0], plio_org.uuid) + self.assertEqual(response.data["results"][0]["uuid"], plio_org.uuid) # set db connection back to public (default) schema connection.set_schema_to_public() @@ -189,11 +193,11 @@ def test_user_list_other_plios_in_org(self): ) # get plios - response = self.client.get("/api/v1/plios/list_uuid/") + response = self.client.get("/api/v1/plios/") # the plio created above should be listed self.assertEqual(len(response.data["results"]), 1) - self.assertEqual(response.data["results"][0], plio_org.uuid) + self.assertEqual(response.data["results"][0]["uuid"], plio_org.uuid) # set db connection back to public (default) schema connection.set_schema_to_public() @@ -218,7 +222,7 @@ def test_non_org_user_cannot_list_plios_in_org(self): ) # get plios - response = self.client.get("/api/v1/plios/list_uuid/") + response = self.client.get("/api/v1/plios/") # no plios should be listed self.assertEqual(len(response.data["results"]), 0) @@ -229,18 +233,16 @@ def test_non_org_user_cannot_list_plios_in_org(self): def test_guest_cannot_list_plio_uuids(self): # unset the credentials self.client.credentials() - # get plio uuids - response = self.client.get("/api/v1/plios/list_uuid/") + response = self.client.get("/api/v1/plios/") self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED) - def test_user_list_empty_plio_uuids(self): + def test_user_list_empty_plio(self): """Tests that a user with no plios receives an empty list of uuids""" # change user self.client.credentials( HTTP_AUTHORIZATION="Bearer " + self.access_token_2.token ) - # get plio uuids - response = self.client.get("/api/v1/plios/list_uuid/") + response = self.client.get("/api/v1/plios/") self.assertEqual( response.data, { @@ -253,10 +255,17 @@ def test_user_list_empty_plio_uuids(self): }, ) - def test_user_list_plio_uuids(self): + def test_user_list_with_plios(self): """Test valid user listing plio uuids when they have plios""" - # get plio uuids - response = self.client.get("/api/v1/plios/list_uuid/") + response = self.client.get("/api/v1/plios/") + + expected_results = list( + Plio.objects.filter(id__in=[self.plio_2.id, self.plio_1.id]).values() + ) + + for index, _ in enumerate(expected_results): + expected_results[index]["unique_viewers"] = 0 + expected_results[index]["items"] = [] self.assertEqual( response.data, @@ -265,11 +274,26 @@ def test_user_list_plio_uuids(self): "page_size": StandardResultsSetPagination.page_size, "next": None, "previous": None, - "results": [self.plio_2.uuid, self.plio_1.uuid], + "results": expected_results, "raw_count": 2, }, ) + def test_listing_plios_returns_unique_num_views(self): + # create some sessions - 2 sessions for one user and one more session for another user + Session.objects.create(plio=self.plio_1, user=self.user) + Session.objects.create(plio=self.plio_1, user=self.user) + Session.objects.create(plio=self.plio_1, user=self.user_2) + + response = self.client.get("/api/v1/plios/") + plios = response.data["results"] + + # plio 2 will be listed first because it was created later + expected_num_unique_viewers = [0, 2] + self.assertEqual( + [plio["unique_viewers"] for plio in plios], expected_num_unique_viewers + ) + def test_guest_can_play_plio(self): # unset the credentials self.client.credentials() @@ -335,15 +359,16 @@ def test_default_ordering_when_no_ordering_specified(self): # make a request to list the plio uuids without specifying any order # NOTE: default ordering should come out to be '-updated_at' # order of plios should be [plio_3, plio_2, plio_1] - response = self.client.get("/api/v1/plios/list_uuid/") + response = self.client.get("/api/v1/plios/") + results = get_uuid_list(response.data["results"]) self.assertListEqual( [plio.uuid for plio in Plio.objects.order_by("-updated_at")], - response.data["results"], + results, ) # also manually checking the order self.assertListEqual( [plio_3.uuid, self.plio_2.uuid, self.plio_1.uuid], - response.data["results"], + results, ) # update the first plio - ordering should change according to 'updated_at' @@ -353,15 +378,16 @@ def test_default_ordering_when_no_ordering_specified(self): # make a request to list the plio uuids # NOTE: default ordering should come out to be '-updated_at' # order of plios should be [plio_1, plio_3, plio_2] - response = self.client.get("/api/v1/plios/list_uuid/") + response = self.client.get("/api/v1/plios/") + results = get_uuid_list(response.data["results"]) self.assertListEqual( [plio.uuid for plio in Plio.objects.order_by("-updated_at")], - response.data["results"], + results, ) # also manually checking the order self.assertListEqual( [self.plio_1.uuid, plio_3.uuid, self.plio_2.uuid], - response.data["results"], + results, ) def test_ordering_applied_as_specified(self): @@ -379,47 +405,48 @@ def test_ordering_applied_as_specified(self): plio_3.name = "C_plio" plio_3.save() - # `list_uuid` should give the result ordered as [plio_1, plio_2, plio_3] + # listing plios should give the result ordered as [plio_1, plio_2, plio_3] # when "name" ordering is specified - response = self.client.get("/api/v1/plios/list_uuid/", {"ordering": "name"}) + response = self.client.get("/api/v1/plios/", {"ordering": "name"}) + results = get_uuid_list(response.data["results"]) self.assertListEqual( [plio.uuid for plio in Plio.objects.order_by("name")], - response.data["results"], + results, ) # also manually checking the order self.assertListEqual( [self.plio_1.uuid, self.plio_2.uuid, plio_3.uuid], - response.data["results"], + results, ) # ordering by "-name" - # 'list_uuid` should give the result ordered as [plio_3, plio_2, plio_1] + # listing plios should give the result ordered as [plio_3, plio_2, plio_1] # when "-name" ordering is specified - response = self.client.get("/api/v1/plios/list_uuid/", {"ordering": "-name"}) + response = self.client.get("/api/v1/plios/", {"ordering": "-name"}) + results = get_uuid_list(response.data["results"]) self.assertListEqual( [plio.uuid for plio in Plio.objects.order_by("-name")], - response.data["results"], + results, ) # also manually checking the order self.assertListEqual( [plio_3.uuid, self.plio_2.uuid, self.plio_1.uuid], - response.data["results"], + results, ) # ordering by 'created_at' - # 'list_uuid` should give the result ordered as [plio_1, plio_2, plio_3] + # listing plios should give the result ordered as [plio_1, plio_2, plio_3] # when "created_at" ordering is specified - response = self.client.get( - "/api/v1/plios/list_uuid/", {"ordering": "created_at"} - ) + response = self.client.get("/api/v1/plios/", {"ordering": "created_at"}) + results = get_uuid_list(response.data["results"]) self.assertListEqual( [plio.uuid for plio in Plio.objects.order_by("created_at")], - response.data["results"], + results, ) # also manually checking the order self.assertListEqual( [self.plio_1.uuid, self.plio_2.uuid, plio_3.uuid], - response.data["results"], + results, ) # ordering by "-unique_viewers" @@ -438,27 +465,27 @@ def test_ordering_applied_as_specified(self): Session.objects.create(plio=self.plio_1, user=self.user) - # 'list_uuid` should give the result ordered as [plio_3, plio_2, plio_1] + # listing plios should give the result ordered as [plio_3, plio_2, plio_1] # when "-unique_viewers" ordering is specified - response = self.client.get( - "/api/v1/plios/list_uuid/", {"ordering": "-unique_viewers"} - ) + response = self.client.get("/api/v1/plios/", {"ordering": "-unique_viewers"}) self.assertListEqual( - [plio_3.uuid, self.plio_2.uuid, self.plio_1.uuid], response.data["results"] + [plio_3.uuid, self.plio_2.uuid, self.plio_1.uuid], + get_uuid_list(response.data["results"]), ) # ordering by "-unique_viewers" and "name" - # 'list_uuid` should give the result ordered as [plio_3, plio_1, plio_2] + # listing plios should give the result ordered as [plio_3, plio_1, plio_2] # when ordering is specified as "-unique_viewers" and "name" # add one more unique_view to plio_1 so that plio_1 and plio_2 both have 2 views each # that way, the second ordering will be done using the "name" Session.objects.create(plio=self.plio_1, user=self.user_2) response = self.client.get( - "/api/v1/plios/list_uuid/", {"ordering": "-unique_viewers,name"} + "/api/v1/plios/", {"ordering": "-unique_viewers,name"} ) self.assertListEqual( - [plio_3.uuid, self.plio_1.uuid, self.plio_2.uuid], response.data["results"] + [plio_3.uuid, self.plio_1.uuid, self.plio_2.uuid], + get_uuid_list(response.data["results"]), ) def test_invalid_ordering_results_in_default_ordering(self): @@ -466,12 +493,12 @@ def test_invalid_ordering_results_in_default_ordering(self): Plio.objects.create(name="Plio 3", video=self.video, created_by=self.user) # order by some invalid ordering string - "xyz" - # `list_uuid` should give the result ordered as [plio_3, plio_2, plio_1] + # listing plios should give the result ordered as [plio_3, plio_2, plio_1] # because an invalid ordering field will result in the default ordering - response = self.client.get("/api/v1/plios/list_uuid/", {"ordering": "xyz"}) + response = self.client.get("/api/v1/plios/", {"ordering": "xyz"}) self.assertListEqual( [plio.uuid for plio in Plio.objects.order_by("-updated_at")], - response.data["results"], + get_uuid_list(response.data["results"]), ) def test_delete(self): @@ -661,6 +688,205 @@ def test_copying_to_workspace_with_no_video(self): # set db connection back to public (default) schema connection.set_schema_to_public() + def test_metrics_returns_empty_if_no_sessions(self): + response = self.client.get( + f"/api/v1/plios/{self.plio_1.uuid}/metrics/", + ) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.data, {}) + + def test_metrics_num_views_and_average_watch_time(self): + # seed some sessions + Session.objects.create(plio=self.plio_1, user=self.user, watch_time=10) + Session.objects.create(plio=self.plio_1, user=self.user, watch_time=20) + Session.objects.create(plio=self.plio_1, user=self.user_2, watch_time=50) + + response = self.client.get( + f"/api/v1/plios/{self.plio_1.uuid}/metrics/", + ) + self.assertEqual(response.data["unique_viewers"], 2) + self.assertEqual(response.data["average_watch_time"], 35.0) + self.assertEqual(response.data["percent_one_minute_retention"], None) + self.assertEqual(response.data["accuracy"], None) + self.assertEqual(response.data["average_num_answered"], None) + self.assertEqual(response.data["percent_completed"], None) + + def test_metrics_video_duration_valid_no_valid_retention(self): + # make the video's duration a valid one for calculating retention + response = self.client.put( + f"/api/v1/videos/{self.video.id}/", {"url": self.video.url, "duration": 100} + ) + + # seed some sessions + Session.objects.create( + plio=self.plio_1, user=self.user, watch_time=20, retention="NaN,NaN" + ) + Session.objects.create(plio=self.plio_1, user=self.user_2, watch_time=50) + + response = self.client.get( + f"/api/v1/plios/{self.plio_1.uuid}/metrics/", + ) + + # retention at 60 seconds should still be None + self.assertEqual(response.data["percent_one_minute_retention"], 0) + + def test_metrics_video_duration_valid_no_valid_retention_has_questions(self): + # seed an item + item = Item.objects.create(type="question", plio=self.plio_1, time=1) + + # seed a question + Question.objects.create(type="mcq", item=item, text="test") + + # make the video's duration a valid one for calculating retention + response = self.client.put( + f"/api/v1/videos/{self.video.id}/", {"url": self.video.url, "duration": 100} + ) + + # seed a session and session answer + session = Session.objects.create( + plio=self.plio_1, user=self.user, watch_time=20, retention="NaN,NaN" + ) + SessionAnswer.objects.create(session=session, item=item) + + response = self.client.get( + f"/api/v1/plios/{self.plio_1.uuid}/metrics/", + ) + + # retention at 60 seconds should still be None + self.assertEqual(response.data["percent_one_minute_retention"], 0) + self.assertEqual(response.data["accuracy"], None) + self.assertEqual(response.data["average_num_answered"], 0) + self.assertEqual(response.data["percent_completed"], 0) + + def test_metrics_valid_retention_values(self): + # make the video's duration a valid one for calculating retention + import numpy as np + + video_duration = 100 + response = self.client.put( + f"/api/v1/videos/{self.video.id}/", + {"url": self.video.url, "duration": video_duration}, + ) + + user_3 = User.objects.create(mobile="+919998776655") + + # seed some sessions with valid retention values + retention_user_1 = [0] * video_duration + retention_user_1[59:] = np.random.randint(0, 4, video_duration - 59) + retention_user_1 = ",".join(list(map(str, retention_user_1))) + + retention_user_2 = [0] * video_duration + retention_user_2[59:] = np.random.randint(0, 4, video_duration - 59) + retention_user_2 = ",".join(list(map(str, retention_user_2))) + + retention_user_3 = ",".join(["0"] * video_duration) + + Session.objects.create( + plio=self.plio_1, user=self.user, watch_time=20, retention=retention_user_1 + ) + Session.objects.create( + plio=self.plio_1, + user=self.user_2, + watch_time=50, + retention=retention_user_2, + ) + Session.objects.create( + plio=self.plio_1, user=user_3, watch_time=100, retention=retention_user_3 + ) + + response = self.client.get( + f"/api/v1/plios/{self.plio_1.uuid}/metrics/", + ) + + # retention at 60 seconds should still be None + self.assertEqual(response.data["percent_one_minute_retention"], 66.67) + + def test_question_metrics_with_single_session_no_answer(self): + # seed an item + item = Item.objects.create(type="question", plio=self.plio_1, time=1) + + # seed a question + Question.objects.create(type="mcq", item=item, text="test") + + # seed a session and session answer + session = Session.objects.create( + plio=self.plio_1, user=self.user, watch_time=20 + ) + SessionAnswer.objects.create(session=session, item=item) + + response = self.client.get( + f"/api/v1/plios/{self.plio_1.uuid}/metrics/", + ) + self.assertEqual(response.data["average_num_answered"], 0) + self.assertEqual(response.data["percent_completed"], 0) + self.assertEqual(response.data["accuracy"], None) + + def test_question_metrics_with_multiple_sessions_no_answer(self): + # seed an item + item = Item.objects.create(type="question", plio=self.plio_1, time=1) + + # seed a question + Question.objects.create(type="mcq", item=item, text="test") + + # seed a few session and session answer objects with empty answers + session = Session.objects.create( + plio=self.plio_1, user=self.user, watch_time=20 + ) + SessionAnswer.objects.create(session=session, item=item) + + session_2 = Session.objects.create( + plio=self.plio_1, user=self.user_2, watch_time=40 + ) + SessionAnswer.objects.create(session=session_2, item=item) + + response = self.client.get( + f"/api/v1/plios/{self.plio_1.uuid}/metrics/", + ) + self.assertEqual(response.data["average_num_answered"], 0) + self.assertEqual(response.data["percent_completed"], 0) + self.assertEqual(response.data["accuracy"], None) + + def test_question_metrics_answers_provided(self): + # seed items + item_1 = Item.objects.create(type="question", plio=self.plio_1, time=1) + item_2 = Item.objects.create(type="question", plio=self.plio_1, time=10) + item_3 = Item.objects.create(type="question", plio=self.plio_1, time=20) + + # seed questions + Question.objects.create( + type="mcq", item=item_1, text="test", options=["", ""], correct_answer=0 + ) + Question.objects.create( + type="checkbox", + item=item_2, + text="test", + options=["", ""], + correct_answer=[0, 1], + ) + Question.objects.create(type="subjective", item=item_3, text="test") + + # seed a few session and session answer objects with answers + session = Session.objects.create( + plio=self.plio_1, user=self.user, watch_time=20 + ) + SessionAnswer.objects.create(session=session, item=item_1, answer=0) + SessionAnswer.objects.create(session=session, item=item_2, answer=[1]) + SessionAnswer.objects.create(session=session, item=item_3, answer="abcd") + + session_2 = Session.objects.create( + plio=self.plio_1, user=self.user_2, watch_time=40 + ) + SessionAnswer.objects.create(session=session_2, item=item_1, answer=1) + SessionAnswer.objects.create(session=session_2, item=item_2, answer=[0, 1]) + SessionAnswer.objects.create(session=session_2, item=item_3) + + response = self.client.get( + f"/api/v1/plios/{self.plio_1.uuid}/metrics/", + ) + self.assertEqual(response.data["average_num_answered"], 2) + self.assertEqual(response.data["percent_completed"], 50) + self.assertEqual(response.data["accuracy"], 58.33) + class PlioDownloadTestCase(BaseTestCase): def setUp(self): diff --git a/plio/urls.py b/plio/urls.py index 15a10a24..dd5858ea 100644 --- a/plio/urls.py +++ b/plio/urls.py @@ -26,7 +26,6 @@ request_otp, verify_otp, get_by_access_token, - retrieve_analytics_app_access_token, generate_external_auth_access_token, ) from organizations.views import OrganizationViewSet @@ -96,11 +95,6 @@ ), path("api/v1/", include(api_router.urls)), path("api-auth/", include("rest_framework.urls", namespace="rest_framework")), - path( - "auth/cubejs-token/", - retrieve_analytics_app_access_token, - name="get-analytics-token", - ), url(r"^auth/", include("rest_framework_social_oauth2.urls")), url( r"^api/v1/docs/$", diff --git a/plio/views.py b/plio/views.py index d3f1dbc6..0bb06c76 100644 --- a/plio/views.py +++ b/plio/views.py @@ -1,7 +1,6 @@ import os import shutil from copy import deepcopy -import base64 import json from rest_framework import viewsets, status, filters @@ -10,7 +9,8 @@ from rest_framework.permissions import IsAuthenticated from rest_framework.pagination import PageNumberPagination from django.db import connection -from django.db.models import Q +from django.db.models import Q, Count, OuterRef, Subquery +from django.db.models.functions import Coalesce from django.http import FileResponse from django_tenants.utils import get_tenant_model @@ -18,13 +18,11 @@ import pandas as pd from storages.backends.s3boto3 import S3Boto3Storage -from google.cloud import bigquery -from google.oauth2 import service_account - from organizations.middleware import OrganizationTenantMiddleware from organizations.models import Organization from users.models import OrganizationUser from plio.models import Video, Plio, Item, Question, Image +from entries.models import Session from plio.serializers import ( VideoSerializer, PlioSerializer, @@ -35,13 +33,14 @@ from plio.settings import ( DEFAULT_TENANT_SHORTCODE, AWS_STORAGE_BUCKET_NAME, - BIGQUERY, ) from plio.queries import ( get_plio_details_query, get_sessions_dump_query, get_responses_dump_query, get_events_query, + get_plio_latest_sessions_query, + get_plio_latest_responses_query, ) from plio.permissions import PlioPermission from plio.ordering import CustomOrderingFilter @@ -158,9 +157,7 @@ def organization_shortcode(self): def is_organizational_workspace(self): return self.organization_shortcode != DEFAULT_TENANT_SHORTCODE - @action(detail=False, permission_classes=[IsAuthenticated]) - def list_uuid(self, request): - """Retrieves a list of UUIDs for all the plios""" + def list(self, request): queryset = self.get_queryset() # personal workspace @@ -182,18 +179,38 @@ def list_uuid(self, request): queryset = Plio.objects.none() num_plios = queryset.count() - queryset = self.filter_queryset(queryset) - uuid_list = queryset.values_list("uuid", flat=True) - page = self.paginate_queryset(uuid_list) + # add the number of unique viewers to the queryset + plio_session_group = Session.objects.filter(plio__uuid=OuterRef("uuid")).values( + "plio__uuid" + ) + + plios_unique_users_count = plio_session_group.annotate( + count_unique_users=Count("user__id", distinct=True) + ).values("count_unique_users") + + # annotate the plio's queryset with the count of unique users + queryset = queryset.annotate( + unique_viewers=Coalesce(Subquery(plios_unique_users_count), 0) + ) + + queryset = self.filter_queryset(queryset) + page = self.paginate_queryset(queryset.values()) if page is not None: + # add the items corresponding to the plio in each plio object + for index, _ in enumerate(page): + page[index]["items"] = ItemSerializer( + queryset[index].item_set, many=True + ).data + return self.get_paginated_response({"data": page, "raw_count": num_plios}) # return an empty response in the paginated format if pagination fails return Response( { "count": 0, + "raw_count": 0, "page_size": self.get_page_size(self.request), "next": None, "previous": None, @@ -335,6 +352,169 @@ def copy(self, request, uuid): return Response(self.get_serializer(plio).data) + @action( + methods=["get"], + detail=True, + permission_classes=[IsAuthenticated, PlioPermission], + ) + def metrics(self, request, uuid): + """Returns usage metrics for the plio""" + # return 404 if user cannot access the object + # else fetch the object + plio = self.get_object() + + # no sessions have been created for the plio: return + if not Session.objects.filter(plio=plio.id): + return Response({}) + + import numpy as np + + with connection.cursor() as cursor: + cursor.execute( + get_plio_latest_sessions_query(plio.uuid, connection.schema_name) + ) + results = cursor.fetchall() + + df = pd.DataFrame(results, columns=["id", "watch_time", "retention"]) + + # number of unique viewers and average watch time + num_unique_viewers = len(df) + average_watch_time = df["watch_time"].mean() + + # retention at one minute + if plio.video.duration is None or plio.video.duration < 60: + # the metric is not applicable in this case + percent_one_minute_retention = None + else: + # convert "0,1,0" to ["0", "1", "0"] + df["retention"] = df["retention"].apply(lambda row: row.split(",")) + + # remove entries where the retention is either empty or has NaN values + df["is_retention_valid"] = df["retention"].apply( + lambda row: ("NaN" not in row and len(row) == plio.video.duration) + ) + + valid_retention_df = df[df["is_retention_valid"]] + + if not len(valid_retention_df): + percent_one_minute_retention = 0 + else: + # convert ["0", "1", "0"] to [0, 1, 0] + retention = ( + valid_retention_df["retention"] + .apply(lambda row: list(map(int, row))) + .values + ) + # create an array out of all the retention values and only + # retain the values after one minute + retention = np.vstack(retention)[:, 59:] + + # checks if a given user has crossed the second mark + percent_one_minute_retention = np.round( + ((retention.sum(axis=1) > 0).sum() / num_unique_viewers) * 100, 2 + ) + + # question-based metrics + questions = Question.objects.filter(item__plio=plio.id) + + # if the plio does not have any questions, these metrics are not applicable + if not questions: + accuracy = None + average_num_answered = None + percent_completed = None + + else: + with connection.cursor() as cursor: + cursor.execute( + get_plio_latest_responses_query( + connection.schema_name, tuple(df["id"]) + ) + ) + results = cursor.fetchall() + + df = pd.DataFrame( + results, + columns=[ + "id", + "user_id", + "answer", + "item_type", + "question_type", + "correct_answer", + ], + ) + + # retain only the responses to items which are questions + question_df = df[df["item_type"] == "question"].reset_index(drop=True) + num_questions = len(questions) + + def is_answer_correct(row): + """Whether the answer corresponding to the given row is correct""" + if row["question_type"] in ["mcq", "checkbox"]: + return row["answer"] == row["correct_answer"] + return row["answer"] is not None + + # holds the number of questions answered for each viewer + num_answered_list = [] + # holds the number of questions correctly answered for each viewer + num_correct_list = [] + + user_grouping = question_df.groupby("user_id") + for group in user_grouping.groups: + # get the responses for a given user + group_df = user_grouping.get_group(group) + + # sanity check + assert num_questions == len( + group_df + ), "Inconsistency in the number of questions" + + num_answered = sum( + group_df["answer"].apply(lambda value: value is not None) + ) + + num_answered_list.append(num_answered) + + if not num_answered: + num_correct_list.append(None) + else: + num_correct_list.append( + sum(group_df.apply(is_answer_correct, axis=1)) + ) + + # converting to numpy arrays enabled us to use vectorization + # to speed up the computation many folds + num_answered_list = np.array(num_answered_list) + num_correct_list = np.array(num_correct_list) + average_num_answered = round(num_answered_list.mean()) + percent_completed = np.round( + 100 * (sum(num_answered_list == num_questions) / num_unique_viewers), 2 + ) + + # only use the responses from viewers who have answered at least + # one question to compute the accuracy + answered_at_least_one_index = num_answered_list > 0 + num_answered_list = num_answered_list[answered_at_least_one_index] + num_correct_list = num_correct_list[answered_at_least_one_index] + + if not len(num_correct_list): + accuracy = None + else: + accuracy = np.round( + (num_correct_list / num_answered_list).mean() * 100, 2 + ) + + return Response( + { + "unique_viewers": num_unique_viewers, + "average_watch_time": average_watch_time, + "percent_one_minute_retention": percent_one_minute_retention, + "accuracy": accuracy, + "average_num_answered": average_num_answered, + "percent_completed": percent_completed, + } + ) + @action( methods=["get"], detail=True, @@ -343,7 +523,6 @@ def copy(self, request, uuid): def download_data(self, request, uuid): """ Downloads a zip file containing various CSVs for a Plio. - If BigQuery is enabled, the report data is fetch from BigQuery dataset. request: HTTP request. uuid: UUID of the plio for which report needs to be downloaded. @@ -377,41 +556,15 @@ def download_data(self, request, uuid): organization.id ) - if BIGQUERY["enabled"]: - gcp_service_account_file = "/tmp/gcp-service-account.json" - with open(gcp_service_account_file, "wb") as file: - file.write(base64.b64decode(BIGQUERY["credentials"])) - - # retrieve credentials from BigQuery credentials file - credentials = service_account.Credentials.from_service_account_file( - gcp_service_account_file - ) - # create bigquery client - client = bigquery.Client( - credentials=credentials, - project=BIGQUERY["project_id"], - location=BIGQUERY["location"], - ) - def run_query(cursor, query_method): - if BIGQUERY["enabled"]: - # execute the sql query using BigQuery client and create a dataframe - df = client.query( - query_method( - uuid, schema=schema_name, mask_user_id=is_user_org_admin - ) - ).to_dataframe() - else: - # execute the sql query using postgres DB connection cursor - cursor.execute( - query_method( - uuid, schema=schema_name, mask_user_id=is_user_org_admin - ) - ) - # extract column names as cursor.description returns a tuple - columns = [col[0] for col in cursor.description] - # create a dataframe from the rows and the columns - df = pd.DataFrame(cursor.fetchall(), columns=columns) + # execute the sql query + cursor.execute( + query_method(uuid, schema=schema_name, mask_user_id=is_user_org_admin) + ) + # extract column names as cursor.description returns a tuple + columns = [col[0] for col in cursor.description] + # create a dataframe from the rows and the columns + df = pd.DataFrame(cursor.fetchall(), columns=columns) return df diff --git a/requirements.txt b/requirements.txt index eea0f9cf..b3fc3b7b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -36,7 +36,6 @@ django-safedelete==1.0.0 django-oauth2-provider==0.2.6.1 django-rest-framework-social-oauth2==1.1.0 coverage==5.5 -google-cloud-bigquery==2.20.0 pyarrow==4.0.1 sentry-sdk==1.1.0 django-silk==4.1.0 diff --git a/users/tests.py b/users/tests.py index bf70274d..4c592cc4 100644 --- a/users/tests.py +++ b/users/tests.py @@ -335,11 +335,6 @@ def test_get_by_access_token_valid_user(self): self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response.json()["id"], self.user.id) - def test_get_analytics_app_access_token(self): - response = self.client.post(reverse("get-analytics-token")) - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertIn("access_token", response.json()) - def test_updating_user_recreates_instance_cache(self): # verify cache data doesn't exist cache_key_name = get_cache_key(self.user) diff --git a/users/views.py b/users/views.py index 233c71d9..7e0fda86 100644 --- a/users/views.py +++ b/users/views.py @@ -1,7 +1,6 @@ import datetime import string import random -import requests from django.utils import timezone from django.contrib.auth import login @@ -17,7 +16,6 @@ API_APPLICATION_NAME, OAUTH2_PROVIDER, OTP_EXPIRE_SECONDS, - ANALYTICS_IDP, SMS_DRIVER, ) from users.models import User, OneTimePassword, OrganizationUser @@ -271,19 +269,3 @@ def generate_external_auth_access_token(request): # login the user, get the new access token and return token = login_user_and_get_access_token(user, request) return Response(token, status=status.HTTP_200_OK) - - -@api_view(["POST"]) -def retrieve_analytics_app_access_token(request): - """Requests the configured identity provider to retrieve an access token.""" - - payload = { - "grant_type": "client_credentials", - "client_id": ANALYTICS_IDP["client_id"], - "client_secret": ANALYTICS_IDP["client_secret"], - } - if ANALYTICS_IDP["type"] == "auth0": - payload["audience"] = ANALYTICS_IDP["audience"] - - response = requests.post(ANALYTICS_IDP["token_url"], data=payload) - return Response(response.json(), status=status.HTTP_200_OK)