diff --git a/plio/queries.py b/plio/queries.py index 030391e8..1fec1f7e 100644 --- a/plio/queries.py +++ b/plio/queries.py @@ -1,13 +1,12 @@ from plio.settings import BIGQUERY -def get_plio_details_query(plio_uuid: str, schema: str, extra_data: dict): +def get_plio_details_query(plio_uuid: str, schema: str, **kwargs): """ Returns the details for the given plio plio_uuid: The plio to fetch the details for. schema: The schema from which the tables are to be accessed. - extra_data: Any extra data that's needed to complete the query. (not required here) """ return f""" SELECT @@ -24,24 +23,20 @@ def get_plio_details_query(plio_uuid: str, schema: str, extra_data: dict): WHERE plio.uuid = '{plio_uuid}'""" -def get_sessions_dump_query(plio_uuid: str, schema: str, extra_data: dict): +def get_sessions_dump_query(plio_uuid: str, schema: str, mask_user_id: bool = True): """ Returns the dump of all the sessions for the given plio plio_uuid: The plio to fetch the details for. schema: The schema from which the tables are to be accessed. - extra_data: Any extra data that's needed to complete the query. - `is_org_plio`: If the `plio_uuid` belongs to an org workspace - `is_user_org_admin`: If the user making this query is that - org's admin + mask_user_id: whether the user id should be masked """ return f""" SELECT session.id as session_id, - session.retention, session.watch_time, CASE - WHEN {str(extra_data["is_user_org_admin"]).lower()} THEN COALESCE(users.email, users.mobile, users.unique_id) + WHEN {str(mask_user_id).lower()} THEN COALESCE(users.email, users.mobile, CONCAT('unique_id:', users.unique_id)) ELSE {'TO_HEX(MD5(CAST(session.user_id as STRING)))' if BIGQUERY['enabled'] else 'MD5(session.user_id::varchar(255))'} END AS user_identifier @@ -51,22 +46,19 @@ def get_sessions_dump_query(plio_uuid: str, schema: str, extra_data: dict): WHERE plio.uuid = '{plio_uuid}'""" -def get_responses_dump_query(plio_uuid: str, schema: str, extra_data: dict): +def get_responses_dump_query(plio_uuid: str, schema: str, mask_user_id: bool = True): """ Returns the dump of all the session responses for the given plio plio_uuid: The plio to fetch the details for. schema: The schema from which the tables are to be accessed. - extra_data: Any extra data that's needed to complete the query. - `is_org_plio`: If the `plio_uuid` belongs to an org workspace - `is_user_org_admin`: If the user making this query is that - org's admin + mask_user_id: whether the user id should be masked """ return f""" SELECT session.id as session_id, CASE - WHEN {str(extra_data["is_user_org_admin"]).lower()} THEN COALESCE(users.email, users.mobile, users.unique_id) + WHEN {str(mask_user_id).lower()} THEN COALESCE(users.email, users.mobile, CONCAT('unique_id:', users.unique_id)) ELSE {'TO_HEX(MD5(CAST(session.user_id as STRING)))' if BIGQUERY['enabled'] else 'MD5(session.user_id::varchar(255))'} END AS user_identifier, @@ -82,22 +74,19 @@ def get_responses_dump_query(plio_uuid: str, schema: str, extra_data: dict): WHERE plio.uuid = '{plio_uuid}'""" -def get_events_query(plio_uuid: str, schema: str, extra_data: dict): +def get_events_query(plio_uuid: str, schema: str, mask_user_id: bool = True): """ Returns the dump of all events across all sessions for the given plio plio_uuid: The plio to fetch the details for. schema: The schema from which the tables are to be accessed. - extra_data: Any extra data that's needed to complete the query. - `is_org_plio`: If the `plio_uuid` belongs to an org workspace - `is_user_org_admin`: If the user making this query is that - org's admin + mask_user_id: whether the user id should be masked """ return f""" SELECT session.id as session_id, CASE - WHEN {str(extra_data["is_user_org_admin"]).lower()} THEN COALESCE(users.email, users.mobile, users.unique_id) + WHEN {str(mask_user_id).lower()} THEN COALESCE(users.email, users.mobile, CONCAT('unique_id:', users.unique_id)) ELSE {'TO_HEX(MD5(CAST(session.user_id as STRING)))' if BIGQUERY['enabled'] else 'MD5(session.user_id::varchar(255))'} END AS user_identifier, diff --git a/plio/static/plio/docs/download_csv_README.md b/plio/static/plio/docs/download_csv_README.md index 9dd2a10c..1d0efcef 100644 --- a/plio/static/plio/docs/download_csv_README.md +++ b/plio/static/plio/docs/download_csv_README.md @@ -59,7 +59,7 @@ This section will clarify what each of the `.csv` files in the folder contains: - `session_id`: the unique identifier for the session - - `user_identifier`: the unique identifier for the user associated with this session. To preserve user privacy, this field would not contain any Personally Identifiable Information (PII) and would instead be a hashed value. However, the same user will have the same hashed value across different plios. So, you can still safely identify user trends across plios without harming user privacy. + - `user_identifier`: the unique identifier for the user associated with this session. To preserve user privacy, this field would not contain any Personally Identifiable Information (PII) and would instead be a hashed value. However, the same user will have the same hashed value across different plios. So, you can still safely identify user trends across plios without harming user privacy. However, if you are on the organisational plan and want to map the user ids to your beneficiaries, we provide a way for you to access the true identities of the users interacting with your plio. Read about it [here](https://docs.plio.in/plio-for-teams/). - `answer`: the user's actual answer to the interaction. For `question_type = mcq`, it represents the index of the option selected by the user. In this case, `answer = 1` indicates that the first option has been submitted as the answer. @@ -81,23 +81,20 @@ This section will clarify what each of the `.csv` files in the folder contains: - `session_id`: the unique identifier for the session - - `user_identifier`: the unique identifier for the user associated with this session + - `user_identifier`: the unique identifier for the user associated with this session. To preserve user privacy, this field would not contain any Personally Identifiable Information (PII) and would instead be a hashed value. However, the same user will have the same hashed value across different plios. So, you can still safely identify user trends across plios without harming user privacy. However, if you are on the organisational plan and want to map the user ids to your beneficiaries, we provide a way for you to access the true identities of the users interacting with your plio. Read about it [here](https://docs.plio.in/plio-for-teams/). - `watch_time`: the amount of time the user has watched the video (in seconds) - the most recent session of any user includes the total time across all previous sessions by that user. - - `retention`: the retention array over the video for the given user. The length of the array is the number of seconds of the video and each value represents how many times the user has visited that particular second of the video while watching the plio. - - For example, if your video is 4 minutes long, each row of this column will have 240 values (one for each second). If a user did not reach the end of the video, the values towards the end would be 0. If a user has rewatched the first 10 seconds 5 times, the first 10 values would be 5. Ignore this field if it contains something like `NaN, NaN, ...`. Example: - | session_id | retention | watch_time | user_identifier | - | ---------- | ------------------------------------------ | ---------- | -------------------------------- | - | 1951 | 1,1,1,10,1,2,...,0,0,0,0,0,1,1,1,1,0,0,0,0 | 50 | addfa9b7e234254d26e9c7f2af1005cb | + | session_id | watch_time | user_identifier | + | ---------- | ---------- | -------------------------------- | + | 1951 | 50 | addfa9b7e234254d26e9c7f2af1005cb | - `events.csv`: contains the details of each event in each session of every user. The columns represent the following: - `session_id`: the unique identifier for the session - - `user_identifier`: the unique identifier for the user associated with this session + - `user_identifier`: the unique identifier for the user associated with this session. To preserve user privacy, this field would not contain any Personally Identifiable Information (PII) and would instead be a hashed value. However, the same user will have the same hashed value across different plios. So, you can still safely identify user trends across plios without harming user privacy. However, if you are on the organisational plan and want to map the user ids to your beneficiaries, we provide a way for you to access the true identities of the users interacting with your plio. Read about it [here](https://docs.plio.in/plio-for-teams/). - `event_type`: the type of the event (e.g. `played`, `paused`, etc.). The full list of event types and their meanings can be found in the [Events](#events) section. - `event_player_time`: the current time in the video when the event was triggered (in seconds) - `event_details`: further details for the event based on the event type (e.g. question number for events related to questions, etc.) diff --git a/plio/static/plio/docs/download_csv_README.pdf b/plio/static/plio/docs/download_csv_README.pdf index 53bc6948..30e5e862 100644 Binary files a/plio/static/plio/docs/download_csv_README.pdf and b/plio/static/plio/docs/download_csv_README.pdf differ diff --git a/plio/tests.py b/plio/tests.py index e9c81cb4..8b1e3a6f 100644 --- a/plio/tests.py +++ b/plio/tests.py @@ -197,6 +197,34 @@ def test_user_list_other_plios_in_org(self): # set db connection back to public (default) schema connection.set_schema_to_public() + def test_non_org_user_cannot_list_plios_in_org(self): + """A user who is not in an org should not be able to list plios in org workspace""" + # set db connection to organization schema + connection.set_schema(self.organization.schema_name) + + # create video in the org workspace + video_org = Video.objects.create( + title="Video 1", url="https://www.youtube.com/watch?v=vnISjBbrMUM" + ) + + # create plio within the org workspace by user 1 + Plio.objects.create(name="Plio 1", video=video_org, created_by=self.user) + + # set organization in request and access token for user 2 + self.client.credentials( + HTTP_ORGANIZATION=self.organization.shortcode, + HTTP_AUTHORIZATION="Bearer " + self.access_token_2.token, + ) + + # get plios + response = self.client.get("/api/v1/plios/list_uuid/") + + # no plios should be listed + self.assertEqual(len(response.data["results"]), 0) + + # set db connection back to public (default) schema + connection.set_schema_to_public() + def test_guest_cannot_list_plio_uuids(self): # unset the credentials self.client.credentials() diff --git a/plio/views.py b/plio/views.py index 4cc8dc53..1db3858f 100644 --- a/plio/views.py +++ b/plio/views.py @@ -19,7 +19,8 @@ from google.oauth2 import service_account from organizations.middleware import OrganizationTenantMiddleware -from users.models import OrganizationUser, Role +from organizations.models import Organization +from users.models import OrganizationUser from plio.models import Video, Plio, Item, Question, Image from plio.serializers import ( VideoSerializer, @@ -127,29 +128,36 @@ def perform_create(self, serializer): def perform_update(self, serializer): serializer.save(created_by=self.get_object().created_by) + @property + def organization_shortcode(self): + return OrganizationTenantMiddleware.get_organization_shortcode(self.request) + + @property + def is_organizational_workspace(self): + return self.organization_shortcode != DEFAULT_TENANT_SHORTCODE + @action(detail=False, permission_classes=[IsAuthenticated]) def list_uuid(self, request): """Retrieves a list of UUIDs for all the plios""" queryset = self.get_queryset() - organization_shortcode = ( - OrganizationTenantMiddleware.get_organization_shortcode(self.request) - ) - # personal workspace - if organization_shortcode == DEFAULT_TENANT_SHORTCODE: + if not self.is_organizational_workspace: queryset = queryset.filter(created_by=self.request.user) - - # organizational workspace - if OrganizationUser.objects.filter( - organization__shortcode=organization_shortcode, - user=self.request.user.id, - ).exists(): - # user should be authenticated and a part of the org - queryset = queryset.filter( - Q(is_public=True) - | (Q(is_public=False) & Q(created_by=self.request.user)) - ) + else: + # organizational workspace + if OrganizationUser.objects.filter( + organization__shortcode=self.organization_shortcode, + user=self.request.user.id, + ).exists(): + # user should be a part of the org + queryset = queryset.filter( + Q(is_public=True) + | (Q(is_public=False) & Q(created_by=self.request.user)) + ) + else: + # otherwise, they don't have access to any plio + queryset = Plio.objects.none() num_plios = queryset.count() queryset = self.filter_queryset(queryset) @@ -242,27 +250,12 @@ def download_data(self, request, uuid): # schema name to query in schema_name = OrganizationTenantMiddleware().get_schema(self.request) - # if the plio belongs to an org schema - is_org_plio = False if (schema_name == "public") else True - - # if the plio belongs to an org, is the user requesting the data - # an org admin or not - is_user_org_admin = False - if is_org_plio: - organization_shortcode = ( - OrganizationTenantMiddleware.get_organization_shortcode(self.request) - ) - is_user_org_admin = OrganizationUser.objects.filter( - organization__shortcode=organization_shortcode, - user=self.request.user.id, - role=Role.objects.filter(name="org-admin").first().id, - ).exists() - - # extra data to be passed to the queries - extra_data = { - "is_org_plio": is_org_plio, - "is_user_org_admin": is_user_org_admin, - } + organization = Organization.objects.filter( + shortcode=self.organization_shortcode, + ).first() + is_user_org_admin = organization is not None and request.user.is_org_admin( + organization.id + ) if BIGQUERY["enabled"]: gcp_service_account_file = "/tmp/gcp-service-account.json" @@ -284,12 +277,16 @@ def run_query(cursor, query_method): if BIGQUERY["enabled"]: # execute the sql query using BigQuery client and create a dataframe df = client.query( - query_method(uuid, schema=schema_name, extra_data=extra_data) + query_method( + uuid, schema=schema_name, mask_user_id=is_user_org_admin + ) ).to_dataframe() else: # execute the sql query using postgres DB connection cursor cursor.execute( - query_method(uuid, schema=schema_name, extra_data=extra_data) + query_method( + uuid, schema=schema_name, mask_user_id=is_user_org_admin + ) ) # extract column names as cursor.description returns a tuple columns = [col[0] for col in cursor.description] diff --git a/users/config.py b/users/config.py index 9fbc36bb..e19cbd8d 100644 --- a/users/config.py +++ b/users/config.py @@ -1,2 +1,3 @@ user_status_choices = [("waitlist", "Added to Waitlist"), ("approved", "Approved")] required_third_party_auth_keys = ["unique_id", "api_key"] +org_admin_roles = ["org-admin", "super-admin"] diff --git a/users/models.py b/users/models.py index f3ee03c5..0df0c830 100644 --- a/users/models.py +++ b/users/models.py @@ -3,7 +3,7 @@ from django.contrib.auth.models import AbstractUser from organizations.models import Organization from safedelete.models import SafeDeleteModel, SafeDeleteManager, SOFT_DELETE -from .config import user_status_choices +from .config import user_status_choices, org_admin_roles class UserManager(SafeDeleteManager): @@ -102,7 +102,7 @@ def name(self): def __str__(self): return "%d: %s" % (self.id, self.name) - def get_role_for_organization(self, organization_id): + def get_role_for_organization(self, organization_id: int): """Returns the user's role within the organization provided (None if the user is not a part)""" organization_user = OrganizationUser.objects.filter( organization_id=organization_id, user_id=self.id @@ -112,6 +112,18 @@ def get_role_for_organization(self, organization_id): return organization_user.role + def is_org_admin(self, organization_id: int, return_role: bool = False): + """Whether the user has the privileges of an organisation's admin""" + user_organization_role = self.get_role_for_organization(organization_id) + has_org_admin_access = ( + user_organization_role is not None + and user_organization_role.name in org_admin_roles + ) + if not return_role: + return has_org_admin_access + + return has_org_admin_access, user_organization_role + class UserMeta(SafeDeleteModel): _safedelete_policy = SOFT_DELETE diff --git a/users/permissions.py b/users/permissions.py index d897a9e5..88bbf849 100644 --- a/users/permissions.py +++ b/users/permissions.py @@ -36,13 +36,11 @@ def has_permission(self, request, view): if view.action in ["list", "retrieve", "destroy"]: return True - user_organization_role = request.user.get_role_for_organization( - request.data["organization"] + has_org_admin_access, user_organization_role = request.user.is_org_admin( + organization_id=request.data["organization"], return_role=True ) - if not user_organization_role or user_organization_role.name not in [ - "org-admin", - "super-admin", - ]: + + if not has_org_admin_access: # user doesn't belong to the queried organization # or doesn't have sufficient role within organization return False