Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enables super-admin of an org to view true user identity + code cleanups #273

Merged
merged 9 commits into from
Dec 21, 2021
31 changes: 10 additions & 21 deletions plio/queries.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
from plio.settings import BIGQUERY


def get_plio_details_query(plio_uuid: str, schema: str, extra_data: dict):
def get_plio_details_query(plio_uuid: str, schema: str, **kwargs):
"""
Returns the details for the given plio

plio_uuid: The plio to fetch the details for.
schema: The schema from which the tables are to be accessed.
extra_data: Any extra data that's needed to complete the query. (not required here)
"""
return f"""
SELECT
Expand All @@ -24,24 +23,20 @@ def get_plio_details_query(plio_uuid: str, schema: str, extra_data: dict):
WHERE plio.uuid = '{plio_uuid}'"""


def get_sessions_dump_query(plio_uuid: str, schema: str, extra_data: dict):
def get_sessions_dump_query(plio_uuid: str, schema: str, mask_user_id: bool = True):
"""
Returns the dump of all the sessions for the given plio

plio_uuid: The plio to fetch the details for.
schema: The schema from which the tables are to be accessed.
extra_data: Any extra data that's needed to complete the query.
`is_org_plio`: If the `plio_uuid` belongs to an org workspace
`is_user_org_admin`: If the user making this query is that
org's admin
mask_user_id: whether the user id should be masked
"""
return f"""
SELECT
session.id as session_id,
session.retention,
session.watch_time,
CASE
WHEN {str(extra_data["is_user_org_admin"]).lower()} THEN COALESCE(users.email, users.mobile, users.unique_id)
WHEN {str(mask_user_id).lower()} THEN COALESCE(users.email, users.mobile, CONCAT('unique_id:', users.unique_id))
ELSE
{'TO_HEX(MD5(CAST(session.user_id as STRING)))' if BIGQUERY['enabled'] else 'MD5(session.user_id::varchar(255))'}
END AS user_identifier
Expand All @@ -51,22 +46,19 @@ def get_sessions_dump_query(plio_uuid: str, schema: str, extra_data: dict):
WHERE plio.uuid = '{plio_uuid}'"""


def get_responses_dump_query(plio_uuid: str, schema: str, extra_data: dict):
def get_responses_dump_query(plio_uuid: str, schema: str, mask_user_id: bool = True):
"""
Returns the dump of all the session responses for the given plio

plio_uuid: The plio to fetch the details for.
schema: The schema from which the tables are to be accessed.
extra_data: Any extra data that's needed to complete the query.
`is_org_plio`: If the `plio_uuid` belongs to an org workspace
`is_user_org_admin`: If the user making this query is that
org's admin
mask_user_id: whether the user id should be masked
"""
return f"""
SELECT
session.id as session_id,
CASE
WHEN {str(extra_data["is_user_org_admin"]).lower()} THEN COALESCE(users.email, users.mobile, users.unique_id)
WHEN {str(mask_user_id).lower()} THEN COALESCE(users.email, users.mobile, CONCAT('unique_id:', users.unique_id))
ELSE
{'TO_HEX(MD5(CAST(session.user_id as STRING)))' if BIGQUERY['enabled'] else 'MD5(session.user_id::varchar(255))'}
END AS user_identifier,
Expand All @@ -82,22 +74,19 @@ def get_responses_dump_query(plio_uuid: str, schema: str, extra_data: dict):
WHERE plio.uuid = '{plio_uuid}'"""


def get_events_query(plio_uuid: str, schema: str, extra_data: dict):
def get_events_query(plio_uuid: str, schema: str, mask_user_id: bool = True):
"""
Returns the dump of all events across all sessions for the given plio

plio_uuid: The plio to fetch the details for.
schema: The schema from which the tables are to be accessed.
extra_data: Any extra data that's needed to complete the query.
`is_org_plio`: If the `plio_uuid` belongs to an org workspace
`is_user_org_admin`: If the user making this query is that
org's admin
mask_user_id: whether the user id should be masked
"""
return f"""
SELECT
session.id as session_id,
CASE
WHEN {str(extra_data["is_user_org_admin"]).lower()} THEN COALESCE(users.email, users.mobile, users.unique_id)
WHEN {str(mask_user_id).lower()} THEN COALESCE(users.email, users.mobile, CONCAT('unique_id:', users.unique_id))
ELSE
{'TO_HEX(MD5(CAST(session.user_id as STRING)))' if BIGQUERY['enabled'] else 'MD5(session.user_id::varchar(255))'}
END AS user_identifier,
Expand Down
15 changes: 6 additions & 9 deletions plio/static/plio/docs/download_csv_README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ This section will clarify what each of the `.csv` files in the folder contains:

- `session_id`: the unique identifier for the session

- `user_identifier`: the unique identifier for the user associated with this session. To preserve user privacy, this field would not contain any Personally Identifiable Information (PII) and would instead be a hashed value. However, the same user will have the same hashed value across different plios. So, you can still safely identify user trends across plios without harming user privacy.
- `user_identifier`: the unique identifier for the user associated with this session. To preserve user privacy, this field would not contain any Personally Identifiable Information (PII) and would instead be a hashed value. However, the same user will have the same hashed value across different plios. So, you can still safely identify user trends across plios without harming user privacy. However, if you are on the organisational plan and want to map the user ids to your beneficiaries, we provide a way for you to access the true identities of the users interacting with your plio. Read about it [here](https://docs.plio.in/plio-for-teams/).

- `answer`: the user's actual answer to the interaction. For `question_type = mcq`, it represents the index of the option selected by the user. In this case, `answer = 1` indicates that the first option has been submitted as the answer.

Expand All @@ -81,23 +81,20 @@ This section will clarify what each of the `.csv` files in the folder contains:

- `session_id`: the unique identifier for the session

- `user_identifier`: the unique identifier for the user associated with this session
- `user_identifier`: the unique identifier for the user associated with this session. To preserve user privacy, this field would not contain any Personally Identifiable Information (PII) and would instead be a hashed value. However, the same user will have the same hashed value across different plios. So, you can still safely identify user trends across plios without harming user privacy. However, if you are on the organisational plan and want to map the user ids to your beneficiaries, we provide a way for you to access the true identities of the users interacting with your plio. Read about it [here](https://docs.plio.in/plio-for-teams/).
dalmia marked this conversation as resolved.
Show resolved Hide resolved

- `watch_time`: the amount of time the user has watched the video (in seconds) - the most recent session of any user includes the total time across all previous sessions by that user.

- `retention`: the retention array over the video for the given user. The length of the array is the number of seconds of the video and each value represents how many times the user has visited that particular second of the video while watching the plio.

For example, if your video is 4 minutes long, each row of this column will have 240 values (one for each second). If a user did not reach the end of the video, the values towards the end would be 0. If a user has rewatched the first 10 seconds 5 times, the first 10 values would be 5. Ignore this field if it contains something like `NaN, NaN, ...`.

Example:
| session_id | retention | watch_time | user_identifier |
| ---------- | ------------------------------------------ | ---------- | -------------------------------- |
| 1951 | 1,1,1,10,1,2,...,0,0,0,0,0,1,1,1,1,0,0,0,0 | 50 | addfa9b7e234254d26e9c7f2af1005cb |
| session_id | watch_time | user_identifier |
| ---------- | ---------- | -------------------------------- |
| 1951 | 50 | addfa9b7e234254d26e9c7f2af1005cb |

- `events.csv`: contains the details of each event in each session of every user. The columns represent the following:

- `session_id`: the unique identifier for the session
- `user_identifier`: the unique identifier for the user associated with this session
- `user_identifier`: the unique identifier for the user associated with this session. To preserve user privacy, this field would not contain any Personally Identifiable Information (PII) and would instead be a hashed value. However, the same user will have the same hashed value across different plios. So, you can still safely identify user trends across plios without harming user privacy. However, if you are on the organisational plan and want to map the user ids to your beneficiaries, we provide a way for you to access the true identities of the users interacting with your plio. Read about it [here](https://docs.plio.in/plio-for-teams/).
- `event_type`: the type of the event (e.g. `played`, `paused`, etc.). The full list of event types and their meanings can be found in the [Events](#events) section.
- `event_player_time`: the current time in the video when the event was triggered (in seconds)
- `event_details`: further details for the event based on the event type (e.g. question number for events related to questions, etc.)
Expand Down
Binary file modified plio/static/plio/docs/download_csv_README.pdf
Binary file not shown.
28 changes: 28 additions & 0 deletions plio/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,34 @@ def test_user_list_other_plios_in_org(self):
# set db connection back to public (default) schema
connection.set_schema_to_public()

def test_non_org_user_cannot_list_plios_in_org(self):
"""A user who is not in an org should not be able to list plios in org workspace"""
# set db connection to organization schema
connection.set_schema(self.organization.schema_name)

# create video in the org workspace
video_org = Video.objects.create(
title="Video 1", url="https://www.youtube.com/watch?v=vnISjBbrMUM"
)

# create plio within the org workspace by user 1
Plio.objects.create(name="Plio 1", video=video_org, created_by=self.user)

# set organization in request and access token for user 2
self.client.credentials(
HTTP_ORGANIZATION=self.organization.shortcode,
HTTP_AUTHORIZATION="Bearer " + self.access_token_2.token,
)

# get plios
response = self.client.get("/api/v1/plios/list_uuid/")

# no plios should be listed
self.assertEqual(len(response.data["results"]), 0)

# set db connection back to public (default) schema
connection.set_schema_to_public()

def test_guest_cannot_list_plio_uuids(self):
# unset the credentials
self.client.credentials()
Expand Down
77 changes: 37 additions & 40 deletions plio/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
from google.oauth2 import service_account

from organizations.middleware import OrganizationTenantMiddleware
from users.models import OrganizationUser, Role
from organizations.models import Organization
from users.models import OrganizationUser
from plio.models import Video, Plio, Item, Question, Image
from plio.serializers import (
VideoSerializer,
Expand Down Expand Up @@ -127,29 +128,36 @@ def perform_create(self, serializer):
def perform_update(self, serializer):
serializer.save(created_by=self.get_object().created_by)

@property
def organization_shortcode(self):
return OrganizationTenantMiddleware.get_organization_shortcode(self.request)

@property
def is_organizational_workspace(self):
return self.organization_shortcode != DEFAULT_TENANT_SHORTCODE

@action(detail=False, permission_classes=[IsAuthenticated])
def list_uuid(self, request):
"""Retrieves a list of UUIDs for all the plios"""
queryset = self.get_queryset()

organization_shortcode = (
OrganizationTenantMiddleware.get_organization_shortcode(self.request)
)

# personal workspace
if organization_shortcode == DEFAULT_TENANT_SHORTCODE:
if not self.is_organizational_workspace:
queryset = queryset.filter(created_by=self.request.user)

# organizational workspace
if OrganizationUser.objects.filter(
organization__shortcode=organization_shortcode,
user=self.request.user.id,
).exists():
# user should be authenticated and a part of the org
queryset = queryset.filter(
Q(is_public=True)
| (Q(is_public=False) & Q(created_by=self.request.user))
)
else:
# organizational workspace
if OrganizationUser.objects.filter(
organization__shortcode=self.organization_shortcode,
user=self.request.user.id,
).exists():
# user should be a part of the org
queryset = queryset.filter(
Q(is_public=True)
| (Q(is_public=False) & Q(created_by=self.request.user))
)
else:
# otherwise, they don't have access to any plio
queryset = Plio.objects.none()

num_plios = queryset.count()
queryset = self.filter_queryset(queryset)
Expand Down Expand Up @@ -242,27 +250,12 @@ def download_data(self, request, uuid):
# schema name to query in
schema_name = OrganizationTenantMiddleware().get_schema(self.request)

# if the plio belongs to an org schema
is_org_plio = False if (schema_name == "public") else True

# if the plio belongs to an org, is the user requesting the data
# an org admin or not
is_user_org_admin = False
if is_org_plio:
organization_shortcode = (
OrganizationTenantMiddleware.get_organization_shortcode(self.request)
)
is_user_org_admin = OrganizationUser.objects.filter(
organization__shortcode=organization_shortcode,
user=self.request.user.id,
role=Role.objects.filter(name="org-admin").first().id,
).exists()

# extra data to be passed to the queries
extra_data = {
"is_org_plio": is_org_plio,
"is_user_org_admin": is_user_org_admin,
}
organization = Organization.objects.filter(
shortcode=self.organization_shortcode,
).first()
is_user_org_admin = organization is not None and request.user.is_org_admin(
organization.id
)

if BIGQUERY["enabled"]:
gcp_service_account_file = "/tmp/gcp-service-account.json"
Expand All @@ -284,12 +277,16 @@ def run_query(cursor, query_method):
if BIGQUERY["enabled"]:
# execute the sql query using BigQuery client and create a dataframe
df = client.query(
query_method(uuid, schema=schema_name, extra_data=extra_data)
query_method(
uuid, schema=schema_name, mask_user_id=is_user_org_admin
)
).to_dataframe()
else:
# execute the sql query using postgres DB connection cursor
cursor.execute(
query_method(uuid, schema=schema_name, extra_data=extra_data)
query_method(
uuid, schema=schema_name, mask_user_id=is_user_org_admin
)
)
# extract column names as cursor.description returns a tuple
columns = [col[0] for col in cursor.description]
Expand Down
1 change: 1 addition & 0 deletions users/config.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
user_status_choices = [("waitlist", "Added to Waitlist"), ("approved", "Approved")]
required_third_party_auth_keys = ["unique_id", "api_key"]
org_admin_roles = ["org-admin", "super-admin"]
16 changes: 14 additions & 2 deletions users/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from django.contrib.auth.models import AbstractUser
from organizations.models import Organization
from safedelete.models import SafeDeleteModel, SafeDeleteManager, SOFT_DELETE
from .config import user_status_choices
from .config import user_status_choices, org_admin_roles


class UserManager(SafeDeleteManager):
Expand Down Expand Up @@ -102,7 +102,7 @@ def name(self):
def __str__(self):
return "%d: %s" % (self.id, self.name)

def get_role_for_organization(self, organization_id):
def get_role_for_organization(self, organization_id: int):
"""Returns the user's role within the organization provided (None if the user is not a part)"""
organization_user = OrganizationUser.objects.filter(
organization_id=organization_id, user_id=self.id
Expand All @@ -112,6 +112,18 @@ def get_role_for_organization(self, organization_id):

return organization_user.role

def is_org_admin(self, organization_id: int, return_role: bool = False):
"""Whether the user has the privileges of an organisation's admin"""
user_organization_role = self.get_role_for_organization(organization_id)
has_org_admin_access = (
user_organization_role is not None
and user_organization_role.name in org_admin_roles
)
if not return_role:
return has_org_admin_access

return has_org_admin_access, user_organization_role


class UserMeta(SafeDeleteModel):
_safedelete_policy = SOFT_DELETE
Expand Down
10 changes: 4 additions & 6 deletions users/permissions.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,11 @@ def has_permission(self, request, view):
if view.action in ["list", "retrieve", "destroy"]:
return True

user_organization_role = request.user.get_role_for_organization(
request.data["organization"]
has_org_admin_access, user_organization_role = request.user.is_org_admin(
organization_id=request.data["organization"], return_role=True
)
if not user_organization_role or user_organization_role.name not in [
"org-admin",
"super-admin",
]:

if not has_org_admin_access:
# user doesn't belong to the queried organization
# or doesn't have sufficient role within organization
return False
Expand Down