From 53e44fff938d50ed3a5845c7edb386a8a95031f8 Mon Sep 17 00:00:00 2001 From: masterlittle Date: Fri, 10 Mar 2023 11:59:06 +0530 Subject: [PATCH 01/11] Add models for pii data --- redash/models/__init__.py | 92 ++++++++++++++++++++++++++----- redash/query_runner/big_query.py | 27 +++++---- redash/tasks/queries/execution.py | 3 +- 3 files changed, 95 insertions(+), 27 deletions(-) diff --git a/redash/models/__init__.py b/redash/models/__init__.py index cfd081ae44..a45705d109 100644 --- a/redash/models/__init__.py +++ b/redash/models/__init__.py @@ -3,13 +3,16 @@ import logging import time import numbers +from typing import List + import pytz -from sqlalchemy import distinct, or_, and_, UniqueConstraint, cast +from sqlalchemy import distinct, or_, and_, UniqueConstraint, ForeignKey from sqlalchemy.dialects import postgresql +from sqlalchemy.dialects.postgresql import ARRAY from sqlalchemy.event import listens_for from sqlalchemy.ext.hybrid import hybrid_property -from sqlalchemy.orm import backref, contains_eager, joinedload, subqueryload, load_only +from sqlalchemy.orm import backref, contains_eager, joinedload, load_only from sqlalchemy.orm.exc import NoResultFound # noqa: F401 from sqlalchemy import func from sqlalchemy_utils import generic_relationship @@ -27,9 +30,6 @@ with_ssh_tunnel, get_configuration_schema_for_query_runner_type, get_query_runner, - TYPE_BOOLEAN, - TYPE_DATE, - TYPE_DATETIME, BaseQueryRunner) from redash.utils import ( generate_token, @@ -48,7 +48,6 @@ from .organizations import Organization from .types import ( EncryptedConfiguration, - Configuration, MutableDict, MutableList, PseudoJSON, @@ -82,6 +81,69 @@ def get(self, query_id): scheduled_queries_executions = ScheduledQueriesExecutions() +class PolicyTag(db.Model): + policy_id = Column(db.String(100)) + policy_name = Column(db.String(50), primary_key=True) + created_at = Column(db.DateTime(True), server_default=func.now()) + updated_at = Column(db.DateTime(True), server_default=func.now(), onupdate=func.now()) + + +class QueryIdPolicyTagMapping(db.Model): + query_id = Column(db.String(100), primary_key=True) + policy_name = Column(db.String(50), ForeignKey(PolicyTag.policy_name), primary_key=True) + created_at = Column(db.DateTime(True), server_default=func.now()) + updated_at = Column(db.DateTime(True), server_default=func.now(), onupdate=func.now()) + + @classmethod + def get_by_query_id(cls, query_id: int): + return cls.policy_name \ + .filter(cls.query_id == query_id) \ + .all() + + @classmethod + def update_table(cls, query_hash, values): + QueryIdPolicyTagMapping.query.filter(QueryIdPolicyTagMapping.query_id == query_hash).delete() + db.session.add_all(values) + db.session.commit() + + +class UserGroupMemberList(db.Model): + user_emails = Column(ARRAY(db.String)) + policy_name = Column(db.String(50), ForeignKey(PolicyTag.policy_name), primary_key=True) + created_at = Column(db.DateTime(True), server_default=func.now()) + updated_at = Column(db.DateTime(True), server_default=func.now(), onupdate=func.now()) + + @classmethod + def get_by_policy_name(cls, policy_names: List[str], user_email: str): + return cls.policy_name \ + .filter(cls.policy_name.in_(policy_names)) \ + .filter(cls.user_emails.any(user_email)) \ + .one() + + +class TableColumnPolicyTagMapping(db.Model): + project_id = Column(db.String(50), primary_key=True) + dataset_id = Column(db.String(50), primary_key=True) + table_name = Column(db.String(100), primary_key=True) + policy_tag = Column(db.String(50), ForeignKey(PolicyTag.policy_name)) + column_name = Column(db.String(50), primary_key=True) + created_at = Column(db.DateTime(True), server_default=func.now()) + updated_at = Column(db.DateTime(True), server_default=func.now(), onupdate=func.now()) + + @classmethod + def get_policy_name(cls, project_name: str, dataset_id: str, table_name: str, column_name: str): + return cls.policy_tag \ + .filter(cls.project_id == project_name) \ + .filter(cls.dataset_id == dataset_id) \ + .filter(cls.table_name == table_name) \ + .filter(cls.column_name == column_name) \ + .one() + + @classmethod + def get_all(cls): + return db.session.query(TableColumnPolicyTagMapping) + + @generic_repr("id", "name", "type", "org_id", "created_at") class DataSource(BelongsToOrgMixin, db.Model): id = primary_key("DataSource") @@ -625,10 +687,10 @@ def past_scheduled_queries(cls): query for query in queries if query.schedule["until"] is not None - and pytz.utc.localize( + and pytz.utc.localize( datetime.datetime.strptime(query.schedule["until"], "%Y-%m-%d") ) - <= now + <= now ] @classmethod @@ -736,11 +798,11 @@ def recent(cls, group_ids, user_id=None, limit=20): Event.action.in_( ["edit", "execute", "edit_name", "edit_description", "view_source"] ), - Event.object_id != None, + Event.object_id is not None, Event.object_type == "query", DataSourceGroup.group_id.in_(group_ids), - or_(Query.is_draft == False, Query.user_id == user_id), - Query.is_archived == False, + or_(Query.is_draft is False, Query.user_id == user_id), + Query.is_archived is False, ) .group_by(Event.object_id, Query.id) .order_by(db.desc(db.func.count(0))) @@ -1128,7 +1190,7 @@ def all(cls, org, group_ids, user_id): DataSourceGroup, Query.data_source_id == DataSourceGroup.data_source_id ) .filter( - Dashboard.is_archived == False, + Dashboard.is_archived is False, ( DataSourceGroup.group_id.in_(group_ids) | (Dashboard.user_id == user_id) @@ -1138,7 +1200,7 @@ def all(cls, org, group_ids, user_id): ) query = query.filter( - or_(Dashboard.user_id == user_id, Dashboard.is_draft == False) + or_(Dashboard.user_id == user_id, Dashboard.is_draft is False) ) return query @@ -1335,14 +1397,14 @@ class ApiKey(TimestampMixin, GFKBase, db.Model): @classmethod def get_by_api_key(cls, api_key): - return cls.query.filter(cls.api_key == api_key, cls.active == True).one() + return cls.query.filter(cls.api_key == api_key, cls.active is True).one() @classmethod def get_by_object(cls, object): return cls.query.filter( cls.object_type == object.__class__.__tablename__, cls.object_id == object.id, - cls.active == True, + cls.active is True, ).first() @classmethod diff --git a/redash/query_runner/big_query.py b/redash/query_runner/big_query.py index 3a3edf35c9..47d1f0e1a9 100644 --- a/redash/query_runner/big_query.py +++ b/redash/query_runner/big_query.py @@ -1,7 +1,6 @@ import datetime import logging import os -import sys import time from base64 import b64decode from collections import defaultdict @@ -9,11 +8,11 @@ from typing import Dict, List, Set, Tuple import httplib2 -import requests from cachetools import cached, TTLCache from redash import settings -from redash.models import User +from redash.models import UserGroupMemberList, TableColumnPolicyTagMapping, QueryIdPolicyTagMapping +# from redash.models import User, TableColumnPolicyTagMapping, UserGroupMemberList from redash.query_runner import * from redash.settings import parse_boolean from redash.utils import json_dumps, json_loads @@ -235,9 +234,9 @@ def _get_columns_used_in_query(self, query_plan): @cached(cache=TTLCache(maxsize=3000, ttl=600)) def _get_policy_tags(self): - return {"table": ["c1", "c2"]} + return TableColumnPolicyTagMapping.get_all() - def _get_query_result(self, jobs, query): + def _get_query_result(self, jobs, query, user): project_id = self._get_project_id() job_data = self._get_job_data(query) insert_response = jobs.insert(projectId=project_id, body=job_data).execute() @@ -258,7 +257,7 @@ def _get_query_result(self, jobs, query): table_column_map: Dict[str, Set[str]] = self._get_columns_used_in_query( job_stats["statistics"]["query"]["queryPlan"]) get_policy_tag_mapping: Dict[str, List[Tuple[str, str]]] = self._get_policy_tags() - + logger.debug(get_policy_tag_mapping) results = defaultdict(set) policy_tags_accessed = set() for table_name, column_list in table_column_map.items(): @@ -269,8 +268,16 @@ def _get_query_result(self, jobs, query): policy_tags_accessed.add(tag) if policy_tags_accessed: # Check if user email has access - raise ValueError("PII") - + access = UserGroupMemberList.get_by_policy_name(policy_names=list(policy_tags_accessed), + user_email=user.email) + if len(access) != len(policy_tags_accessed): + missing_access = policy_tags_accessed - set(access) + raise Exception(f"User does not have permission to query {missing_access}") + + query_hash = self.gen_query_hash(query) + query_policy_tags = [QueryIdPolicyTagMapping(query_id=query_hash, policy_tag=tag) for tag in + policy_tags_accessed] + QueryIdPolicyTagMapping.update_table(query_hash, query_policy_tags) rows = [] while ("rows" in query_reply) and current_row < int(query_reply["totalRows"]): @@ -360,7 +367,7 @@ def get_schema(self, get_stats=False): return list(schema.values()) - def run_query(self, query, user: User): + def run_query(self, query, user): logger.debug("BigQuery got query: %s", query) bigquery_service = self._get_bigquery_service() @@ -379,7 +386,7 @@ def run_query(self, query, user: User): % (limitMB, processedMB), ) - data = self._get_query_result(jobs, query) + data = self._get_query_result(jobs, query, user) error = None json_data = json_dumps(data, ignore_nan=True) diff --git a/redash/tasks/queries/execution.py b/redash/tasks/queries/execution.py index 2fe592801f..4b02e00d7c 100644 --- a/redash/tasks/queries/execution.py +++ b/redash/tasks/queries/execution.py @@ -12,7 +12,7 @@ from redash.tasks.worker import Queue, Job from redash.tasks.alerts import check_alerts_for_query from redash.tasks.failure_report import track_failure -from redash.utils import gen_query_hash, json_dumps, utcnow +from redash.utils import gen_query_hash, utcnow from redash.worker import get_job_logger logger = get_job_logger(__name__) @@ -183,7 +183,6 @@ def run(self): query_runner = self.data_source.query_runner annotated_query = self._annotate_query(query_runner) - try: data, error = query_runner.run_query(annotated_query, self.user) except Exception as e: From 55acd0cbf4f48cbf3d342aa365ce62fb3deaefde Mon Sep 17 00:00:00 2001 From: masterlittle Date: Mon, 13 Mar 2023 19:26:33 +0530 Subject: [PATCH 02/11] Add policy tag checks and additional tables in redash db --- redash/handlers/query_results.py | 18 ++++++ redash/models/__init__.py | 23 +++---- redash/query_runner/athena.py | 4 +- redash/query_runner/big_query.py | 66 ++++++++++++++------- redash/serializers/__init__.py | 2 +- redash/utils/permission_denied_exception.py | 2 + tests/handlers/test_visualizations.py | 4 +- tests/tasks/test_alerts.py | 6 +- 8 files changed, 83 insertions(+), 42 deletions(-) create mode 100644 redash/utils/permission_denied_exception.py diff --git a/redash/handlers/query_results.py b/redash/handlers/query_results.py index cb8b8d14f2..d8f5514d36 100644 --- a/redash/handlers/query_results.py +++ b/redash/handlers/query_results.py @@ -2,12 +2,15 @@ import time import unicodedata +from typing import List + from flask import make_response, request from flask_login import current_user from flask_restful import abort from werkzeug.urls import url_quote from redash import models, settings from redash.handlers.base import BaseResource, get_object_or_404, record_event +from redash.models import UserGroupMemberList from redash.permissions import ( has_access, not_view_only, @@ -36,6 +39,7 @@ serialize_query_result_to_xlsx, serialize_job, ) +from redash.utils.permission_denied_exception import PermissionDeniedException def error_response(message, http_status=400): @@ -319,6 +323,18 @@ def post(self, query_id): else: return error_messages["no_permission"] + def check_for_policy_tag_permissions(self, query_hash: str): + query_policy_tags: List[str] = get_object_or_404(models.QueryIdPolicyTagMapping.get_by_query_id, + query_hash + ) + if query_policy_tags: + access: List[str] = UserGroupMemberList.get_by_policy_name(policy_names=list(query_policy_tags), + user_email=self.current_user.email) + if len(access) != len(query_policy_tags): + missing_access_policy_tags = set(query_policy_tags) - set(access) + abort(400, + message=f"User {self.current_user.email} does not have permission to query {missing_access_policy_tags}") + @require_any_of_permission(("view_query", "execute_query")) def get(self, query_id=None, query_result_id=None, filetype="json"): """ @@ -349,11 +365,13 @@ def get(self, query_id=None, query_result_id=None, filetype="json"): query_result = get_object_or_404( models.QueryResult.get_by_id_and_org, query_result_id, self.current_org ) + self.check_for_policy_tag_permissions(query_result.query_hash) if query_id is not None: query = get_object_or_404( models.Query.get_by_id_and_org, query_id, self.current_org ) + self.check_for_policy_tag_permissions(query.query_hash) if ( query_result is None diff --git a/redash/models/__init__.py b/redash/models/__init__.py index a45705d109..d2eb65ac07 100644 --- a/redash/models/__init__.py +++ b/redash/models/__init__.py @@ -89,20 +89,21 @@ class PolicyTag(db.Model): class QueryIdPolicyTagMapping(db.Model): - query_id = Column(db.String(100), primary_key=True) + query_hash = Column(db.String(100), primary_key=True) policy_name = Column(db.String(50), ForeignKey(PolicyTag.policy_name), primary_key=True) - created_at = Column(db.DateTime(True), server_default=func.now()) + created_at = Column(db.DateTime(True), server_default=func.now(), index=True) updated_at = Column(db.DateTime(True), server_default=func.now(), onupdate=func.now()) @classmethod - def get_by_query_id(cls, query_id: int): - return cls.policy_name \ - .filter(cls.query_id == query_id) \ + def get_by_query_id(cls, query_id: str): + return db.session.query(QueryIdPolicyTagMapping.policy_name) \ + .filter(QueryIdPolicyTagMapping.query_hash == query_id)\ + .distinct()\ .all() @classmethod def update_table(cls, query_hash, values): - QueryIdPolicyTagMapping.query.filter(QueryIdPolicyTagMapping.query_id == query_hash).delete() + QueryIdPolicyTagMapping.query.filter(QueryIdPolicyTagMapping.query_hash == query_hash).delete() db.session.add_all(values) db.session.commit() @@ -115,10 +116,10 @@ class UserGroupMemberList(db.Model): @classmethod def get_by_policy_name(cls, policy_names: List[str], user_email: str): - return cls.policy_name \ - .filter(cls.policy_name.in_(policy_names)) \ - .filter(cls.user_emails.any(user_email)) \ - .one() + return db.session.query(UserGroupMemberList.policy_name) \ + .filter(UserGroupMemberList.policy_name.in_(policy_names)) \ + .filter(UserGroupMemberList.user_emails.any(user_email)) \ + .all() class TableColumnPolicyTagMapping(db.Model): @@ -141,7 +142,7 @@ def get_policy_name(cls, project_name: str, dataset_id: str, table_name: str, co @classmethod def get_all(cls): - return db.session.query(TableColumnPolicyTagMapping) + return db.session.query(TableColumnPolicyTagMapping).all() @generic_repr("id", "name", "type", "org_id", "created_at") diff --git a/redash/query_runner/athena.py b/redash/query_runner/athena.py index 2fbd962490..321080766e 100644 --- a/redash/query_runner/athena.py +++ b/redash/query_runner/athena.py @@ -245,7 +245,7 @@ def run_query(self, query, user): except AttributeError as e: logger.debug("Athena Upstream can't get data_scanned_in_bytes: %s", e) try: - athena_query_id = cursor.query_id + athena_query_id = cursor.query_hash except AttributeError as e: logger.debug("Athena Upstream can't get query_id: %s", e) @@ -263,7 +263,7 @@ def run_query(self, query, user): json_data = json_dumps(data, ignore_nan=True) error = None except Exception: - if cursor.query_id: + if cursor.query_hash: cursor.cancel() raise diff --git a/redash/query_runner/big_query.py b/redash/query_runner/big_query.py index 47d1f0e1a9..388772ba65 100644 --- a/redash/query_runner/big_query.py +++ b/redash/query_runner/big_query.py @@ -16,6 +16,7 @@ from redash.query_runner import * from redash.settings import parse_boolean from redash.utils import json_dumps, json_loads +from redash.utils.permission_denied_exception import PermissionDeniedException logger = logging.getLogger(__name__) @@ -233,8 +234,13 @@ def _get_columns_used_in_query(self, query_plan): return results @cached(cache=TTLCache(maxsize=3000, ttl=600)) - def _get_policy_tags(self): - return TableColumnPolicyTagMapping.get_all() + def _get_policy_tags(self) -> Dict[str, List[Tuple[str, str]]]: + all_policy_tag_mappings: List[TableColumnPolicyTagMapping] = TableColumnPolicyTagMapping.get_all() + grouped_map = defaultdict(list) + for policy_tags in all_policy_tag_mappings: + policy_tag_table_name = f"{policy_tags.project_id}.{policy_tags.dataset_id}.{policy_tags.table_name}" + grouped_map[policy_tag_table_name].append((policy_tags.column_name, policy_tags.policy_tag)) + return grouped_map def _get_query_result(self, jobs, query, user): project_id = self._get_project_id() @@ -256,28 +262,13 @@ def _get_query_result(self, jobs, query, user): **{"projectId": project_id, "jobId": self.current_job_id, "location": self._get_location()}).execute() table_column_map: Dict[str, Set[str]] = self._get_columns_used_in_query( job_stats["statistics"]["query"]["queryPlan"]) - get_policy_tag_mapping: Dict[str, List[Tuple[str, str]]] = self._get_policy_tags() - logger.debug(get_policy_tag_mapping) - results = defaultdict(set) - policy_tags_accessed = set() - for table_name, column_list in table_column_map.items(): - if table_name in get_policy_tag_mapping: - for column, tag in get_policy_tag_mapping[table_name]: - if column in column_list: - results[table_name].add(column) - policy_tags_accessed.add(tag) - if policy_tags_accessed: - # Check if user email has access - access = UserGroupMemberList.get_by_policy_name(policy_names=list(policy_tags_accessed), - user_email=user.email) - if len(access) != len(policy_tags_accessed): - missing_access = policy_tags_accessed - set(access) - raise Exception(f"User does not have permission to query {missing_access}") + get_policy_tag_mapping: Dict[str, Tuple[str, str]] = self._get_policy_tags() + policy_tags_accessed = self.get_policy_tags_in_query(get_policy_tag_mapping, table_column_map) + + self.update_query_policy_tags(policy_tags_accessed, query) + + self._check_for_policy_tags_in_query(policy_tags_accessed, user) - query_hash = self.gen_query_hash(query) - query_policy_tags = [QueryIdPolicyTagMapping(query_id=query_hash, policy_tag=tag) for tag in - policy_tags_accessed] - QueryIdPolicyTagMapping.update_table(query_hash, query_policy_tags) rows = [] while ("rows" in query_reply) and current_row < int(query_reply["totalRows"]): @@ -320,6 +311,35 @@ def _get_query_result(self, jobs, query, user): return data + def _check_for_policy_tags_in_query(self, policy_tags_accessed, user): + if policy_tags_accessed: + # Check if user email has access + access: List[str] = UserGroupMemberList.get_by_policy_name(policy_names=list(policy_tags_accessed.keys()), + user_email=user.email) + if len(access) != len(policy_tags_accessed): + missing_access_policy_tags = set(policy_tags_accessed.keys()) - set(access) + filtered_results: dict = {k: v for k, v in policy_tags_accessed.items() if + k in missing_access_policy_tags} + f = [f"Policy tag- {k}, Columns accessed- {v}" for k, v in filtered_results.items()] + + raise PermissionDeniedException( + "User does not have permission to query columns. \n{} ".format('\n'.join(f))) + + def update_query_policy_tags(self, policy_tags_accessed, query): + query_hash = self.gen_query_hash(query) + query_policy_tags = [QueryIdPolicyTagMapping(query_id=query_hash, policy_name=tag) for tag in + policy_tags_accessed.keys()] + QueryIdPolicyTagMapping.update_table(query_hash, query_policy_tags) + + def get_policy_tags_in_query(self, get_policy_tag_mapping, table_column_map): + policy_tags_accessed = defaultdict(set) + for table_name, column_list in table_column_map.items(): + if table_name in get_policy_tag_mapping.keys(): + for column, tag in get_policy_tag_mapping[table_name]: + if column in column_list: + policy_tags_accessed[tag].add((table_name, column)) + return policy_tags_accessed + def _get_project_datasets(self, project_id): result = [] service = self._get_bigquery_service() diff --git a/redash/serializers/__init__.py b/redash/serializers/__init__.py index 6105364c49..19a30e19b4 100644 --- a/redash/serializers/__init__.py +++ b/redash/serializers/__init__.py @@ -209,7 +209,7 @@ def serialize_alert(alert, full=True): d["query"] = serialize_query(alert.query_rel) d["user"] = alert.user.to_dict() else: - d["query_id"] = alert.query_id + d["query_id"] = alert.query_hash d["user_id"] = alert.user_id return d diff --git a/redash/utils/permission_denied_exception.py b/redash/utils/permission_denied_exception.py new file mode 100644 index 0000000000..e51660f678 --- /dev/null +++ b/redash/utils/permission_denied_exception.py @@ -0,0 +1,2 @@ +class PermissionDeniedException(Exception): + pass diff --git a/tests/handlers/test_visualizations.py b/tests/handlers/test_visualizations.py index 42a5b61668..bf5d6d5097 100644 --- a/tests/handlers/test_visualizations.py +++ b/tests/handlers/test_visualizations.py @@ -105,7 +105,7 @@ def test_only_owner_collaborator_or_admin_can_edit_visualization(self): self.make_request( "post", - "/api/queries/{}/acl".format(vis.query_id), + "/api/queries/{}/acl".format(vis.query_hash), data={"access_type": "modify", "user_id": other_user.id}, ) rv = self.make_request("post", path, user=other_user, data=data) @@ -139,7 +139,7 @@ def test_only_owner_collaborator_or_admin_can_delete_visualization(self): self.make_request( "post", - "/api/queries/{}/acl".format(vis.query_id), + "/api/queries/{}/acl".format(vis.query_hash), data={"access_type": "modify", "user_id": other_user.id}, ) diff --git a/tests/tasks/test_alerts.py b/tests/tasks/test_alerts.py index 305e782436..49c3fd8234 100644 --- a/tests/tasks/test_alerts.py +++ b/tests/tasks/test_alerts.py @@ -16,7 +16,7 @@ def test_notifies_subscribers_when_should(self): Alert.evaluate = MagicMock(return_value=Alert.TRIGGERED_STATE) alert = self.factory.create_alert() - check_alerts_for_query(alert.query_id) + check_alerts_for_query(alert.query_hash) self.assertTrue(redash.tasks.alerts.notify_subscriptions.called) @@ -25,7 +25,7 @@ def test_doesnt_notify_when_nothing_changed(self): Alert.evaluate = MagicMock(return_value=Alert.OK_STATE) alert = self.factory.create_alert() - check_alerts_for_query(alert.query_id) + check_alerts_for_query(alert.query_hash) self.assertFalse(redash.tasks.alerts.notify_subscriptions.called) @@ -34,7 +34,7 @@ def test_doesnt_notify_when_muted(self): Alert.evaluate = MagicMock(return_value=Alert.TRIGGERED_STATE) alert = self.factory.create_alert(options={"muted": True}) - check_alerts_for_query(alert.query_id) + check_alerts_for_query(alert.query_hash) self.assertFalse(redash.tasks.alerts.notify_subscriptions.called) From a4ab72f5025e740325133a53db71a6623c857627 Mon Sep 17 00:00:00 2001 From: masterlittle Date: Wed, 15 Mar 2023 14:30:21 +0530 Subject: [PATCH 03/11] Fix user permissions --- redash/models/__init__.py | 1 - redash/query_runner/big_query.py | 14 ++++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/redash/models/__init__.py b/redash/models/__init__.py index d2eb65ac07..ba7d502792 100644 --- a/redash/models/__init__.py +++ b/redash/models/__init__.py @@ -82,7 +82,6 @@ def get(self, query_id): class PolicyTag(db.Model): - policy_id = Column(db.String(100)) policy_name = Column(db.String(50), primary_key=True) created_at = Column(db.DateTime(True), server_default=func.now()) updated_at = Column(db.DateTime(True), server_default=func.now(), onupdate=func.now()) diff --git a/redash/query_runner/big_query.py b/redash/query_runner/big_query.py index 388772ba65..c85b1ff17e 100644 --- a/redash/query_runner/big_query.py +++ b/redash/query_runner/big_query.py @@ -265,7 +265,7 @@ def _get_query_result(self, jobs, query, user): get_policy_tag_mapping: Dict[str, Tuple[str, str]] = self._get_policy_tags() policy_tags_accessed = self.get_policy_tags_in_query(get_policy_tag_mapping, table_column_map) - self.update_query_policy_tags(policy_tags_accessed, query) + self._update_query_policy_tags(policy_tags_accessed, query) self._check_for_policy_tags_in_query(policy_tags_accessed, user) @@ -314,10 +314,11 @@ def _get_query_result(self, jobs, query, user): def _check_for_policy_tags_in_query(self, policy_tags_accessed, user): if policy_tags_accessed: # Check if user email has access - access: List[str] = UserGroupMemberList.get_by_policy_name(policy_names=list(policy_tags_accessed.keys()), + access_results: List[Tuple[str]] = UserGroupMemberList.get_by_policy_name(policy_names=list(policy_tags_accessed.keys()), user_email=user.email) + access = {a[0] for a in access_results} if len(access) != len(policy_tags_accessed): - missing_access_policy_tags = set(policy_tags_accessed.keys()) - set(access) + missing_access_policy_tags = set(policy_tags_accessed.keys()) - access filtered_results: dict = {k: v for k, v in policy_tags_accessed.items() if k in missing_access_policy_tags} f = [f"Policy tag- {k}, Columns accessed- {v}" for k, v in filtered_results.items()] @@ -325,9 +326,9 @@ def _check_for_policy_tags_in_query(self, policy_tags_accessed, user): raise PermissionDeniedException( "User does not have permission to query columns. \n{} ".format('\n'.join(f))) - def update_query_policy_tags(self, policy_tags_accessed, query): + def _update_query_policy_tags(self, policy_tags_accessed, query): query_hash = self.gen_query_hash(query) - query_policy_tags = [QueryIdPolicyTagMapping(query_id=query_hash, policy_name=tag) for tag in + query_policy_tags = [QueryIdPolicyTagMapping(query_hash=query_hash, policy_name=tag) for tag in policy_tags_accessed.keys()] QueryIdPolicyTagMapping.update_table(query_hash, query_policy_tags) @@ -372,8 +373,9 @@ def get_schema(self, get_stats=False): dataset_id = dataset["datasetReference"]["datasetId"] query = query_base.format(schema_project_id=schema_project_id, dataset_id=dataset_id) queries.append(query) - + logger.debug(datasets) query = '\nUNION ALL\n'.join(queries) + logger.debug(queries) results, error = self.run_query(query, None) if error is not None: self._handle_run_query_error(error) From 0fa0bf40ed5295afdc53957f058ff741758b6ea6 Mon Sep 17 00:00:00 2001 From: masterlittle Date: Thu, 16 Mar 2023 12:07:31 +0530 Subject: [PATCH 04/11] Fix query_id change --- .github/create_image.yml | 43 +++++++++++++++++++++++++++ Dockerfile | 1 + redash/query_runner/athena.py | 4 +-- redash/serializers/__init__.py | 2 +- tests/handlers/test_visualizations.py | 4 +-- tests/tasks/test_alerts.py | 6 ++-- 6 files changed, 52 insertions(+), 8 deletions(-) create mode 100644 .github/create_image.yml diff --git a/.github/create_image.yml b/.github/create_image.yml new file mode 100644 index 0000000000..0adc442e21 --- /dev/null +++ b/.github/create_image.yml @@ -0,0 +1,43 @@ +name: Build custom redash image + +on: + push: + tags: + - '*' + +jobs: + deploy: + permissions: + contents: 'read' + id-token: 'write' + environment: + name: Prod + url: "app.{{ vars.BASE_DOMAIN }}" + runs-on: ubuntu-latest + env: + IMAGE_NAME: asia-south1-python.pkg.dev/prod-data-platform/redash + + steps: + - uses: actions/checkout@v3 + - id: 'auth' + name: 'Authenticate to Google Cloud' + uses: 'google-github-actions/auth@v1' + with: + workload_identity_provider: 'projects/1027534050611/locations/global/workloadIdentityPools/github-pool/providers/github' + service_account: 'prod-data-eng-deployment@prod-data-platform.iam.gserviceaccount.com' + token_format: 'access_token' + + - uses: olegtarasov/get-tag@v2.1 + id: tag_name + + - # Add support for more platforms with QEMU (optional) + # https://github.com/docker/setup-qemu-action + name: Set up QEMU + uses: docker/setup-qemu-action@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + platforms: linux/amd64,linux/arm64 + + - name: Build & push custom image + run: DOCKER_BUILDKIT=1 docker buildx build -t ${{ vars.IMAGE_NAME }}:${{ steps.tag_name.outputs.tag }} . --push diff --git a/Dockerfile b/Dockerfile index 5a80b67990..c361316e76 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,3 +1,4 @@ +# syntax=docker/dockerfile:1.3 FROM node:14.17 as frontend-builder RUN npm install --global --force yarn@1.22.10 diff --git a/redash/query_runner/athena.py b/redash/query_runner/athena.py index 321080766e..2fbd962490 100644 --- a/redash/query_runner/athena.py +++ b/redash/query_runner/athena.py @@ -245,7 +245,7 @@ def run_query(self, query, user): except AttributeError as e: logger.debug("Athena Upstream can't get data_scanned_in_bytes: %s", e) try: - athena_query_id = cursor.query_hash + athena_query_id = cursor.query_id except AttributeError as e: logger.debug("Athena Upstream can't get query_id: %s", e) @@ -263,7 +263,7 @@ def run_query(self, query, user): json_data = json_dumps(data, ignore_nan=True) error = None except Exception: - if cursor.query_hash: + if cursor.query_id: cursor.cancel() raise diff --git a/redash/serializers/__init__.py b/redash/serializers/__init__.py index 19a30e19b4..6105364c49 100644 --- a/redash/serializers/__init__.py +++ b/redash/serializers/__init__.py @@ -209,7 +209,7 @@ def serialize_alert(alert, full=True): d["query"] = serialize_query(alert.query_rel) d["user"] = alert.user.to_dict() else: - d["query_id"] = alert.query_hash + d["query_id"] = alert.query_id d["user_id"] = alert.user_id return d diff --git a/tests/handlers/test_visualizations.py b/tests/handlers/test_visualizations.py index bf5d6d5097..42a5b61668 100644 --- a/tests/handlers/test_visualizations.py +++ b/tests/handlers/test_visualizations.py @@ -105,7 +105,7 @@ def test_only_owner_collaborator_or_admin_can_edit_visualization(self): self.make_request( "post", - "/api/queries/{}/acl".format(vis.query_hash), + "/api/queries/{}/acl".format(vis.query_id), data={"access_type": "modify", "user_id": other_user.id}, ) rv = self.make_request("post", path, user=other_user, data=data) @@ -139,7 +139,7 @@ def test_only_owner_collaborator_or_admin_can_delete_visualization(self): self.make_request( "post", - "/api/queries/{}/acl".format(vis.query_hash), + "/api/queries/{}/acl".format(vis.query_id), data={"access_type": "modify", "user_id": other_user.id}, ) diff --git a/tests/tasks/test_alerts.py b/tests/tasks/test_alerts.py index 49c3fd8234..305e782436 100644 --- a/tests/tasks/test_alerts.py +++ b/tests/tasks/test_alerts.py @@ -16,7 +16,7 @@ def test_notifies_subscribers_when_should(self): Alert.evaluate = MagicMock(return_value=Alert.TRIGGERED_STATE) alert = self.factory.create_alert() - check_alerts_for_query(alert.query_hash) + check_alerts_for_query(alert.query_id) self.assertTrue(redash.tasks.alerts.notify_subscriptions.called) @@ -25,7 +25,7 @@ def test_doesnt_notify_when_nothing_changed(self): Alert.evaluate = MagicMock(return_value=Alert.OK_STATE) alert = self.factory.create_alert() - check_alerts_for_query(alert.query_hash) + check_alerts_for_query(alert.query_id) self.assertFalse(redash.tasks.alerts.notify_subscriptions.called) @@ -34,7 +34,7 @@ def test_doesnt_notify_when_muted(self): Alert.evaluate = MagicMock(return_value=Alert.TRIGGERED_STATE) alert = self.factory.create_alert(options={"muted": True}) - check_alerts_for_query(alert.query_hash) + check_alerts_for_query(alert.query_id) self.assertFalse(redash.tasks.alerts.notify_subscriptions.called) From 777bab6775adbe17574f44e2187bd1ebb8efd603 Mon Sep 17 00:00:00 2001 From: masterlittle Date: Thu, 16 Mar 2023 13:55:35 +0530 Subject: [PATCH 05/11] Add DDL statement file --- redash/models/privacy_models.sql | 109 +++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 redash/models/privacy_models.sql diff --git a/redash/models/privacy_models.sql b/redash/models/privacy_models.sql new file mode 100644 index 0000000000..e1faab1d82 --- /dev/null +++ b/redash/models/privacy_models.sql @@ -0,0 +1,109 @@ + +-- +-- Name: policy_tag; Type: TABLE; Schema: public; Owner: postgres +-- + +CREATE TABLE public.policy_tag ( + policy_name character varying(50) NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL +); + +-- +-- Name: policy_tag policy_tag_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres +-- + +ALTER TABLE ONLY public.policy_tag + ADD CONSTRAINT policy_tag_pkey PRIMARY KEY (policy_name); + +-- +-- Name: query_id_policy_tag_mapping; Type: TABLE; Schema: public; Owner: postgres +-- + +CREATE TABLE public.query_id_policy_tag_mapping ( + query_hash character varying(100) NOT NULL, + policy_name character varying(50) NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL +); + +-- +-- Name: query_id_policy_tag_mapping query_id_policy_tag_mapping_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres +-- + +ALTER TABLE ONLY public.query_id_policy_tag_mapping + ADD CONSTRAINT query_id_policy_tag_mapping_pkey PRIMARY KEY (query_hash, policy_name); + + +-- +-- Name: ix_query_id_policy_tag_mapping_created_at; Type: INDEX; Schema: public; Owner: postgres +-- + +CREATE INDEX ix_query_id_policy_tag_mapping_created_at ON public.query_id_policy_tag_mapping USING btree (created_at); + + +-- +-- Name: query_id_policy_tag_mapping query_id_policy_tag_mapping_policy_name_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres +-- + +ALTER TABLE ONLY public.query_id_policy_tag_mapping + ADD CONSTRAINT query_id_policy_tag_mapping_policy_name_fkey FOREIGN KEY (policy_name) REFERENCES public.policy_tag(policy_name); + + +-- +-- Name: table_column_policy_tag_mapping; Type: TABLE; Schema: public; Owner: postgres +-- + +CREATE TABLE public.table_column_policy_tag_mapping ( + project_id character varying(50) NOT NULL, + dataset_id character varying(50) NOT NULL, + table_name character varying(100) NOT NULL, + policy_tag character varying(50) NOT NULL, + column_name character varying(50) NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL +); + +-- +-- Name: table_column_policy_tag_mapping table_column_policy_tag_mapping_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres +-- + +ALTER TABLE ONLY public.table_column_policy_tag_mapping + ADD CONSTRAINT table_column_policy_tag_mapping_pkey PRIMARY KEY (project_id, dataset_id, table_name, column_name); + + +-- +-- Name: table_column_policy_tag_mapping table_column_policy_tag_mapping_policy_tag_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres +-- + +ALTER TABLE ONLY public.table_column_policy_tag_mapping + ADD CONSTRAINT table_column_policy_tag_mapping_policy_tag_fkey FOREIGN KEY (policy_tag) REFERENCES public.policy_tag(policy_name); + + +-- +-- Name: user_group_member_list; Type: TABLE; Schema: public; Owner: postgres +-- + +CREATE TABLE public.user_group_member_list ( + user_emails character varying[] NOT NULL, + policy_name character varying(50) NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: user_group_member_list user_group_member_list_pkey; Type: CONSTRAINT; Schema: public; Owner: postgres +-- + +ALTER TABLE ONLY public.user_group_member_list + ADD CONSTRAINT user_group_member_list_pkey PRIMARY KEY (policy_name); + + +-- +-- Name: user_group_member_list user_group_member_list_policy_name_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres +-- + +ALTER TABLE ONLY public.user_group_member_list + ADD CONSTRAINT user_group_member_list_policy_name_fkey FOREIGN KEY (policy_name) REFERENCES public.policy_tag(policy_name); + From 0c1ccaf15556e9aeae6b3259cd3090aef4495b7b Mon Sep 17 00:00:00 2001 From: masterlittle Date: Mon, 20 Mar 2023 15:08:49 +0530 Subject: [PATCH 06/11] Remove foreign key constraints from privacy tables --- redash/models/__init__.py | 6 +++--- redash/models/privacy_models.sql | 24 ------------------------ 2 files changed, 3 insertions(+), 27 deletions(-) diff --git a/redash/models/__init__.py b/redash/models/__init__.py index ba7d502792..7fc8238376 100644 --- a/redash/models/__init__.py +++ b/redash/models/__init__.py @@ -89,7 +89,7 @@ class PolicyTag(db.Model): class QueryIdPolicyTagMapping(db.Model): query_hash = Column(db.String(100), primary_key=True) - policy_name = Column(db.String(50), ForeignKey(PolicyTag.policy_name), primary_key=True) + policy_name = Column(db.String(50), primary_key=True) created_at = Column(db.DateTime(True), server_default=func.now(), index=True) updated_at = Column(db.DateTime(True), server_default=func.now(), onupdate=func.now()) @@ -109,7 +109,7 @@ def update_table(cls, query_hash, values): class UserGroupMemberList(db.Model): user_emails = Column(ARRAY(db.String)) - policy_name = Column(db.String(50), ForeignKey(PolicyTag.policy_name), primary_key=True) + policy_name = Column(db.String(50), primary_key=True) created_at = Column(db.DateTime(True), server_default=func.now()) updated_at = Column(db.DateTime(True), server_default=func.now(), onupdate=func.now()) @@ -125,7 +125,7 @@ class TableColumnPolicyTagMapping(db.Model): project_id = Column(db.String(50), primary_key=True) dataset_id = Column(db.String(50), primary_key=True) table_name = Column(db.String(100), primary_key=True) - policy_tag = Column(db.String(50), ForeignKey(PolicyTag.policy_name)) + policy_tag = Column(db.String(50)) column_name = Column(db.String(50), primary_key=True) created_at = Column(db.DateTime(True), server_default=func.now()) updated_at = Column(db.DateTime(True), server_default=func.now(), onupdate=func.now()) diff --git a/redash/models/privacy_models.sql b/redash/models/privacy_models.sql index e1faab1d82..389b9ee678 100644 --- a/redash/models/privacy_models.sql +++ b/redash/models/privacy_models.sql @@ -42,14 +42,6 @@ ALTER TABLE ONLY public.query_id_policy_tag_mapping CREATE INDEX ix_query_id_policy_tag_mapping_created_at ON public.query_id_policy_tag_mapping USING btree (created_at); --- --- Name: query_id_policy_tag_mapping query_id_policy_tag_mapping_policy_name_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.query_id_policy_tag_mapping - ADD CONSTRAINT query_id_policy_tag_mapping_policy_name_fkey FOREIGN KEY (policy_name) REFERENCES public.policy_tag(policy_name); - - -- -- Name: table_column_policy_tag_mapping; Type: TABLE; Schema: public; Owner: postgres -- @@ -72,13 +64,6 @@ ALTER TABLE ONLY public.table_column_policy_tag_mapping ADD CONSTRAINT table_column_policy_tag_mapping_pkey PRIMARY KEY (project_id, dataset_id, table_name, column_name); --- --- Name: table_column_policy_tag_mapping table_column_policy_tag_mapping_policy_tag_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.table_column_policy_tag_mapping - ADD CONSTRAINT table_column_policy_tag_mapping_policy_tag_fkey FOREIGN KEY (policy_tag) REFERENCES public.policy_tag(policy_name); - -- -- Name: user_group_member_list; Type: TABLE; Schema: public; Owner: postgres @@ -98,12 +83,3 @@ CREATE TABLE public.user_group_member_list ( ALTER TABLE ONLY public.user_group_member_list ADD CONSTRAINT user_group_member_list_pkey PRIMARY KEY (policy_name); - - --- --- Name: user_group_member_list user_group_member_list_policy_name_fkey; Type: FK CONSTRAINT; Schema: public; Owner: postgres --- - -ALTER TABLE ONLY public.user_group_member_list - ADD CONSTRAINT user_group_member_list_policy_name_fkey FOREIGN KEY (policy_name) REFERENCES public.policy_tag(policy_name); - From b7300a2e70963bf45ba567248484101390babdfd Mon Sep 17 00:00:00 2001 From: masterlittle Date: Thu, 23 Mar 2023 13:39:20 +0530 Subject: [PATCH 07/11] Update docker compose file --- client/app/pages/queries/QueryView.jsx | 241 ------------------------- docker-compose-f.yml | 106 ++++++++--- 2 files changed, 84 insertions(+), 263 deletions(-) delete mode 100644 client/app/pages/queries/QueryView.jsx diff --git a/client/app/pages/queries/QueryView.jsx b/client/app/pages/queries/QueryView.jsx deleted file mode 100644 index cbd582b480..0000000000 --- a/client/app/pages/queries/QueryView.jsx +++ /dev/null @@ -1,241 +0,0 @@ -import React, { useState, useEffect, useCallback } from "react"; -import PropTypes from "prop-types"; -import cx from "classnames"; -import useMedia from "use-media"; -import Button from "antd/lib/button"; - -import FullscreenOutlinedIcon from "@ant-design/icons/FullscreenOutlined"; -import FullscreenExitOutlinedIcon from "@ant-design/icons/FullscreenExitOutlined"; - -import routeWithUserSession from "@/components/ApplicationArea/routeWithUserSession"; -import EditInPlace from "@/components/EditInPlace"; -import Parameters from "@/components/Parameters"; -import DynamicComponent from "@/components/DynamicComponent"; -import PlainButton from "@/components/PlainButton"; - -import DataSource from "@/services/data-source"; -import { ExecutionStatus } from "@/services/query-result"; -import routes from "@/services/routes"; -import { policy } from "@/services/policy"; - -import useQueryResultData from "@/lib/useQueryResultData"; - -import QueryPageHeader from "./components/QueryPageHeader"; -import QueryVisualizationTabs from "./components/QueryVisualizationTabs"; -import QueryExecutionStatus from "./components/QueryExecutionStatus"; -import QueryMetadata from "./components/QueryMetadata"; -import wrapQueryPage from "./components/wrapQueryPage"; -import QueryViewButton from "./components/QueryViewButton"; -import QueryExecutionMetadata from "./components/QueryExecutionMetadata"; - -import useVisualizationTabHandler from "./hooks/useVisualizationTabHandler"; -import useQueryExecute from "./hooks/useQueryExecute"; -import useUpdateQueryDescription from "./hooks/useUpdateQueryDescription"; -import useQueryFlags from "./hooks/useQueryFlags"; -import useQueryParameters from "./hooks/useQueryParameters"; -import useEditScheduleDialog from "./hooks/useEditScheduleDialog"; -import useEditVisualizationDialog from "./hooks/useEditVisualizationDialog"; -import useDeleteVisualization from "./hooks/useDeleteVisualization"; -import useFullscreenHandler from "../../lib/hooks/useFullscreenHandler"; - -import "./QueryView.less"; - -function QueryView(props) { - const [query, setQuery] = useState(props.query); - const [dataSource, setDataSource] = useState(); - const queryFlags = useQueryFlags(query, dataSource); - const [parameters, areParametersDirty, updateParametersDirtyFlag] = useQueryParameters(query); - const [selectedVisualization, setSelectedVisualization] = useVisualizationTabHandler(query.visualizations); - const isDesktop = useMedia({ minWidth: 768 }); - const isFixedLayout = useMedia({ minHeight: 500 }) && isDesktop; - const [fullscreen, toggleFullscreen] = useFullscreenHandler(isDesktop); - const [addingDescription, setAddingDescription] = useState(false); - - const { - queryResult, - loadedInitialResults, - isExecuting, - executionStatus, - executeQuery, - error: executionError, - cancelCallback: cancelExecution, - isCancelling: isExecutionCancelling, - updatedAt, - } = useQueryExecute(query); - - const queryResultData = useQueryResultData(queryResult); - - const updateQueryDescription = useUpdateQueryDescription(query, setQuery); - const editSchedule = useEditScheduleDialog(query, setQuery); - const addVisualization = useEditVisualizationDialog(query, queryResult, (newQuery, visualization) => { - setQuery(newQuery); - setSelectedVisualization(visualization.id); - }); - const editVisualization = useEditVisualizationDialog(query, queryResult, newQuery => setQuery(newQuery)); - const deleteVisualization = useDeleteVisualization(query, setQuery); - - const doExecuteQuery = useCallback( - (skipParametersDirtyFlag = false) => { - if (!queryFlags.canExecute || (!skipParametersDirtyFlag && (areParametersDirty || isExecuting))) { - return; - } - executeQuery(); - }, - [areParametersDirty, executeQuery, isExecuting, queryFlags.canExecute] - ); - - useEffect(() => { - document.title = query.name; - }, [query.name]); - - useEffect(() => { - DataSource.get({ id: query.data_source_id }).then(setDataSource); - }, [query.data_source_id]); - - return ( -
-
- - {policy.canRun(query) && ( - - Refresh - - )} - - } - tagsExtra={ - !query.description && - queryFlags.canEdit && - !addingDescription && - !fullscreen && ( - setAddingDescription(true)}> -