Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CT-808 more grant adapter tests #5452

Merged
merged 18 commits into from
Jul 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions core/dbt/adapters/base/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,17 +547,24 @@ def standardize_grants_dict(self, grants_table: agate.Table) -> dict:
"""Translate the result of `show grants` (or equivalent) to match the
grants which a user would configure in their project.

If relevant -- filter down to grants made BY the current user role,
and filter OUT any grants TO the current user/role (e.g. OWNERSHIP).
Ideally, the SQL to show grants should also be filtering:
filter OUT any grants TO the current user/role (e.g. OWNERSHIP).
If that's not possible in SQL, it can be done in this method instead.

:param grants_table: An agate table containing the query result of
the SQL returned by get_show_grant_sql
:return: A standardized dictionary matching the `grants` config
:rtype: dict
"""
raise NotImplementedException(
"`standardize_grants_dict` is not implemented for this adapter!"
)
grants_dict: Dict[str, List[str]] = {}
for row in grants_table:
grantee = row["grantee"]
privilege = row["privilege_type"]
if privilege in grants_dict.keys():
grants_dict[privilege].append(grantee)
else:
grants_dict.update({privilege: [grantee]})
return grants_dict

###
# Provided methods about relations
Expand Down
29 changes: 17 additions & 12 deletions core/dbt/context/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
import os
from typing import Any, Dict, NoReturn, Optional, Mapping, Iterable, Set
from typing import Any, Dict, NoReturn, Optional, Mapping, Iterable, Set, List

from dbt import flags
from dbt import tracking
Expand Down Expand Up @@ -659,22 +659,27 @@ def print(msg: str) -> str:

@contextmember
@staticmethod
def diff_of_two_dicts(dict_a, dict_b):
def diff_of_two_dicts(
dict_a: Dict[str, List[str]], dict_b: Dict[str, List[str]]
) -> Dict[str, List[str]]:
"""
Given two dictionaries:
dict_a: {'key_x': ['value_1', 'value_2'], 'key_y': ['value_3']}
dict_b: {'key_x': ['value_1'], 'key_z': ['value_4']}
Return the same dictionary representation of dict_a MINUS dict_b
Given two dictionaries of type Dict[str, List[str]]:
dict_a = {'key_x': ['value_1', 'VALUE_2'], 'KEY_Y': ['value_3']}
dict_b = {'key_x': ['value_1'], 'key_z': ['value_4']}
Return the same dictionary representation of dict_a MINUS dict_b,
performing a case-insensitive comparison between the strings in each.
All keys returned will be in the original case of dict_a.
returns {'key_x': ['VALUE_2'], 'KEY_Y': ['value_3']}
"""

dict_diff = {}
dict_a = {k.lower(): v for k, v in dict_a.items()}
dict_b = {k.lower(): v for k, v in dict_b.items()}
dict_b_lowered = {k.casefold(): [x.casefold() for x in v] for k, v in dict_b.items()}
for k in dict_a:
if k in dict_b:
a_lowered = map(lambda x: x.lower(), dict_a[k])
b_lowered = map(lambda x: x.lower(), dict_b[k])
diff = list(set(a_lowered) - set(b_lowered))
if k.casefold() in dict_b_lowered.keys():
diff = []
for v in dict_a[k]:
if v.casefold() not in dict_b_lowered[k.casefold()]:
diff.append(v)
Comment on lines +676 to +682
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

casefold, FTW 💪

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh wow thats super cool haven't heard of casefold before.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nathaniel-may is going to take a look at refactoring this for performance / Pythonic-ness. Not going to consider it a blocker for merging this PR in the meantime.

if diff:
dict_diff.update({k: diff})
else:
Expand Down
154 changes: 117 additions & 37 deletions core/dbt/include/global_project/macros/adapters/apply_grants.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
{# ------- BOOLEAN MACROS --------- #}

{#
-- COPY GRANTS
-- When a relational object (view or table) is replaced in this database,
-- do previous grants carry over to the new object? This may depend on:
-- whether we use alter-rename-swap versus CREATE OR REPLACE
-- user-supplied configuration (e.g. copy_grants on Snowflake)
-- By default, play it safe, assume TRUE: that grants ARE copied over.
-- This means dbt will first "show" current grants and then calculate diffs.
-- It may require an additional query than is strictly necessary,
-- but better safe than sorry.
#}

{% macro copy_grants() %}
{{ return(adapter.dispatch('copy_grants', 'dbt')()) }}
{% endmacro %}
Expand All @@ -6,6 +20,25 @@
{{ return(True) }}
{% endmacro %}


{#
-- SUPPORT MULTIPLE GRANTEES PER DCL STATEMENT
-- Does this database support 'grant {privilege} to {grantee_1}, {grantee_2}, ...'
-- Or must these be separate statements:
-- `grant {privilege} to {grantee_1}`;
-- `grant {privilege} to {grantee_2}`;
-- By default, pick the former, because it's what we prefer when available.
#}

{% macro support_multiple_grantees_per_dcl_statement() %}
{{ return(adapter.dispatch('support_multiple_grantees_per_dcl_statement', 'dbt')()) }}
{% endmacro %}

{%- macro default__support_multiple_grantees_per_dcl_statement() -%}
{{ return(True) }}
{%- endmacro -%}


{% macro should_revoke(existing_relation, full_refresh_mode=True) %}

{% if not existing_relation %}
Expand All @@ -21,6 +54,8 @@

{% endmacro %}

{# ------- DCL STATEMENT TEMPLATES --------- #}

{% macro get_show_grant_sql(relation) %}
{{ return(adapter.dispatch("get_show_grant_sql", "dbt")(relation)) }}
{% endmacro %}
Expand All @@ -29,59 +64,104 @@
show grants on {{ relation }}
{% endmacro %}

{% macro get_grant_sql(relation, grant_config) %}
{{ return(adapter.dispatch('get_grant_sql', 'dbt')(relation, grant_config)) }}

{% macro get_grant_sql(relation, privilege, grantees) %}
{{ return(adapter.dispatch('get_grant_sql', 'dbt')(relation, privilege, grantees)) }}
{% endmacro %}

{%- macro default__get_grant_sql(relation, privilege, grantees) -%}
grant {{ privilege }} on {{ relation }} to {{ grantees | join(', ') }}
{%- endmacro -%}


{% macro get_revoke_sql(relation, privilege, grantees) %}
{{ return(adapter.dispatch('get_revoke_sql', 'dbt')(relation, privilege, grantees)) }}
{% endmacro %}

{%- macro default__get_revoke_sql(relation, privilege, grantees) -%}
revoke {{ privilege }} on {{ relation }} from {{ grantees | join(', ') }}
{%- endmacro -%}


{# ------- RUNTIME APPLICATION --------- #}

{% macro get_dcl_statement_list(relation, grant_config, get_dcl_macro) %}
{{ return(adapter.dispatch('get_dcl_statement_list', 'dbt')(relation, grant_config, get_dcl_macro)) }}
{% endmacro %}

{%- macro default__get_grant_sql(relation, grant_config) -%}
{%- for privilege in grant_config.keys() -%}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't have to adjust in this PR, but have we considered move all these kind of logic to python vs do them in Jinja

Copy link
Contributor

@jtcohen6 jtcohen6 Jul 9, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking about this yesterday. Both get_grant_sql and get_revoke_sql now have the same boilerplate, which is unpacking the grant config and looping over them. The piece of this that really wants to be in Jinja is really more like:

{% macro get_grant_sql() %}
    grant {{ privilege }} on {{ relation }} to {{ grantee }}
{% endmacro %}

I could see refactoring to call this simpler macro instead, and move the unpacking/looping/array-building logic elsewhere.

For what it's worth, most of the heavier lifting is really happening in apply_grants. We could reimplemented some pieces of it in Python. Keeping the top-level macro call in Jinja (=user space) does allow people to override its behavior for themselves, though.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I acted on this feedback in c763601, by moving the grant_config unpacking / looping logic into their own separate reusable macro, so that get_grant_sql + get_revoke_sql can be dead simple

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jtcohen6 solid move! I also like @ChenyuLInx's notion of putting more looping logic into Python to make the Jinja simpler.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Totally. It can be fairly tricky today to call macros from within Python models (adapter.execute_macro is the sort-of state-of-art), so when a macro needs to call other macros (as this one does), Jinja can still be easier. But if we can split out a pure function, as we did with diff_of_two_dicts and standardize_grants_dict, Python is wayyyy better for code clarity and to unit testing.

{%- set grantees = grant_config[privilege] -%}
{%- if grantees -%}
{%- for grantee in grantees -%}
grant {{ privilege }} on {{ relation }} to {{ grantee }};
{%- endfor -%}
{%- macro default__get_dcl_statement_list(relation, grant_config, get_dcl_macro) -%}
{#
-- Unpack grant_config into specific privileges and the set of users who need them granted/revoked.
-- Depending on whether this database supports multiple grantees per statement, pass in the list of
-- all grantees per privilege, or (if not) template one statement per privilege-grantee pair.
-- `get_dcl_macro` will be either `get_grant_sql` or `get_revoke_sql`
#}
{%- set dcl_statements = [] -%}
{%- for privilege, grantees in grant_config.items() %}
{%- if support_multiple_grantees_per_dcl_statement() and grantees -%}
{%- set dcl = get_dcl_macro(relation, privilege, grantees) -%}
{%- do dcl_statements.append(dcl) -%}
{%- else -%}
{%- for grantee in grantees -%}
{% set dcl = get_dcl_macro(relation, privilege, [grantee]) %}
{%- do dcl_statements.append(dcl) -%}
{% endfor -%}
{%- endif -%}
{%- endfor -%}
{{ return(dcl_statements) }}
{%- endmacro %}

{% macro get_revoke_sql(relation, grant_config) %}
{{ return(adapter.dispatch("get_revoke_sql", "dbt")(relation, grant_config)) }}

{% macro call_dcl_statements(dcl_statement_list) %}
{{ return(adapter.dispatch("call_dcl_statements", "dbt")(dcl_statement_list)) }}
{% endmacro %}

{% macro default__call_dcl_statements(dcl_statement_list) %}
{#
-- By default, supply all grant + revoke statements in a single semicolon-separated block,
-- so that they're all processed together.

-- Some databases do not support this. Those adapters will need to override this macro
-- to run each statement individually.
#}
{% call statement('grants') %}
{% for dcl_statement in dcl_statement_list %}
{{ dcl_statement }};
{% endfor %}
{% endcall %}
{% endmacro %}

{% macro default__get_revoke_sql(relation, grant_config) %}
{%- for privilege in grant_config.keys() -%}
{%- set grantees = grant_config[privilege] -%}
{%- if grantees -%}
{%- for grantee in grantees -%}
revoke {{ privilege }} on {{ relation }} from {{ grantee }};
{% endfor -%}
{%- endif -%}
{%- endfor -%}
{%- endmacro -%}

{% macro apply_grants(relation, grant_config, should_revoke) %}
{{ return(adapter.dispatch("apply_grants", "dbt")(relation, grant_config, should_revoke)) }}
{% endmacro %}

{% macro default__apply_grants(relation, grant_config, should_revoke=True) %}
{#-- If grant_config is {} or None, this is a no-op --#}
{% if grant_config %}
{% if should_revoke %}
{% set current_grants_table = run_query(get_show_grant_sql(relation)) %}
{% set current_grants_dict = adapter.standardize_grants_dict(current_grants_table) %}
{% set needs_granting = diff_of_two_dicts(grant_config, current_grants_dict) %}
{% set needs_revoking = diff_of_two_dicts(current_grants_dict, grant_config) %}
{% if not (needs_granting or needs_revoking) %}
{{ log('On ' ~ relation ~': All grants are in place, no revocation or granting needed.')}}
{% endif %}
{% else %}
{% set needs_revoking = {} %}
{% set needs_granting = grant_config %}
{% if should_revoke %}
{#-- We think previous grants may have carried over --#}
{#-- Show current grants and calculate diffs --#}
{% set current_grants_table = run_query(get_show_grant_sql(relation)) %}
{% set current_grants_dict = adapter.standardize_grants_dict(current_grants_table) %}
{% set needs_granting = diff_of_two_dicts(grant_config, current_grants_dict) %}
{% set needs_revoking = diff_of_two_dicts(current_grants_dict, grant_config) %}
{% if not (needs_granting or needs_revoking) %}
{{ log('On ' ~ relation ~': All grants are in place, no revocation or granting needed.')}}
{% endif %}
{% if needs_granting or needs_revoking %}
{% call statement('grants') %}
{{ get_revoke_sql(relation, needs_revoking) }}
{{ get_grant_sql(relation, needs_granting) }}
{% endcall %}
{% else %}
{#-- We don't think there's any chance of previous grants having carried over. --#}
{#-- Jump straight to granting what the user has configured. --#}
{% set needs_revoking = {} %}
{% set needs_granting = grant_config %}
{% endif %}
{% if needs_granting or needs_revoking %}
{% set revoke_statement_list = get_dcl_statement_list(relation, needs_revoking, get_revoke_sql) %}
{% set grant_statement_list = get_dcl_statement_list(relation, needs_granting, get_grant_sql) %}
{% set dcl_statement_list = revoke_statement_list + grant_statement_list %}
{% if dcl_statement_list %}
{{ call_dcl_statements(dcl_statement_list) }}
{% endif %}
{% endif %}
{% endif %}
{% endmacro %}
13 changes: 0 additions & 13 deletions plugins/postgres/dbt/adapters/postgres/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from dbt.dataclass_schema import dbtClassMixin, ValidationError
import dbt.exceptions
import dbt.utils
import agate


# note that this isn't an adapter macro, so just a single underscore
Expand Down Expand Up @@ -86,18 +85,6 @@ def verify_database(self, database):
def parse_index(self, raw_index: Any) -> Optional[PostgresIndexConfig]:
return PostgresIndexConfig.parse(raw_index)

@available
def standardize_grants_dict(self, grants_table: agate.Table) -> dict:
grants_dict = {}
for row in grants_table:
grantee = row["grantee"]
privilege = row["privilege_type"]
if privilege in grants_dict.keys():
grants_dict[privilege].append(grantee)
else:
grants_dict.update({privilege: [grantee]})
return grants_dict

def _link_cached_database_relations(self, schemas: Set[str]):
"""
:param schemas: The set of schemas that should have links added.
Expand Down
4 changes: 2 additions & 2 deletions plugins/postgres/dbt/include/postgres/macros/adapters.sql
Original file line number Diff line number Diff line change
Expand Up @@ -205,13 +205,13 @@

{%- macro postgres__get_show_grant_sql(relation) -%}
select grantee, privilege_type
from information_schema.role_table_grants
from {{ relation.information_schema('role_table_grants') }}
where grantor = current_role
and grantee != current_role
and table_schema = '{{ relation.schema }}'
and table_name = '{{ relation.identifier }}'
{%- endmacro -%}

{% macro postgres__are_grants_copied_over_when_replaced() %}
{% macro postgres__copy_grants() %}
{{ return(False) }}
{% endmacro %}
58 changes: 58 additions & 0 deletions tests/adapter/dbt/tests/adapter/grants/base_grants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import pytest
import os
from dbt.tests.util import (
relation_from_name,
get_connection,
)
from dbt.context.base import BaseContext # diff_of_two_dicts only

TEST_USER_ENV_VARS = ["DBT_TEST_USER_1", "DBT_TEST_USER_2", "DBT_TEST_USER_3"]


def replace_all(text, dic):
for i, j in dic.items():
text = text.replace(i, j)
return text


class BaseGrants:
def privilege_grantee_name_overrides(self):
# these privilege and grantee names are valid on most databases, but not all!
# looking at you, BigQuery
# optionally use this to map from "select" --> "other_select_name", "insert" --> ...
return {
"select": "select",
"insert": "insert",
"fake_privilege": "fake_privilege",
"invalid_user": "invalid_user",
}

def interpolate_name_overrides(self, yaml_text):
return replace_all(yaml_text, self.privilege_grantee_name_overrides())

@pytest.fixture(scope="class", autouse=True)
def get_test_users(self, project):
test_users = []
for env_var in TEST_USER_ENV_VARS:
user_name = os.getenv(env_var)
if user_name:
test_users.append(user_name)
return test_users

def get_grants_on_relation(self, project, relation_name):
relation = relation_from_name(project.adapter, relation_name)
adapter = project.adapter
with get_connection(adapter):
kwargs = {"relation": relation}
show_grant_sql = adapter.execute_macro("get_show_grant_sql", kwargs=kwargs)
_, grant_table = adapter.execute(show_grant_sql, fetch=True)
actual_grants = adapter.standardize_grants_dict(grant_table)
return actual_grants

def assert_expected_grants_match_actual(self, project, relation_name, expected_grants):
actual_grants = self.get_grants_on_relation(project, relation_name)
# need a case-insensitive comparison
# so just a simple "assert expected == actual_grants" won't work
diff_a = BaseContext.diff_of_two_dicts(actual_grants, expected_grants)
diff_b = BaseContext.diff_of_two_dicts(expected_grants, actual_grants)
assert diff_a == diff_b == {}
Loading