Skip to content

Commit

Permalink
chore: improve mask/unmask encrypted_extra (#29943)
Browse files Browse the repository at this point in the history
(cherry picked from commit 4b59e42)
  • Loading branch information
betodealmeida authored and sadpandajoe committed Aug 27, 2024
1 parent fe33689 commit d488c78
Show file tree
Hide file tree
Showing 13 changed files with 490 additions and 151 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ dependencies = [
"humanize",
"importlib_metadata",
"isodate",
"jsonpath-ng>=1.6.1, <2",
"Mako>=1.2.2",
"markdown>=3.0",
"msgpack>=1.0.0, <1.1",
Expand Down
8 changes: 5 additions & 3 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,7 @@ geopy==2.4.1
google-auth==2.29.0
# via shillelagh
greenlet==3.0.3
# via
# shillelagh
# sqlalchemy
# via shillelagh
gunicorn==22.0.0
# via apache-superset
hashids==1.3.1
Expand All @@ -173,6 +171,8 @@ jinja2==3.1.4
# via
# flask
# flask-babel
jsonpath-ng==1.6.1
# via apache-superset
jsonschema==4.17.3
# via flask-appbuilder
kombu==5.3.7
Expand Down Expand Up @@ -249,6 +249,8 @@ pgsanity==0.2.9
# via apache-superset
platformdirs==3.8.1
# via requests-cache
ply==3.11
# via jsonpath-ng
polyline==2.0.2
# via apache-superset
prison==0.2.1
Expand Down
24 changes: 5 additions & 19 deletions requirements/development.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
# via
# -r requirements/base.in
# -r requirements/development.in
appnope==0.1.4
# via ipython
astroid==3.1.0
# via pylint
boto3==1.34.112
# via dataflows-tabulator
# via
# apache-superset
# dataflows-tabulator
botocore==1.34.112
# via
# boto3
Expand Down Expand Up @@ -177,9 +177,7 @@ protobuf==4.23.0
psycopg2-binary==2.9.6
# via apache-superset
pure-sasl==0.6.2
# via
# pyhive
# thrift-sasl
# via thrift-sasl
pydata-google-auth==1.7.0
# via pandas-gbq
pydruid==0.6.9
Expand Down Expand Up @@ -232,18 +230,9 @@ tableschema==1.20.10
thrift==0.16.0
# via
# apache-superset
# pyhive
# thrift-sasl
thrift-sasl==0.4.3
# via
# build
# coverage
# pip-tools
# pylint
# pyproject-api
# pyproject-hooks
# pytest
# tox
# via apache-superset
tomlkit==0.12.5
# via pylint
toposort==1.10
Expand All @@ -254,9 +243,6 @@ tqdm==4.66.4
# via
# cmdstanpy
# prophet
traitlets==5.14.3
# via
# matplotlib-inline
trino==0.328.0
# via apache-superset
tzlocal==5.2
Expand Down
52 changes: 43 additions & 9 deletions superset/db_engine_specs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
from superset.utils import core as utils, json
from superset.utils.core import ColumnSpec, GenericDataType
from superset.utils.hashing import md5_sha_from_str
from superset.utils.json import redact_sensitive, reveal_sensitive
from superset.utils.network import is_hostname_valid, is_port_open
from superset.utils.oauth2 import encode_oauth2_state

Expand Down Expand Up @@ -398,6 +399,11 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]]
] = {}

# List of JSON path to fields in `encrypted_extra` that should be masked when the
# database is edited. By default everything is masked.
# pylint: disable=invalid-name
encrypted_extra_sensitive_fields: set[str] = {"$.*"}

# Whether the engine supports file uploads
# if True, database will be listed as option in the upload file form
supports_file_upload = True
Expand Down Expand Up @@ -2163,26 +2169,54 @@ def get_impersonation_key(cls, user: User | None) -> Any:
@classmethod
def mask_encrypted_extra(cls, encrypted_extra: str | None) -> str | None:
"""
Mask ``encrypted_extra``.
Mask `encrypted_extra`.
This is used to remove any sensitive data in ``encrypted_extra`` when presenting
it to the user. For example, a private key might be replaced with a masked value
"XXXXXXXXXX". If the masked value is changed the corresponding entry is updated,
otherwise the old value is used (see ``unmask_encrypted_extra`` below).
This is used to remove any sensitive data in `encrypted_extra` when presenting
it to the user when a database is edited. For example, a private key might be
replaced with a masked value "XXXXXXXXXX". If the masked value is changed the
corresponding entry is updated, otherwise the old value is used (see
`unmask_encrypted_extra` below).
"""
return encrypted_extra
if encrypted_extra is None or not cls.encrypted_extra_sensitive_fields:
return encrypted_extra

try:
config = json.loads(encrypted_extra)
except (TypeError, json.JSONDecodeError):
return encrypted_extra

masked_encrypted_extra = redact_sensitive(
config,
cls.encrypted_extra_sensitive_fields,
)

return json.dumps(masked_encrypted_extra)

# pylint: disable=unused-argument
@classmethod
def unmask_encrypted_extra(cls, old: str | None, new: str | None) -> str | None:
"""
Remove masks from ``encrypted_extra``.
Remove masks from `encrypted_extra`.
This method allows reusing existing values from the current encrypted extra on
updates. It's useful for reusing masked passwords, allowing keys to be updated
without having to provide sensitive data to the client.
"""
return new
if old is None or new is None:
return new

try:
old_config = json.loads(old)
new_config = json.loads(new)
except (TypeError, json.JSONDecodeError):
return new

new_config = reveal_sensitive(
old_config,
new_config,
cls.encrypted_extra_sensitive_fields,
)

return json.dumps(new_config)

@classmethod
def get_public_information(cls) -> dict[str, Any]:
Expand Down
48 changes: 5 additions & 43 deletions superset/db_engine_specs/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

from __future__ import annotations

import contextlib
import re
import urllib
from datetime import datetime
Expand All @@ -38,7 +37,7 @@
from sqlalchemy.sql import sqltypes

from superset import sql_parse
from superset.constants import PASSWORD_MASK, TimeGrain
from superset.constants import TimeGrain
from superset.databases.schemas import encrypted_field_properties, EncryptedString
from superset.databases.utils import make_url_safe
from superset.db_engine_specs.base import BaseEngineSpec, BasicPropertiesType
Expand Down Expand Up @@ -129,6 +128,10 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met

supports_catalog = supports_dynamic_catalog = True

# when editing the database, mask this field in `encrypted_extra`
# pylint: disable=invalid-name
encrypted_extra_sensitive_fields = {"$.credentials_info.private_key"}

"""
https://www.python.org/dev/peps/pep-0249/#arraysize
raw_connections bypass the sqlalchemy-bigquery query execution context and deal with
Expand Down Expand Up @@ -594,47 +597,6 @@ def get_parameters_from_uri(

raise ValidationError("Invalid service credentials")

@classmethod
def mask_encrypted_extra(cls, encrypted_extra: str | None) -> str | None:
if encrypted_extra is None:
return encrypted_extra

try:
config = json.loads(encrypted_extra)
except (json.JSONDecodeError, TypeError):
return encrypted_extra

with contextlib.suppress(KeyError):
config["credentials_info"]["private_key"] = PASSWORD_MASK
return json.dumps(config)

@classmethod
def unmask_encrypted_extra(cls, old: str | None, new: str | None) -> str | None:
"""
Reuse ``private_key`` if available and unchanged.
"""
if old is None or new is None:
return new

try:
old_config = json.loads(old)
new_config = json.loads(new)
except (TypeError, json.JSONDecodeError):
return new

if "credentials_info" not in new_config:
return new

if "private_key" not in new_config["credentials_info"]:
return new

if new_config["credentials_info"]["private_key"] == PASSWORD_MASK:
new_config["credentials_info"]["private_key"] = old_config[
"credentials_info"
]["private_key"]

return json.dumps(new_config)

@classmethod
def get_dbapi_exception_mapping(cls) -> dict[type[Exception], type[Exception]]:
# pylint: disable=import-outside-toplevel
Expand Down
51 changes: 6 additions & 45 deletions superset/db_engine_specs/gsheets.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

from __future__ import annotations

import contextlib
import logging
import re
from re import Pattern
Expand All @@ -37,7 +36,6 @@
from sqlalchemy.engine.url import URL

from superset import db, security_manager
from superset.constants import PASSWORD_MASK
from superset.databases.schemas import encrypted_field_properties, EncryptedString
from superset.db_engine_specs.shillelagh import ShillelaghEngineSpec
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
Expand Down Expand Up @@ -93,6 +91,10 @@ class GSheetsEngineSpec(ShillelaghEngineSpec):
default_driver = "apsw"
sqlalchemy_uri_placeholder = "gsheets://"

# when editing the database, mask this field in `encrypted_extra`
# pylint: disable=invalid-name
encrypted_extra_sensitive_fields = {"$.service_account_info.private_key"}

custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]]] = {
SYNTAX_ERROR_REGEX: (
__(
Expand Down Expand Up @@ -157,11 +159,11 @@ def get_extra_table_metadata(
return {"metadata": metadata["extra"]}

@classmethod
# pylint: disable=unused-argument
def build_sqlalchemy_uri(
cls,
_: GSheetsParametersType,
encrypted_extra: None # pylint: disable=unused-argument
| (dict[str, Any]) = None,
encrypted_extra: None | (dict[str, Any]) = None,
) -> str:
return "gsheets://"

Expand All @@ -177,47 +179,6 @@ def get_parameters_from_uri(

raise ValidationError("Invalid service credentials")

@classmethod
def mask_encrypted_extra(cls, encrypted_extra: str | None) -> str | None:
if encrypted_extra is None:
return encrypted_extra

try:
config = json.loads(encrypted_extra)
except (TypeError, json.JSONDecodeError):
return encrypted_extra

with contextlib.suppress(KeyError):
config["service_account_info"]["private_key"] = PASSWORD_MASK
return json.dumps(config)

@classmethod
def unmask_encrypted_extra(cls, old: str | None, new: str | None) -> str | None:
"""
Reuse ``private_key`` if available and unchanged.
"""
if old is None or new is None:
return new

try:
old_config = json.loads(old)
new_config = json.loads(new)
except (TypeError, json.JSONDecodeError):
return new

if "service_account_info" not in new_config:
return new

if "private_key" not in new_config["service_account_info"]:
return new

if new_config["service_account_info"]["private_key"] == PASSWORD_MASK:
new_config["service_account_info"]["private_key"] = old_config[
"service_account_info"
]["private_key"]

return json.dumps(new_config)

@classmethod
def parameters_json_schema(cls) -> Any:
"""
Expand Down
6 changes: 6 additions & 0 deletions superset/db_engine_specs/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,12 @@ class SnowflakeEngineSpec(PostgresBaseEngineSpec):
supports_dynamic_schema = True
supports_catalog = supports_dynamic_catalog = True

# pylint: disable=invalid-name
encrypted_extra_sensitive_fields = {
"$.auth_params.privatekey_body",
"$.auth_params.privatekey_pass",
}

_time_grain_expressions = {
None: "{col}",
TimeGrain.SECOND: "DATE_TRUNC('SECOND', {col})",
Expand Down
Loading

0 comments on commit d488c78

Please sign in to comment.