Skip to content

Commit

Permalink
chore: improve mask/unmask encrypted_extra
Browse files Browse the repository at this point in the history
  • Loading branch information
betodealmeida committed Aug 14, 2024
1 parent f5d614d commit e2a8519
Show file tree
Hide file tree
Showing 8 changed files with 187 additions and 128 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ dependencies = [
"humanize",
"importlib_metadata",
"isodate",
"jsonpath-ng>=1.6.1, <2",
"Mako>=1.2.2",
"markdown>=3.0",
"msgpack>=1.0.0, <1.1",
Expand Down
8 changes: 5 additions & 3 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,7 @@ geopy==2.4.1
google-auth==2.29.0
# via shillelagh
greenlet==3.0.3
# via
# shillelagh
# sqlalchemy
# via shillelagh
gunicorn==22.0.0
# via apache-superset
hashids==1.3.1
Expand All @@ -173,6 +171,8 @@ jinja2==3.1.4
# via
# flask
# flask-babel
jsonpath-ng==1.6.1
# via apache-superset
jsonschema==4.17.3
# via flask-appbuilder
kombu==5.3.7
Expand Down Expand Up @@ -249,6 +249,8 @@ pgsanity==0.2.9
# via apache-superset
platformdirs==3.8.1
# via requests-cache
ply==3.11
# via jsonpath-ng
polyline==2.0.2
# via apache-superset
prison==0.2.1
Expand Down
24 changes: 5 additions & 19 deletions requirements/development.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
# via
# -r requirements/base.in
# -r requirements/development.in
appnope==0.1.4
# via ipython
astroid==3.1.0
# via pylint
boto3==1.34.112
# via dataflows-tabulator
# via
# apache-superset
# dataflows-tabulator
botocore==1.34.112
# via
# boto3
Expand Down Expand Up @@ -177,9 +177,7 @@ protobuf==4.23.0
psycopg2-binary==2.9.6
# via apache-superset
pure-sasl==0.6.2
# via
# pyhive
# thrift-sasl
# via thrift-sasl
pydata-google-auth==1.7.0
# via pandas-gbq
pydruid==0.6.9
Expand Down Expand Up @@ -232,18 +230,9 @@ tableschema==1.20.10
thrift==0.16.0
# via
# apache-superset
# pyhive
# thrift-sasl
thrift-sasl==0.4.3
# via
# build
# coverage
# pip-tools
# pylint
# pyproject-api
# pyproject-hooks
# pytest
# tox
# via apache-superset
tomlkit==0.12.5
# via pylint
toposort==1.10
Expand All @@ -254,9 +243,6 @@ tqdm==4.66.4
# via
# cmdstanpy
# prophet
traitlets==5.14.3
# via
# matplotlib-inline
trino==0.328.0
# via apache-superset
tzlocal==5.2
Expand Down
55 changes: 45 additions & 10 deletions superset/db_engine_specs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
from flask import current_app, g, url_for
from flask_appbuilder.security.sqla.models import User
from flask_babel import gettext as __, lazy_gettext as _
from jsonpath_ng import parse
from marshmallow import fields, Schema
from marshmallow.validate import Range
from sqlalchemy import column, select, types
Expand All @@ -59,7 +60,7 @@
from sqlparse.tokens import CTE

from superset import sql_parse
from superset.constants import TimeGrain as TimeGrainConstants
from superset.constants import PASSWORD_MASK, TimeGrain as TimeGrainConstants
from superset.databases.utils import get_table_metadata, make_url_safe
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
from superset.exceptions import DisallowedSQLFunction, OAuth2Error, OAuth2RedirectError
Expand Down Expand Up @@ -2160,29 +2161,63 @@ def get_impersonation_key(cls, user: User | None) -> Any:
"""
return user.username if user else None

# list of JSON path to fields in `encrypted_extra` that should be masked when the
# database is edited
# pylint: disable=invalid-name
encrypted_extra_sensitive_fields: list[str] = []

@classmethod
def mask_encrypted_extra(cls, encrypted_extra: str | None) -> str | None:
"""
Mask ``encrypted_extra``.
Mask `encrypted_extra`.
This is used to remove any sensitive data in ``encrypted_extra`` when presenting
it to the user. For example, a private key might be replaced with a masked value
"XXXXXXXXXX". If the masked value is changed the corresponding entry is updated,
otherwise the old value is used (see ``unmask_encrypted_extra`` below).
This is used to remove any sensitive data in `encrypted_extra` when presenting
it to the user when a database is edited. For example, a private key might be
replaced with a masked value "XXXXXXXXXX". If the masked value is changed the
corresponding entry is updated, otherwise the old value is used (see
`unmask_encrypted_extra` below).
"""
return encrypted_extra
if encrypted_extra is None or not cls.encrypted_extra_sensitive_fields:
return encrypted_extra

try:
config = json.loads(encrypted_extra)
except (TypeError, json.JSONDecodeError):
return encrypted_extra

for json_path in cls.encrypted_extra_sensitive_fields:
jsonpath_expr = parse(json_path)
for match in jsonpath_expr.find(config):
match.context.value[match.path.fields[0]] = PASSWORD_MASK

return json.dumps(config)

# pylint: disable=unused-argument
@classmethod
def unmask_encrypted_extra(cls, old: str | None, new: str | None) -> str | None:
"""
Remove masks from ``encrypted_extra``.
Remove masks from `encrypted_extra`.
This method allows reusing existing values from the current encrypted extra on
updates. It's useful for reusing masked passwords, allowing keys to be updated
without having to provide sensitive data to the client.
"""
return new
if old is None or new is None:
return new

try:
old_config = json.loads(old)
new_config = json.loads(new)
except (TypeError, json.JSONDecodeError):
return new

for json_path in cls.encrypted_extra_sensitive_fields:
jsonpath_expr = parse(json_path)
for match in jsonpath_expr.find(new_config):
if match.value == PASSWORD_MASK:
old_value = jsonpath_expr.find(old_config)
match.context.value[match.path.fields[0]] = old_value[0].value

return json.dumps(new_config)

@classmethod
def get_public_information(cls) -> dict[str, Any]:
Expand Down
48 changes: 5 additions & 43 deletions superset/db_engine_specs/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

from __future__ import annotations

import contextlib
import re
import urllib
from datetime import datetime
Expand All @@ -38,7 +37,7 @@
from sqlalchemy.sql import sqltypes

from superset import sql_parse
from superset.constants import PASSWORD_MASK, TimeGrain
from superset.constants import TimeGrain
from superset.databases.schemas import encrypted_field_properties, EncryptedString
from superset.databases.utils import make_url_safe
from superset.db_engine_specs.base import BaseEngineSpec, BasicPropertiesType
Expand Down Expand Up @@ -129,6 +128,10 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met

supports_catalog = supports_dynamic_catalog = True

# when editing the database, mask this field in `encrypted_extra`
# pylint: disable=invalid-name
encrypted_extra_sensitive_fields = ["$.credentials_info.private_key"]

"""
https://www.python.org/dev/peps/pep-0249/#arraysize
raw_connections bypass the sqlalchemy-bigquery query execution context and deal with
Expand Down Expand Up @@ -594,47 +597,6 @@ def get_parameters_from_uri(

raise ValidationError("Invalid service credentials")

@classmethod
def mask_encrypted_extra(cls, encrypted_extra: str | None) -> str | None:
if encrypted_extra is None:
return encrypted_extra

try:
config = json.loads(encrypted_extra)
except (json.JSONDecodeError, TypeError):
return encrypted_extra

with contextlib.suppress(KeyError):
config["credentials_info"]["private_key"] = PASSWORD_MASK
return json.dumps(config)

@classmethod
def unmask_encrypted_extra(cls, old: str | None, new: str | None) -> str | None:
"""
Reuse ``private_key`` if available and unchanged.
"""
if old is None or new is None:
return new

try:
old_config = json.loads(old)
new_config = json.loads(new)
except (TypeError, json.JSONDecodeError):
return new

if "credentials_info" not in new_config:
return new

if "private_key" not in new_config["credentials_info"]:
return new

if new_config["credentials_info"]["private_key"] == PASSWORD_MASK:
new_config["credentials_info"]["private_key"] = old_config[
"credentials_info"
]["private_key"]

return json.dumps(new_config)

@classmethod
def get_dbapi_exception_mapping(cls) -> dict[type[Exception], type[Exception]]:
# pylint: disable=import-outside-toplevel
Expand Down
51 changes: 6 additions & 45 deletions superset/db_engine_specs/gsheets.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

from __future__ import annotations

import contextlib
import logging
import re
from re import Pattern
Expand All @@ -37,7 +36,6 @@
from sqlalchemy.engine.url import URL

from superset import db, security_manager
from superset.constants import PASSWORD_MASK
from superset.databases.schemas import encrypted_field_properties, EncryptedString
from superset.db_engine_specs.shillelagh import ShillelaghEngineSpec
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
Expand Down Expand Up @@ -93,6 +91,10 @@ class GSheetsEngineSpec(ShillelaghEngineSpec):
default_driver = "apsw"
sqlalchemy_uri_placeholder = "gsheets://"

# when editing the database, mask this field in `encrypted_extra`
# pylint: disable=invalid-name
encrypted_extra_sensitive_fields = ["$.service_account_info.private_key"]

custom_errors: dict[Pattern[str], tuple[str, SupersetErrorType, dict[str, Any]]] = {
SYNTAX_ERROR_REGEX: (
__(
Expand Down Expand Up @@ -157,11 +159,11 @@ def get_extra_table_metadata(
return {"metadata": metadata["extra"]}

@classmethod
# pylint: disable=unused-argument
def build_sqlalchemy_uri(
cls,
_: GSheetsParametersType,
encrypted_extra: None # pylint: disable=unused-argument
| (dict[str, Any]) = None,
encrypted_extra: None | (dict[str, Any]) = None,
) -> str:
return "gsheets://"

Expand All @@ -177,47 +179,6 @@ def get_parameters_from_uri(

raise ValidationError("Invalid service credentials")

@classmethod
def mask_encrypted_extra(cls, encrypted_extra: str | None) -> str | None:
if encrypted_extra is None:
return encrypted_extra

try:
config = json.loads(encrypted_extra)
except (TypeError, json.JSONDecodeError):
return encrypted_extra

with contextlib.suppress(KeyError):
config["service_account_info"]["private_key"] = PASSWORD_MASK
return json.dumps(config)

@classmethod
def unmask_encrypted_extra(cls, old: str | None, new: str | None) -> str | None:
"""
Reuse ``private_key`` if available and unchanged.
"""
if old is None or new is None:
return new

try:
old_config = json.loads(old)
new_config = json.loads(new)
except (TypeError, json.JSONDecodeError):
return new

if "service_account_info" not in new_config:
return new

if "private_key" not in new_config["service_account_info"]:
return new

if new_config["service_account_info"]["private_key"] == PASSWORD_MASK:
new_config["service_account_info"]["private_key"] = old_config[
"service_account_info"
]["private_key"]

return json.dumps(new_config)

@classmethod
def parameters_json_schema(cls) -> Any:
"""
Expand Down
Loading

0 comments on commit e2a8519

Please sign in to comment.