Skip to content

Commit

Permalink
fix: improve upload ZIP file validation (#25658)
Browse files Browse the repository at this point in the history
  • Loading branch information
dpgaspar authored and michael-s-molina committed Oct 19, 2023
1 parent b95ff2d commit b0f229e
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 0 deletions.
2 changes: 2 additions & 0 deletions superset/commands/importers/v1/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from superset.commands.importers.exceptions import IncorrectVersionError
from superset.databases.ssh_tunnel.models import SSHTunnel
from superset.models.core import Database
from superset.utils.core import check_is_safe_zip

METADATA_FILE_NAME = "metadata.yaml"
IMPORT_VERSION = "1.0.0"
Expand Down Expand Up @@ -207,6 +208,7 @@ def is_valid_config(file_name: str) -> bool:


def get_contents_from_bundle(bundle: ZipFile) -> dict[str, str]:
check_is_safe_zip(bundle)
return {
remove_root(file_name): bundle.read(file_name).decode()
for file_name in bundle.namelist()
Expand Down
5 changes: 5 additions & 0 deletions superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1579,6 +1579,11 @@ def EMAIL_HEADER_MUTATOR( # pylint: disable=invalid-name,unused-argument
Literal["examples", "all"] | tuple[str, list[dict[str, Any]]]
) = "all"

# Max allowed size for a zipped file
ZIPPED_FILE_MAX_SIZE = 100 * 1024 * 1024 # 100MB
# Max allowed compression ratio for a zipped file
ZIP_FILE_MAX_COMPRESS_RATIO = 200.0

# Configuration for environment tag shown on the navbar. Setting 'text' to '' will hide the tag.
# 'color' can either be a hex color code, or a dot-indexed theme color (e.g. error.base)
ENVIRONMENT_TAG_CONFIG = {
Expand Down
19 changes: 19 additions & 0 deletions superset/utils/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1917,6 +1917,25 @@ def create_zip(files: dict[str, Any]) -> BytesIO:
return buf


def check_is_safe_zip(zip_file: ZipFile) -> None:
"""
Checks whether a ZIP file is safe, raises SupersetException if not.
:param zip_file:
:return:
"""
uncompress_size = 0
compress_size = 0
for zip_file_element in zip_file.infolist():
if zip_file_element.file_size > current_app.config["ZIPPED_FILE_MAX_SIZE"]:
raise SupersetException("Found file with size above allowed threshold")
uncompress_size += zip_file_element.file_size
compress_size += zip_file_element.compress_size
compress_ratio = uncompress_size / compress_size
if compress_ratio > current_app.config["ZIP_FILE_MAX_COMPRESS_RATIO"]:
raise SupersetException("Zip compress ratio above allowed threshold")


def remove_extra_adhoc_filters(form_data: dict[str, Any]) -> None:
"""
Remove filters from slice data that originate from a filter box or native filter
Expand Down
57 changes: 57 additions & 0 deletions tests/unit_tests/utils/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,16 @@
# specific language governing permissions and limitations
# under the License.
import os
from dataclasses import dataclass
from typing import Any, Optional
from unittest.mock import MagicMock

import pytest

from superset.exceptions import SupersetException
from superset.utils.core import (
cast_to_boolean,
check_is_safe_zip,
is_test,
parse_boolean_string,
QueryObjectFilterClause,
Expand All @@ -41,6 +45,12 @@
}


@dataclass
class MockZipInfo:
file_size: int
compress_size: int


@pytest.mark.parametrize(
"original,expected",
[
Expand Down Expand Up @@ -171,3 +181,50 @@ def test_other_values():
assert cast_to_boolean([]) is False
assert cast_to_boolean({}) is False
assert cast_to_boolean(object()) is False


def test_check_if_safe_zip_success(app_context: None) -> None:
"""
Test if ZIP files are safe
"""
ZipFile = MagicMock()
ZipFile.infolist.return_value = [
MockZipInfo(file_size=1000, compress_size=10),
MockZipInfo(file_size=1000, compress_size=10),
MockZipInfo(file_size=1000, compress_size=10),
MockZipInfo(file_size=1000, compress_size=10),
MockZipInfo(file_size=1000, compress_size=10),
]
check_is_safe_zip(ZipFile)


def test_check_if_safe_zip_high_rate(app_context: None) -> None:
"""
Test if ZIP files is not highly compressed
"""
ZipFile = MagicMock()
ZipFile.infolist.return_value = [
MockZipInfo(file_size=1000, compress_size=1),
MockZipInfo(file_size=1000, compress_size=1),
MockZipInfo(file_size=1000, compress_size=1),
MockZipInfo(file_size=1000, compress_size=1),
MockZipInfo(file_size=1000, compress_size=1),
]
with pytest.raises(SupersetException):
check_is_safe_zip(ZipFile)


def test_check_if_safe_zip_hidden_bomb(app_context: None) -> None:
"""
Test if ZIP file does not contain a big file highly compressed
"""
ZipFile = MagicMock()
ZipFile.infolist.return_value = [
MockZipInfo(file_size=1000, compress_size=100),
MockZipInfo(file_size=1000, compress_size=100),
MockZipInfo(file_size=1000, compress_size=100),
MockZipInfo(file_size=1000, compress_size=100),
MockZipInfo(file_size=1000 * (1024 * 1024), compress_size=100),
]
with pytest.raises(SupersetException):
check_is_safe_zip(ZipFile)

0 comments on commit b0f229e

Please sign in to comment.