From ebdaec17278f227506c493add634eac294e4d9b9 Mon Sep 17 00:00:00 2001 From: Mike VanDenburgh Date: Mon, 25 Apr 2022 13:29:15 -0400 Subject: [PATCH 1/4] Escape glob string This prevents postgres from executing a regex in the glob expression if the user provides one --- dandiapi/api/views/asset.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/dandiapi/api/views/asset.py b/dandiapi/api/views/asset.py index d430cdaa2..9665a3cbf 100644 --- a/dandiapi/api/views/asset.py +++ b/dandiapi/api/views/asset.py @@ -531,9 +531,12 @@ def list(self, request, *args, **kwargs): ) if glob_pattern is not None: - queryset = queryset.filter( - path__iregex=glob_pattern.replace('*', '.*').replace('.', '\\.') - ) + # Escape special characters in the glob pattern. This is a security precaution taken since + # we are using postgres' regex search. A malicious user who knows this could include a + # regex as part of the glob expression, which postgres would happily parse and use if it's + # not escaped. + glob_pattern = re.escape(glob_pattern) + queryset = queryset.filter(path__iregex=glob_pattern.replace('\\*', '.*')) page = self.paginate_queryset(queryset) if page is not None: From 3b3393a697785f31402510f393bb5c86fc21386e Mon Sep 17 00:00:00 2001 From: Mike VanDenburgh Date: Mon, 25 Apr 2022 13:38:48 -0400 Subject: [PATCH 2/4] Add a test for glob that was affected by the bug --- dandiapi/api/tests/test_asset.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dandiapi/api/tests/test_asset.py b/dandiapi/api/tests/test_asset.py index f9a6fc80f..44d4f774d 100644 --- a/dandiapi/api/tests/test_asset.py +++ b/dandiapi/api/tests/test_asset.py @@ -1271,6 +1271,7 @@ def test_asset_direct_info(api_client, asset): ['a/b/c/d.txt', 'a/b/c/e.txt'], ), ('a/b/d/*', ['a/b/d/e.txt']), + ('*b*e.txt', ['a/b/c/e.txt', 'a/b/d/e.txt']), ], ) def test_asset_rest_glob(api_client, asset_factory, version, glob_pattern, expected_paths): From ba4ef0421648f5bed99c9a2d0102f8e9dc516b45 Mon Sep 17 00:00:00 2001 From: Mike VanDenburgh Date: Wed, 15 Jun 2022 13:18:25 -0400 Subject: [PATCH 3/4] Remove regex search Supporting regex opens us up to the possibility of a reDOS attack. --- dandiapi/api/tests/test_asset.py | 49 ------------------------------- dandiapi/api/views/asset.py | 20 +++---------- dandiapi/api/views/serializers.py | 8 ----- 3 files changed, 4 insertions(+), 73 deletions(-) diff --git a/dandiapi/api/tests/test_asset.py b/dandiapi/api/tests/test_asset.py index 44d4f774d..eabc7e1e3 100644 --- a/dandiapi/api/tests/test_asset.py +++ b/dandiapi/api/tests/test_asset.py @@ -1285,52 +1285,3 @@ def test_asset_rest_glob(api_client, asset_factory, version, glob_pattern, expec ) assert expected_paths == [asset['path'] for asset in resp.json()['results']] - - -@pytest.mark.django_db -@pytest.mark.parametrize( - 'regex_pattern,expected_paths', - [ - ( - '[0-9].txt', - ['1.txt', '1/2/3.txt'], - ), - ( - '[a-z].txt', - ['a/b/c/d.txt', 'a/b/c/e.txt', 'a/b/d/e.txt'], - ), - ], -) -def test_asset_rest_regex_valid(api_client, asset_factory, version, regex_pattern, expected_paths): - paths = ('1.txt', '1/2/3.txt', 'a/b/c/d.txt', 'a/b/c/e.txt', 'a/b/d/e.txt') - for path in paths: - version.assets.add(asset_factory(path=path)) - - resp = api_client.get( - f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/', - {'regex': regex_pattern}, - ) - - assert expected_paths == [asset['path'] for asset in resp.json()['results']] - - -@pytest.mark.django_db -def test_asset_rest_regex_invalid(api_client, version): - resp = api_client.get( - f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/', - {'regex': '[[[[['}, # provide an invalid regex - ) - - assert resp.status_code == 400 - - -@pytest.mark.django_db -def test_asset_rest_glob_regex_together(api_client, version): - """Test that including both a glob and regex returns a 400 error.""" - resp = api_client.get( - f'/api/dandisets/{version.dandiset.identifier}/versions/{version.version}/assets/', - {'regex': '[0-9].txt', 'glob': '*.txt'}, - ) - - assert resp.status_code == 400 - assert resp.json() == {'glob': ['Cannot specify both glob and regex']} diff --git a/dandiapi/api/views/asset.py b/dandiapi/api/views/asset.py index 9665a3cbf..152c91569 100644 --- a/dandiapi/api/views/asset.py +++ b/dandiapi/api/views/asset.py @@ -517,24 +517,12 @@ def list(self, request, *args, **kwargs): queryset = self.filter_queryset(self.get_queryset()) glob_pattern: str | None = serializer.validated_data.get('glob') - regex_pattern: str | None = serializer.validated_data.get('regex') - - if regex_pattern is not None: - try: - # Validate the regex by calling re.compile on it - re.compile(regex_pattern) - queryset = queryset.filter(path__iregex=regex_pattern) - except re.error: - return Response( - data=f'{regex_pattern} is not a valid regex pattern.', - status=status.HTTP_400_BAD_REQUEST, - ) if glob_pattern is not None: - # Escape special characters in the glob pattern. This is a security precaution taken since - # we are using postgres' regex search. A malicious user who knows this could include a - # regex as part of the glob expression, which postgres would happily parse and use if it's - # not escaped. + # Escape special characters in the glob pattern. This is a security precaution taken + # since we are using postgres' regex search. A malicious user who knows this could + # include a regex as part of the glob expression, which postgres would happily parse + # and use if it's not escaped. glob_pattern = re.escape(glob_pattern) queryset = queryset.filter(path__iregex=glob_pattern.replace('\\*', '.*')) diff --git a/dandiapi/api/views/serializers.py b/dandiapi/api/views/serializers.py index 46cb9a0ca..eeb83f568 100644 --- a/dandiapi/api/views/serializers.py +++ b/dandiapi/api/views/serializers.py @@ -201,11 +201,3 @@ class AssetPathsQueryParameterSerializer(serializers.Serializer): class AssetListSerializer(serializers.Serializer): glob = serializers.CharField(required=False) - regex = serializers.CharField(required=False) - - def validate(self, attrs): - if 'glob' in attrs and 'regex' in attrs: - raise serializers.ValidationError( - {'glob': 'Cannot specify both glob and regex'}, - ) - return super().validate(attrs) From 88e301849fa9e1d1a802868e33464987d40e6b15 Mon Sep 17 00:00:00 2001 From: Mike VanDenburgh Date: Thu, 16 Jun 2022 17:35:38 -0400 Subject: [PATCH 4/4] Add test case that ensures regexes are escaped --- dandiapi/api/tests/test_asset.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dandiapi/api/tests/test_asset.py b/dandiapi/api/tests/test_asset.py index eabc7e1e3..680f9de53 100644 --- a/dandiapi/api/tests/test_asset.py +++ b/dandiapi/api/tests/test_asset.py @@ -1272,6 +1272,7 @@ def test_asset_direct_info(api_client, asset): ), ('a/b/d/*', ['a/b/d/e.txt']), ('*b*e.txt', ['a/b/c/e.txt', 'a/b/d/e.txt']), + ('.*[a|b].txt', []), # regexes shouldn't be evaluated ], ) def test_asset_rest_glob(api_client, asset_factory, version, glob_pattern, expected_paths):