From 0e511e763f3fda98c0aea4b64f3091582e572d18 Mon Sep 17 00:00:00 2001 From: Julia Lahovnik <126178122+jlahovnik@users.noreply.github.com> Date: Fri, 9 Aug 2024 15:33:25 +0200 Subject: [PATCH] fix(server): invalid characters in download urls (#1276) --- eodag/rest/stac.py | 27 +++++++++++++++++++++++++-- tests/units/test_stac_core.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/eodag/rest/stac.py b/eodag/rest/stac.py index f09e6c6ae..4f6602e61 100644 --- a/eodag/rest/stac.py +++ b/eodag/rest/stac.py @@ -22,7 +22,15 @@ from collections import defaultdict from datetime import datetime, timezone from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, cast -from urllib.parse import parse_qs, urlencode, urlparse, urlunparse +from urllib.parse import ( + parse_qs, + quote, + urlencode, + urlparse, + urlsplit, + urlunparse, + urlunsplit, +) import dateutil.parser import geojson @@ -102,6 +110,13 @@ ] +def _quote_url_path(url: str) -> str: + parsed = urlsplit(url) + path = quote(parsed.path) + components = (parsed.scheme, parsed.netloc, path, parsed.query, parsed.fragment) + return urlunsplit(components) + + class StacCommon: """Stac common object @@ -342,6 +357,10 @@ def __get_item_list( # remove empty properties product_item = self.__filter_item_properties_values(product_item) + # quote invalid characters in links + for link in product_item["links"]: + link["href"] = _quote_url_path(link["href"]) + # update item link with datacube query-string if _dc_qs or self.provider: url_parts = urlparse(str(product_item["links"][0]["href"])) @@ -378,9 +397,12 @@ def _get_assets( origin_href = product.remote_location # update download link with up-to-date query-args + quoted_href = _quote_url_path( + downloadlink_href + ) # quote invalid characters in url assets["downloadLink"] = { "title": "Download link", - "href": downloadlink_href, + "href": quoted_href, "type": "application/zip", } @@ -424,6 +446,7 @@ def _get_assets( assets[asset_key]["type"] = asset_type if origin := assets[asset_key].get("alternate", {}).get("origin"): origin["type"] = asset_type + asset_value["href"] = _quote_url_path(asset_value["href"]) if thumbnail_url := product.properties.get( "quicklook", product.properties.get("thumbnail", None) diff --git a/tests/units/test_stac_core.py b/tests/units/test_stac_core.py index d6d704918..93f2d2793 100644 --- a/tests/units/test_stac_core.py +++ b/tests/units/test_stac_core.py @@ -352,6 +352,41 @@ async def test_search_stac_items_post(self, mock__request: Mock): }, ) + @mock.patch( + "eodag.plugins.search.qssearch.QueryStringSearch._request", + autospec=True, + ) + def test_search_stac_items_special_characters(self, mock__request: Mock): + """search_stac_items runs without any error with non-stac providers""" + # mock the QueryStringSearch request with the S2_MSI_L1C peps response search dictionary + mock__request.return_value = mock.Mock() + res = self.peps_resp_search_json + res["features"][0]["properties"]["productIdentifier"] = "id,with,commas" + res["features"][1]["properties"]["productIdentifier"] = "star*in*id" + mock__request.return_value.json.return_value = res + + response = self.rest_core.search_stac_items( + request=mock_request("http://foo/search"), + search_request=SearchPostRequest.model_validate({"provider": "peps"}), + catalogs=["S2_MSI_L1C"], + ) + + mock__request.assert_called() + + # check that default assets have been added to the response + self.assertTrue( + "downloadLink", "thumbnail" in response["features"][0]["assets"].keys() + ) + # check that invalid characters have been quoted + self.assertIn(",", response["features"][0]["id"]) + self.assertNotIn(",", response["features"][0]["assets"]["downloadLink"]["href"]) + self.assertNotIn(",", response["features"][0]["links"][0]["href"]) + self.assertIn("*", response["features"][1]["id"]) + self.assertNotIn("*", response["features"][1]["assets"]["downloadLink"]["href"]) + self.assertNotIn("*", response["features"][1]["links"][0]["href"]) + # check that no other asset have also been added to the response + self.assertEqual(len(response["features"][0]["assets"]), 2) + def test_get_templates_path(self): """get_templates_path returns an existing dir path""" with pytest.warns(