From b6e7b0da254ab09a456cd3223dc2ce5a0058c85f Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 13 Jun 2024 22:21:35 -0400 Subject: [PATCH 1/2] fix WPS owslib/pywps mixup between default/data_type as default format of an optional ComplexData input --- CHANGES.rst | 5 +- tests/functional/test_wps_provider.py | 77 +++++++++++++++++++ tests/processes/test_convert.py | 82 +++++++++++++++++++++ tests/resources/__init__.py | 1 + tests/resources/wps_complex_optional_io.xml | 23 ++++++ weaver/processes/convert.py | 24 ++++-- 6 files changed, 206 insertions(+), 6 deletions(-) create mode 100644 tests/resources/wps_complex_optional_io.xml diff --git a/CHANGES.rst b/CHANGES.rst index 76d38d483..a8612519e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -16,7 +16,10 @@ Changes: Fixes: ------ -- No change. +- Fix invalid ``default`` attribute resolution of an optional `WPS` ``ComplexData`` (i.e.: ``minOccurs: 0``) that also + provides a ``Default/Format`` in the `XML` process description. When that input was omitted (as permitted) from the + execution request, parsing of the `XML` would incorrectly inject the `JSON` representation of the ``Default/Format`` + as a substitute for the ``default`` value. See ``weaver.processes.convert.ows2json_io`` implementation for details. .. _changes_5.6.0: diff --git a/tests/functional/test_wps_provider.py b/tests/functional/test_wps_provider.py index 143404517..463859305 100644 --- a/tests/functional/test_wps_provider.py +++ b/tests/functional/test_wps_provider.py @@ -246,3 +246,80 @@ def test_register_describe_execute_ncdump(self, mock_responses): with open(output_path, mode="r", encoding="utf-8") as out_file: data = out_file.read() assert data == ncdump_data + + @mocked_remote_server_requests_wps1([ + resources.TEST_REMOTE_SERVER_URL, + resources.TEST_HUMMINGBIRD_WPS1_GETCAP_XML, + [resources.TEST_HUMMINGBIRD_DESCRIBE_WPS1_XML], + ]) + def test_register_describe_execute_ncdump_no_default_format(self, mock_responses): + """ + Test a remote :term:`WPS` provider that defines an optional input of ``ComplexData`` type with a default format. + + When the ``ComplexData`` input is omitted (allowed by ``minOccurs=0``), the execution parsing must **NOT** + inject an input due to the detection of the default **format** (rather than default data/reference). If an + input is erroneously injected, :mod:`pywps` tends to auto-generate an empty file (from the storage linked to + the input), which yields an explicitly provided empty string value as ``ComplexData`` input. + + An example of such process is the ``ncdump`` that + takes [0-100] ``ComplexData`` AND/OR [0-100] NetCDF OpenDAP URL strings as ``LiteralData``. + """ + self.service_store.clear_services() + + # register the provider + remote_provider_name = "test-wps-remote-provider-hummingbird" + path = "/providers" + data = {"id": remote_provider_name, "url": resources.TEST_REMOTE_SERVER_URL} + resp = self.app.post_json(path, params=data, headers=self.json_headers) + assert resp.status_code == 201 + + exec_path = f"{self.app_url}/providers/{remote_provider_name}/processes/ncdump/execution" + exec_file = "http://localhost.com/dont/care.nc" + exec_body = { + "mode": ExecuteMode.ASYNC, + "response": ExecuteResponse.DOCUMENT, + "inputs": [{"id": "dataset_opendap", "data": exec_file}], + "outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.VALUE}] + } + status_url = f"{resources.TEST_REMOTE_SERVER_URL}/status.xml" + output_url = f"{resources.TEST_REMOTE_SERVER_URL}/output.txt" + with open(resources.TEST_HUMMINGBIRD_STATUS_WPS1_XML, mode="r", encoding="utf-8") as status_file: + status = status_file.read().format( + TEST_SERVER_URL=resources.TEST_REMOTE_SERVER_URL, + LOCATION_XML=status_url, + OUTPUT_FILE=output_url, + ) + + ncdump_data = "Fake NetCDF Data" + with contextlib.ExitStack() as stack_exec: + # mock direct execution bypassing celery + for mock_exec in mocked_execute_celery(): + stack_exec.enter_context(mock_exec) + # mock responses expected by "remote" WPS-1 Execute request and relevant documents + mock_responses.add("GET", exec_file, body=ncdump_data, headers={"Content-Type": ContentType.APP_NETCDF}) + mock_responses.add("POST", resources.TEST_REMOTE_SERVER_URL, body=status, headers=self.xml_headers) + mock_responses.add("GET", status_url, body=status, headers=self.xml_headers) + mock_responses.add("GET", output_url, body=ncdump_data, headers={"Content-Type": ContentType.TEXT_PLAIN}) + + # add reference to specific provider class 'dispatch' to validate it was called with expected inputs + # (whole procedure must run even though a lot of parts are mocked) + # use the last possible method before sendoff to WPS, since filtering of omitted defaults can happen + # at the very last moment to accommodate for CWL needing 'null' inputs explicitly in jobs submission + real_wps1_process_dispatch = Wps1Process.dispatch + handle_wps1_process_dispatch = stack_exec.enter_context( + mock.patch.object(Wps1Process, "dispatch", side_effect=real_wps1_process_dispatch, autospec=True) + ) # type: MockPatch + + # launch job execution and validate + resp = mocked_sub_requests(self.app, "post_json", exec_path, timeout=5, + data=exec_body, headers=self.json_headers, only_local=True) + assert resp.status_code in [200, 201], f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}" + assert handle_wps1_process_dispatch.called, "WPS-1 handler should have been called by CWL runner context" + + wps_exec_params = handle_wps1_process_dispatch.call_args_list[0].args + wps_exec_inputs = wps_exec_params[1] + assert isinstance(wps_exec_inputs, list), "WPS Inputs should have been passed for dispatch as 1st argument." + wps_exec_inputs = dict(wps_exec_inputs) + assert "dataset_opendap" in wps_exec_inputs, "Explicitly provided WPS inputs should be present." + assert "dataset" not in wps_exec_inputs, "Omitted WPS input should not be injected in the request." + assert wps_exec_inputs["dataset_opendap"] == exec_file diff --git a/tests/processes/test_convert.py b/tests/processes/test_convert.py index c567101fa..8cec95b04 100644 --- a/tests/processes/test_convert.py +++ b/tests/processes/test_convert.py @@ -25,6 +25,7 @@ from tests import resources from tests.utils import MockedResponse, assert_equal_any_order, mocked_remote_server_requests_wps1 +from weaver import xml_util from weaver.exceptions import PackageTypeError from weaver.formats import ( EDAM_MAPPING, @@ -69,6 +70,7 @@ get_io_type_category, is_cwl_complex_type, json2oas_io, + json2wps_io, json2wps_allowed_values, json2wps_datatype, json2wps_supported_uoms, @@ -2341,6 +2343,86 @@ def test_ows2json_io_convert_literal_uom(ows_io, json_io): assert result == json_io +def test_ows_wps_json_default_complex_format(): + xml_io = xml_util.parse(resources.WPS_COMPLEX_OPTIONAL_IO_XML).xpath("//DataInputs/Input")[0] + ows_io = OWSInput(xml_io) + res_io = ows2json_io(ows_io) + assert res_io == { + "id": "test", + "title": "test", + "min_occurs": 0, + "max_occurs": 100, + "any_value": False, + "type": WPS_COMPLEX_DATA, + "data_type": WPS_COMPLEX_DATA, + "data_format": { + "mimeType": ContentType.APP_NETCDF, + "encoding": "base64", + "maximumMegabytes": 200, + "schema": None, + "default": True, + }, + "formats": [ + { + "mimeType": ContentType.APP_NETCDF, + "encoding": "base64", + "maximumMegabytes": 200, + "schema": None, + "default": True, + }, + { + "mimeType": ContentType.APP_JSON, + "encoding": None, + "maximumMegabytes": 200, + "schema": None, + "default": False, + }, + ], + } + + wps_io = json2wps_io(res_io, "input") + assert wps_io.identifier == "test" + assert wps_io.title == "test" + assert wps_io.min_occurs == 0 + assert wps_io.max_occurs == 100 + assert wps_io.data_format == Format(ContentType.APP_NETCDF, encoding="base64") + assert all(wps_fmt == val_fmt for wps_fmt, val_fmt in zip( + wps_io.supported_formats, + [ + Format(ContentType.APP_NETCDF, encoding="base64"), + Format(ContentType.APP_JSON), + ], + )) + + # ensure no defaults applied + assert wps_io.data is None + assert wps_io._default is None + + res_io = wps2json_io(wps_io) + assert res_io == { + "id": "test", + "title": "test", + "description": "", + "keywords": [], + "metadata": [], + "translations": None, + "asreference": False, + "workdir": None, + "minOccurs": "0", + "maxOccurs": "100", + "mode": MODE.NONE, + "type": WPS_COMPLEX, + "data_format": {"mime_type": ContentType.APP_NETCDF, "encoding": "base64", "schema": "", "extension": ""}, + "formats": [ + {"mediaType": ContentType.APP_NETCDF, "encoding": "base64", "schema": "", "extension": "", "default": True}, + {"mediaType": ContentType.APP_JSON, "encoding": "", "schema": "", "extension": "", "default": False} + ], + # from default data_format + "mimetype": ContentType.APP_NETCDF, + "encoding": "base64", + } + + @pytest.mark.parametrize( ["value", "dtype", "ctype", "expect"], [ diff --git a/tests/resources/__init__.py b/tests/resources/__init__.py index 5e4357b10..db9b94e7f 100644 --- a/tests/resources/__init__.py +++ b/tests/resources/__init__.py @@ -49,6 +49,7 @@ WPS_ENUM_ARRAY_IO_ID = "subset_countries" WPS_ENUM_ARRAY_IO_XML = os.path.join(RESOURCES_PATH, "wps_enum_array_io.xml") +WPS_COMPLEX_OPTIONAL_IO_XML = os.path.join(RESOURCES_PATH, "wps_complex_optional_io.xml") WPS_LITERAL_COMPLEX_IO_ID = "ice_days" WPS_LITERAL_COMPLEX_IO_XML = os.path.join(RESOURCES_PATH, "wps_literal_complex_io.xml") WPS_LITERAL_VALUES_IO_ID = "ensemble_grid_point_cold_spell_duration_index" diff --git a/tests/resources/wps_complex_optional_io.xml b/tests/resources/wps_complex_optional_io.xml new file mode 100644 index 000000000..5211637ec --- /dev/null +++ b/tests/resources/wps_complex_optional_io.xml @@ -0,0 +1,23 @@ + + + test + test + + + + application/x-netcdf + base64 + + + + + application/x-netcdf + base64 + + + application/json + + + + + diff --git a/weaver/processes/convert.py b/weaver/processes/convert.py index f625b0acf..5551f6b25 100644 --- a/weaver/processes/convert.py +++ b/weaver/processes/convert.py @@ -397,10 +397,22 @@ def ows2json_io(ows_io): # add 'format' if missing, derived from other variants if io_type == WPS_COMPLEX_DATA: - fmt_default = False + fmt_default_found = False if "default" in json_io and isinstance(json_io["default"], dict): - json_io["default"]["default"] = True # provide for workflow extension (internal), schema drops it (API) - fmt_default = True + # NOTE: + # When parsing between OWSLib and PyWPS for corresponding 'ComplexData' structures, there is an + # ambiguity in the field names. The OWSLib 'defaultValue' attribute is employed as the "default format" + # to resolve if the **format** was omitted (but only when the input reference/data must is provided!), + # whereas PyWPS uses the attribute named 'data_format' for the applied (or default) format of the input + # reference/data. However, both of these libraries also reuse the 'defaultValue' and 'default' attributes + # respectively for the 'LiteralData' type, where the value is the **actual data** used by default. + # The conversion between those libraries must therefore be careful about these specific field combinations + # to avoid misbehavior when resolving some form of "default" by omission of data/reference/format. + # To be sure, swap the ambiguous 'default' for explicit 'data_format' (where 'data=None' if I/O omitted). + fmt_default_value = json_io.pop("default") + fmt_default_value["default"] = True # provide for workflow extension (internal), schema drops it (API) + json_io["data_format"] = fmt_default_value + fmt_default_found = True # retrieve alternate format definitions if "formats" not in json_io: @@ -430,13 +442,15 @@ def ows2json_io(ows_io): # apply the default flag for fmt in json_io["formats"]: - fmt["default"] = fmt_default and is_equal_formats(json_io["default"], fmt) + fmt_default_value = get_field(json_io, "data_format", default={}) + fmt["default"] = fmt_default_found and is_equal_formats(fmt_default_value, fmt) if fmt["default"]: break # NOTE: - # Don't apply 'minOccurs=0' as in below literal case because default 'format' does not imply that unspecified + # Don't force 'minOccurs=0' as in below literal case because default 'format' does not imply that unspecified # input is valid, but rather that given an input without explicit 'format' specified, that 'default' is used. + # If the I/O is optional, it should have indicated explicitly the 'minOccurs=0' attribute for 'ComplexData'. return json_io # add value constraints in specifications From 8dffa2ebea1b665a496670342311caadb5aaade5 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 13 Jun 2024 23:36:25 -0400 Subject: [PATCH 2/2] fix lint --- tests/processes/test_convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/processes/test_convert.py b/tests/processes/test_convert.py index 8cec95b04..9969f93b0 100644 --- a/tests/processes/test_convert.py +++ b/tests/processes/test_convert.py @@ -70,9 +70,9 @@ get_io_type_category, is_cwl_complex_type, json2oas_io, - json2wps_io, json2wps_allowed_values, json2wps_datatype, + json2wps_io, json2wps_supported_uoms, merge_io_formats, normalize_ordered_io,