Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix WPS owslib/pywps mixup between default/data_type as default format of an optional ComplexData input #666

Merged
merged 2 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ Changes:

Fixes:
------
- No change.
- Fix invalid ``default`` attribute resolution of an optional `WPS` ``ComplexData`` (i.e.: ``minOccurs: 0``) that also
provides a ``Default/Format`` in the `XML` process description. When that input was omitted (as permitted) from the
execution request, parsing of the `XML` would incorrectly inject the `JSON` representation of the ``Default/Format``
as a substitute for the ``default`` value. See ``weaver.processes.convert.ows2json_io`` implementation for details.

.. _changes_5.6.0:

Expand Down
77 changes: 77 additions & 0 deletions tests/functional/test_wps_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,3 +246,80 @@ def test_register_describe_execute_ncdump(self, mock_responses):
with open(output_path, mode="r", encoding="utf-8") as out_file:
data = out_file.read()
assert data == ncdump_data

@mocked_remote_server_requests_wps1([
resources.TEST_REMOTE_SERVER_URL,
resources.TEST_HUMMINGBIRD_WPS1_GETCAP_XML,
[resources.TEST_HUMMINGBIRD_DESCRIBE_WPS1_XML],
])
def test_register_describe_execute_ncdump_no_default_format(self, mock_responses):
"""
Test a remote :term:`WPS` provider that defines an optional input of ``ComplexData`` type with a default format.

When the ``ComplexData`` input is omitted (allowed by ``minOccurs=0``), the execution parsing must **NOT**
inject an input due to the detection of the default **format** (rather than default data/reference). If an
input is erroneously injected, :mod:`pywps` tends to auto-generate an empty file (from the storage linked to
the input), which yields an explicitly provided empty string value as ``ComplexData`` input.

An example of such process is the ``ncdump`` that
takes [0-100] ``ComplexData`` AND/OR [0-100] NetCDF OpenDAP URL strings as ``LiteralData``.
"""
self.service_store.clear_services()

# register the provider
remote_provider_name = "test-wps-remote-provider-hummingbird"
path = "/providers"
data = {"id": remote_provider_name, "url": resources.TEST_REMOTE_SERVER_URL}
resp = self.app.post_json(path, params=data, headers=self.json_headers)
assert resp.status_code == 201

exec_path = f"{self.app_url}/providers/{remote_provider_name}/processes/ncdump/execution"
exec_file = "http://localhost.com/dont/care.nc"
exec_body = {
"mode": ExecuteMode.ASYNC,
"response": ExecuteResponse.DOCUMENT,
"inputs": [{"id": "dataset_opendap", "data": exec_file}],
"outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.VALUE}]
}
status_url = f"{resources.TEST_REMOTE_SERVER_URL}/status.xml"
output_url = f"{resources.TEST_REMOTE_SERVER_URL}/output.txt"
with open(resources.TEST_HUMMINGBIRD_STATUS_WPS1_XML, mode="r", encoding="utf-8") as status_file:
status = status_file.read().format(
TEST_SERVER_URL=resources.TEST_REMOTE_SERVER_URL,
LOCATION_XML=status_url,
OUTPUT_FILE=output_url,
)

ncdump_data = "Fake NetCDF Data"
with contextlib.ExitStack() as stack_exec:
# mock direct execution bypassing celery
for mock_exec in mocked_execute_celery():
stack_exec.enter_context(mock_exec)
# mock responses expected by "remote" WPS-1 Execute request and relevant documents
mock_responses.add("GET", exec_file, body=ncdump_data, headers={"Content-Type": ContentType.APP_NETCDF})
mock_responses.add("POST", resources.TEST_REMOTE_SERVER_URL, body=status, headers=self.xml_headers)
mock_responses.add("GET", status_url, body=status, headers=self.xml_headers)
mock_responses.add("GET", output_url, body=ncdump_data, headers={"Content-Type": ContentType.TEXT_PLAIN})

# add reference to specific provider class 'dispatch' to validate it was called with expected inputs
# (whole procedure must run even though a lot of parts are mocked)
# use the last possible method before sendoff to WPS, since filtering of omitted defaults can happen
# at the very last moment to accommodate for CWL needing 'null' inputs explicitly in jobs submission
real_wps1_process_dispatch = Wps1Process.dispatch
handle_wps1_process_dispatch = stack_exec.enter_context(
mock.patch.object(Wps1Process, "dispatch", side_effect=real_wps1_process_dispatch, autospec=True)
) # type: MockPatch

# launch job execution and validate
resp = mocked_sub_requests(self.app, "post_json", exec_path, timeout=5,
data=exec_body, headers=self.json_headers, only_local=True)
assert resp.status_code in [200, 201], f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}"
assert handle_wps1_process_dispatch.called, "WPS-1 handler should have been called by CWL runner context"

wps_exec_params = handle_wps1_process_dispatch.call_args_list[0].args
wps_exec_inputs = wps_exec_params[1]
assert isinstance(wps_exec_inputs, list), "WPS Inputs should have been passed for dispatch as 1st argument."
wps_exec_inputs = dict(wps_exec_inputs)
assert "dataset_opendap" in wps_exec_inputs, "Explicitly provided WPS inputs should be present."
assert "dataset" not in wps_exec_inputs, "Omitted WPS input should not be injected in the request."
assert wps_exec_inputs["dataset_opendap"] == exec_file
82 changes: 82 additions & 0 deletions tests/processes/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

from tests import resources
from tests.utils import MockedResponse, assert_equal_any_order, mocked_remote_server_requests_wps1
from weaver import xml_util
from weaver.exceptions import PackageTypeError
from weaver.formats import (
EDAM_MAPPING,
Expand Down Expand Up @@ -71,6 +72,7 @@
json2oas_io,
json2wps_allowed_values,
json2wps_datatype,
json2wps_io,
json2wps_supported_uoms,
merge_io_formats,
normalize_ordered_io,
Expand Down Expand Up @@ -2341,6 +2343,86 @@ def test_ows2json_io_convert_literal_uom(ows_io, json_io):
assert result == json_io


def test_ows_wps_json_default_complex_format():
xml_io = xml_util.parse(resources.WPS_COMPLEX_OPTIONAL_IO_XML).xpath("//DataInputs/Input")[0]
ows_io = OWSInput(xml_io)
res_io = ows2json_io(ows_io)
assert res_io == {
"id": "test",
"title": "test",
"min_occurs": 0,
"max_occurs": 100,
"any_value": False,
"type": WPS_COMPLEX_DATA,
"data_type": WPS_COMPLEX_DATA,
"data_format": {
"mimeType": ContentType.APP_NETCDF,
"encoding": "base64",
"maximumMegabytes": 200,
"schema": None,
"default": True,
},
"formats": [
{
"mimeType": ContentType.APP_NETCDF,
"encoding": "base64",
"maximumMegabytes": 200,
"schema": None,
"default": True,
},
{
"mimeType": ContentType.APP_JSON,
"encoding": None,
"maximumMegabytes": 200,
"schema": None,
"default": False,
},
],
}

wps_io = json2wps_io(res_io, "input")
assert wps_io.identifier == "test"
assert wps_io.title == "test"
assert wps_io.min_occurs == 0
assert wps_io.max_occurs == 100
assert wps_io.data_format == Format(ContentType.APP_NETCDF, encoding="base64")
assert all(wps_fmt == val_fmt for wps_fmt, val_fmt in zip(
wps_io.supported_formats,
[
Format(ContentType.APP_NETCDF, encoding="base64"),
Format(ContentType.APP_JSON),
],
))

# ensure no defaults applied
assert wps_io.data is None
assert wps_io._default is None

res_io = wps2json_io(wps_io)
assert res_io == {
"id": "test",
"title": "test",
"description": "",
"keywords": [],
"metadata": [],
"translations": None,
"asreference": False,
"workdir": None,
"minOccurs": "0",
"maxOccurs": "100",
"mode": MODE.NONE,
"type": WPS_COMPLEX,
"data_format": {"mime_type": ContentType.APP_NETCDF, "encoding": "base64", "schema": "", "extension": ""},
"formats": [
{"mediaType": ContentType.APP_NETCDF, "encoding": "base64", "schema": "", "extension": "", "default": True},
{"mediaType": ContentType.APP_JSON, "encoding": "", "schema": "", "extension": "", "default": False}
],
# from default data_format
"mimetype": ContentType.APP_NETCDF,
"encoding": "base64",
}


@pytest.mark.parametrize(
["value", "dtype", "ctype", "expect"],
[
Expand Down
1 change: 1 addition & 0 deletions tests/resources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@

WPS_ENUM_ARRAY_IO_ID = "subset_countries"
WPS_ENUM_ARRAY_IO_XML = os.path.join(RESOURCES_PATH, "wps_enum_array_io.xml")
WPS_COMPLEX_OPTIONAL_IO_XML = os.path.join(RESOURCES_PATH, "wps_complex_optional_io.xml")
WPS_LITERAL_COMPLEX_IO_ID = "ice_days"
WPS_LITERAL_COMPLEX_IO_XML = os.path.join(RESOURCES_PATH, "wps_literal_complex_io.xml")
WPS_LITERAL_VALUES_IO_ID = "ensemble_grid_point_cold_spell_duration_index"
Expand Down
23 changes: 23 additions & 0 deletions tests/resources/wps_complex_optional_io.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<DataInputs xmlns:ows="http://www.opengis.net/ows/1.1">
<Input minOccurs="0" maxOccurs="100">
<ows:Identifier>test</ows:Identifier>
<ows:Title>test</ows:Title>
<ComplexData maximumMegabytes="200">
<Default>
<Format>
<MimeType>application/x-netcdf</MimeType>
<Encoding>base64</Encoding>
</Format>
</Default>
<Supported>
<Format>
<MimeType>application/x-netcdf</MimeType>
<Encoding>base64</Encoding>
</Format>
<Format>
<MimeType>application/json</MimeType>
</Format>
</Supported>
</ComplexData>
</Input>
</DataInputs>
24 changes: 19 additions & 5 deletions weaver/processes/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,10 +397,22 @@ def ows2json_io(ows_io):

# add 'format' if missing, derived from other variants
if io_type == WPS_COMPLEX_DATA:
fmt_default = False
fmt_default_found = False
if "default" in json_io and isinstance(json_io["default"], dict):
json_io["default"]["default"] = True # provide for workflow extension (internal), schema drops it (API)
fmt_default = True
# NOTE:
# When parsing between OWSLib and PyWPS for corresponding 'ComplexData' structures, there is an
# ambiguity in the field names. The OWSLib 'defaultValue' attribute is employed as the "default format"
# to resolve if the **format** was omitted (but only when the input reference/data must is provided!),
# whereas PyWPS uses the attribute named 'data_format' for the applied (or default) format of the input
# reference/data. However, both of these libraries also reuse the 'defaultValue' and 'default' attributes
# respectively for the 'LiteralData' type, where the value is the **actual data** used by default.
# The conversion between those libraries must therefore be careful about these specific field combinations
# to avoid misbehavior when resolving some form of "default" by omission of data/reference/format.
# To be sure, swap the ambiguous 'default' for explicit 'data_format' (where 'data=None' if I/O omitted).
fmt_default_value = json_io.pop("default")
fmt_default_value["default"] = True # provide for workflow extension (internal), schema drops it (API)
json_io["data_format"] = fmt_default_value
fmt_default_found = True

# retrieve alternate format definitions
if "formats" not in json_io:
Expand Down Expand Up @@ -430,13 +442,15 @@ def ows2json_io(ows_io):

# apply the default flag
for fmt in json_io["formats"]:
fmt["default"] = fmt_default and is_equal_formats(json_io["default"], fmt)
fmt_default_value = get_field(json_io, "data_format", default={})
fmt["default"] = fmt_default_found and is_equal_formats(fmt_default_value, fmt)
if fmt["default"]:
break

# NOTE:
# Don't apply 'minOccurs=0' as in below literal case because default 'format' does not imply that unspecified
# Don't force 'minOccurs=0' as in below literal case because default 'format' does not imply that unspecified
# input is valid, but rather that given an input without explicit 'format' specified, that 'default' is used.
# If the I/O is optional, it should have indicated explicitly the 'minOccurs=0' attribute for 'ComplexData'.
return json_io

# add value constraints in specifications
Expand Down
Loading