Skip to content

Commit

Permalink
Merge pull request #666 from crim-ca/fix-ows-pywps-default-format
Browse files Browse the repository at this point in the history
  • Loading branch information
fmigneault authored Jun 14, 2024
2 parents 2230182 + 8dffa2e commit b4050b6
Show file tree
Hide file tree
Showing 6 changed files with 206 additions and 6 deletions.
5 changes: 4 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ Changes:

Fixes:
------
- No change.
- Fix invalid ``default`` attribute resolution of an optional `WPS` ``ComplexData`` (i.e.: ``minOccurs: 0``) that also
provides a ``Default/Format`` in the `XML` process description. When that input was omitted (as permitted) from the
execution request, parsing of the `XML` would incorrectly inject the `JSON` representation of the ``Default/Format``
as a substitute for the ``default`` value. See ``weaver.processes.convert.ows2json_io`` implementation for details.

.. _changes_5.6.0:

Expand Down
77 changes: 77 additions & 0 deletions tests/functional/test_wps_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,3 +246,80 @@ def test_register_describe_execute_ncdump(self, mock_responses):
with open(output_path, mode="r", encoding="utf-8") as out_file:
data = out_file.read()
assert data == ncdump_data

@mocked_remote_server_requests_wps1([
resources.TEST_REMOTE_SERVER_URL,
resources.TEST_HUMMINGBIRD_WPS1_GETCAP_XML,
[resources.TEST_HUMMINGBIRD_DESCRIBE_WPS1_XML],
])
def test_register_describe_execute_ncdump_no_default_format(self, mock_responses):
"""
Test a remote :term:`WPS` provider that defines an optional input of ``ComplexData`` type with a default format.
When the ``ComplexData`` input is omitted (allowed by ``minOccurs=0``), the execution parsing must **NOT**
inject an input due to the detection of the default **format** (rather than default data/reference). If an
input is erroneously injected, :mod:`pywps` tends to auto-generate an empty file (from the storage linked to
the input), which yields an explicitly provided empty string value as ``ComplexData`` input.
An example of such process is the ``ncdump`` that
takes [0-100] ``ComplexData`` AND/OR [0-100] NetCDF OpenDAP URL strings as ``LiteralData``.
"""
self.service_store.clear_services()

# register the provider
remote_provider_name = "test-wps-remote-provider-hummingbird"
path = "/providers"
data = {"id": remote_provider_name, "url": resources.TEST_REMOTE_SERVER_URL}
resp = self.app.post_json(path, params=data, headers=self.json_headers)
assert resp.status_code == 201

exec_path = f"{self.app_url}/providers/{remote_provider_name}/processes/ncdump/execution"
exec_file = "http://localhost.com/dont/care.nc"
exec_body = {
"mode": ExecuteMode.ASYNC,
"response": ExecuteResponse.DOCUMENT,
"inputs": [{"id": "dataset_opendap", "data": exec_file}],
"outputs": [{"id": "output", "transmissionMode": ExecuteTransmissionMode.VALUE}]
}
status_url = f"{resources.TEST_REMOTE_SERVER_URL}/status.xml"
output_url = f"{resources.TEST_REMOTE_SERVER_URL}/output.txt"
with open(resources.TEST_HUMMINGBIRD_STATUS_WPS1_XML, mode="r", encoding="utf-8") as status_file:
status = status_file.read().format(
TEST_SERVER_URL=resources.TEST_REMOTE_SERVER_URL,
LOCATION_XML=status_url,
OUTPUT_FILE=output_url,
)

ncdump_data = "Fake NetCDF Data"
with contextlib.ExitStack() as stack_exec:
# mock direct execution bypassing celery
for mock_exec in mocked_execute_celery():
stack_exec.enter_context(mock_exec)
# mock responses expected by "remote" WPS-1 Execute request and relevant documents
mock_responses.add("GET", exec_file, body=ncdump_data, headers={"Content-Type": ContentType.APP_NETCDF})
mock_responses.add("POST", resources.TEST_REMOTE_SERVER_URL, body=status, headers=self.xml_headers)
mock_responses.add("GET", status_url, body=status, headers=self.xml_headers)
mock_responses.add("GET", output_url, body=ncdump_data, headers={"Content-Type": ContentType.TEXT_PLAIN})

# add reference to specific provider class 'dispatch' to validate it was called with expected inputs
# (whole procedure must run even though a lot of parts are mocked)
# use the last possible method before sendoff to WPS, since filtering of omitted defaults can happen
# at the very last moment to accommodate for CWL needing 'null' inputs explicitly in jobs submission
real_wps1_process_dispatch = Wps1Process.dispatch
handle_wps1_process_dispatch = stack_exec.enter_context(
mock.patch.object(Wps1Process, "dispatch", side_effect=real_wps1_process_dispatch, autospec=True)
) # type: MockPatch

# launch job execution and validate
resp = mocked_sub_requests(self.app, "post_json", exec_path, timeout=5,
data=exec_body, headers=self.json_headers, only_local=True)
assert resp.status_code in [200, 201], f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}"
assert handle_wps1_process_dispatch.called, "WPS-1 handler should have been called by CWL runner context"

wps_exec_params = handle_wps1_process_dispatch.call_args_list[0].args
wps_exec_inputs = wps_exec_params[1]
assert isinstance(wps_exec_inputs, list), "WPS Inputs should have been passed for dispatch as 1st argument."
wps_exec_inputs = dict(wps_exec_inputs)
assert "dataset_opendap" in wps_exec_inputs, "Explicitly provided WPS inputs should be present."
assert "dataset" not in wps_exec_inputs, "Omitted WPS input should not be injected in the request."
assert wps_exec_inputs["dataset_opendap"] == exec_file
82 changes: 82 additions & 0 deletions tests/processes/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

from tests import resources
from tests.utils import MockedResponse, assert_equal_any_order, mocked_remote_server_requests_wps1
from weaver import xml_util
from weaver.exceptions import PackageTypeError
from weaver.formats import (
EDAM_MAPPING,
Expand Down Expand Up @@ -71,6 +72,7 @@
json2oas_io,
json2wps_allowed_values,
json2wps_datatype,
json2wps_io,
json2wps_supported_uoms,
merge_io_formats,
normalize_ordered_io,
Expand Down Expand Up @@ -2341,6 +2343,86 @@ def test_ows2json_io_convert_literal_uom(ows_io, json_io):
assert result == json_io


def test_ows_wps_json_default_complex_format():
xml_io = xml_util.parse(resources.WPS_COMPLEX_OPTIONAL_IO_XML).xpath("//DataInputs/Input")[0]
ows_io = OWSInput(xml_io)
res_io = ows2json_io(ows_io)
assert res_io == {
"id": "test",
"title": "test",
"min_occurs": 0,
"max_occurs": 100,
"any_value": False,
"type": WPS_COMPLEX_DATA,
"data_type": WPS_COMPLEX_DATA,
"data_format": {
"mimeType": ContentType.APP_NETCDF,
"encoding": "base64",
"maximumMegabytes": 200,
"schema": None,
"default": True,
},
"formats": [
{
"mimeType": ContentType.APP_NETCDF,
"encoding": "base64",
"maximumMegabytes": 200,
"schema": None,
"default": True,
},
{
"mimeType": ContentType.APP_JSON,
"encoding": None,
"maximumMegabytes": 200,
"schema": None,
"default": False,
},
],
}

wps_io = json2wps_io(res_io, "input")
assert wps_io.identifier == "test"
assert wps_io.title == "test"
assert wps_io.min_occurs == 0
assert wps_io.max_occurs == 100
assert wps_io.data_format == Format(ContentType.APP_NETCDF, encoding="base64")
assert all(wps_fmt == val_fmt for wps_fmt, val_fmt in zip(
wps_io.supported_formats,
[
Format(ContentType.APP_NETCDF, encoding="base64"),
Format(ContentType.APP_JSON),
],
))

# ensure no defaults applied
assert wps_io.data is None
assert wps_io._default is None

res_io = wps2json_io(wps_io)
assert res_io == {
"id": "test",
"title": "test",
"description": "",
"keywords": [],
"metadata": [],
"translations": None,
"asreference": False,
"workdir": None,
"minOccurs": "0",
"maxOccurs": "100",
"mode": MODE.NONE,
"type": WPS_COMPLEX,
"data_format": {"mime_type": ContentType.APP_NETCDF, "encoding": "base64", "schema": "", "extension": ""},
"formats": [
{"mediaType": ContentType.APP_NETCDF, "encoding": "base64", "schema": "", "extension": "", "default": True},
{"mediaType": ContentType.APP_JSON, "encoding": "", "schema": "", "extension": "", "default": False}
],
# from default data_format
"mimetype": ContentType.APP_NETCDF,
"encoding": "base64",
}


@pytest.mark.parametrize(
["value", "dtype", "ctype", "expect"],
[
Expand Down
1 change: 1 addition & 0 deletions tests/resources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@

WPS_ENUM_ARRAY_IO_ID = "subset_countries"
WPS_ENUM_ARRAY_IO_XML = os.path.join(RESOURCES_PATH, "wps_enum_array_io.xml")
WPS_COMPLEX_OPTIONAL_IO_XML = os.path.join(RESOURCES_PATH, "wps_complex_optional_io.xml")
WPS_LITERAL_COMPLEX_IO_ID = "ice_days"
WPS_LITERAL_COMPLEX_IO_XML = os.path.join(RESOURCES_PATH, "wps_literal_complex_io.xml")
WPS_LITERAL_VALUES_IO_ID = "ensemble_grid_point_cold_spell_duration_index"
Expand Down
23 changes: 23 additions & 0 deletions tests/resources/wps_complex_optional_io.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<DataInputs xmlns:ows="http://www.opengis.net/ows/1.1">
<Input minOccurs="0" maxOccurs="100">
<ows:Identifier>test</ows:Identifier>
<ows:Title>test</ows:Title>
<ComplexData maximumMegabytes="200">
<Default>
<Format>
<MimeType>application/x-netcdf</MimeType>
<Encoding>base64</Encoding>
</Format>
</Default>
<Supported>
<Format>
<MimeType>application/x-netcdf</MimeType>
<Encoding>base64</Encoding>
</Format>
<Format>
<MimeType>application/json</MimeType>
</Format>
</Supported>
</ComplexData>
</Input>
</DataInputs>
24 changes: 19 additions & 5 deletions weaver/processes/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,10 +397,22 @@ def ows2json_io(ows_io):

# add 'format' if missing, derived from other variants
if io_type == WPS_COMPLEX_DATA:
fmt_default = False
fmt_default_found = False
if "default" in json_io and isinstance(json_io["default"], dict):
json_io["default"]["default"] = True # provide for workflow extension (internal), schema drops it (API)
fmt_default = True
# NOTE:
# When parsing between OWSLib and PyWPS for corresponding 'ComplexData' structures, there is an
# ambiguity in the field names. The OWSLib 'defaultValue' attribute is employed as the "default format"
# to resolve if the **format** was omitted (but only when the input reference/data must is provided!),
# whereas PyWPS uses the attribute named 'data_format' for the applied (or default) format of the input
# reference/data. However, both of these libraries also reuse the 'defaultValue' and 'default' attributes
# respectively for the 'LiteralData' type, where the value is the **actual data** used by default.
# The conversion between those libraries must therefore be careful about these specific field combinations
# to avoid misbehavior when resolving some form of "default" by omission of data/reference/format.
# To be sure, swap the ambiguous 'default' for explicit 'data_format' (where 'data=None' if I/O omitted).
fmt_default_value = json_io.pop("default")
fmt_default_value["default"] = True # provide for workflow extension (internal), schema drops it (API)
json_io["data_format"] = fmt_default_value
fmt_default_found = True

# retrieve alternate format definitions
if "formats" not in json_io:
Expand Down Expand Up @@ -430,13 +442,15 @@ def ows2json_io(ows_io):

# apply the default flag
for fmt in json_io["formats"]:
fmt["default"] = fmt_default and is_equal_formats(json_io["default"], fmt)
fmt_default_value = get_field(json_io, "data_format", default={})
fmt["default"] = fmt_default_found and is_equal_formats(fmt_default_value, fmt)
if fmt["default"]:
break

# NOTE:
# Don't apply 'minOccurs=0' as in below literal case because default 'format' does not imply that unspecified
# Don't force 'minOccurs=0' as in below literal case because default 'format' does not imply that unspecified
# input is valid, but rather that given an input without explicit 'format' specified, that 'default' is used.
# If the I/O is optional, it should have indicated explicitly the 'minOccurs=0' attribute for 'ComplexData'.
return json_io

# add value constraints in specifications
Expand Down

0 comments on commit b4050b6

Please sign in to comment.