Skip to content

Commit

Permalink
fix parsing of CWL I/O with 'format' as JS expression used to handle …
Browse files Browse the repository at this point in the history
…multiple format/media-type combinations (relates to common-workflow-language/cwl-v1.3#52)
  • Loading branch information
fmigneault committed Aug 22, 2024
1 parent c7ae658 commit b504128
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 28 deletions.
5 changes: 4 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ Changes:

Fixes:
------
- No change.
- Fix `CWL` I/O with ``format`` defined as a `JavaScript Expression` to be incorrectly parsed by the convertion
operations to extract applicable media-types. These cases will be ignored, since media-types cannot be inferred
from them. The `WPS` or `OAS` I/O definitions should instead provide the applicable media-types
(relates to `common-workflow-language/cwl-v1.3#52 <https://github.com/common-workflow-language/cwl-v1.3/issues/52>`_).

.. _changes_5.7.0:

Expand Down
34 changes: 20 additions & 14 deletions tests/functional/application-packages/EchoFeatures/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,40 @@ processDescription:
id: EchoFeatures
title: Test Echo Features
version: "1.0" # must be string, avoid interpretation as float
description: Dummy process that simply echo's back the input bbox for testing purposes.
description: Dummy process that simply echo's back the input features for testing purposes.
keywords:
- test
inputs:
features:
description: "Collection of features, either provided directly as FeatureCollection or an array of geometries."
schema:
- oneOf:
- allOf:
- format: geojson-feature-collection
- $ref: "https://geojson.org/schema/FeatureCollection.json"
- type: array
items:
allOf:
- format: geojson-geometry
- $ref: "http://schemas.opengis.net/ogcapi/features/part1/1.0/openapi/schemas/geometryGeoJSON.yaml"
oneOf:
- allOf:
- type: string
format: geojson-feature-collection
contentMediaType: application/geo+json
- $ref: "https://geojson.org/schema/FeatureCollection.json"
- type: array
items:
allOf:
- type: string
format: geojson-geometry
contentMediaType: application/geo+json
- $ref: "http://schemas.opengis.net/ogcapi/features/part1/1.0/openapi/schemas/geometryGeoJSON.yaml"
outputs:
features:
schema:
- allOf:
- format: geojson-feature-collection
- $ref: "https://geojson.org/schema/FeatureCollection.json"
allOf:
- type: string
format: geojson-feature-collection
contentMediaType: application/geo+json
- $ref: "https://geojson.org/schema/FeatureCollection.json"
jobControlOptions:
- async-execute
- sync-execute
outputTransmission:
- reference
executionUnit:
# note: This does not work by itself! The test suite injects the file dynamically.
- href: "tests/functional/application-packages/EchoBoundingBox/echo_bbox.cwl"
- href: "tests/functional/application-packages/EchoFeatures/echo_features.cwl"
deploymentProfileName: "http://www.opengis.net/profiles/eoc/dockerizedApplication"
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,16 @@ requirements:
inputs:
features:
type:
- type: File
format: "oap:geojson-feature-collection"
- "File"
- type: array
items:
type: File
format: "oap:geojson-feature"
items: File
format: |
${
if (Array.isArray(inputs.features)) {
return "iana:application/geo+json";
}
return "http://www.opengis.net/def/glossary/term/FeatureCollection";
}
inputBinding:
valueFrom: |
${
Expand All @@ -29,10 +33,9 @@ inputs:
outputs:
features:
type: File
format: "oap:geojson-feature-collection"
format: "http://www.opengis.net/def/glossary/term/FeatureCollection"
outputBinding:
glob: "features.json"
stdout: "features.json"
$namespaces:
iana: "https://www.iana.org/assignments/media-types/"
oap: "http://www.opengis.net/def/format/ogcapi-processes/0/"
13 changes: 9 additions & 4 deletions tests/functional/test_wps_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
mocked_wps_output,
setup_aws_s3_bucket
)
from weaver.execute import ExecuteMode, ExecuteResponse, ExecuteTransmissionMode
from weaver.execute import ExecuteCollectionFormat, ExecuteMode, ExecuteResponse, ExecuteTransmissionMode
from weaver.formats import (
EDAM_MAPPING,
EDAM_NAMESPACE,
Expand Down Expand Up @@ -2267,14 +2267,17 @@ def test_execute_job_with_bbox(self):
)

def test_execute_job_with_collection_input(self):
proc = "EchoFeatures"
name = "EchoFeatures"
body = self.retrieve_payload(name, "deploy", local=True)
proc = self.fully_qualified_test_process_name(self._testMethodName)
self.deploy_process(body, describe_schema=ProcessSchema.OGC, process_id=proc)

with contextlib.ExitStack() as stack:
tmp_dir = stack.enter_context(tempfile.TemporaryDirectory()) # pylint: disable=R1732
tmp_feature_collection_geojson = stack.enter_context(
tempfile.NamedTemporaryFile(suffix=".geojson", mode="w", dir=tmp_dir) # pylint: disable=R1732
)
exec_body_val = self.retrieve_payload(proc, "execute", local=True)
exec_body_val = self.retrieve_payload(name, "execute", local=True)
json.dump(
exec_body_val["inputs"]["features"]["value"],
tmp_feature_collection_geojson,
Expand All @@ -2288,6 +2291,8 @@ def test_execute_job_with_collection_input(self):
"inputs": {
"features": {
"collection": "https://mocked-file-server.com/collections/test",
"format": ExecuteCollectionFormat.GEOJSON,
"type": ContentType.APP_GEOJSON,
"filter-lang": "cql2-text",
"filter": "properties.name = test"
}
Expand All @@ -2296,7 +2301,7 @@ def test_execute_job_with_collection_input(self):

for mock_exec in mocked_execute_celery():
stack.enter_context(mock_exec)
proc_url = f"/processes/{proc}/jobs"
proc_url = f"/processes/{proc}/execution"
resp = mocked_sub_requests(self.app, "post_json", proc_url, timeout=5,
data=exec_body_col, headers=self.json_headers, only_local=True)
assert resp.status_code in [200, 201], f"Failed with: [{resp.status_code}]\nReason:\n{resp.json}"
Expand Down
16 changes: 14 additions & 2 deletions weaver/processes/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -1726,22 +1726,34 @@ def cwl2wps_io(io_info, io_select):
"title": io_info.get("label", io_def.name),
"abstract": io_info.get("doc", ""),
}
# format can represent either a Media-Type or a schema reference
# format can represent either a Media-Type, a schema reference, or a CWL expression
# - format as Media-Type is useful for WPS Complex
# - format as schema is useful for WPS BoundingBox JSON/YAML structure
# - format as CWL expression is ignored since it cannot be easily interpreted
io_formats = []
if "format" in io_info:
io_fmt = io_info["format"]
io_formats = [io_fmt] if isinstance(io_fmt, str) else io_fmt
io_formats = [get_format(fmt) for fmt in io_formats]
io_formats = [
get_format(fmt) for fmt in io_formats
if (
fmt and isinstance(fmt, str)
and not any(fmt.strip().startswith(fmt_s) for fmt_s in ["$", "{", "("])
and not any(fmt.strip().endswith(fmt_e) for fmt_e in ["}", ")"])
)
]
for i, io_format in enumerate(list(io_formats)):
# when CWL namespaced format are not resolved, full path URI to schema is expected
# because of full URI, should have lots of '/' (including protocol separator),
# use this to detect content schema reference vs content media-type reference
if io_format and len(io_format.mime_type.split("/")) > 2:
io_ext = os.path.splitext(io_format.mime_type)[-1]
io_typ = get_content_type(io_ext)
if not io_typ: # could not resolve (eg: schema reference or unknown type)
continue
io_format = Format(io_typ, extension=io_ext, schema=io_format.mime_type)
io_formats[i] = io_format
if io_formats:
kw["supported_formats"] = io_formats
kw["mode"] = MODE.SIMPLE # only validate the extension (not file contents)
else:
Expand Down

0 comments on commit b504128

Please sign in to comment.