Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Job output transform #548

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ DOCKER_REPO ?= pavics/weaver
# guess OS (Linux, Darwin,...)
OS_NAME := $(shell uname -s 2>/dev/null || echo "unknown")
CPU_ARCH := $(shell uname -m 2>/dev/null || uname -p 2>/dev/null || echo "unknown")
SUDO ?=

# conda
CONDA_CMD ?= __EMPTY__
Expand Down Expand Up @@ -228,10 +229,10 @@ conda-env-export: ## export the conda environment
install: install-all ## alias for 'install-all' target

.PHONY: install-run
install-run: conda-install install-sys install-pkg install-raw ## install requirements and application to run locally
install-run: conda-install install-sys install-pkg install-raw install-dev install-transform ## install requirements and application to run locally

.PHONY: install-all
install-all: conda-install install-sys install-pkg install-pip install-dev ## install application with all dependencies
install-all: conda-install install-sys install-pkg install-pip install-dev install-transform ## install application with all dependencies

.PHONY: install-doc
install-doc: install-pip ## install documentation dependencies
Expand Down Expand Up @@ -274,7 +275,7 @@ install-raw: ## install without any requirements or dependencies (suppose everyt
install-npm: ## install npm package manager and dependencies if they cannot be found
@[ -f "$(shell which npm)" ] || ( \
echo "Binary package manager npm not found. Attempting to install it."; \
apt-get install npm \
$(SUDO) apt-get install npm \
)

.PHONY: install-npm-stylelint
Expand All @@ -291,6 +292,16 @@ install-npm-remarklint: install-npm ## install remark-lint dependency for 'chec
npm install --save-dev \
)

.PHONY: install-transform
install-transform: install-cairo-dependencies # install-transform dependencies

.PHONY: install-cairo-dependencies
install-cairo-dependencies: ## install required dependencies for Transformer
@[ -f "$(shell which cairo)" ] || ( \
echo "Binary package manager cairo not found. Attempting to install it."; \
$(SUDO) apt-get install libpangocairo-1.0-0 \
)

.PHONY: install-dev-npm
install-dev-npm: install-npm install-npm-remarklint install-npm-remarklint ## install all npm development dependencies

Expand Down Expand Up @@ -833,6 +844,7 @@ docker-clean: ## remove all built docker images (only matching current/latest v
docker rmi -f "$(APP_NAME):latest" || true
docker rmi -f "$(APP_NAME):base" || true


## -- Launchers targets --------------------------------------------------------------------------------------------- ##

.PHONY: start
Expand Down
2 changes: 1 addition & 1 deletion config/weaver.ini.example
Original file line number Diff line number Diff line change
Expand Up @@ -257,4 +257,4 @@ level = NOTSET
formatter = generic

[formatter_generic]
format = [%(asctime)s] %(levelname)-8.8s [%(threadName)s][%(name)s] %(message)s
format = [%(asctime)s] %(levelname)-8.8s [%(threadName)s][%(name)s] %(message)s
1 change: 1 addition & 0 deletions docker/Dockerfile-base
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
g++ \
git \
nodejs \
libpangocairo-1.0-0 \
&& pip install --no-cache-dir --upgrade -r requirements-sys.txt \
&& pip install --no-cache-dir -r requirements.txt \
&& pip install --no-cache-dir -e ${APP_DIR} \
Expand Down
10 changes: 10 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,13 @@ typing_extensions
WebTest
wsgiproxy
WSGIProxy2

# transformer
fpdf
Pillow
pandas
cairosvg
multipagetiff
# gdal
rasterio
python-magic
Comment on lines +42 to +50
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Makefile refers to a requirements-trfm.txt file. Those should be moved into that file. It can then be embedded in this file using -r requirements-trfm.txt (as done in the top).

12 changes: 12 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ gunicorn>=22
# even more reduced dependency constraints (https://github.com/vinitkumar/json2xml/pull/195)
json2xml==4.1.0
jsonschema>=3.0.1

# FIXME: kombu for pymongo>=4 not yet released as 5.3.0 (only pre-releases available)
# - https://github.com/crim-ca/weaver/issues/386
# - https://github.com/celery/kombu/pull/1536
Expand Down Expand Up @@ -121,5 +122,16 @@ urllib3==1.26.19 ; python_version < "3.10" # pyup: ignore
urlmatch
xmltodict
webob
werkzeug>2

# transformer
fpdf
Pillow
pandas
cairosvg
multipagetiff
# gdal
rasterio
python-magic
werkzeug>=3.0.3,<3.1
zipp>=3.19.1 # not directly required, pinned by Snyk to avoid a vulnerability
2 changes: 2 additions & 0 deletions tests/resources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@
TEST_REMOTE_SERVER_URL, WPS_NO_INPUTS_ID
)

WPS_transform_RESSOURCES = os.path.join(RESOURCES_PATH, "transform")


def _load_path(file_path, text=False, xml=False):
# type: (str, bool, bool) -> Union[JSON, xml_util.XML, str]
Expand Down
18,250 changes: 18,250 additions & 0 deletions tests/resources/transform/avocado.csv

Large diffs are not rendered by default.

Binary file added tests/resources/transform/dubai.tif
Binary file not shown.
Binary file added tests/resources/transform/logo_crim.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added tests/resources/transform/multi.tif
Binary file not shown.
1 change: 1 addition & 0 deletions tests/resources/transform/text.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
Empty file added tests/transform/__init__.py
Empty file.
42 changes: 42 additions & 0 deletions tests/transform/test_transform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import mimetypes
import os
import shutil
import tempfile

from pyramid.response import FileResponse

from weaver.transform.transform import FAMILIES, Transform


def using_mimes(func):
def wrapper(*args, **kwargs):
cmt = mimetypes.guess_type(args[0])[0]
for family in FAMILIES:
if cmt in family:
for wmt in [f for f in family if f != cmt]:
func(args[0], cmt, wmt)

return wrapper


@using_mimes
def transform(f, cmt="", wmt=""):
try:
with tempfile.TemporaryDirectory() as tmp_path:
shutil.copy(f, os.path.join(tmp_path, os.path.basename(f)))
f = os.path.join(tmp_path, os.path.basename(f))

t = Transform(file_path=f, current_media_type=cmt, wanted_media_type=wmt)

assert isinstance(t.get(), FileResponse), f"{cmt} -> {wmt} {str(t['error'])}"
print(f"{cmt} -> {wmt} passed")
return t.output_path
except Exception as e:
print(f"{cmt} -> {wmt} failed")
assert False, f"{os.path.splitext(f)[1]} -> {f} {str(e)}"
pass


def test_transformations():
for fn in os.listdir("./res/transform"):
transform(os.path.join("./res/transform", fn))
1 change: 1 addition & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -950,6 +950,7 @@ class FileServer(SimpleHTTPTestServer):
This server takes more time to start than usual mocks. Use it sparingly, and consider maintaining a single
instance over multiple tests of a complete test suite rather than recreating a server for each test.
"""

def __init__(self): # pylint: disable=W0231
self._port = self.get_port()
self._uri = f"http://0.0.0.0:{self._port}"
Expand Down
112 changes: 63 additions & 49 deletions tests/wps_restapi/test_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,22 +160,22 @@ def setUp(self):
user_id=self.user_editor1_id, status=Status.STARTED, progress=99, access=Visibility.PUBLIC)

def make_job(self,
task_id, # type: str
process, # type: str
service, # type: Optional[str]
user_id, # type: Optional[int]
status, # type: AnyStatusType
progress, # type: int
access, # type: AnyVisibility
created=None, # type: Optional[Union[datetime.datetime, str]]
offset=None, # type: Optional[int]
duration=None, # type: Optional[int]
exceptions=None, # type: Optional[List[JSON]]
logs=None, # type: Optional[List[Union[str, Tuple[str, AnyLogLevel, AnyStatusType, Number]]]]
statistics=None, # type: Optional[Statistics]
tags=None, # type: Optional[List[str]]
add_info=True, # type: bool
): # type: (...) -> Job
task_id, # type: str
process, # type: str
service, # type: Optional[str]
user_id, # type: Optional[int]
status, # type: AnyStatusType
progress, # type: int
access, # type: AnyVisibility
created=None, # type: Optional[Union[datetime.datetime, str]]
offset=None, # type: Optional[int]
duration=None, # type: Optional[int]
exceptions=None, # type: Optional[List[JSON]]
logs=None, # type: Optional[List[Union[str, Tuple[str, AnyLogLevel, AnyStatusType, Number]]]]
statistics=None, # type: Optional[Statistics]
tags=None, # type: Optional[List[str]]
add_info=True, # type: bool
): # type: (...) -> Job
if isinstance(created, str):
created = date_parser.parse(created)
job = self.job_store.save_job(task_id=task_id, process=process, service=service, is_workflow=False,
Expand Down Expand Up @@ -439,7 +439,7 @@ def test_get_jobs_links_navigation(self):
base_url = self.settings["weaver.url"]
jobs_url = base_url + sd.jobs_service.path
limit = 2 # expect 11 jobs to be visible, making 6 pages of 2 each (except last that is 1)
last = 5 # zero-based index of last page
last = 5 # zero-based index of last page
last_page = f"page={last}"
prev_last_page = f"page={last - 1}"
limit_kvp = f"limit={limit}"
Expand Down Expand Up @@ -882,7 +882,7 @@ def test_get_jobs_public_service_no_processes(self):
service=self.service_public.name,
process=self.process_private.identifier)
with contextlib.ExitStack() as stack:
for patch in mocked_remote_wps([]): # process invisible (not returned by remote)
for patch in mocked_remote_wps([]): # process invisible (not returned by remote)
stack.enter_context(patch)
resp = self.app.get(path, headers=self.json_headers, expect_errors=True)
assert resp.status_code == 404
Expand Down Expand Up @@ -915,37 +915,43 @@ def filter_service(jobs): # type: (Iterable[Job]) -> List[Job]
path_jobs_user_req_tests = [
# pylint: disable=C0301,line-too-long
# URI ACCESS USER EXPECTED JOBS
(uri_direct_jobs, None, None, public_jobs), # noqa: E241,E501
(uri_direct_jobs, None, self.user_editor1_id, editor1_all_jobs), # noqa: E241,E501
(uri_direct_jobs, None, self.user_admin_id, self.job_info), # noqa: E241,E501
(uri_direct_jobs, Visibility.PRIVATE, None, public_jobs), # noqa: E241,E501
(uri_direct_jobs, Visibility.PRIVATE, self.user_editor1_id, editor1_private_jobs), # noqa: E241,E501
(uri_direct_jobs, Visibility.PRIVATE, self.user_admin_id, admin_private_jobs), # noqa: E241,E501
(uri_direct_jobs, Visibility.PUBLIC, None, public_jobs), # noqa: E241,E501
(uri_direct_jobs, Visibility.PUBLIC, self.user_editor1_id, editor1_public_jobs), # noqa: E241,E501
(uri_direct_jobs, Visibility.PUBLIC, self.user_admin_id, admin_public_jobs), # noqa: E241,E501
(uri_direct_jobs, None, None, public_jobs), # noqa: E241,E501
(uri_direct_jobs, None, self.user_editor1_id, editor1_all_jobs), # noqa: E241,E501
(uri_direct_jobs, None, self.user_admin_id, self.job_info), # noqa: E241,E501
(uri_direct_jobs, Visibility.PRIVATE, None, public_jobs), # noqa: E241,E501
(uri_direct_jobs, Visibility.PRIVATE, self.user_editor1_id, editor1_private_jobs), # noqa: E241,E501
(uri_direct_jobs, Visibility.PRIVATE, self.user_admin_id, admin_private_jobs), # noqa: E241,E501
(uri_direct_jobs, Visibility.PUBLIC, None, public_jobs), # noqa: E241,E501
(uri_direct_jobs, Visibility.PUBLIC, self.user_editor1_id, editor1_public_jobs), # noqa: E241,E501
(uri_direct_jobs, Visibility.PUBLIC, self.user_admin_id, admin_public_jobs), # noqa: E241,E501
# ---
(uri_process_jobs, None, None, filter_process(public_jobs)), # noqa: E241,E501
(uri_process_jobs, None, self.user_editor1_id, filter_process(editor1_all_jobs)), # noqa: E241,E501
(uri_process_jobs, None, self.user_admin_id, filter_process(self.job_info)), # noqa: E241,E501
(uri_process_jobs, Visibility.PRIVATE, None, filter_process(public_jobs)), # noqa: E241,E501
(uri_process_jobs, Visibility.PRIVATE, self.user_editor1_id, filter_process(editor1_private_jobs)), # noqa: E241,E501
(uri_process_jobs, Visibility.PRIVATE, self.user_admin_id, filter_process(admin_private_jobs)), # noqa: E241,E501
(uri_process_jobs, Visibility.PUBLIC, None, filter_process(public_jobs)), # noqa: E241,E501
(uri_process_jobs, Visibility.PUBLIC, self.user_editor1_id, filter_process(editor1_public_jobs)), # noqa: E241,E501
(uri_process_jobs, Visibility.PUBLIC, self.user_admin_id, filter_process(public_jobs)), # noqa: E241,E501
(uri_process_jobs, None, None, filter_process(public_jobs)), # noqa: E241,E501
(uri_process_jobs, None, self.user_editor1_id, filter_process(editor1_all_jobs)), # noqa: E241,E501
(uri_process_jobs, None, self.user_admin_id, filter_process(self.job_info)), # noqa: E241,E501
(uri_process_jobs, Visibility.PRIVATE, None, filter_process(public_jobs)), # noqa: E241,E501
(uri_process_jobs, Visibility.PRIVATE, self.user_editor1_id, filter_process(editor1_private_jobs)),
# noqa: E241,E501
(uri_process_jobs, Visibility.PRIVATE, self.user_admin_id, filter_process(admin_private_jobs)),
# noqa: E241,E501
(uri_process_jobs, Visibility.PUBLIC, None, filter_process(public_jobs)), # noqa: E241,E501
(uri_process_jobs, Visibility.PUBLIC, self.user_editor1_id, filter_process(editor1_public_jobs)),
# noqa: E241,E501
(uri_process_jobs, Visibility.PUBLIC, self.user_admin_id, filter_process(public_jobs)), # noqa: E241,E501
# ---
(uri_provider_jobs, None, None, filter_service(public_jobs)), # noqa: E241,E501
(uri_provider_jobs, None, self.user_editor1_id, filter_service(editor1_all_jobs)), # noqa: E241,E501
(uri_provider_jobs, None, self.user_admin_id, filter_service(self.job_info)), # noqa: E241,E501
(uri_provider_jobs, Visibility.PRIVATE, None, filter_service(public_jobs)), # noqa: E241,E501
(uri_provider_jobs, Visibility.PRIVATE, self.user_editor1_id, filter_service(editor1_private_jobs)), # noqa: E241,E501
(uri_provider_jobs, Visibility.PRIVATE, self.user_admin_id, filter_service(admin_private_jobs)), # noqa: E241,E501
(uri_provider_jobs, Visibility.PUBLIC, None, filter_service(public_jobs)), # noqa: E241,E501
(uri_provider_jobs, Visibility.PUBLIC, self.user_editor1_id, filter_service(editor1_public_jobs)), # noqa: E241,E501
(uri_provider_jobs, Visibility.PUBLIC, self.user_admin_id, filter_service(public_jobs)), # noqa: E241,E501

] # type: List[Tuple[str, str, Union[None, int], List[Job]]]
(uri_provider_jobs, None, None, filter_service(public_jobs)), # noqa: E241,E501
(uri_provider_jobs, None, self.user_editor1_id, filter_service(editor1_all_jobs)), # noqa: E241,E501
(uri_provider_jobs, None, self.user_admin_id, filter_service(self.job_info)), # noqa: E241,E501
(uri_provider_jobs, Visibility.PRIVATE, None, filter_service(public_jobs)), # noqa: E241,E501
(uri_provider_jobs, Visibility.PRIVATE, self.user_editor1_id, filter_service(editor1_private_jobs)),
# noqa: E241,E501
(uri_provider_jobs, Visibility.PRIVATE, self.user_admin_id, filter_service(admin_private_jobs)),
# noqa: E241,E501
(uri_provider_jobs, Visibility.PUBLIC, None, filter_service(public_jobs)), # noqa: E241,E501
(uri_provider_jobs, Visibility.PUBLIC, self.user_editor1_id, filter_service(editor1_public_jobs)),
# noqa: E241,E501
(uri_provider_jobs, Visibility.PUBLIC, self.user_admin_id, filter_service(public_jobs)), # noqa: E241,E501

] # type: List[Tuple[str, str, Union[None, int], List[Job]]]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Revert auto-formating by your IDE here and above.


for i, (path, access, user_id, expected_jobs) in enumerate(path_jobs_user_req_tests):
with contextlib.ExitStack() as stack:
Expand Down Expand Up @@ -1339,7 +1345,7 @@ def test_get_job_invalid_uuid(self):
"""
# to make sure UUID is applied, use the "same format" (8-4-4-4-12), but with invalid definitions
base_path = sd.job_service.path.format(job_id="thisisnt-some-real-uuid-allerrordata")
for sub_path in ["", "/inputs", "/outputs", "/results", "/logs", "exceptions"]:
for sub_path in ["", "/inputs", "/outputs", "/results", "/logs", "/exceptions"]:
path = f"{base_path}{sub_path}"
resp = self.app.get(path, headers=self.json_headers, expect_errors=True)
assert resp.status_code == 400
Expand Down Expand Up @@ -1524,7 +1530,7 @@ def test_job_results_errors(self):
assert resp.status_code == code, case
assert resp.json["title"] == title, case
assert resp.json["cause"] == cause, case
assert resp.json["type"].endswith(error_type), case # ignore http full reference, not always there
assert resp.json["type"].endswith(error_type), case # ignore http full reference, not always there
assert "links" in resp.json

def test_jobs_inputs_outputs_validations(self):
Expand Down Expand Up @@ -1628,6 +1634,14 @@ def test_jobs_inputs_outputs_validations(self):
with self.assertRaises(colander.Invalid):
sd.Execute().deserialize({"outputs": {"random": {"transmissionMode": "bad"}}})

path = f"/jobs/{self.job_info[0].id}/outputs"
resp = self.app.get(path, headers=self.json_headers)
for link in resp.json["links"]:
header = {"Accept": link["type"]}
resp = self.app.get(link, headers=header)
assert resp.status_code == 200
assert link["type"] in resp.content_type or "application/gzip" in resp.content_type

def test_job_logs_formats(self):
path = f"/jobs/{self.job_info[0].id}/logs"
resp = self.app.get(path, headers=self.json_headers)
Expand Down
3 changes: 3 additions & 0 deletions weaver/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2201,6 +2201,7 @@ class ValidateAuthHandlerAction(argparse.Action):
"""
Action that will validate that the input argument references an authentication handler that can be resolved.
"""

def __call__(self, parser, namespace, auth_handler_ref, option_string=None):
# type: (argparse.ArgumentParser, argparse.Namespace, Optional[str], Optional[str]) -> None
"""
Expand Down Expand Up @@ -2250,6 +2251,7 @@ class ValidateHeaderAction(argparse._AppendAction): # noqa: W0212

Header-Name: Header-Value
"""

def __call__(self, parser, namespace, values, option_string=None):
# type: (argparse.ArgumentParser, argparse.Namespace, Union[str, Sequence[Any], None], Optional[str]) -> None
"""
Expand Down Expand Up @@ -2284,6 +2286,7 @@ class ValidateNonZeroPositiveNumberAction(argparse.Action):
"""
Action that will validate that the input argument is a positive number greater than zero.
"""

def __call__(self, parser, namespace, values, option_string=None):
# type: (argparse.ArgumentParser, argparse.Namespace, Union[str, Sequence[Any], None], Optional[str]) -> None
"""
Expand Down
Loading
Loading