Skip to content

Commit

Permalink
Issue #104 drop usage of non-standard read_vector process
Browse files Browse the repository at this point in the history
instead, support reading GeoJSON from local path
  • Loading branch information
soxofaan committed Nov 27, 2024
1 parent 491143e commit 60be18f
Show file tree
Hide file tree
Showing 6 changed files with 156 additions and 55 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed

- `MultiBackendJobManager`: costs has been added as a column in tracking databases ([[#588](https://github.com/Open-EO/openeo-python-client/issues/588)])
- When passing a path/string as `geometry` to `DataCube.aggregate_spatial()`, `DataCube.mask_polygon()`, etc.: this is not translated automatically to deprecated, non-standard `read_vector` usage. Instead, if it is a local GeoJSON file, the GeoJSON data will be loaded directly client-side. ([#104](https://github.com/Open-EO/openeo-python-client/issues/104), [#457](https://github.com/Open-EO/openeo-python-client/issues/457))

### Removed

Expand Down
50 changes: 37 additions & 13 deletions openeo/rest/datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
from openeo.rest.service import Service
from openeo.rest.udp import RESTUserDefinedProcess
from openeo.rest.vectorcube import VectorCube
from openeo.util import dict_no_none, guess_format, normalize_crs, rfc3339
from openeo.util import dict_no_none, guess_format, load_json, normalize_crs, rfc3339

if typing.TYPE_CHECKING:
# Imports for type checking only (circular import issue at runtime).
Expand Down Expand Up @@ -609,7 +609,8 @@ def filter_spatial(
(also see :py:func:`Connection.list_file_formats() <openeo.rest.connection.Connection.list_file_formats>`),
e.g. GeoJSON, GeoParquet, etc.
A ``load_url`` process will automatically be added to the process graph.
- a path (that is valid for the back-end) to a GeoJSON file.
- a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file,
which will be loaded automatically to get the geometries as GeoJSON construct.
- a :py:class:`~openeo.rest.vectorcube.VectorCube` instance.
- a :py:class:`~openeo.api.process.Parameter` instance.
Expand All @@ -619,6 +620,11 @@ def filter_spatial(
.. versionchanged:: 0.36.0
Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process.
.. versionchanged:: 0.36.0
Support for passing a backend-side path as ``geometries`` argument was removed.
Instead, it's possible to provide a client-side path to a GeoJSON file
(which will be loaded client-side to get the geometries as GeoJSON construct).
"""
valid_geojson_types = [
"Point", "MultiPoint", "LineString", "MultiLineString",
Expand Down Expand Up @@ -1065,7 +1071,7 @@ def _get_geometry_argument(
crs: Optional[str] = None,
) -> Union[dict, Parameter, PGNode]:
"""
Convert input to a geometry as "geojson" subtype object.
Convert input to a geometry as "geojson" subtype object or vectorcube.
:param crs: value that encodes a coordinate reference system.
See :py:func:`openeo.util.normalize_crs` for more details about additional normalization that is applied to this argument.
Expand All @@ -1088,13 +1094,13 @@ def _get_geometry_argument(
}.get(suffix, suffix.split(".")[-1])
return self.connection.load_url(url=argument, format=format)

if isinstance(argument, (str, pathlib.Path)):
# Assumption: `geometry` is path to polygon is a path to vector file at backend.
# TODO #104: `read_vector` is non-standard process.
# TODO: If path exists client side: load it client side?
return PGNode(process_id="read_vector", arguments={"filename": str(argument)})

if isinstance(argument, shapely.geometry.base.BaseGeometry):
if (
isinstance(argument, (str, pathlib.Path))
and pathlib.Path(argument).is_file()
and pathlib.Path(argument).suffix.lower() in [".json", ".geojson"]
):
geometry = load_json(argument)
elif isinstance(argument, shapely.geometry.base.BaseGeometry):
geometry = mapping(argument)
elif isinstance(argument, dict):
geometry = argument
Expand Down Expand Up @@ -1149,7 +1155,8 @@ def aggregate_spatial(
(also see :py:func:`Connection.list_file_formats() <openeo.rest.connection.Connection.list_file_formats>`),
e.g. GeoJSON, GeoParquet, etc.
A ``load_url`` process will automatically be added to the process graph.
- a path (that is valid for the back-end) to a GeoJSON file.
- a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file,
which will be loaded automatically to get the geometries as GeoJSON construct.
- a :py:class:`~openeo.rest.vectorcube.VectorCube` instance.
- a :py:class:`~openeo.api.process.Parameter` instance.
Expand Down Expand Up @@ -1179,6 +1186,11 @@ def aggregate_spatial(
.. versionchanged:: 0.36.0
Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process.
.. versionchanged:: 0.36.0
Support for passing a backend-side path as ``geometries`` argument was removed.
Instead, it's possible to provide a client-side path to a GeoJSON file
(which will be loaded client-side to get the geometries as GeoJSON construct).
"""
valid_geojson_types = [
"Point", "MultiPoint", "LineString", "MultiLineString",
Expand Down Expand Up @@ -1504,7 +1516,8 @@ def apply_polygon(
(also see :py:func:`Connection.list_file_formats() <openeo.rest.connection.Connection.list_file_formats>`),
e.g. GeoJSON, GeoParquet, etc.
A ``load_url`` process will automatically be added to the process graph.
- a path (that is valid for the back-end) to a GeoJSON file.
- a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file,
which will be loaded automatically to get the geometries as GeoJSON construct.
- a :py:class:`~openeo.rest.vectorcube.VectorCube` instance.
- a :py:class:`~openeo.api.process.Parameter` instance.
Expand All @@ -1521,6 +1534,11 @@ def apply_polygon(
.. versionchanged:: 0.36.0
Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process.
.. versionchanged:: 0.36.0
Support for passing a backend-side path as ``geometries`` argument was removed.
Instead, it's possible to provide a client-side path to a GeoJSON file
(which will be loaded client-side to get the geometries as GeoJSON construct).
"""
# TODO drop support for legacy `polygons` argument:
# remove `kwargs, remove default `None` value for `geometries` and `process`
Expand Down Expand Up @@ -2013,7 +2031,8 @@ def mask_polygon(
(also see :py:func:`Connection.list_file_formats() <openeo.rest.connection.Connection.list_file_formats>`),
e.g. GeoJSON, GeoParquet, etc.
A ``load_url`` process will automatically be added to the process graph.
- a path (that is valid for the back-end) to a GeoJSON file.
- a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file,
which will be loaded automatically to get the geometries as GeoJSON construct.
- a :py:class:`~openeo.rest.vectorcube.VectorCube` instance.
- a :py:class:`~openeo.api.process.Parameter` instance.
Expand All @@ -2026,6 +2045,11 @@ def mask_polygon(
.. versionchanged:: 0.36.0
Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process.
.. versionchanged:: 0.36.0
Support for passing a backend-side path as ``geometries`` argument was removed.
Instead, it's possible to provide a client-side path to a GeoJSON file
(which will be loaded client-side to get the geometries as GeoJSON construct).
"""
valid_geojson_types = ["Polygon", "MultiPolygon", "GeometryCollection", "Feature", "FeatureCollection"]
mask = self._get_geometry_argument(mask, valid_geojson_types=valid_geojson_types, crs=srs)
Expand Down
10 changes: 5 additions & 5 deletions tests/data/1.0.0/aggregate_zonal_path.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@
}
}
},
"readvector1": {
"process_id": "read_vector",
"loadurl1": {
"process_id": "load_url",
"arguments": {
"filename": "/some/path/to/GeometryCollection.geojson"
}
"url": "https://example.com/geometries.geojson",
"format": "GeoJSON"}
},
"aggregatespatial1": {
"process_id": "aggregate_spatial",
Expand All @@ -34,7 +34,7 @@
"from_node": "filterbbox1"
},
"geometries": {
"from_node": "readvector1"
"from_node": "loadurl1"
},
"reducer": {
"process_graph": {
Expand Down
23 changes: 23 additions & 0 deletions tests/data/geojson/polygon02.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"type": "Polygon",
"coordinates": [
[
[
3,
50
],
[
4,
50
],
[
4,
51
],
[
3,
50
]
]
]
}
113 changes: 87 additions & 26 deletions tests/rest/datacube/test_datacube100.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,25 @@ def test_filter_spatial(con100: Connection):
}


@pytest.mark.parametrize("path_factory", [str, pathlib.Path])
def test_filter_spatial_local_path(con100: Connection, path_factory, test_data):
path = path_factory(test_data.get_path("geojson/polygon02.json"))
cube = con100.load_collection("S2")
masked = cube.filter_spatial(geometries=path)
assert get_download_graph(masked, drop_save_result=True, drop_load_collection=True) == {
"filterspatial1": {
"process_id": "filter_spatial",
"arguments": {
"data": {"from_node": "loadcollection1"},
"geometries": {
"type": "Polygon",
"coordinates": [[[3, 50], [4, 50], [4, 51], [3, 50]]],
},
},
}
}


@pytest.mark.parametrize(
["url", "expected_format"],
[
Expand Down Expand Up @@ -659,6 +678,38 @@ def test_aggregate_spatial_geometry_url(con100: Connection, url, expected_format
}


@pytest.mark.parametrize("path_factory", [str, pathlib.Path])
def test_aggregate_spatial_geometry_local_path(con100: Connection, path_factory, test_data):
cube = con100.load_collection("S2")
path = path_factory(test_data.get_path("geojson/polygon02.json"))
result = cube.aggregate_spatial(geometries=path, reducer="mean")
assert get_download_graph(result, drop_save_result=True, drop_load_collection=True) == {
"aggregatespatial1": {
"process_id": "aggregate_spatial",
"arguments": {
"data": {"from_node": "loadcollection1"},
"geometries": {"type": "Polygon", "coordinates": [[[3, 50], [4, 50], [4, 51], [3, 50]]]},
"reducer": {
"process_graph": {
"mean1": {
"process_id": "mean",
"arguments": {"data": {"from_parameter": "data"}},
"result": True,
}
}
},
},
},
}


def test_aggregate_spatial_geometry_local_path_invalid(con100: Connection):
path = "nope/invalid:path%here.json"
cube = con100.load_collection("S2")
with pytest.raises(OpenEoClientException, match="Invalid geometry argument"):
_ = cube.aggregate_spatial(geometries=path, reducer="mean")


def test_aggregate_spatial_window(con100: Connection):
img = con100.load_collection("S2")
size = [5, 3]
Expand Down Expand Up @@ -827,24 +878,29 @@ def test_mask_polygon_parameter(con100: Connection):
}


def test_mask_polygon_path(con100: Connection):
img = con100.load_collection("S2")
masked = img.mask_polygon(mask="path/to/polygon.json")
assert sorted(masked.flat_graph().keys()) == ["loadcollection1", "maskpolygon1", "readvector1"]
assert masked.flat_graph()["maskpolygon1"] == {
"process_id": "mask_polygon",
"arguments": {
"data": {"from_node": "loadcollection1"},
"mask": {"from_node": "readvector1"},
@pytest.mark.parametrize("path_factory", [str, pathlib.Path])
def test_mask_polygon_geometry_local_path(con100: Connection, path_factory, test_data):
path = path_factory(test_data.get_path("geojson/polygon02.json"))
cube = con100.load_collection("S2")
masked = cube.mask_polygon(mask=path)
assert get_download_graph(masked, drop_save_result=True, drop_load_collection=True) == {
"maskpolygon1": {
"process_id": "mask_polygon",
"arguments": {
"data": {"from_node": "loadcollection1"},
"mask": {"type": "Polygon", "coordinates": [[[3, 50], [4, 50], [4, 51], [3, 50]]]},
},
},
"result": True,
}
assert masked.flat_graph()["readvector1"] == {
"process_id": "read_vector",
"arguments": {"filename": "path/to/polygon.json"},
}


def test_mask_polygon_geometry_local_path_invalid(con100: Connection):
path = "nope/invalid:path%here.json"
cube = con100.load_collection("S2")
with pytest.raises(OpenEoClientException, match="Invalid geometry argument"):
_ = cube.mask_polygon(mask=path)


@pytest.mark.parametrize("get_geometries", [
lambda c: PGNode("load_vector", url="https://geo.test/features.json"),
lambda c: openeo.processes.process("load_vector", url="https://geo.test/features.json"),
Expand Down Expand Up @@ -1583,18 +1639,19 @@ def test_chunk_polygon_parameter(con100: Connection):
}


def test_chunk_polygon_path(con100: Connection):
@pytest.mark.parametrize("path_factory", [str, pathlib.Path])
def test_chunk_polygon_path(con100: Connection, test_data, path_factory):
path = path_factory(test_data.get_path("geojson/polygon02.json"))
cube = con100.load_collection("S2")
process = lambda data: data.run_udf(udf="myfancycode", runtime="Python")
with pytest.warns(UserDeprecationWarning, match="Use `apply_polygon`"):
result = cube.chunk_polygon(chunks="path/to/polygon.json", process=process)
result = cube.chunk_polygon(chunks=path, process=process)
assert get_download_graph(result, drop_save_result=True, drop_load_collection=True) == {
"readvector1": {"process_id": "read_vector", "arguments": {"filename": "path/to/polygon.json"}},
"chunkpolygon1": {
"process_id": "chunk_polygon",
"arguments": {
"data": {"from_node": "loadcollection1"},
"chunks": {"from_node": "readvector1"},
"chunks": {"type": "Polygon", "coordinates": [[[3, 50], [4, 50], [4, 51], [3, 50]]]},
"process": {
"process_graph": {
"runudf1": {
Expand Down Expand Up @@ -1797,21 +1854,17 @@ def test_apply_polygon_parameter(con100: Connection, geometries_argument, geomet
("geometries", "geometries"),
],
)
def test_apply_polygon_path(con100: Connection, geometries_argument, geometries_parameter):
def test_apply_polygon_local_path(con100: Connection, geometries_argument, geometries_parameter, test_data):
path = test_data.get_path("geojson/polygon02.json")
cube = con100.load_collection("S2")
process = UDF(code="myfancycode", runtime="Python")
result = cube.apply_polygon(**{geometries_argument: "path/to/polygon.json"}, process=process)
result = cube.apply_polygon(**{geometries_argument: path}, process=process)
assert get_download_graph(result, drop_save_result=True, drop_load_collection=True) == {
"readvector1": {
# TODO #104 #457 get rid of non-standard read_vector
"process_id": "read_vector",
"arguments": {"filename": "path/to/polygon.json"},
},
"applypolygon1": {
"process_id": "apply_polygon",
"arguments": {
"data": {"from_node": "loadcollection1"},
geometries_parameter: {"from_node": "readvector1"},
geometries_parameter: {"type": "Polygon", "coordinates": [[[3, 50], [4, 50], [4, 51], [3, 50]]]},
"process": {
"process_graph": {
"runudf1": {
Expand All @@ -1830,6 +1883,14 @@ def test_apply_polygon_path(con100: Connection, geometries_argument, geometries_
}


def test_apply_polygon_local_path_invalid(con100: Connection):
path = "nope/invalid:path%here.json"
cube = con100.load_collection("S2")
process = UDF(code="myfancycode", runtime="Python")
with pytest.raises(OpenEoClientException, match="Invalid geometry argument"):
_ = cube.apply_polygon(geometries=path, process=process)


@pytest.mark.parametrize(
["geometries_argument", "geometries_parameter"],
[
Expand Down
14 changes: 3 additions & 11 deletions tests/rest/datacube/test_zonal_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,13 @@ def test_aggregate_spatial(connection, api_version, reducer, test_data):
assert get_execute_graph(res) == test_data.load_json("%s/aggregate_zonal_polygon.json" % api_version)


def test_polygon_timeseries_path(connection, api_version, test_data):
res = (
connection.load_collection('S2')
.filter_bbox(west=3, east=6, north=52, south=50)
.polygonal_mean_timeseries(polygon="/some/path/to/GeometryCollection.geojson")
)
assert get_execute_graph(res) == test_data.load_json("%s/aggregate_zonal_path.json" % api_version)


@pytest.mark.parametrize("reducer", ["mean", openeo.processes.mean, lambda x: x.mean()])
def test_aggregate_spatial_read_vector(connection, api_version, reducer, test_data):
def test_aggregate_spatial_with_geometry_url(connection, api_version, reducer, test_data):
res = (
connection.load_collection("S2")
.filter_bbox(3, 6, 52, 50)
.aggregate_spatial(geometries="/some/path/to/GeometryCollection.geojson", reducer=reducer)
.filter_bbox(3, 6, 52, 50)
.aggregate_spatial(geometries="https://example.com/geometries.geojson", reducer=reducer)
)
assert get_execute_graph(res) == test_data.load_json("%s/aggregate_zonal_path.json" % api_version)

Expand Down

0 comments on commit 60be18f

Please sign in to comment.