From e8f0fd6901bce5e3689d42a92453793c71b1f8e6 Mon Sep 17 00:00:00 2001 From: Vincent Sarago Date: Mon, 2 Aug 2021 17:56:46 +0200 Subject: [PATCH] add statistics endpoints (#347) * add statistics endpoints * update stats * support FeatureCollection * update docs --- CHANGES.md | 1 + docs/endpoints/cog.md | 31 ++- docs/endpoints/stac.md | 31 ++- src/titiler/core/tests/test_factories.py | 270 ++++++++++++++++++++--- src/titiler/core/titiler/core/factory.py | 128 ++++++++++- src/titiler/core/titiler/core/utils.py | 65 +++++- 6 files changed, 490 insertions(+), 36 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 8fe82e407..9b7db9488 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -5,6 +5,7 @@ ### titiler.core * add `/crop` POST endpoint to return an image from a GeoJSON feature (https://github.com/developmentseed/titiler/pull/339) +* add `/statistics` (GET and POST) endpoints to return advanced images statistics (https://github.com/developmentseed/titiler/pull/347) ### titiler.application diff --git a/docs/endpoints/cog.md b/docs/endpoints/cog.md index 2471612f0..86a10e9d3 100644 --- a/docs/endpoints/cog.md +++ b/docs/endpoints/cog.md @@ -20,6 +20,8 @@ Read Info/Metadata and create Web map Tiles from a **single** COG. The `cog` rou | `GET` | `/cog/preview[.{format}]` | image/bin | create a preview image from a dataset | `GET` | `/cog/crop/{minx},{miny},{maxx},{maxy}[/{width}x{height}].{format}` | image/bin | create an image from part of a dataset | `POST` | `/cog/crop[/{width}x{height}][].{format}]` | image/bin | create an image from a geojson covering a dataset +| `GET` | `/cog/statistics` | JSON | Return advanced statistics from a dataset +| `POST` | `/cog/statistics` | GeoJSON | Return zonal statistics from a dataset for a geosjon Feature or FeatureCollection | `GET` | `/cog/validate` | JSON | validate a COG and return dataset info | `GET` | `/cog/viewer` | HTML | demo webpage @@ -116,7 +118,7 @@ Example: `:endpoint:/cog/crop[/{width}x{height}][].{format}] - [POST]` - Body: - - **feature**: A valida GeoJSON feature (Polygon or MultiPolygon) + - **feature**: A valid GeoJSON feature (Polygon or MultiPolygon) - PathParams: - **height**: Force output image height. OPTIONAL @@ -231,6 +233,33 @@ Example: - `https://myendpoint/cog/metadata?url=https://somewhere.com/mycog.tif&bidx=1,2,3` +### Statistics + +Advanced raster statistics + +`:endpoint:/cog/statistics - [GET|POST]` + +- QueryParams: + - **url**: Cloud Optimized GeoTIFF URL. **REQUIRED** + - **bidx**: Comma (',') delimited band indexes. OPTIONAL + - **expression**: rio-tiler's band math expression (e.g B1/B2). OPTIONAL + - **nodata**: Overwrite internal Nodata value. OPTIONAL + - **max_size**: Max image size from which to calculate statistics, default is 1024. OPTIONAL + - **height**: Force image height. OPTIONAL + - **width**: Force image width. OPTIONAL + - **unscale**: Apply internal Scale/Offset. OPTIONAL + - **resampling_method**: rasterio resampling method. Default is `nearest`. + - **categorical**: Return statistics for categorical dataset. + - **c** (multiple): Pixels values for categories. + - **p** (multiple): Percentile values. + +- Body (for POST endpoint): + - **features**: A valid GeoJSON feature or FeatureCollection (Polygon or MultiPolygon). + +Example: + +- `https://myendpoint/cog/statistics?url=https://somewhere.com/mycog.tif&bidx=1,2,3&categorical=true&c=1&c=2&c=3&p=2&p98` + ### Demo `:endpoint:/cog/viewer` - COG Viewer diff --git a/docs/endpoints/stac.md b/docs/endpoints/stac.md index d656be383..86636d756 100644 --- a/docs/endpoints/stac.md +++ b/docs/endpoints/stac.md @@ -261,7 +261,36 @@ Example: Example: -- `https://myendpoint/stac/metadata?https://somewhere.com/item.json&assets=B01` +- `https://myendpoint/stac/metadata?url=https://somewhere.com/item.json&assets=B01` + + +### Statistics + +Advanced raster statistics + +`:endpoint:/stac/statistics - [GET|POST]` + +- QueryParams: + - **url**: STAC Item URL. **REQUIRED** + - **bidx**: Comma (',') delimited band indexes. OPTIONAL + - **assets**: Comma (',') delimited asset names. **REQUIRED** + - **expression**: rio-tiler's band math expression (e.g B1/B2). OPTIONAL + - **nodata**: Overwrite internal Nodata value. OPTIONAL + - **max_size**: Max image size from which to calculate statistics, default is 1024. OPTIONAL + - **height**: Force image height. OPTIONAL + - **width**: Force image width. OPTIONAL + - **unscale**: Apply internal Scale/Offset. OPTIONAL + - **resampling_method**: rasterio resampling method. Default is `nearest`. + - **categorical**: Return statistics for categorical dataset. + - **c** (multiple): Pixels values for categories. + - **p** (multiple): Percentile values. + +- Body (for POST endpoint): + - **features**: A valid GeoJSON feature or FeatureCollection (Polygon or MultiPolygon). + +Example: + +- `https://myendpoint/cog/statistics?url=https://somewhere.com/item.json&assets=B01&categorical=true&c=1&c=2&c=3&p=2&p98` ### Demo diff --git a/src/titiler/core/tests/test_factories.py b/src/titiler/core/tests/test_factories.py index f49f48b6c..73b33dc4e 100644 --- a/src/titiler/core/tests/test_factories.py +++ b/src/titiler/core/tests/test_factories.py @@ -31,7 +31,7 @@ def test_TilerFactory(): """Test TilerFactory class.""" cog = TilerFactory() - assert len(cog.router.routes) == 24 + assert len(cog.router.routes) == 26 assert cog.tms_dependency == TMSParams cog = TilerFactory(router_prefix="something", tms_dependency=WebMercatorTMSParams) @@ -47,7 +47,7 @@ def test_TilerFactory(): response = client.get(f"/something/NZTM2000/tilejson.json?url={DATA_DIR}/cog.tif") assert response.status_code == 422 - cog = TilerFactory(add_preview=False, add_part=False) + cog = TilerFactory(add_preview=False, add_part=False, add_statistics=False) assert len(cog.router.routes) == 17 app = FastAPI() @@ -272,47 +272,209 @@ def test_TilerFactory(): assert meta["dtype"] == "int16" assert meta["count"] == 1 - feature = json.dumps( - { - "type": "Feature", - "properties": {}, - "geometry": { - "type": "Polygon", - "coordinates": [ - [ - [-59.23828124999999, 74.16408546675687], - [-59.83154296874999, 73.15680773175981], - [-58.73291015624999, 72.88087095711504], - [-56.62353515625, 73.06104462497655], - [-55.17333984375, 73.41588526207096], - [-55.2392578125, 74.09799577518739], - [-56.88720703125, 74.2895142503942], - [-57.23876953124999, 74.30735341486248], - [-59.23828124999999, 74.16408546675687], - ] - ], - }, - } - ) - - response = client.post(f"/crop?url={DATA_DIR}/cog.tif", data=feature) + feature = { + "type": "Feature", + "properties": {}, + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [-59.23828124999999, 74.16408546675687], + [-59.83154296874999, 73.15680773175981], + [-58.73291015624999, 72.88087095711504], + [-56.62353515625, 73.06104462497655], + [-55.17333984375, 73.41588526207096], + [-55.2392578125, 74.09799577518739], + [-56.88720703125, 74.2895142503942], + [-57.23876953124999, 74.30735341486248], + [-59.23828124999999, 74.16408546675687], + ] + ], + }, + } + + feature_collection = {"type": "FeatureCollection", "features": [feature]} + + response = client.post(f"/crop?url={DATA_DIR}/cog.tif", json=feature) assert response.status_code == 200 assert response.headers["content-type"] == "image/png" - response = client.post(f"/crop.tif?url={DATA_DIR}/cog.tif", data=feature) + response = client.post(f"/crop.tif?url={DATA_DIR}/cog.tif", json=feature) assert response.status_code == 200 assert response.headers["content-type"] == "image/tiff; application=geotiff" meta = parse_img(response.content) assert meta["dtype"] == "uint16" assert meta["count"] == 2 - response = client.post(f"/crop/100x100.jpeg?url={DATA_DIR}/cog.tif", data=feature) + response = client.post(f"/crop/100x100.jpeg?url={DATA_DIR}/cog.tif", json=feature) assert response.status_code == 200 assert response.headers["content-type"] == "image/jpeg" meta = parse_img(response.content) assert meta["width"] == 100 assert meta["height"] == 100 + # GET - statistics + response = client.get(f"/statistics?url={DATA_DIR}/cog.tif&bidx=1,1,1") + assert response.status_code == 200 + assert response.headers["content-type"] == "application/json" + resp = response.json() + assert len(resp) == 3 + assert list(resp[0]) == [ + "min", + "max", + "mean", + "count", + "sum", + "std", + "median", + "majority", + "minority", + "unique", + "percentile_2", + "percentile_98", + "valid_pixels", + "masked_pixels", + "valid_percent", + ] + + response = client.get(f"/statistics?url={DATA_DIR}/cog.tif&bidx=1,1,1&p=4") + assert response.status_code == 200 + assert response.headers["content-type"] == "application/json" + resp = response.json() + assert len(resp) == 3 + assert list(resp[0]) == [ + "min", + "max", + "mean", + "count", + "sum", + "std", + "median", + "majority", + "minority", + "unique", + "percentile_4", + "valid_pixels", + "masked_pixels", + "valid_percent", + ] + + response = client.get(f"/statistics?url={DATA_DIR}/cog.tif&categorical=true") + assert response.status_code == 200 + assert response.headers["content-type"] == "application/json" + resp = response.json() + assert len(resp) == 1 + assert list(resp[0]) == [ + "categories", + "valid_pixels", + "masked_pixels", + "valid_percent", + ] + assert len(resp[0]["categories"]) == 15 + + response = client.get( + f"/statistics?url={DATA_DIR}/cog.tif&categorical=true&c=1&c=2&c=3&c=4" + ) + assert response.status_code == 200 + assert response.headers["content-type"] == "application/json" + resp = response.json() + assert len(resp) == 1 + assert list(resp[0]) == [ + "categories", + "valid_pixels", + "masked_pixels", + "valid_percent", + ] + assert len(resp[0]["categories"]) == 4 + assert resp[0]["categories"]["4"] == 0 + + # POST - statistics + response = client.post( + f"/statistics?url={DATA_DIR}/cog.tif&bidx=1,1,1", json=feature + ) + assert response.status_code == 200 + assert response.headers["content-type"] == "application/geo+json" + resp = response.json() + assert resp["type"] == "Feature" + assert len(resp["properties"]["statistics"]) == 3 + assert list(resp["properties"]["statistics"][0]) == [ + "min", + "max", + "mean", + "count", + "sum", + "std", + "median", + "majority", + "minority", + "unique", + "percentile_2", + "percentile_98", + "valid_pixels", + "masked_pixels", + "valid_percent", + ] + + response = client.post( + f"/statistics?url={DATA_DIR}/cog.tif&bidx=1,1,1", json=feature_collection + ) + assert response.status_code == 200 + assert response.headers["content-type"] == "application/geo+json" + resp = response.json() + assert resp["type"] == "FeatureCollection" + assert len(resp["features"][0]["properties"]["statistics"]) == 3 + assert list(resp["features"][0]["properties"]["statistics"][0]) == [ + "min", + "max", + "mean", + "count", + "sum", + "std", + "median", + "majority", + "minority", + "unique", + "percentile_2", + "percentile_98", + "valid_pixels", + "masked_pixels", + "valid_percent", + ] + + response = client.post( + f"/statistics?url={DATA_DIR}/cog.tif&categorical=true", json=feature + ) + assert response.status_code == 200 + assert response.headers["content-type"] == "application/geo+json" + resp = response.json() + assert resp["type"] == "Feature" + assert len(resp["properties"]["statistics"]) == 1 + assert list(resp["properties"]["statistics"][0]) == [ + "categories", + "valid_pixels", + "masked_pixels", + "valid_percent", + ] + assert len(resp["properties"]["statistics"][0]["categories"]) == 12 + + response = client.post( + f"/statistics?url={DATA_DIR}/cog.tif&categorical=true&c=1&c=2&c=3&c=4", + json=feature, + ) + assert response.status_code == 200 + assert response.headers["content-type"] == "application/geo+json" + resp = response.json() + assert resp["type"] == "Feature" + assert len(resp["properties"]["statistics"]) == 1 + assert list(resp["properties"]["statistics"][0]) == [ + "categories", + "valid_pixels", + "masked_pixels", + "valid_percent", + ] + assert len(resp["properties"]["statistics"][0]["categories"]) == 4 + assert resp["properties"]["statistics"][0]["categories"]["4"] == 0 + @patch("rio_tiler.io.cogeo.rasterio") def test_MultiBaseTilerFactory(rio): @@ -320,7 +482,7 @@ def test_MultiBaseTilerFactory(rio): rio.open = mock_rasterio_open stac = MultiBaseTilerFactory(reader=STACReader) - assert len(stac.router.routes) == 25 + assert len(stac.router.routes) == 27 app = FastAPI() app.include_router(stac.router) @@ -371,6 +533,30 @@ def test_MultiBaseTilerFactory(rio): assert meta["dtype"] == "int32" assert meta["count"] == 3 + # GET - statistics + response = client.get(f"/statistics?url={DATA_DIR}/item.json&assets=B01,B09") + assert response.status_code == 200 + assert response.headers["content-type"] == "application/json" + resp = response.json() + assert len(resp) == 2 + assert list(resp[0]) == [ + "min", + "max", + "mean", + "count", + "sum", + "std", + "median", + "majority", + "minority", + "unique", + "percentile_2", + "percentile_98", + "valid_pixels", + "masked_pixels", + "valid_percent", + ] + @attr.s class BandFileReader(MultiBandReader): @@ -400,7 +586,7 @@ def test_MultiBandTilerFactory(): """test MultiBandTilerFactory.""" bands = MultiBandTilerFactory(reader=BandFileReader) - assert len(bands.router.routes) == 25 + assert len(bands.router.routes) == 27 app = FastAPI() app.include_router(bands.router) @@ -446,6 +632,30 @@ def test_MultiBandTilerFactory(): assert meta["dtype"] == "int32" assert meta["count"] == 3 + # GET - statistics + response = client.get(f"/statistics?url={DATA_DIR}&bands=B01,B09") + assert response.status_code == 200 + assert response.headers["content-type"] == "application/json" + resp = response.json() + assert len(resp) == 2 + assert list(resp[0]) == [ + "min", + "max", + "mean", + "count", + "sum", + "std", + "median", + "majority", + "minority", + "unique", + "percentile_2", + "percentile_98", + "valid_pixels", + "masked_pixels", + "valid_percent", + ] + def test_TMSFactory(): """test TMSFactory.""" diff --git a/src/titiler/core/titiler/core/factory.py b/src/titiler/core/titiler/core/factory.py index 37625169d..86efe6cd0 100644 --- a/src/titiler/core/titiler/core/factory.py +++ b/src/titiler/core/titiler/core/factory.py @@ -2,11 +2,11 @@ import abc from dataclasses import dataclass, field -from typing import Any, Callable, Dict, List, Optional, Type +from typing import Any, Callable, Dict, List, Optional, Type, Union from urllib.parse import urlencode, urlparse import rasterio -from geojson_pydantic.features import Feature +from geojson_pydantic.features import Feature, FeatureCollection from morecantile import TileMatrixSet from rio_tiler.io import BaseReader, COGReader, MultiBandReader, MultiBaseReader from rio_tiler.models import Bounds, Info, Metadata @@ -33,7 +33,7 @@ from titiler.core.models.OGC import TileMatrixSetList from titiler.core.resources.enums import ImageType, MediaType, OptionalHeader from titiler.core.resources.responses import GeoJSONResponse, XMLResponse -from titiler.core.utils import Timer, bbox_to_feature +from titiler.core.utils import Timer, bbox_to_feature, data_stats from fastapi import APIRouter, Body, Depends, Path, Query @@ -146,6 +146,7 @@ class TilerFactory(BaseTilerFactory): # Add/Remove some endpoints add_preview: bool = True add_part: bool = True + add_statistics: bool = True def register_routes(self): """ @@ -172,6 +173,9 @@ def register_routes(self): if self.add_part: self.part() + if self.add_statistics: + self.statistics() + ############################################################################ # /bounds ############################################################################ @@ -772,6 +776,124 @@ def geojson_crop( return Response(content, media_type=format.mediatype, headers=headers) + ############################################################################ + # /statistics (Optional) + ############################################################################ + def statistics(self): + """add statistics endpoints.""" + + @self.router.get( + "/statistics", + responses={ + 200: { + "content": {"application/json": {}}, + "description": "Return dataset's statistics.", + } + }, + ) + def statistics( + src_path=Depends(self.path_dependency), + layer_params=Depends(self.layer_dependency), + image_params=Depends(self.img_dependency), + dataset_params=Depends(self.dataset_dependency), + categorical: bool = Query( + False, description="Return statistics for categorical dataset." + ), + c: List[Union[float, int]] = Query( + None, description="Pixels values for categories." + ), + p: List[int] = Query([2, 98], description="Percentile values."), + kwargs: Dict = Depends(self.additional_dependency), + ): + """Create image from a geojson feature.""" + with rasterio.Env(**self.gdal_config): + with self.reader(src_path, **self.reader_options) as src_dst: + data = src_dst.preview( + **layer_params.kwargs, + **image_params.kwargs, + **dataset_params.kwargs, + **kwargs, + ).as_masked() + + return data_stats( + data, categorical=categorical, categories=c, percentiles=p + ) + + @self.router.post( + "/statistics", + response_model=Union[Feature, FeatureCollection], + response_model_exclude_none=True, + response_class=GeoJSONResponse, + responses={ + 200: { + "content": {"application/json": {}}, + "description": "Return dataset's statistics.", + } + }, + ) + def geojson_statistics( + features: Union[FeatureCollection, Feature] = Body( + ..., descriptiom="GeoJSON Feature or FeatureCollection." + ), + src_path=Depends(self.path_dependency), + layer_params=Depends(self.layer_dependency), + image_params=Depends(self.img_dependency), + dataset_params=Depends(self.dataset_dependency), + categorical: bool = Query( + False, description="Return statistics for categorical dataset." + ), + c: List[Union[float, int]] = Query( + None, description="Pixels values for categories." + ), + p: List[int] = Query([2, 98], description="Percentile values."), + kwargs: Dict = Depends(self.additional_dependency), + ): + """Create image from a geojson feature.""" + if isinstance(features, FeatureCollection): + feat = [] + for feature in features: + with rasterio.Env(**self.gdal_config): + with self.reader(src_path, **self.reader_options) as src_dst: + data = src_dst.feature( + feature.dict(exclude_none=True), + **layer_params.kwargs, + **image_params.kwargs, + **dataset_params.kwargs, + **kwargs, + ).as_masked() + + feature.properties.update( + { + "statistics": data_stats( + data, + categorical=categorical, + categories=c, + percentiles=p, + ) + } + ) + feat.append(feature) + return FeatureCollection(features=feat) + else: + with rasterio.Env(**self.gdal_config): + with self.reader(src_path, **self.reader_options) as src_dst: + data = src_dst.feature( + features.dict(exclude_none=True), + **layer_params.kwargs, + **image_params.kwargs, + **dataset_params.kwargs, + **kwargs, + ).as_masked() + + features.properties.update( + { + "statistics": data_stats( + data, categorical=categorical, categories=c, percentiles=p, + ) + } + ) + return features + @dataclass class MultiBaseTilerFactory(TilerFactory): diff --git a/src/titiler/core/titiler/core/utils.py b/src/titiler/core/titiler/core/utils.py index 7b66e8b46..a9d616cd0 100644 --- a/src/titiler/core/titiler/core/utils.py +++ b/src/titiler/core/titiler/core/utils.py @@ -1,8 +1,9 @@ """TiTiler utility functions.""" import time -from typing import Dict, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple +import numpy from geojson_pydantic.features import Feature @@ -55,3 +56,65 @@ def bbox_to_feature( "type": "Feature", } ) + + +def data_stats( + data: numpy.ma.array, + categorical: bool = False, + categories: Optional[List[float]] = None, + percentiles: List[int] = [2, 98], +) -> List[Dict[Any, Any]]: + """Returns statistics.""" + output: List[Dict[Any, Any]] = [] + percentiles_names = [f"percentile_{int(p)}" for p in percentiles] + for b in range(data.shape[0]): + keys, counts = numpy.unique(data[b].compressed(), return_counts=True) + + valid_pixels = float(numpy.ma.count(data[b])) + masked_pixels = float(numpy.ma.count_masked(data[b])) + valid_percent = round((valid_pixels / data[b].size) * 100, 2) + info_px = { + "valid_pixels": valid_pixels, + "masked_pixels": masked_pixels, + "valid_percent": valid_percent, + } + + if categorical: + # if input categories we make sure to use the same type as the data + out_keys = ( + numpy.array(categories).astype(keys.dtype) if categories else keys + ) + out_dict = dict(zip(keys.tolist(), counts.tolist())) + output.append( + { + "categories": {k: out_dict.get(k, 0) for k in out_keys.tolist()}, + **info_px, + }, + ) + else: + percentiles_values = numpy.percentile( + data[b].compressed(), percentiles + ).tolist() + + output.append( + { + "min": float(data[b].min()), + "max": float(data[b].max()), + "mean": float(data[b].mean()), + "count": float(data[b].count()), + "sum": float(data[b].sum()), + "std": float(data[b].std()), + "median": float(numpy.ma.median(data[b])), + "majority": float( + keys[counts.tolist().index(counts.max())].tolist() + ), + "minority": float( + keys[counts.tolist().index(counts.min())].tolist() + ), + "unique": float(counts.size), + **dict(zip(percentiles_names, percentiles_values)), + **info_px, + } + ) + + return output