diff --git a/notebooks/space2stats_api_adm_example.ipynb b/notebooks/space2stats_api_adm_example.ipynb new file mode 100644 index 0000000..5e8a083 --- /dev/null +++ b/notebooks/space2stats_api_adm_example.ipynb @@ -0,0 +1,319 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import geopandas as gpd\n", + "import pandas as pd\n", + "import json\n", + "from shapely.geometry import shape\n", + "import numpy as np\n", + "from lonboard import Map, ScatterplotLayer" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "BASE_URL = \"http://127.0.0.1:8000\"\n", + "FIELDS_ENDPOINT = f\"{BASE_URL}/fields\"\n", + "SUMMARY_ENDPOINT = f\"{BASE_URL}/summary\"" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [], + "source": [ + "def fetch_admin_boundaries(iso3: str, adm: str) -> gpd.GeoDataFrame:\n", + " \"\"\"Fetch administrative boundaries from GeoBoundaries API.\"\"\"\n", + " url = f'https://www.geoboundaries.org/api/current/gbOpen/{iso3}/{adm}/'\n", + " res = requests.get(url).json()\n", + " return gpd.read_file(res['gjDownloadURL'])\n", + "\n", + "def fetch_summary_data(feature: Dict) -> pd.DataFrame:\n", + " \"\"\"Fetch summary data for each administrative feature.\"\"\"\n", + " request_payload = {\n", + " \"aoi\": feature,\n", + " \"spatial_join_method\": \"touches\",\n", + " \"fields\": [\"sum_pop_2020\"], \n", + " \"geometry\": \"point\"\n", + " }\n", + " response = requests.post(SUMMARY_ENDPOINT, json=request_payload)\n", + " if response.status_code != 200:\n", + " raise Exception(f\"Failed to get summary: {response.text}\")\n", + " \n", + " summary_data = response.json()\n", + " if not summary_data:\n", + " print(f\"Failed to get summary for {feature['id']}\")\n", + " return pd.DataFrame() # Return an empty DataFrame if no data\n", + " \n", + " df = pd.DataFrame(summary_data)\n", + " df['adm_id'] = int(feature['id'])\n", + " df['adm_name'] = feature['properties']['shapeName']\n", + " df['geometry'] = df['geometry'].apply(lambda geom: shape(geom))\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Turkana 0\n", + "Marsabit 1\n", + "Mandera 2\n", + "Wajir 3\n", + "West Pokot 4\n", + "Samburu 5\n", + "Isiolo 6\n", + "Baringo 7\n", + "Elgeyo-Marakwet 8\n", + "Trans Nzoia 9\n", + "Bungoma 10\n", + "Garissa 11\n", + "Uasin Gishu 12\n", + "Kakamega 13\n", + "Laikipia 14\n", + "Busia 15\n", + "Meru 16\n", + "Nandi 17\n", + "Siaya 18\n", + "Nakuru 19\n", + "Vihiga 20\n", + "Nyandarua 21\n", + "Tharaka 22\n", + "Kericho 23\n", + "Kisumu 24\n", + "Nyeri 25\n", + "Tana River 26\n", + "Kitui 27\n", + "Kirinyaga 28\n", + "Embu 29\n", + "Homa Bay 30\n", + "Bomet 31\n", + "Nyamira 32\n", + "Narok 33\n", + "Kisii 34\n", + "Murang'a 35\n", + "Migori 36\n", + "Kiambu 37\n", + "Machakos 38\n", + "Kajiado 39\n", + "Nairobi 40\n", + "Makueni 41\n", + "Lamu 42\n", + "Kilifi 43\n", + "Taita Taveta 44\n", + "Kwale 45\n", + "Mombasa 46\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
hex_idgeometrysum_pop_2020adm_idadm_name
0866a4a407ffffffPOINT (35.68873 4.94892)554.6550210Turkana
1866a4a417ffffffPOINT (35.59512 1.81273)207.9770460Turkana
2866a4a41fffffffPOINT (35.17288 5.20383)637.2188170Turkana
3866a4a427ffffffPOINT (35.61627 2.62098)948.8767300Turkana
4866a4a437ffffffPOINT (35.89027 3.48441)112.8057800Turkana
\n", + "
" + ], + "text/plain": [ + " hex_id geometry sum_pop_2020 adm_id adm_name\n", + "0 866a4a407ffffff POINT (35.68873 4.94892) 554.655021 0 Turkana\n", + "1 866a4a417ffffff POINT (35.59512 1.81273) 207.977046 0 Turkana\n", + "2 866a4a41fffffff POINT (35.17288 5.20383) 637.218817 0 Turkana\n", + "3 866a4a427ffffff POINT (35.61627 2.62098) 948.876730 0 Turkana\n", + "4 866a4a437ffffff POINT (35.89027 3.48441) 112.805780 0 Turkana" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ISO3 = 'KEN'\n", + "ADM = 'ADM1'\n", + "adm_boundaries = fetch_admin_boundaries(ISO3, ADM)\n", + "geojson_str = adm_boundaries.to_json()\n", + "adm_geojson = json.loads(geojson_str)\n", + "adm_features = adm_geojson['features']\n", + "\n", + "gdfs = []\n", + "for i, feature in enumerate(adm_features):\n", + " df = fetch_summary_data(feature)\n", + " if not df.empty:\n", + " gdfs.append(gpd.GeoDataFrame(df, geometry='geometry', crs='EPSG:4326'))\n", + " print(feature['properties']['shapeName'], i)\n", + "\n", + "# Concatenate all GeoDataFrames into a single GeoDataFrame\n", + "gdf = pd.concat(gdfs, ignore_index=True)\n", + "\n", + "# Display the GeoDataFrame structure\n", + "gdf.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [], + "source": [ + "# Define custom breaks and corresponding RGBA colors for visualization\n", + "breaks = [0, 1, 1000, 10000, 50000, 100000, 200000, gdf[\"sum_pop_2020\"].max()]\n", + "colors = np.array([\n", + " [211, 211, 211, 255], # Light gray for 0\n", + " [255, 255, 0, 255], # Yellow for 1-1000\n", + " [255, 165, 0, 255], # Orange for 1000-10000\n", + " [255, 0, 0, 255], # Red for 10000-50000\n", + " [128, 0, 128, 255], # Purple for 50000-100000\n", + " [0, 0, 255, 255], # Blue for 100000-200000\n", + " [0, 0, 139, 255], # Dark blue for 200000+\n", + "])\n", + "\n", + "def assign_color(value: float) -> list:\n", + " \"\"\"Assign colors based on population value.\"\"\"\n", + " for i in range(len(breaks) - 1):\n", + " if breaks[i] <= value < breaks[i + 1]:\n", + " return colors[i].tolist() # Convert to list\n", + " return colors[-1].tolist() # In case value exceeds all breaks" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "63b834db2cde4dea96705c9b656cea45", + "version_major": 2, + "version_minor": 1 + }, + "text/plain": [ + "Map(layers=[ScatterplotLayer(get_fill_color=\n", + "[\n", + " [\n", + " 2…" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Map sum_pop_2020 values to colors using the custom function\n", + "gdf['color'] = gdf[\"sum_pop_2020\"].apply(assign_color)\n", + "\n", + "# Flatten the color list into a 2D array for ScatterplotLayer\n", + "color_list = np.array(gdf['color'].tolist(), dtype=np.uint8)\n", + "\n", + "# Create the scatterplot layer with the assigned colors\n", + "layer = ScatterplotLayer.from_geopandas(gdf, get_radius=2000, get_fill_color=color_list)\n", + "\n", + "# Create the map with the scatterplot layer\n", + "m = Map(layer)\n", + "m" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "spacestats", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/space2stats_api/src/space2stats/h3_utils.py b/space2stats_api/src/space2stats/h3_utils.py index 639d27b..de597c4 100644 --- a/space2stats_api/src/space2stats/h3_utils.py +++ b/space2stats_api/src/space2stats/h3_utils.py @@ -1,15 +1,91 @@ -from itertools import chain -from typing import Any, Dict, List, Optional +from typing import Any, Dict, Iterable, List, Literal, Optional, Set import h3 from shapely.geometry import MultiPolygon, Point, Polygon, mapping, shape +# https://h3geo.org/docs/core-library/restable +MAX_RESOLUTION = 15 + + +def _recursive_polyfill( + aoi_geojson: Dict[str, Any], resolution: int, original_resolution: int +) -> Set[str]: + # Attempt to get H3 IDs at the current resolution + h3_ids = h3.polyfill(aoi_geojson, resolution, geo_json_conformant=True) + + # If valid H3 IDs are found, return them + if h3_ids: + return {h3.h3_to_parent(h3_id, original_resolution) for h3_id in h3_ids} + + # If we haven't reached the maximum resolution, try the next higher resolution + if resolution < MAX_RESOLUTION: + return _recursive_polyfill(aoi_geojson, resolution + 1, original_resolution) + + return set() + + +def _find_within(aoi_geojson: Dict[str, Any], h3_ids: Iterable[str]) -> Set[str]: + # Convert AOI GeoJSON to Shapely geometry + aoi_shape = shape(aoi_geojson) + + # Find H3 IDs that are within the polygon + contained_h3_ids = [ + h3_id + for h3_id in h3_ids + if aoi_shape.contains(Polygon(h3.h3_to_geo_boundary(h3_id, geo_json=True))) + ] + + return set(contained_h3_ids) + + +def _find_touches(aoi_geojson: Dict[str, Any], h3_ids: Iterable[str]) -> Set[str]: + aoi_shape = shape(aoi_geojson) + + neighbors = set() + for h3_id in h3_ids: + parent_id = h3.h3_to_parent(h3_id, h3.h3_get_resolution(h3_id) - 1) + + for n_h3_id in h3.h3_to_children( + parent_id, h3.h3_get_resolution(parent_id) + 1 + ): + boundary = Polygon(h3.h3_to_geo_boundary(n_h3_id, geo_json=True)) + if h3.h3_indexes_are_neighbors(h3_id, n_h3_id) and aoi_shape.intersects( + boundary + ): + neighbors.add(n_h3_id) + + return neighbors.union(h3_ids) + + +def _generate_h3_ids( + aoi_geojson: Dict[str, Any], + resolution: int, + spatial_join_method: Literal["touches", "within", "centroid"], +) -> Set[str]: + if spatial_join_method not in ["touches", "within", "centroid"]: + raise ValueError(f"Invalid spatial join method: {spatial_join_method}") + + # Generate H3 hexagons covering the AOI + # Polyfill defines containment based on centroid: + # https://h3geo.org/docs/3.x/api/regions/#polyfill + h3_ids = _recursive_polyfill(aoi_geojson, resolution, resolution) + + if spatial_join_method == "within": + h3_ids = _find_within(aoi_geojson, h3_ids) + + if spatial_join_method == "touches": + h3_ids = _find_touches(aoi_geojson, h3_ids) + + return h3_ids + def generate_h3_ids( - aoi_geojson: Dict[str, Any], resolution: int, spatial_join_method: str -) -> List[str]: + aoi_geojson: Dict[str, Any], + resolution: int, + spatial_join_method: Literal["touches", "within", "centroid"], +) -> Set[str]: """ - Generate H3 hexagon IDs for a given AOI GeoJSON object. + Generate H3 hexagon IDs for a given AOI GeoJSON object, supporting both Polygon and MultiPolygon. Parameters: aoi_geojson (Dict[str, Any]): The AOI GeoJSON object. @@ -17,51 +93,24 @@ def generate_h3_ids( spatial_join_method (str): The spatial join method to use. Returns: - List[str]: A list of H3 hexagon IDs. + Set[str]: A set of H3 hexagon IDs. """ if spatial_join_method not in ["touches", "within", "centroid"]: raise ValueError("Invalid spatial join method") - # Convert GeoJSON to Shapely geometry aoi_shape = shape(aoi_geojson) - # Generate H3 hexagons covering the AOI - geoms = ( - [mapping(geom) for geom in aoi_shape.geoms] - if isinstance(aoi_shape, MultiPolygon) - else [aoi_geojson] - ) - h3_ids = list( - # Use set to remove duplicates - set( - # Treat list of sets as single iterable - chain( - *[ - # Generate H3 hexagons for each geometry - h3.polyfill(geom, resolution, geo_json_conformant=True) - for geom in geoms - ] - ) - ) - ) - - # Filter hexagons based on spatial join method - # Touches method returns plain h3_ids - if spatial_join_method == "within": - h3_ids = [ - h3_id - for h3_id in h3_ids - if aoi_shape.contains(Polygon(h3.h3_to_geo_boundary(h3_id, geo_json=True))) - ] - - elif spatial_join_method == "centroid": - h3_ids = [ - h3_id - for h3_id in h3_ids - if aoi_shape.contains( - Polygon(h3.h3_to_geo_boundary(h3_id, geo_json=True)).centroid + if isinstance(aoi_shape, MultiPolygon): + h3_ids = set() + for geom in aoi_shape.geoms: + # Convert each geometry to GeoJSON + geom_geojson = mapping(geom) + h3_ids.update( + _generate_h3_ids(geom_geojson, resolution, spatial_join_method) ) - ] + else: + # Single polygon case + h3_ids = _generate_h3_ids(aoi_geojson, resolution, spatial_join_method) return h3_ids diff --git a/space2stats_api/src/space2stats/lib.py b/space2stats_api/src/space2stats/lib.py index 689a101..f814ceb 100644 --- a/space2stats_api/src/space2stats/lib.py +++ b/space2stats_api/src/space2stats/lib.py @@ -84,13 +84,15 @@ def summaries( return [] # Get Summaries from H3 ids - rows, colnames = self._get_summaries(fields=fields, h3_ids=h3_ids) + rows, colnames = self._get_summaries(fields=fields, h3_ids=list(h3_ids)) if not rows: return [] # Format Summaries summaries: List[Dict] = [] - geometries = generate_h3_geometries(h3_ids, geometry) if geometry else None + geometries = ( + generate_h3_geometries(list(h3_ids), geometry) if geometry else None + ) for idx, row in enumerate(rows): summary = {"hex_id": row[0]} diff --git a/space2stats_api/src/tests/test_h3_utils.py b/space2stats_api/src/tests/test_h3_utils.py index 50d03eb..8603178 100644 --- a/space2stats_api/src/tests/test_h3_utils.py +++ b/space2stats_api/src/tests/test_h3_utils.py @@ -66,5 +66,67 @@ def test_generate_h3_geometries_point_multipolygon(): assert geom["type"] == "Point", "Expected Point geometry for MultiPolygon" +def test_generate_h3_sliver_polygon_touches(): + data = generate_h3_ids( + { + "type": "Polygon", + "coordinates": [ + [ + [41.14127371265408, -2.1034653113510444], + [41.140645873470845, -2.104696345752785], + [41.14205369446421, -2.104701102391104], + [41.14127371265408, -2.1034653113510444], + ] + ], + }, + 6, + "touches", + ) + + for h in ["867a74817ffffff", "867a74807ffffff"]: + assert h in data, f"Missing {h} in generated hexagons" + assert len(data) == 2 + + +def test_generate_h3_sliver_polygon_within(): + data = generate_h3_ids( + { + "type": "Polygon", + "coordinates": [ + [ + [41.14127371265408, -2.1034653113510444], + [41.140645873470845, -2.104696345752785], + [41.14205369446421, -2.104701102391104], + [41.14127371265408, -2.1034653113510444], + ] + ], + }, + 6, + "within", + ) + assert len(data) == 0, "Expected no hexagons to match" + + +def test_generate_h3_sliver_polygon_centroid(): + data = generate_h3_ids( + { + "type": "Polygon", + "coordinates": [ + [ + [41.14127371265408, -2.1034653113510444], + [41.140645873470845, -2.104696345752785], + [41.14205369446421, -2.104701102391104], + [41.14127371265408, -2.1034653113510444], + ] + ], + }, + 6, + "centroid", + ) + h = "867a74817ffffff" + assert "867a74817ffffff" in data, f"{h} not in generated hexagons" + assert len(data) == 1 + + if __name__ == "__main__": pytest.main()