diff --git a/notebooks/space2stats_api_adm_example.ipynb b/notebooks/space2stats_api_adm_example.ipynb
new file mode 100644
index 0000000..5e8a083
--- /dev/null
+++ b/notebooks/space2stats_api_adm_example.ipynb
@@ -0,0 +1,319 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import requests\n",
+ "import geopandas as gpd\n",
+ "import pandas as pd\n",
+ "import json\n",
+ "from shapely.geometry import shape\n",
+ "import numpy as np\n",
+ "from lonboard import Map, ScatterplotLayer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "BASE_URL = \"http://127.0.0.1:8000\"\n",
+ "FIELDS_ENDPOINT = f\"{BASE_URL}/fields\"\n",
+ "SUMMARY_ENDPOINT = f\"{BASE_URL}/summary\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def fetch_admin_boundaries(iso3: str, adm: str) -> gpd.GeoDataFrame:\n",
+ " \"\"\"Fetch administrative boundaries from GeoBoundaries API.\"\"\"\n",
+ " url = f'https://www.geoboundaries.org/api/current/gbOpen/{iso3}/{adm}/'\n",
+ " res = requests.get(url).json()\n",
+ " return gpd.read_file(res['gjDownloadURL'])\n",
+ "\n",
+ "def fetch_summary_data(feature: Dict) -> pd.DataFrame:\n",
+ " \"\"\"Fetch summary data for each administrative feature.\"\"\"\n",
+ " request_payload = {\n",
+ " \"aoi\": feature,\n",
+ " \"spatial_join_method\": \"touches\",\n",
+ " \"fields\": [\"sum_pop_2020\"], \n",
+ " \"geometry\": \"point\"\n",
+ " }\n",
+ " response = requests.post(SUMMARY_ENDPOINT, json=request_payload)\n",
+ " if response.status_code != 200:\n",
+ " raise Exception(f\"Failed to get summary: {response.text}\")\n",
+ " \n",
+ " summary_data = response.json()\n",
+ " if not summary_data:\n",
+ " print(f\"Failed to get summary for {feature['id']}\")\n",
+ " return pd.DataFrame() # Return an empty DataFrame if no data\n",
+ " \n",
+ " df = pd.DataFrame(summary_data)\n",
+ " df['adm_id'] = int(feature['id'])\n",
+ " df['adm_name'] = feature['properties']['shapeName']\n",
+ " df['geometry'] = df['geometry'].apply(lambda geom: shape(geom))\n",
+ " return df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Turkana 0\n",
+ "Marsabit 1\n",
+ "Mandera 2\n",
+ "Wajir 3\n",
+ "West Pokot 4\n",
+ "Samburu 5\n",
+ "Isiolo 6\n",
+ "Baringo 7\n",
+ "Elgeyo-Marakwet 8\n",
+ "Trans Nzoia 9\n",
+ "Bungoma 10\n",
+ "Garissa 11\n",
+ "Uasin Gishu 12\n",
+ "Kakamega 13\n",
+ "Laikipia 14\n",
+ "Busia 15\n",
+ "Meru 16\n",
+ "Nandi 17\n",
+ "Siaya 18\n",
+ "Nakuru 19\n",
+ "Vihiga 20\n",
+ "Nyandarua 21\n",
+ "Tharaka 22\n",
+ "Kericho 23\n",
+ "Kisumu 24\n",
+ "Nyeri 25\n",
+ "Tana River 26\n",
+ "Kitui 27\n",
+ "Kirinyaga 28\n",
+ "Embu 29\n",
+ "Homa Bay 30\n",
+ "Bomet 31\n",
+ "Nyamira 32\n",
+ "Narok 33\n",
+ "Kisii 34\n",
+ "Murang'a 35\n",
+ "Migori 36\n",
+ "Kiambu 37\n",
+ "Machakos 38\n",
+ "Kajiado 39\n",
+ "Nairobi 40\n",
+ "Makueni 41\n",
+ "Lamu 42\n",
+ "Kilifi 43\n",
+ "Taita Taveta 44\n",
+ "Kwale 45\n",
+ "Mombasa 46\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " hex_id | \n",
+ " geometry | \n",
+ " sum_pop_2020 | \n",
+ " adm_id | \n",
+ " adm_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 866a4a407ffffff | \n",
+ " POINT (35.68873 4.94892) | \n",
+ " 554.655021 | \n",
+ " 0 | \n",
+ " Turkana | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 866a4a417ffffff | \n",
+ " POINT (35.59512 1.81273) | \n",
+ " 207.977046 | \n",
+ " 0 | \n",
+ " Turkana | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 866a4a41fffffff | \n",
+ " POINT (35.17288 5.20383) | \n",
+ " 637.218817 | \n",
+ " 0 | \n",
+ " Turkana | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 866a4a427ffffff | \n",
+ " POINT (35.61627 2.62098) | \n",
+ " 948.876730 | \n",
+ " 0 | \n",
+ " Turkana | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 866a4a437ffffff | \n",
+ " POINT (35.89027 3.48441) | \n",
+ " 112.805780 | \n",
+ " 0 | \n",
+ " Turkana | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " hex_id geometry sum_pop_2020 adm_id adm_name\n",
+ "0 866a4a407ffffff POINT (35.68873 4.94892) 554.655021 0 Turkana\n",
+ "1 866a4a417ffffff POINT (35.59512 1.81273) 207.977046 0 Turkana\n",
+ "2 866a4a41fffffff POINT (35.17288 5.20383) 637.218817 0 Turkana\n",
+ "3 866a4a427ffffff POINT (35.61627 2.62098) 948.876730 0 Turkana\n",
+ "4 866a4a437ffffff POINT (35.89027 3.48441) 112.805780 0 Turkana"
+ ]
+ },
+ "execution_count": 55,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ISO3 = 'KEN'\n",
+ "ADM = 'ADM1'\n",
+ "adm_boundaries = fetch_admin_boundaries(ISO3, ADM)\n",
+ "geojson_str = adm_boundaries.to_json()\n",
+ "adm_geojson = json.loads(geojson_str)\n",
+ "adm_features = adm_geojson['features']\n",
+ "\n",
+ "gdfs = []\n",
+ "for i, feature in enumerate(adm_features):\n",
+ " df = fetch_summary_data(feature)\n",
+ " if not df.empty:\n",
+ " gdfs.append(gpd.GeoDataFrame(df, geometry='geometry', crs='EPSG:4326'))\n",
+ " print(feature['properties']['shapeName'], i)\n",
+ "\n",
+ "# Concatenate all GeoDataFrames into a single GeoDataFrame\n",
+ "gdf = pd.concat(gdfs, ignore_index=True)\n",
+ "\n",
+ "# Display the GeoDataFrame structure\n",
+ "gdf.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Define custom breaks and corresponding RGBA colors for visualization\n",
+ "breaks = [0, 1, 1000, 10000, 50000, 100000, 200000, gdf[\"sum_pop_2020\"].max()]\n",
+ "colors = np.array([\n",
+ " [211, 211, 211, 255], # Light gray for 0\n",
+ " [255, 255, 0, 255], # Yellow for 1-1000\n",
+ " [255, 165, 0, 255], # Orange for 1000-10000\n",
+ " [255, 0, 0, 255], # Red for 10000-50000\n",
+ " [128, 0, 128, 255], # Purple for 50000-100000\n",
+ " [0, 0, 255, 255], # Blue for 100000-200000\n",
+ " [0, 0, 139, 255], # Dark blue for 200000+\n",
+ "])\n",
+ "\n",
+ "def assign_color(value: float) -> list:\n",
+ " \"\"\"Assign colors based on population value.\"\"\"\n",
+ " for i in range(len(breaks) - 1):\n",
+ " if breaks[i] <= value < breaks[i + 1]:\n",
+ " return colors[i].tolist() # Convert to list\n",
+ " return colors[-1].tolist() # In case value exceeds all breaks"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "63b834db2cde4dea96705c9b656cea45",
+ "version_major": 2,
+ "version_minor": 1
+ },
+ "text/plain": [
+ "Map(layers=[ScatterplotLayer(get_fill_color=\n",
+ "[\n",
+ " [\n",
+ " 2…"
+ ]
+ },
+ "execution_count": 58,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Map sum_pop_2020 values to colors using the custom function\n",
+ "gdf['color'] = gdf[\"sum_pop_2020\"].apply(assign_color)\n",
+ "\n",
+ "# Flatten the color list into a 2D array for ScatterplotLayer\n",
+ "color_list = np.array(gdf['color'].tolist(), dtype=np.uint8)\n",
+ "\n",
+ "# Create the scatterplot layer with the assigned colors\n",
+ "layer = ScatterplotLayer.from_geopandas(gdf, get_radius=2000, get_fill_color=color_list)\n",
+ "\n",
+ "# Create the map with the scatterplot layer\n",
+ "m = Map(layer)\n",
+ "m"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "spacestats",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.9"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/space2stats_api/src/space2stats/h3_utils.py b/space2stats_api/src/space2stats/h3_utils.py
index 639d27b..de597c4 100644
--- a/space2stats_api/src/space2stats/h3_utils.py
+++ b/space2stats_api/src/space2stats/h3_utils.py
@@ -1,15 +1,91 @@
-from itertools import chain
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, Iterable, List, Literal, Optional, Set
import h3
from shapely.geometry import MultiPolygon, Point, Polygon, mapping, shape
+# https://h3geo.org/docs/core-library/restable
+MAX_RESOLUTION = 15
+
+
+def _recursive_polyfill(
+ aoi_geojson: Dict[str, Any], resolution: int, original_resolution: int
+) -> Set[str]:
+ # Attempt to get H3 IDs at the current resolution
+ h3_ids = h3.polyfill(aoi_geojson, resolution, geo_json_conformant=True)
+
+ # If valid H3 IDs are found, return them
+ if h3_ids:
+ return {h3.h3_to_parent(h3_id, original_resolution) for h3_id in h3_ids}
+
+ # If we haven't reached the maximum resolution, try the next higher resolution
+ if resolution < MAX_RESOLUTION:
+ return _recursive_polyfill(aoi_geojson, resolution + 1, original_resolution)
+
+ return set()
+
+
+def _find_within(aoi_geojson: Dict[str, Any], h3_ids: Iterable[str]) -> Set[str]:
+ # Convert AOI GeoJSON to Shapely geometry
+ aoi_shape = shape(aoi_geojson)
+
+ # Find H3 IDs that are within the polygon
+ contained_h3_ids = [
+ h3_id
+ for h3_id in h3_ids
+ if aoi_shape.contains(Polygon(h3.h3_to_geo_boundary(h3_id, geo_json=True)))
+ ]
+
+ return set(contained_h3_ids)
+
+
+def _find_touches(aoi_geojson: Dict[str, Any], h3_ids: Iterable[str]) -> Set[str]:
+ aoi_shape = shape(aoi_geojson)
+
+ neighbors = set()
+ for h3_id in h3_ids:
+ parent_id = h3.h3_to_parent(h3_id, h3.h3_get_resolution(h3_id) - 1)
+
+ for n_h3_id in h3.h3_to_children(
+ parent_id, h3.h3_get_resolution(parent_id) + 1
+ ):
+ boundary = Polygon(h3.h3_to_geo_boundary(n_h3_id, geo_json=True))
+ if h3.h3_indexes_are_neighbors(h3_id, n_h3_id) and aoi_shape.intersects(
+ boundary
+ ):
+ neighbors.add(n_h3_id)
+
+ return neighbors.union(h3_ids)
+
+
+def _generate_h3_ids(
+ aoi_geojson: Dict[str, Any],
+ resolution: int,
+ spatial_join_method: Literal["touches", "within", "centroid"],
+) -> Set[str]:
+ if spatial_join_method not in ["touches", "within", "centroid"]:
+ raise ValueError(f"Invalid spatial join method: {spatial_join_method}")
+
+ # Generate H3 hexagons covering the AOI
+ # Polyfill defines containment based on centroid:
+ # https://h3geo.org/docs/3.x/api/regions/#polyfill
+ h3_ids = _recursive_polyfill(aoi_geojson, resolution, resolution)
+
+ if spatial_join_method == "within":
+ h3_ids = _find_within(aoi_geojson, h3_ids)
+
+ if spatial_join_method == "touches":
+ h3_ids = _find_touches(aoi_geojson, h3_ids)
+
+ return h3_ids
+
def generate_h3_ids(
- aoi_geojson: Dict[str, Any], resolution: int, spatial_join_method: str
-) -> List[str]:
+ aoi_geojson: Dict[str, Any],
+ resolution: int,
+ spatial_join_method: Literal["touches", "within", "centroid"],
+) -> Set[str]:
"""
- Generate H3 hexagon IDs for a given AOI GeoJSON object.
+ Generate H3 hexagon IDs for a given AOI GeoJSON object, supporting both Polygon and MultiPolygon.
Parameters:
aoi_geojson (Dict[str, Any]): The AOI GeoJSON object.
@@ -17,51 +93,24 @@ def generate_h3_ids(
spatial_join_method (str): The spatial join method to use.
Returns:
- List[str]: A list of H3 hexagon IDs.
+ Set[str]: A set of H3 hexagon IDs.
"""
if spatial_join_method not in ["touches", "within", "centroid"]:
raise ValueError("Invalid spatial join method")
- # Convert GeoJSON to Shapely geometry
aoi_shape = shape(aoi_geojson)
- # Generate H3 hexagons covering the AOI
- geoms = (
- [mapping(geom) for geom in aoi_shape.geoms]
- if isinstance(aoi_shape, MultiPolygon)
- else [aoi_geojson]
- )
- h3_ids = list(
- # Use set to remove duplicates
- set(
- # Treat list of sets as single iterable
- chain(
- *[
- # Generate H3 hexagons for each geometry
- h3.polyfill(geom, resolution, geo_json_conformant=True)
- for geom in geoms
- ]
- )
- )
- )
-
- # Filter hexagons based on spatial join method
- # Touches method returns plain h3_ids
- if spatial_join_method == "within":
- h3_ids = [
- h3_id
- for h3_id in h3_ids
- if aoi_shape.contains(Polygon(h3.h3_to_geo_boundary(h3_id, geo_json=True)))
- ]
-
- elif spatial_join_method == "centroid":
- h3_ids = [
- h3_id
- for h3_id in h3_ids
- if aoi_shape.contains(
- Polygon(h3.h3_to_geo_boundary(h3_id, geo_json=True)).centroid
+ if isinstance(aoi_shape, MultiPolygon):
+ h3_ids = set()
+ for geom in aoi_shape.geoms:
+ # Convert each geometry to GeoJSON
+ geom_geojson = mapping(geom)
+ h3_ids.update(
+ _generate_h3_ids(geom_geojson, resolution, spatial_join_method)
)
- ]
+ else:
+ # Single polygon case
+ h3_ids = _generate_h3_ids(aoi_geojson, resolution, spatial_join_method)
return h3_ids
diff --git a/space2stats_api/src/space2stats/lib.py b/space2stats_api/src/space2stats/lib.py
index 689a101..f814ceb 100644
--- a/space2stats_api/src/space2stats/lib.py
+++ b/space2stats_api/src/space2stats/lib.py
@@ -84,13 +84,15 @@ def summaries(
return []
# Get Summaries from H3 ids
- rows, colnames = self._get_summaries(fields=fields, h3_ids=h3_ids)
+ rows, colnames = self._get_summaries(fields=fields, h3_ids=list(h3_ids))
if not rows:
return []
# Format Summaries
summaries: List[Dict] = []
- geometries = generate_h3_geometries(h3_ids, geometry) if geometry else None
+ geometries = (
+ generate_h3_geometries(list(h3_ids), geometry) if geometry else None
+ )
for idx, row in enumerate(rows):
summary = {"hex_id": row[0]}
diff --git a/space2stats_api/src/tests/test_h3_utils.py b/space2stats_api/src/tests/test_h3_utils.py
index 50d03eb..8603178 100644
--- a/space2stats_api/src/tests/test_h3_utils.py
+++ b/space2stats_api/src/tests/test_h3_utils.py
@@ -66,5 +66,67 @@ def test_generate_h3_geometries_point_multipolygon():
assert geom["type"] == "Point", "Expected Point geometry for MultiPolygon"
+def test_generate_h3_sliver_polygon_touches():
+ data = generate_h3_ids(
+ {
+ "type": "Polygon",
+ "coordinates": [
+ [
+ [41.14127371265408, -2.1034653113510444],
+ [41.140645873470845, -2.104696345752785],
+ [41.14205369446421, -2.104701102391104],
+ [41.14127371265408, -2.1034653113510444],
+ ]
+ ],
+ },
+ 6,
+ "touches",
+ )
+
+ for h in ["867a74817ffffff", "867a74807ffffff"]:
+ assert h in data, f"Missing {h} in generated hexagons"
+ assert len(data) == 2
+
+
+def test_generate_h3_sliver_polygon_within():
+ data = generate_h3_ids(
+ {
+ "type": "Polygon",
+ "coordinates": [
+ [
+ [41.14127371265408, -2.1034653113510444],
+ [41.140645873470845, -2.104696345752785],
+ [41.14205369446421, -2.104701102391104],
+ [41.14127371265408, -2.1034653113510444],
+ ]
+ ],
+ },
+ 6,
+ "within",
+ )
+ assert len(data) == 0, "Expected no hexagons to match"
+
+
+def test_generate_h3_sliver_polygon_centroid():
+ data = generate_h3_ids(
+ {
+ "type": "Polygon",
+ "coordinates": [
+ [
+ [41.14127371265408, -2.1034653113510444],
+ [41.140645873470845, -2.104696345752785],
+ [41.14205369446421, -2.104701102391104],
+ [41.14127371265408, -2.1034653113510444],
+ ]
+ ],
+ },
+ 6,
+ "centroid",
+ )
+ h = "867a74817ffffff"
+ assert "867a74817ffffff" in data, f"{h} not in generated hexagons"
+ assert len(data) == 1
+
+
if __name__ == "__main__":
pytest.main()