diff --git a/space2stats_api/src/space2stats_ingest/METADATA/Space2Stats Metadata Content.xlsx b/space2stats_api/src/space2stats_ingest/METADATA/Space2Stats Metadata Content.xlsx new file mode 100644 index 0000000..c3df1c7 Binary files /dev/null and b/space2stats_api/src/space2stats_ingest/METADATA/Space2Stats Metadata Content.xlsx differ diff --git a/space2stats_api/src/space2stats_ingest/METADATA/metadata.ipynb b/space2stats_api/src/space2stats_ingest/METADATA/metadata.ipynb new file mode 100644 index 0000000..01808b1 --- /dev/null +++ b/space2stats_api/src/space2stats_ingest/METADATA/metadata.ipynb @@ -0,0 +1,3586 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Metadata" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook creates a SpatioTemporal Asset Catalog (STAC) catalog and item for the Space2Stats database. It reads the source parquet file and an additional metadata spreadsheet to create STAC compliant metadata." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Dict\n", + "import requests\n", + "import pandas as pd\n", + "import geopandas as gpd\n", + "from shapely.geometry import shape, Polygon\n", + "import h3\n", + "\n", + "import shutil\n", + "import tempfile\n", + "from pathlib import Path\n", + "\n", + "from pystac import Catalog, Item, Asset, CatalogType, get_stac_version\n", + "import fio_stac\n", + "from datetime import datetime, UTC\n", + "import ast\n", + "from os.path import join\n", + "\n", + "import git, os\n", + "git_repo = git.Repo(os.getcwd(), search_parent_directories=True)\n", + "git_root = git_repo.git.rev_parse(\"--show-toplevel\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Current Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "parquet_file = join(git_root, 'postgres', 'space2stats.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_parquet(parquet_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "gdf = df.copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "14117882" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(gdf)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "gdf.loc[:, 'geometry'] = gdf.apply(lambda x: Polygon(h3.h3_to_geo_boundary(x['hex_id'], geo_json=True)), axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "gdf = gpd.GeoDataFrame(gdf, geometry='geometry', crs='EPSG:4326')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([-179.99999562, -89.98750455, 179.99999096, 89.98750455])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdf.total_bounds" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create STAC" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.0.0\n" + ] + } + ], + "source": [ + "print(get_stac_version())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Content" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For now, metadata fields are managed through an Excel Spreadsheet." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "overview = pd.read_excel(\"Space2Stats Metadata Content.xlsx\", sheet_name=\"DDH Dataset\", index_col=\"Field\")\n", + "nada = pd.read_excel(\"Space2Stats Metadata Content.xlsx\", sheet_name=\"NADA\", index_col=\"Field\")\n", + "feature_catalog = pd.read_excel(\"Space2Stats Metadata Content.xlsx\", sheet_name=\"Feature Catalog\")\n", + "sources = pd.read_excel(\"Space2Stats Metadata Content.xlsx\", sheet_name=\"Sources\")\n", + "sources.loc[:, \"Variables\"] = sources.apply(lambda x: ast.literal_eval(x['Variables']), axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Value
Field
TitleSpace2Stats Database
DescriptionA global dataset of geospatial variables at th...
TTLBen Stewart
Business UnitDECSC
CollaboratorAndres Chamorro
\n", + "
" + ], + "text/plain": [ + " Value\n", + "Field \n", + "Title Space2Stats Database\n", + "Description A global dataset of geospatial variables at th...\n", + "TTL Ben Stewart\n", + "Business Unit DECSC\n", + "Collaborator Andres Chamorro" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "overview.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
GroupValue
Field
TitleIdentificationSpace2Stats Database
IdentifierIdentificationGLO_2024_SPACE2STATS_GEO_v01
Hierarchy levelIdentificationdataset
EditionIdentificationv.1
Edition DateIdentification2024-09-06 00:00:00
\n", + "
" + ], + "text/plain": [ + " Group Value\n", + "Field \n", + "Title Identification Space2Stats Database\n", + "Identifier Identification GLO_2024_SPACE2STATS_GEO_v01\n", + "Hierarchy level Identification dataset\n", + "Edition Identification v.1\n", + "Edition Date Identification 2024-09-06 00:00:00" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nada.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
variabledescriptiontypenodata
0hex_idH3 unique identifierstringNaN
1ogc_fidFeature unique identifiernumericNaN
2sum_pop_2020Total population, 2020numericNaN
3sum_pop_f_0_2020Total population female, ages 0 to 1, 2020numericNaN
4sum_pop_f_10_2020Total population female, ages 10 to 15, 2020numericNaN
\n", + "
" + ], + "text/plain": [ + " variable description type \\\n", + "0 hex_id H3 unique identifier string \n", + "1 ogc_fid Feature unique identifier numeric \n", + "2 sum_pop_2020 Total population, 2020 numeric \n", + "3 sum_pop_f_0_2020 Total population female, ages 0 to 1, 2020 numeric \n", + "4 sum_pop_f_10_2020 Total population female, ages 10 to 15, 2020 numeric \n", + "\n", + " nodata \n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "feature_catalog.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ThemeNameDescriptionMethodological NotesVariablesSource DataCitation sourceOrganizationMethodResolution
0DemographicsPopulationGridded population disaggregated by gender.Global raster files are processed for each hex...[sum_pop_2020, sum_pop_f_0_2020, sum_pop_f_10_...WorldPop gridded population, 2020, Unconstrain...Stevens FR, Gaughan AE, Linard C, Tatem AJ (20...World Pop, https://www.worldpop.org/methods/po...sum100 mts
1Socio-economicNighttime LightsSum of luminosity values measured by monthly c...Monthly composites generated by NASA through t...[ntl_sum_yyyymm]World Bank - Light Every Night, https://regist...NaNNASA, World Banksum500 mts
2ExposureFlood AreaArea where flood depth is greater than 50 cm, ...Flood data combines fluvial, pluvial, and coas...[flood_area_100, flood_area_1000]Fathom 3.0 High Resolution Global Flood Maps I...Wing et al. (2024) A 30 m Global Flood Inundat...Fathom, https://www.fathom.global/sum30 mts
3ExposurePopulation Exposed to FloodsPopulation where flood depth is greater than 5...Flood data is intersected with population grid...[flood_pop_100, flood_pop_1000]Fathom 3.0 High Resolution Global Flood Maps I...Wing et al. (2024) A 30 m Global Flood Inundat...Fathom, https://www.fathom.global/sum of intersect30 mts and 100 mts
4ConflictNumber of Conflict EventsSum of conflict events (ACLED).Conflict data is filtered for event types and ...[acled_events_yyyy]Armed Conflict Location and Event Data (ACLED)...https://acleddata.com/article-categories/gener...ACLED, https://acleddata.com/countpoint data
\n", + "
" + ], + "text/plain": [ + " Theme Name \\\n", + "0 Demographics Population \n", + "1 Socio-economic Nighttime Lights \n", + "2 Exposure Flood Area \n", + "3 Exposure Population Exposed to Floods \n", + "4 Conflict Number of Conflict Events \n", + "\n", + " Description \\\n", + "0 Gridded population disaggregated by gender. \n", + "1 Sum of luminosity values measured by monthly c... \n", + "2 Area where flood depth is greater than 50 cm, ... \n", + "3 Population where flood depth is greater than 5... \n", + "4 Sum of conflict events (ACLED). \n", + "\n", + " Methodological Notes \\\n", + "0 Global raster files are processed for each hex... \n", + "1 Monthly composites generated by NASA through t... \n", + "2 Flood data combines fluvial, pluvial, and coas... \n", + "3 Flood data is intersected with population grid... \n", + "4 Conflict data is filtered for event types and ... \n", + "\n", + " Variables \\\n", + "0 [sum_pop_2020, sum_pop_f_0_2020, sum_pop_f_10_... \n", + "1 [ntl_sum_yyyymm] \n", + "2 [flood_area_100, flood_area_1000] \n", + "3 [flood_pop_100, flood_pop_1000] \n", + "4 [acled_events_yyyy] \n", + "\n", + " Source Data \\\n", + "0 WorldPop gridded population, 2020, Unconstrain... \n", + "1 World Bank - Light Every Night, https://regist... \n", + "2 Fathom 3.0 High Resolution Global Flood Maps I... \n", + "3 Fathom 3.0 High Resolution Global Flood Maps I... \n", + "4 Armed Conflict Location and Event Data (ACLED)... \n", + "\n", + " Citation source \\\n", + "0 Stevens FR, Gaughan AE, Linard C, Tatem AJ (20... \n", + "1 NaN \n", + "2 Wing et al. (2024) A 30 m Global Flood Inundat... \n", + "3 Wing et al. (2024) A 30 m Global Flood Inundat... \n", + "4 https://acleddata.com/article-categories/gener... \n", + "\n", + " Organization Method \\\n", + "0 World Pop, https://www.worldpop.org/methods/po... sum \n", + "1 NASA, World Bank sum \n", + "2 Fathom, https://www.fathom.global/ sum \n", + "3 Fathom, https://www.fathom.global/ sum of intersect \n", + "4 ACLED, https://acleddata.com/ count \n", + "\n", + " Resolution \n", + "0 100 mts \n", + "1 500 mts \n", + "2 30 mts \n", + "3 30 mts and 100 mts \n", + "4 point data " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sources.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Catalog \n", + "\n", + "Basic description of project and dataset. \n", + "Can link to World Bank metadata page with appropriate schema (DDH or NADA). \n", + "See for example, https://nada-demo.ihsn.org/index.php/catalog/55/ or https://datacatalog.worldbank.org/search/dataset/0064614/Harmonized-Sub-National-Food-Security-Data" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [], + "source": [ + "catalog = Catalog(\n", + " id=\"space2stats-catalog\", \n", + " description=overview.loc[\"Description Resource\"].values[0],\n", + " title=overview.loc[\"Title\"].values[0],\n", + " extra_fields={\n", + " \"License\": overview.loc[\"License\"].values[0],\n", + " \"Responsible Party\": nada.loc[\"Responsible party\", \"Value\"],\n", + " \"Purpose\": nada.loc[\"Purpose\", \"Value\"],\n", + " \"Keywords\": [\"space2stats\", \"sub-national\", \"h3\", \"hexagons\", \"global\"]\n", + " },\n", + " href=\"https://worldbank.github.io/DECAT_Space2Stats/stac/catalog.json\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### STAC Item\n", + "\n", + "Represent the global H3 parquet file with column descriptions for each variable." + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [], + "source": [ + "data_dict = []\n", + "for column in gdf.columns:\n", + " if column == 'geometry':\n", + " continue\n", + " data_dict.append({\n", + " \"name\": column,\n", + " \"description\": feature_catalog.loc[feature_catalog['variable'] == column, 'description'].values[0],\n", + " \"type\": str(gdf[column].dtype),\n", + " })" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [], + "source": [ + "gdf_types = gdf.dtypes.to_dict()\n", + "gdf_types = {k: str(v) for k, v in gdf_types.items()}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using the [table](https://github.com/stac-extensions/table) extension here. Fio-stac also builds `vector:layers` property, not sure if it's necessary." + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bb = gdf.total_bounds.tolist()\n", + "geom = Polygon.from_bounds(bb[0], bb[1], bb[2], bb[3])\n", + "\n", + "item = Item(\n", + " id=\"space2stats\",\n", + " geometry=geom.__geo_interface__,\n", + " bbox=bb,\n", + " datetime=datetime.now(),\n", + " properties={\n", + " \"name\": \"Space2Stats H3 Data\",\n", + " \"description\": \"GeoParquet dataset with h3 hexagons (level 6) covering the globe. Users can access data through an API, specifying variables and areas of interest.\", \n", + " \"table:primary_geometry\" : \"geometry\",\n", + " \"table:columns\" : data_dict,\n", + " \"vector:layers\" : {\n", + " \"space2stats\": gdf_types,\n", + " }\n", + " },\n", + " stac_extensions = ['https://stac-extensions.github.io/table/v1.2.0/schema.json']\n", + " # assets={\n", + " # \"data\": Asset(href=out_file, media_type=\"application/geo+json\")\n", + " # } \n", + ")\n", + "item" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "
    \n", + " \n", + " \n", + " \n", + "
  • \n", + " rel\n", + " \"item\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " href\n", + " \"https://worldbank.github.io/DECAT_Space2Stats/stac/space2stats/space2stats.json\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " type\n", + " \"application/json\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " title\n", + " \"Space2Stats Item\"\n", + "
  • \n", + " \n", + " \n", + " \n", + "
\n", + "
\n", + "
" + ], + "text/plain": [ + ">" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "catalog.add_item(item, title=\"Space2Stats Item\")" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[]\n", + "[]\n" + ] + } + ], + "source": [ + "print(list(catalog.get_children()))\n", + "print(list(catalog.get_items()))" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "* \n", + " * \n" + ] + } + ], + "source": [ + "catalog.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Assets\n", + "\n", + "Can store additional information about authors, the source for input data, how it was processed etc. \n", + "Add another asset for API docs." + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "
    \n", + " \n", + " \n", + " \n", + "
  • \n", + " href\n", + " \"./sources.json\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " type\n", + " \"application/json\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " title\n", + " \"Sources Metadata\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " roles[] 1 items\n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 0\n", + " \"metadata\"\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
  • \n", + " \n", + " \n", + "
\n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sources_path = join(\".\", \"stac\", \"space2stats\", \"sources.json\") # \"space2stats\"\n", + "asset = Asset(\n", + " href=\"./sources.json\",\n", + " title=\"Sources Metadata\",\n", + " media_type=\"application/json\",\n", + " roles=[\"metadata\"]\n", + " )\n", + "asset" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [], + "source": [ + "item.add_asset(\"sources-metadata\", asset)" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "
    \n", + " \n", + " \n", + " \n", + "
  • \n", + " href\n", + " \"https://space2stats.ds.io/docs\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " type\n", + " \"text/html\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " title\n", + " \"API Documentation\"\n", + "
  • \n", + " \n", + " \n", + " \n", + " \n", + "
  • \n", + " roles[] 1 items\n", + " \n", + "
      \n", + " \n", + " \n", + " \n", + "
    • \n", + " 0\n", + " \"metadata\"\n", + "
    • \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + "
  • \n", + " \n", + " \n", + "
\n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "asset_api = Asset(\n", + " href=\"https://space2stats.ds.io/docs\",\n", + " title=\"API Documentation\",\n", + " media_type=\"text/html\",\n", + " roles=[\"metadata\"]\n", + " )\n", + "asset_api" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [], + "source": [ + "item.add_asset(\"api-docs\", asset_api)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save Demo" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "False\n", + "False\n" + ] + } + ], + "source": [ + "print(catalog.get_self_href() is None)\n", + "print(item.get_self_href() is None)" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [], + "source": [ + "# catalog.normalize_hrefs(join(\".\", \"stac\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://worldbank.github.io/DECAT_Space2Stats/stac/catalog.json\n", + "https://worldbank.github.io/DECAT_Space2Stats/stac/space2stats/space2stats.json\n" + ] + } + ], + "source": [ + "print(catalog.get_self_href())\n", + "print(item.get_self_href())" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [], + "source": [ + "catalog.save(catalog_type=CatalogType.RELATIVE_PUBLISHED, dest_href=join(\".\", \"stac\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [], + "source": [ + "sources.to_json(\n", + " sources_path, \n", + " orient = 'records',\n", + " indent = 4\n", + " )" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "titi", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/space2stats_api/src/space2stats_ingest/METADATA/stac/catalog.json b/space2stats_api/src/space2stats_ingest/METADATA/stac/catalog.json new file mode 100644 index 0000000..0076c8e --- /dev/null +++ b/space2stats_api/src/space2stats_ingest/METADATA/stac/catalog.json @@ -0,0 +1,36 @@ +{ + "type": "Catalog", + "id": "space2stats-catalog", + "stac_version": "1.0.0", + "description": "This database contains geospatial statistics for the entire globe standardized to a hexagonal grid. The spatial unit of the dataset is the H3 level 6 (approximately 36 sq. km. per cell). The variables cover a wide range of geographic themes relevant to international development, including demographic, socio-economic, environmental, climate, and infrastructure. An API enables users to query, access, and aggregate statistics from the Space2Stats database. The purpose of this API is to facilitate the generation of sub-national geospatial aggregates for any administrative boundary set.", + "links": [ + { + "rel": "root", + "href": "./catalog.json", + "type": "application/json", + "title": "Space2Stats Database" + }, + { + "rel": "self", + "href": "https://worldbank.github.io/DECAT_Space2Stats/stac/catalog.json", + "type": "application/json" + }, + { + "rel": "item", + "href": "./space2stats/space2stats.json", + "type": "application/json", + "title": "Space2Stats Item" + } + ], + "License": "Creative Commons Attribution 4.0", + "Responsible Party": "Ben Stewart (Task Leader), Andres Chamorro (Collaborator), Development Data Group (DECDG), World Bank", + "Purpose": "The purpose of this API is to facilitate the generation of sub-national geospatial aggregates for any administrative boundary set.", + "Keywords": [ + "space2stats", + "sub-national", + "h3", + "hexagons", + "global" + ], + "title": "Space2Stats Database" +} \ No newline at end of file diff --git a/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats/sources.json b/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats/sources.json new file mode 100644 index 0000000..f346e74 --- /dev/null +++ b/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats/sources.json @@ -0,0 +1,126 @@ +[ + { + "Theme":"Demographics", + "Name":"Population", + "Description":"Gridded population disaggregated by gender.", + "Methodological Notes":"Global raster files are processed for each hexagonal grid using zonal statistics.", + "Variables":[ + "sum_pop_2020", + "sum_pop_f_0_2020", + "sum_pop_f_10_2020", + "sum_pop_f_15_2020", + "sum_pop_f_1_2020", + "sum_pop_f_20_2020", + "sum_pop_f_25_2020", + "sum_pop_f_30_2020", + "sum_pop_f_35_2020", + "sum_pop_f_40_2020", + "sum_pop_f_45_2020", + "sum_pop_f_50_2020", + "sum_pop_f_55_2020", + "sum_pop_f_5_2020", + "sum_pop_f_60_2020", + "sum_pop_f_65_2020", + "sum_pop_f_70_2020", + "sum_pop_f_75_2020", + "sum_pop_f_80_2020", + "sum_pop_m_0_2020", + "sum_pop_m_10_2020", + "sum_pop_m_15_2020", + "sum_pop_m_1_2020", + "sum_pop_m_20_2020", + "sum_pop_m_25_2020", + "sum_pop_m_30_2020", + "sum_pop_m_35_2020", + "sum_pop_m_40_2020", + "sum_pop_m_45_2020", + "sum_pop_m_50_2020", + "sum_pop_m_55_2020", + "sum_pop_m_5_2020", + "sum_pop_m_60_2020", + "sum_pop_m_65_2020", + "sum_pop_m_70_2020", + "sum_pop_m_75_2020", + "sum_pop_m_80_2020", + "sum_pop_m_2020", + "sum_pop_f_2020" + ], + "Source Data":"WorldPop gridded population, 2020, Unconstrained, UN-Adjusted, https:\/\/www.worldpop.org\/methods\/top_down_constrained_vs_unconstrained\/", + "Citation source":"Stevens FR, Gaughan AE, Linard C, Tatem AJ (2015) Disaggregating Census Data for Population Mapping Using Random Forests with Remotely-Sensed and Ancillary Data. ", + "Organization":"World Pop, https:\/\/www.worldpop.org\/methods\/populations", + "Method":"sum", + "Resolution":"100 mts" + }, + { + "Theme":"Socio-economic", + "Name":"Nighttime Lights", + "Description":"Sum of luminosity values measured by monthly composites from VIIRS satellite.", + "Methodological Notes":"Monthly composites generated by NASA through the Lights Every Night partnership.", + "Variables":[ + "ntl_sum_yyyymm" + ], + "Source Data":"World Bank - Light Every Night, https:\/\/registry.opendata.aws\/wb-light-every-night\/", + "Citation source":null, + "Organization":"NASA, World Bank", + "Method":"sum", + "Resolution":"500 mts" + }, + { + "Theme":"Exposure", + "Name":"Flood Area", + "Description":"Area where flood depth is greater than 50 cm, 1-in-100 or 1000 return period.", + "Methodological Notes":"Flood data combines fluvial, pluvial, and coastal flood exposure using the maximum value. Return period indicates likelihood of disaster (1 in 100 years).", + "Variables":[ + "flood_area_100", + "flood_area_1000" + ], + "Source Data":"Fathom 3.0 High Resolution Global Flood Maps Including Climate Scenarios, https:\/\/datacatalog.worldbank.org\/search\/dataset\/0065653\/Fathom-3-0---High-Resolution-Global-Flood-Maps-Including-Climate-Scenarios", + "Citation source":"Wing et al. (2024) A 30 m Global Flood Inundation Model for Any Climate Scenario. https:\/\/doi.org\/10.1029\/2023WR036460", + "Organization":"Fathom, https:\/\/www.fathom.global\/", + "Method":"sum", + "Resolution":"30 mts" + }, + { + "Theme":"Exposure", + "Name":"Population Exposed to Floods", + "Description":"Population where flood depth is greater than 50 cm, 1-in-100 or 1000 return period.", + "Methodological Notes":"Flood data is intersected with population grid to estimate population exposed.", + "Variables":[ + "flood_pop_100", + "flood_pop_1000" + ], + "Source Data":"Fathom 3.0 High Resolution Global Flood Maps Including Climate Scenarios, https:\/\/datacatalog.worldbank.org\/search\/dataset\/0065653\/Fathom-3-0---High-Resolution-Global-Flood-Maps-Including-Climate-Scenarios", + "Citation source":"Wing et al. (2024) A 30 m Global Flood Inundation Model for Any Climate Scenario. https:\/\/doi.org\/10.1029\/2023WR036460", + "Organization":"Fathom, https:\/\/www.fathom.global\/", + "Method":"sum of intersect", + "Resolution":"30 mts and 100 mts" + }, + { + "Theme":"Conflict", + "Name":"Number of Conflict Events", + "Description":"Sum of conflict events (ACLED).", + "Methodological Notes":"Conflict data is filtered for event types and then aggregated by hexagon (count).", + "Variables":[ + "acled_events_yyyy" + ], + "Source Data":"Armed Conflict Location and Event Data (ACLED), https:\/\/acleddata.com\/data\/", + "Citation source":"https:\/\/acleddata.com\/article-categories\/general-methodology\/", + "Organization":"ACLED, https:\/\/acleddata.com\/", + "Method":"count", + "Resolution":"point data" + }, + { + "Theme":"Conflict", + "Name":"Number of Conflict Fatalities", + "Description":"Sum of estimated fatalities from conflcit events (ACLED).", + "Methodological Notes":"Conflict data is filtered for event types and then aggregated by hexagon (sum of fatalities).", + "Variables":[ + "acled_fatalities_yyyy" + ], + "Source Data":"Armed Conflict Location and Event Data (ACLED), https:\/\/acleddata.com\/data\/", + "Citation source":"https:\/\/acleddata.com\/article-categories\/general-methodology\/", + "Organization":"ACLED, https:\/\/acleddata.com\/", + "Method":"sum", + "Resolution":"point data" + } +] \ No newline at end of file diff --git a/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats/space2stats.json b/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats/space2stats.json new file mode 100644 index 0000000..c135088 --- /dev/null +++ b/space2stats_api/src/space2stats_ingest/METADATA/stac/space2stats/space2stats.json @@ -0,0 +1,308 @@ +{ + "type": "Feature", + "stac_version": "1.0.0", + "id": "space2stats", + "properties": { + "name": "Space2Stats H3 Data", + "description": "GeoParquet dataset with h3 hexagons (level 6) covering the globe. Users can access data through an API, specifying variables and areas of interest.", + "table:primary_geometry": "geometry", + "table:columns": [ + { + "name": "hex_id", + "description": "H3 unique identifier", + "type": "object" + }, + { + "name": "sum_pop_f_0_2020", + "description": "Total population female, ages 0 to 1, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_10_2020", + "description": "Total population female, ages 10 to 15, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_15_2020", + "description": "Total population female, ages 15 to 20, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_1_2020", + "description": "Total population female, ages 1 to 10, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_20_2020", + "description": "Total population female, ages 20 to 25, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_25_2020", + "description": "Total population female, ages 25 to 30, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_30_2020", + "description": "Total population female, ages 30 to 35, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_35_2020", + "description": "Total population female, ages 35 to 40, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_40_2020", + "description": "Total population female, ages 40 to 45, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_45_2020", + "description": "Total population female, ages 45 to 50, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_50_2020", + "description": "Total population female, ages 50 to 55, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_55_2020", + "description": "Total population female, ages 55 to 60, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_5_2020", + "description": "Total population female, ages 5 to 10, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_60_2020", + "description": "Total population female, ages 60 to 65, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_65_2020", + "description": "Total population female, ages 65 to 70, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_70_2020", + "description": "Total population female, ages 70 to 75, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_75_2020", + "description": "Total population female, ages 75 to 80, 2020", + "type": "float64" + }, + { + "name": "sum_pop_f_80_2020", + "description": "Total population female, ages 80 and above, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_0_2020", + "description": "Total population male, ages 0 to 1, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_10_2020", + "description": "Total population male, ages 10 to 15, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_15_2020", + "description": "Total population male, ages 15 to 20, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_1_2020", + "description": "Total population male, ages 1 to 10, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_20_2020", + "description": "Total population male, ages 20 to 25, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_25_2020", + "description": "Total population male, ages 25 to 30, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_30_2020", + "description": "Total population male, ages 30 to 35, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_35_2020", + "description": "Total population male, ages 35 to 40, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_40_2020", + "description": "Total population male, ages 40 to 45, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_45_2020", + "description": "Total population male, ages 45 to 50, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_50_2020", + "description": "Total population male, ages 50 to 55, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_55_2020", + "description": "Total population male, ages 55 to 60, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_5_2020", + "description": "Total population male, ages 5 to 10, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_60_2020", + "description": "Total population male, ages 60 to 65, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_65_2020", + "description": "Total population male, ages 65 to 70, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_70_2020", + "description": "Total population male, ages 70 to 75, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_75_2020", + "description": "Total population male, ages 75 to 80, 2020", + "type": "float64" + }, + { + "name": "sum_pop_m_80_2020", + "description": "Total population male, ages 80 and above, 2020", + "type": "float64" + } + ], + "vector:layers": { + "space2stats": { + "hex_id": "object", + "sum_pop_f_0_2020": "float64", + "sum_pop_f_10_2020": "float64", + "sum_pop_f_15_2020": "float64", + "sum_pop_f_1_2020": "float64", + "sum_pop_f_20_2020": "float64", + "sum_pop_f_25_2020": "float64", + "sum_pop_f_30_2020": "float64", + "sum_pop_f_35_2020": "float64", + "sum_pop_f_40_2020": "float64", + "sum_pop_f_45_2020": "float64", + "sum_pop_f_50_2020": "float64", + "sum_pop_f_55_2020": "float64", + "sum_pop_f_5_2020": "float64", + "sum_pop_f_60_2020": "float64", + "sum_pop_f_65_2020": "float64", + "sum_pop_f_70_2020": "float64", + "sum_pop_f_75_2020": "float64", + "sum_pop_f_80_2020": "float64", + "sum_pop_m_0_2020": "float64", + "sum_pop_m_10_2020": "float64", + "sum_pop_m_15_2020": "float64", + "sum_pop_m_1_2020": "float64", + "sum_pop_m_20_2020": "float64", + "sum_pop_m_25_2020": "float64", + "sum_pop_m_30_2020": "float64", + "sum_pop_m_35_2020": "float64", + "sum_pop_m_40_2020": "float64", + "sum_pop_m_45_2020": "float64", + "sum_pop_m_50_2020": "float64", + "sum_pop_m_55_2020": "float64", + "sum_pop_m_5_2020": "float64", + "sum_pop_m_60_2020": "float64", + "sum_pop_m_65_2020": "float64", + "sum_pop_m_70_2020": "float64", + "sum_pop_m_75_2020": "float64", + "sum_pop_m_80_2020": "float64", + "geometry": "geometry" + } + }, + "datetime": "2024-10-07T11:21:25.944150Z" + }, + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + -179.99999561620714, + -89.98750455101016 + ], + [ + -179.99999561620714, + 89.98750455101016 + ], + [ + 179.99999096313272, + 89.98750455101016 + ], + [ + 179.99999096313272, + -89.98750455101016 + ], + [ + -179.99999561620714, + -89.98750455101016 + ] + ] + ] + }, + "links": [ + { + "rel": "root", + "href": "../catalog.json", + "type": "application/json", + "title": "Space2Stats Database" + }, + { + "rel": "parent", + "href": "../catalog.json", + "type": "application/json", + "title": "Space2Stats Database" + } + ], + "assets": { + "sources-metadata": { + "href": "./sources.json", + "type": "application/json", + "title": "Sources Metadata", + "roles": [ + "metadata" + ] + }, + "api-docs": { + "href": "https://space2stats.ds.io/docs", + "type": "text/html", + "title": "API Documentation", + "roles": [ + "metadata" + ] + } + }, + "bbox": [ + -179.99999561620714, + -89.98750455101016, + 179.99999096313272, + 89.98750455101016 + ], + "stac_extensions": [ + "https://stac-extensions.github.io/table/v1.2.0/schema.json" + ] +} \ No newline at end of file