Skip to content

Commit

Permalink
Merge pull request #107 from worldbank/viirs_metadata
Browse files Browse the repository at this point in the history
Adding NTL Metadata, seriously
  • Loading branch information
Gabe-Levin authored Dec 18, 2024
2 parents 30543c4 + 7e2bcd7 commit a81439f
Show file tree
Hide file tree
Showing 14 changed files with 4,050 additions and 748 deletions.
1,382 changes: 1,382 additions & 0 deletions docs/user-docs/space2stats_nighttime_lights.ipynb

Large diffs are not rendered by default.

741 changes: 741 additions & 0 deletions notebooks/MP_SCRIPTS/NighttimeLights/Combine_CSV_into_parquet.ipynb

Large diffs are not rendered by default.

519 changes: 519 additions & 0 deletions notebooks/MP_SCRIPTS/NighttimeLights/TEST_zonal_stats_data_NTL.ipynb

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,7 @@ def run_zonal(gdf, cur_raster_file, out_file, buffer0=False, verbose=False):
data_prefix = "VIIRS_ANNUAL_EOG"

# Get list of nighttime lights VIIRS data
# ntl_files = ntl.aws_search_ntl()
ntl_folder = "/home/public/Data/GLOBAL/NighttimeLights/VIIRS_ANNUAL_EOG_V21"
ntl_files = [
os.path.join(ntl_folder, x)
for x in os.listdir(ntl_folder)
if x.endswith(".tif")
]
ntl_files = ntl.aws_search_ntl()

# h3_0_list = h3_helper.generate_lvl0_lists(h3_level, return_gdf=True, buffer0=False)
admin_bounds = "/home/wb411133/data/Global/ADMIN/Admin2_Polys.shp"
Expand Down
247 changes: 247 additions & 0 deletions notebooks/MP_SCRIPTS/Urbanization/Combine_CSV_into_parquet.ipynb

Large diffs are not rendered by default.

771 changes: 771 additions & 0 deletions notebooks/MP_SCRIPTS/Urbanization/TEST_zonal_stats_data.ipynb

Large diffs are not rendered by default.

173 changes: 173 additions & 0 deletions notebooks/MP_SCRIPTS/Urbanization/zonal_urbanization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
import multiprocessing
import os
import sys

import GOSTrocks.ntlMisc as ntl
import GOSTrocks.rasterMisc as rMisc
import pandas as pd
from GOSTrocks.misc import tPrint
from h3 import h3

# import geopandas as gpd
# import numpy as np


sys.path.append("../../src")
import global_zonal
import h3_helper

AWS_S3_BUCKET = "wbg-geography01"
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
AWS_SESSION_TOKEN = os.getenv("AWS_SESSION_TOKEN")

if __name__ == "__main__":
verbose = True
run_urban = False
run_urban_pop = True
run_urban_pop = False
tPrint("Starting")
h3_level = 6
data_prefix = "Urbanization"
data_prefix_pop = "Urbanization_Pop"

# Urbanization layers
unq_urban = [11, 12, 13, 21, 22, 23, 30]
ghsl_folder = "/home/public/Data/GLOBAL/GHSL/"

ghs_smod = os.path.join(
ghsl_folder, "SMOD", "GHS_SMOD_E2020_GLOBE_R2023A_54009_1000_V1_0.tif"
)
ghs_pop = os.path.join(
ghsl_folder, "Pop", "GHS_POP_E2020_GLOBE_R2023A_54009_100_V1_0.tif"
)

# h3_0_list = h3_helper.generate_lvl0_lists(h3_level, return_gdf=True, buffer0=False)
# if verbose:

ghs_smod = os.path.join(
ghsl_folder, "SMOD", "GHS_SMOD_E2020_GLOBE_R2023A_54009_1000_V1_0.tif"
)
ghs_pop = os.path.join(
ghsl_folder, "POP", "GHS_POP_E2020_GLOBE_R2023A_54009_100_V1_0.tif"
)

# h3_0_list = h3_helper.generate_lvl0_lists(h3_level, return_gdf=True, buffer0=False)
# if verbose:

# tPrint("H3_0 list generated")

h3_1_list = h3_helper.generate_lvl1_lists(
h3_level, return_gdf=True, buffer0=True, read_pickle=True, write_pickle=False
)
if verbose:
tPrint("H3_1 list generated")

urban_pop_args = []
urban_args = []
for h3_1_key, cur_gdf in h3_1_list.items():
if run_urban_pop:
# Set up mp arguments for urban population
pop_filename = "GHS_POP_2020_Urban_Breakdown.csv"
pop_out_s3_key = f"Space2Stats/h3_stats_data/GLOBAL/{data_prefix_pop}/{h3_1_key}/{pop_filename}"
pop_full_path = os.path.join("s3://", AWS_S3_BUCKET, pop_out_s3_key)
urban_pop_args.append(
[cur_gdf, "shape_id", ghs_pop, ghs_smod, pop_full_path, unq_urban]
)
if run_urban:
# set up mp arguments for urban summary
urban_filename = "GHS_SMOD_2020.csv"
urban_out_s3_key = f"Space2Stats/h3_stats_data/GLOBAL/{data_prefix}/{h3_1_key}/{urban_filename}"
urban_full_path = os.path.join("s3://", AWS_S3_BUCKET, urban_out_s3_key)
urban_args.append(
[cur_gdf, "shape_id", ghs_smod, unq_urban, urban_full_path]
)
# Set up mp arguments for urban population
pop_filename = "GHS_POP_2020_Urban_Breakdown.csv"
pop_out_s3_key = f"Space2Stats/h3_stats_data/GLOBAL/{data_prefix_pop}/{h3_1_key}/{pop_filename}"
pop_full_path = os.path.join("s3://", AWS_S3_BUCKET, pop_out_s3_key)
try:
tempPD = pd.read_csv(pop_full_path)
except:
urban_pop_args.append(
[cur_gdf, "shape_id", ghs_pop, ghs_smod, pop_full_path, unq_urban]
)

# set up mp arguments for urban summary
urban_filename = "GHS_SMOD_2020.csv"
urban_out_s3_key = f"Space2Stats/h3_stats_data/GLOBAL/{data_prefix}/{h3_1_key}/{urban_filename}"
urban_full_path = os.path.join("s3://", AWS_S3_BUCKET, urban_out_s3_key)
urban_args.append([cur_gdf, "shape_id", ghs_smod, unq_urban, urban_full_path])

if run_urban:
tPrint(f"Running calculations on urban: {len(urban_args)} processes")
# Run multi processing on urban
if multiprocess:
with multiprocessing.Pool(processes=min([70, len(urban_args)])) as pool:
results = pool.starmap(global_zonal.zonal_stats_categories, urban_args)
tPrint(f"Finished urban calculations: {len(results)}")
for combo in results:
out_file = list(combo.keys())[0]
res = combo[out_file]
res.to_csv(
out_file,
storage_options={
"key": AWS_ACCESS_KEY_ID,
"secret": AWS_SECRET_ACCESS_KEY,
"token": AWS_SESSION_TOKEN,
},
)
else:
for a in urban_args:
results = global_zonal.zonal_stats_categories(*a)
out_file = list(results.keys())[0]
res = combo[out_file]
res.to_csv(
out_file,
storage_options={
"key": AWS_ACCESS_KEY_ID,
"secret": AWS_SECRET_ACCESS_KEY,
"token": AWS_SESSION_TOKEN,
},
)
tPrint(f"Finished {out_file}")

if run_urban_pop:
tPrint(
f"Running calculations on urban population: {len(urban_pop_args)} processes"
)
# Run multi processing on urban_pop_calculations
if multiprocess:
with multiprocessing.Pool(processes=min([40, len(urban_pop_args)])) as pool:
results = pool.starmap(
global_zonal.zonal_stats_categorical, urban_pop_args
)
tPrint(f"Finished multiprocessing urban pop calculations: {len(results)}")
for combo in results:
out_file = list(combo.keys())[0]
res = combo[out_file]
res.to_csv(
out_file,
storage_options={
"key": AWS_ACCESS_KEY_ID,
"secret": AWS_SECRET_ACCESS_KEY,
"token": AWS_SESSION_TOKEN,
},
)
else:
for a in urban_pop_args:
combo = global_zonal.zonal_stats_categorical(
*a, verbose=verbose, minVal=0
)
out_file = list(combo.keys())[0]
tPrint(f"Completed {out_file}")
res = combo[out_file]
res.to_csv(
out_file,
storage_options={
"key": AWS_ACCESS_KEY_ID,
"secret": AWS_SECRET_ACCESS_KEY,
"token": AWS_SESSION_TOKEN,
},
)
tPrint("Finished")
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,17 @@
},
{
"rel": "item",
"href": "./nighttime_lights_2013/nighttime_lights_2013.json",
"href": "./urbanization_ghssmod/urbanization_ghssmod.json",
"type": "application/json",
"title": "Nighttime Lights"
"title": "Urbanization by population and by area"
},
{
"rel": "item",
"href": "./urbanization_ghssmod/urbanization_ghssmod.json",
"href": "./nighttime_lights/nighttime_lights.json",
"type": "application/json",
"title": "Urbanization by population and by area"
}
"title": "Nighttime Lights"
},

],
"Title": "Space2Stats Database",
"Description": "This database contains geospatial statistics for the entire globe standardized to a hexagonal grid. The spatial unit of the dataset is the H3 level 6 (approximately 36 sq. km. per cell). The variables cover a wide range of geographic themes relevant to international development, including demographic, socio-economic, environmental, climate, and infrastructure. An API enables users to query, access, and aggregate statistics from the Space2Stats database. The purpose of this API is to facilitate the generation of sub-national geospatial aggregates for any administrative boundary set.",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
{
"type": "Feature",
"stac_version": "1.0.0",
"stac_extensions": [
"https://stac-extensions.github.io/table/v1.2.0/schema.json",
"https://stac-extensions.github.io/scientific/v1.0.0/schema.json"
],
"id": "nighttime_lights",
"geometry": {
"type": "Polygon",
"coordinates": [
[
[
-179.99999561620714,
-89.98750455101016
],
[
-179.99999561620714,
89.98750455101016
],
[
179.99999096313272,
89.98750455101016
],
[
179.99999096313272,
-89.98750455101016
],
[
-179.99999561620714,
-89.98750455101016
]
]
]
},
"bbox": [
-179.99999561620714,
-89.98750455101016,
179.99999096313272,
89.98750455101016
],
"properties": {
"name": "Nighttime Lights",
"description": "Sum of luminosity values measured by monthly composites from VIIRS satellite.",
"methodological_notes": "Monthly composites generated by NASA through the Lights Every Night partnership.",
"source_data": "World Bank - Light Every Night, https://registry.opendata.aws/wb-light-every-night/",
"sci:citation": "tbd",
"method": "sum",
"resolution": "500 mts",
"themes": "Socio-economic",
"table:columns": [
{
"name": "sum_viirs_ntl_2012",
"description": "Sum of VIIRS nighttlime lights brightness for 2012",
"type": "float64"
},
{
"name": "sum_viirs_ntl_2013",
"description": "Sum of VIIRS nighttlime lights brightness for 2013",
"type": "float64"
},
{
"name": "sum_viirs_ntl_2014",
"description": "Sum of VIIRS nighttlime lights brightness for 2014",
"type": "float64"
},
{
"name": "sum_viirs_ntl_2015",
"description": "Sum of VIIRS nighttlime lights brightness for 2015",
"type": "float64"
},
{
"name": "sum_viirs_ntl_2016",
"description": "Sum of VIIRS nighttlime lights brightness for 2016",
"type": "float64"
},
{
"name": "sum_viirs_ntl_2017",
"description": "Sum of VIIRS nighttlime lights brightness for 2017",
"type": "float64"
},
{
"name": "sum_viirs_ntl_2018",
"description": "Sum of VIIRS nighttlime lights brightness for 2018",
"type": "float64"
},
{
"name": "sum_viirs_ntl_2019",
"description": "Sum of VIIRS nighttlime lights brightness for 2019",
"type": "float64"
},
{
"name": "sum_viirs_ntl_2020",
"description": "Sum of VIIRS nighttlime lights brightness for 2020",
"type": "float64"
},
{
"name": "sum_viirs_ntl_2021",
"description": "Sum of VIIRS nighttlime lights brightness for 2021",
"type": "float64"
},
{
"name": "sum_viirs_ntl_2022",
"description": "Sum of VIIRS nighttlime lights brightness for 2022",
"type": "float64"
},
{
"name": "sum_viirs_ntl_2023",
"description": "Sum of VIIRS nighttlime lights brightness for 2023",
"type": "float64"
},
{
"name": "sum_viirs_ntl_2024",
"description": "Sum of VIIRS nighttlime lights brightness for 2024",
"type": "float64"
},
{
"name": "hex_id",
"description": "H3 unique identifier",
"type": "object"
}
],
"datetime": "2024-12-17T09:05:44.687946Z"
},
"links": [
{
"rel": "root",
"href": "../../catalog.json",
"type": "application/json",
"title": "Space2Stats Database"
},
{
"rel": "collection",
"href": "../collection.json",
"type": "application/json",
"title": "Space2Stats Collection"
},
{
"rel": "parent",
"href": "../collection.json",
"type": "application/json",
"title": "Space2Stats Collection"
}
],
"assets": {
"api-docs": {
"href": "https://space2stats.ds.io/docs",
"type": "text/html",
"title": "API Documentation",
"roles": [
"metadata"
]
}
},
"collection": "space2stats-collection"
}
Loading

0 comments on commit a81439f

Please sign in to comment.