From 6a9274df43b6c35eb3488d15b096d4912cd89e99 Mon Sep 17 00:00:00 2001 From: "Benjamin P. Stewart" Date: Tue, 2 Apr 2024 15:43:33 -0400 Subject: [PATCH] Updating multiprocessing for urbanization --- notebooks/S2S_Multiprocessing.ipynb | 249 ++++++++++++++++++++-------- src/h3_helper.py | 12 +- 2 files changed, 191 insertions(+), 70 deletions(-) diff --git a/notebooks/S2S_Multiprocessing.ipynb b/notebooks/S2S_Multiprocessing.ipynb index 6b68ca3..85d4d0f 100644 --- a/notebooks/S2S_Multiprocessing.ipynb +++ b/notebooks/S2S_Multiprocessing.ipynb @@ -9,15 +9,15 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 12, "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "/home/wb411133/.conda/envs/ee/lib/python3.9/site-packages/geopandas/_compat.py:106: UserWarning: The Shapely GEOS version (3.9.1-CAPI-1.14.2) is incompatible with the GEOS version PyGEOS was compiled with (3.10.4-CAPI-1.16.2). Conversions between both will be slow.\n", - " warnings.warn(\n" + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" ] } ], @@ -34,14 +34,18 @@ "from shapely.geometry import Polygon\n", "\n", "sys.path.insert(0, \"/home/wb411133/Code/gostrocks/src\")\n", - "import GOSTRocks.rasterMisc as rMisc\n", - "import GOSTRocks.ntlMisc as ntl\n", - "import GOSTRocks.mapMisc as mapMisc\n", - "from GOSTRocks.misc import tPrint\n", + "import GOSTrocks.rasterMisc as rMisc\n", + "import GOSTrocks.ntlMisc as ntl\n", + "import GOSTrocks.mapMisc as mapMisc\n", + "from GOSTrocks.misc import tPrint\n", "\n", "sys.path.append(\"../src\")\n", "import h3_helper\n", - "import country_zonal\n", + "\n", + "AWS_S3_BUCKET = 'wbg-geography01'\n", + "AWS_ACCESS_KEY_ID = os.getenv(\"AWS_ACCESS_KEY_ID\")\n", + "AWS_SECRET_ACCESS_KEY = os.getenv(\"AWS_SECRET_ACCESS_KEY\")\n", + "AWS_SESSION_TOKEN = os.getenv(\"AWS_SESSION_TOKEN\")\n", "\n", "%load_ext autoreload\n", "%autoreload 2" @@ -49,87 +53,202 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "ename": "RasterioIOError", + "evalue": "J:/Data/GLOBAL/GHSL/Pop/GHS_POP_E2020_GLOBE_R2023A_54009_100_V1_0.tif: Permission denied", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mCPLE_OpenFailedError\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32mrasterio\\\\_base.pyx:310\u001b[0m, in \u001b[0;36mrasterio._base.DatasetBase.__init__\u001b[1;34m()\u001b[0m\n", + "File \u001b[1;32mrasterio\\\\_base.pyx:221\u001b[0m, in \u001b[0;36mrasterio._base.open_dataset\u001b[1;34m()\u001b[0m\n", + "File \u001b[1;32mrasterio\\\\_err.pyx:221\u001b[0m, in \u001b[0;36mrasterio._err.exc_wrap_pointer\u001b[1;34m()\u001b[0m\n", + "\u001b[1;31mCPLE_OpenFailedError\u001b[0m: J:/Data/GLOBAL/GHSL/Pop/GHS_POP_E2020_GLOBE_R2023A_54009_100_V1_0.tif: Permission denied", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[1;31mRasterioIOError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[18], line 12\u001b[0m\n\u001b[0;32m 9\u001b[0m global_pop_file \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mJ:/Data/GLOBAL/GHSL/Pop/GHS_POP_E2020_GLOBE_R2023A_54009_100_V1_0.tif\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 11\u001b[0m ghs_smod \u001b[38;5;241m=\u001b[39m rasterio\u001b[38;5;241m.\u001b[39mopen(global_urban_file)\n\u001b[1;32m---> 12\u001b[0m ghs_pop \u001b[38;5;241m=\u001b[39m rasterio\u001b[38;5;241m.\u001b[39mopen(global_pop_file)\n", + "File \u001b[1;32mc:\\wbg\\Anaconda3\\envs\\urban_test\\Lib\\site-packages\\rasterio\\env.py:451\u001b[0m, in \u001b[0;36mensure_env_with_credentials..wrapper\u001b[1;34m(*args, **kwds)\u001b[0m\n\u001b[0;32m 448\u001b[0m session \u001b[38;5;241m=\u001b[39m DummySession()\n\u001b[0;32m 450\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m env_ctor(session\u001b[38;5;241m=\u001b[39msession):\n\u001b[1;32m--> 451\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m f(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwds)\n", + "File \u001b[1;32mc:\\wbg\\Anaconda3\\envs\\urban_test\\Lib\\site-packages\\rasterio\\__init__.py:304\u001b[0m, in \u001b[0;36mopen\u001b[1;34m(fp, mode, driver, width, height, count, crs, transform, dtype, nodata, sharing, **kwargs)\u001b[0m\n\u001b[0;32m 301\u001b[0m path \u001b[38;5;241m=\u001b[39m _parse_path(raw_dataset_path)\n\u001b[0;32m 303\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m mode \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m--> 304\u001b[0m dataset \u001b[38;5;241m=\u001b[39m DatasetReader(path, driver\u001b[38;5;241m=\u001b[39mdriver, sharing\u001b[38;5;241m=\u001b[39msharing, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 305\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m mode \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mr+\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m 306\u001b[0m dataset \u001b[38;5;241m=\u001b[39m get_writer_for_path(path, driver\u001b[38;5;241m=\u001b[39mdriver)(\n\u001b[0;32m 307\u001b[0m path, mode, driver\u001b[38;5;241m=\u001b[39mdriver, sharing\u001b[38;5;241m=\u001b[39msharing, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[0;32m 308\u001b[0m )\n", + "File \u001b[1;32mrasterio\\\\_base.pyx:312\u001b[0m, in \u001b[0;36mrasterio._base.DatasetBase.__init__\u001b[1;34m()\u001b[0m\n", + "\u001b[1;31mRasterioIOError\u001b[0m: J:/Data/GLOBAL/GHSL/Pop/GHS_POP_E2020_GLOBE_R2023A_54009_100_V1_0.tif: Permission denied" + ] + } + ], + "source": [ + "h3_level = 6\n", + "multiprocess=True\n", + "verbose = True\n", + "h3_level = 6\n", + "data_prefix = \"Urbanization\"\n", + "\n", + "admin_bounds = \"J:/Data/GLOBAL/ADMIN/ADMIN2/HighRes_20230328/shp/WB_GAD_ADM2.shp\"\n", + "global_urban_file = \"J:/Data/GLOBAL/GHSL/SMOD/GHS_SMOD_E2020_GLOBE_R2023A_54009_1000_V1_0.tif\"\n", + "global_pop_file = \"J:/Data/GLOBAL/GHSL/Pop/GHS_POP_E2020_GLOBE_R2023A_54009_100_V1_0.tif\"\n", + "\n", + "ghs_smod = rasterio.open(global_urban_file)\n", + "ghs_pop = rasterio.open(global_pop_file)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ - "admin_bounds = \"/home/public/Data/GLOBAL/ADMIN/ADMIN2/HighRes_20230328/shp/WB_GAD_ADM2.shp\"\n", - "global_urban_file = \"/home/public/Data/GLOBAL/GHSL/SMOD/GHS_SMOD_E2020_GLOBE_R2023A_54009_1000_V1_0.tif\"\n", - "global_pop_file = \"/home/public/Data/GLOBAL/GHSL/Pop/GHS_POP_E2020_GLOBE_R2023A_54009_100_V1_0.tif\"" + "h3_0_list = h3_helper.generate_lvl0_lists(h3_level, return_gdf=True, buffer0=False)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1;31mSignature:\u001b[0m\n", + "\u001b[0mrMisc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mzonalStats\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m\n", + "\u001b[0m \u001b[0minShp\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\n", + "\u001b[0m \u001b[0minRaster\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\n", + "\u001b[0m \u001b[0mbandNum\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\n", + "\u001b[0m \u001b[0mmask_A\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\n", + "\u001b[0m \u001b[0mreProj\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\n", + "\u001b[0m \u001b[0mminVal\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m''\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\n", + "\u001b[0m \u001b[0mmaxVal\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m''\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\n", + "\u001b[0m \u001b[0mverbose\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\n", + "\u001b[0m \u001b[0mrastType\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'N'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\n", + "\u001b[0m \u001b[0munqVals\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\n", + "\u001b[0m \u001b[0mweighted\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\n", + "\u001b[0m \u001b[0mallTouched\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\n", + "\u001b[0m \u001b[0mcalc_sd\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\n", + "\u001b[0m \u001b[0mreturn_df\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\n", + "\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mDocstring:\u001b[0m\n", + "Run zonal statistics against an input shapefile. Returns array of SUM, MIN, MAX, and MEAN\n", + "\n", + ":param inShp: input geospatial data to summarize raster\n", + ":type inShp: string path to file of gpd.GeoDataFrame\n", + ":param inRaster: input raster to summarize\n", + ":type inRaster: string path to file or rasterio.DatasetReader\n", + ":param bandNum: band in raster to analyze, defaults to 1\n", + ":type bandNum: int, optional\n", + ":param mask_A: mask the raster data using an identical shaped boolean mask, defaults to None\n", + ":type mask_A: np.array, optional\n", + ":param reProj: whether to reproject data to match, if not, raise a ValueError if CRS mismatch between inShp and inRaster, defaults to False\n", + ":type reProj: bool, optional\n", + ":param minVal: if defined, will only calculate statistics on values above this number, defaults to ''\n", + ":type minVal: number, optional\n", + ":param maxVal: if defined, will only calculate statistics on values below this number, defaults to ''\n", + ":type maxVal: number, optional\n", + ":param verbose: provide additional text updates, defaults to False\n", + ":type verbose: bool, optional\n", + ":param rastType: Type of raster, defaults to 'N' as numerical or 'C' as categorical. If 'C' is used, you should provide unqVals\n", + ":type rastType: str, optional\n", + ":param unqVals: List of unique values to search for in raster, defaults to []\n", + ":type unqVals: list of int, optional\n", + ":param weighted: apply weighted zonal calculations. This will determine the % overlap for each\n", + " raster cell in the defined AOI. Will apply weights in calculations of numerical statistics, defaults to False\n", + ":type weighted: bool, optional\n", + ":param allTouched: whether to include all cells touched in raster calculation, passed to rasterio rasterize function, defaults to False\n", + ":type allTouched: bool, optional\n", + ":param calc_sd: include the standard deviation in calculation, defaults to False\n", + ":type calc_sd: bool, optional\n", + ":param return_df: if true, return result as data frame; defaults to False\n", + ":type return_df: boolean, optional\n", + ":raises ValueError: If CRS mismatch between inShp and inRaster\n", + ":return: array of zonal results - one entry for every feature in inShp. Each entry is SUM, MIN, MAX, MEAN, SD (optional)\n", + ":rtype: array\n", + "\u001b[1;31mFile:\u001b[0m c:\\wbg\\anaconda3\\envs\\urban_test\\lib\\site-packages\\gostrocks\\rastermisc.py\n", + "\u001b[1;31mType:\u001b[0m function" + ] + } + ], + "source": [ + "rMisc.zonalStats?" + ] + }, + { + "cell_type": "code", + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ - "inA = gpd.read_file(admin_bounds)\n", - "inA['ID'] = inA.index #Create ID for indexing" + "def run_zonal_cat(gdf, cur_raster_file, out_file, unq_vals=None, buffer0=False, verbose=False):\n", + " cName = out_file\n", + " if verbose:\n", + " tPrint(f'Starting {cName}')\n", + " if buffer0:\n", + " gdf['geometry'] = gdf['geometry'].buffer(0) \n", + " if not unq_vals is None:\n", + " res = rMisc.zonalStats(gdf, cur_raster_file, rastType='C', unqVals=unq_vals, verbose=False)\n", + " res = pd.DataFrame(res, columns=[f'c_{x}' for x in unq_vals])\n", + " else:\n", + " res = rMisc.zonalStats(gdf, cur_raster_file, minVal=0, verbose=False)\n", + " res = pd.DataFrame(res, columns=['SUM', 'MIN', 'MAX', 'MEAN'])\n", + " res['id'] = gdf['shape_id'].values\n", + " if verbose:\n", + " tPrint(f'**** finished {cName}')\n", + " return({out_file:res})" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ - "sel_iso3 = set(inA.loc[inA['WB_REGION'] == \"LCR\"]['WB_A3'].unique())\n", - "sel_iso3 = [x for x in sel_iso3 if x != None]" + "# get counts of urban categorties\n", + "unq_urban = [11,12,13,21,22,23,30]\n", + "processed_list = []\n", + "for h3_0_key, hexes in h3_0_list.items():\n", + " break" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 22, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/wb411133/projects/Space2Stats/COUNTRIES/ECU\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/PAN\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/COL\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/CHL\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/NIC\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/VCT\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/TTO\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/ARG\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/PER\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/HND\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/URY\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/PRY\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/SLV\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/GTM\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/GRD\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/BHS\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/VEN\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/BRB\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/CRI\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/BOL\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/KNA\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/GUY\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/DMA\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/DOM\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/MEX\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/LCA\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/SUR\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/BLZ\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/BRA\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/ATG\n", - "/home/wb411133/projects/Space2Stats/COUNTRIES/JAM\n" + "ename": "TypeError", + "evalue": "expected str, bytes or os.PathLike object, not DatasetReader", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[22], line 11\u001b[0m\n\u001b[0;32m 3\u001b[0m full_path \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124ms3://\u001b[39m\u001b[38;5;124m\"\u001b[39m, AWS_S3_BUCKET, out_s3_key)\n\u001b[0;32m 4\u001b[0m \u001b[38;5;124;03m'''\u001b[39;00m\n\u001b[0;32m 5\u001b[0m \u001b[38;5;124;03mtry:\u001b[39;00m\n\u001b[0;32m 6\u001b[0m \u001b[38;5;124;03m tempPD = pd.read_csv(full_path)\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 9\u001b[0m \n\u001b[0;32m 10\u001b[0m \u001b[38;5;124;03m'''\u001b[39;00m\n\u001b[1;32m---> 11\u001b[0m res \u001b[38;5;241m=\u001b[39m run_zonal_cat(hexes, ghs_smod, full_path, unq_vals\u001b[38;5;241m=\u001b[39munq_urban, buffer0\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, verbose\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m 12\u001b[0m res\u001b[38;5;241m.\u001b[39mto_csv(\n\u001b[0;32m 13\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124ms3://\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mAWS_S3_BUCKET\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mout_s3_key\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 14\u001b[0m index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 19\u001b[0m },\n\u001b[0;32m 20\u001b[0m )\n", + "Cell \u001b[1;32mIn[20], line 2\u001b[0m, in \u001b[0;36mrun_zonal_cat\u001b[1;34m(gdf, cur_raster_file, out_file, unq_vals, buffer0, verbose)\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrun_zonal_cat\u001b[39m(gdf, cur_raster_file, out_file, unq_vals\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, buffer0\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, verbose\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[1;32m----> 2\u001b[0m cName \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mos\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mbasename(os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mdirname(out_file))\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m-\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mos\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mbasename(cur_raster_file)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m verbose:\n\u001b[0;32m 4\u001b[0m tPrint(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mStarting \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcName\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\n", + "File \u001b[1;32m:270\u001b[0m, in \u001b[0;36mbasename\u001b[1;34m(p)\u001b[0m\n", + "File \u001b[1;32m:241\u001b[0m, in \u001b[0;36msplit\u001b[1;34m(p)\u001b[0m\n", + "\u001b[1;31mTypeError\u001b[0m: expected str, bytes or os.PathLike object, not DatasetReader" ] } ], "source": [ - "arg_list = []\n", - "for iso3 in sel_iso3:\n", - " selA = inA.loc[inA['WB_A3'] == iso3].copy()\n", - " out_folder = f\"/home/wb411133/projects/Space2Stats/COUNTRIES/{iso3}\"\n", - " print(out_folder)\n", - " if not os.path.exists(out_folder):\n", - " os.makedirs(out_folder)\n", - " arg_list.append([iso3, selA, 6, global_pop_file, global_urban_file, out_folder])" + "filename = 'GHS_SMOD_2020_counts.csv' \n", + "out_s3_key = f'Space2Stats/h3_stats_data/GLOBAL/{data_prefix}/{h3_0_key}/{filename}'\n", + "full_path = os.path.join(\"s3://\", AWS_S3_BUCKET, out_s3_key)\n", + "'''\n", + "try:\n", + " tempPD = pd.read_csv(full_path)\n", + " processed_list.append(filename)\n", + "except:\n", + "\n", + "'''\n", + "res = run_zonal_cat(hexes, ghs_smod, full_path, unq_vals=unq_urban, buffer0=True, verbose=True)\n", + "res.to_csv(\n", + " f\"s3://{AWS_S3_BUCKET}/{out_s3_key}\",\n", + " index=False,\n", + " storage_options={\n", + " \"key\": AWS_ACCESS_KEY_ID,\n", + " \"secret\": AWS_SECRET_ACCESS_KEY,\n", + " \"token\": AWS_SESSION_TOKEN,\n", + " },\n", + " )" ] }, { @@ -218,9 +337,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Earth Engine", + "display_name": "urban_test", "language": "python", - "name": "ee" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -232,7 +351,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.4" + "version": "3.12.2" } }, "nbformat": 4, diff --git a/src/h3_helper.py b/src/h3_helper.py index f34338e..71108d9 100755 --- a/src/h3_helper.py +++ b/src/h3_helper.py @@ -10,14 +10,12 @@ from rasterio.crs import CRS from mpl_toolkits.axes_grid1 import make_axes_locatable from h3 import h3 -from shapely.geometry import Polygon, Point, mapping +from shapely.geometry import Polygon, mapping from shapely.ops import unary_union from urllib.request import urlopen from tqdm import tqdm -import GOSTRocks.rasterMisc as rMisc -import GOSTRocks.ntlMisc as ntl -from GOSTRocks.misc import tPrint +from GOSTrocks.misc import tPrint def generate_h3_gdf(in_gdf, h3_level=7): ''' Generate a GeoDataFrame of h3 grid cells from an input geodataframe @@ -55,11 +53,15 @@ def generate_lvl0_lists(h3_lvl, return_gdf=False, buffer0=False): ---------- h3_lvl : int h3 level to generate children of h0 parents + return_gdf : bool, optional + return a GeoDataFrame instead of a dictionary, by default False + buffer0 : bool, optional + buffer the h3 lvl 0 cells by 0 to fix inherent topological errors, by default False Returns ------- dict - dictionary with keys as lvl0 codes with all children at h3_lvl level as values + dictionary with keys as lvl0 codes with all children at h3_lvl level as values; returns a GeoDataFrame if return_gdf is True """ # Get list of all h3 lvl 0 cells h3_lvl0 = list(h3.get_res0_indexes())