Skip to content

Commit

Permalink
merge resolution on cre-arrow branch
Browse files Browse the repository at this point in the history
  • Loading branch information
jpswinski committed Mar 6, 2024
2 parents bd59e5a + d996e49 commit 78f3531
Show file tree
Hide file tree
Showing 110 changed files with 7,758 additions and 1,448 deletions.
9 changes: 4 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,6 @@ if (${ENABLE_H5CORO_ATTRIBUTE_SUPPORT})
target_compile_definitions (slideruleLib PUBLIC H5CORO_ATTRIBUTE_SUPPORT)
endif ()

if (${ENABLE_APACHE_ARROW_10_COMPAT})
message (STATUS "Enabling Apache Arrow 10 compatibility")
target_compile_definitions (slideruleLib PUBLIC APACHE_ARROW_10_COMPAT)
endif ()

if (${ENABLE_BEST_EFFORT_CONDA_ENV})
message (STATUS "Attempting best effort at running in a mixed system and conda environment")
target_compile_definitions (slideruleLib PUBLIC BEST_EFFORT_CONDA_ENV)
Expand Down Expand Up @@ -121,6 +116,10 @@ if(${USE_CCSDS_PACKAGE})
add_subdirectory (packages/ccsds)
endif()

if(${USE_CRE_PACKAGE})
add_subdirectory (packages/cre)
endif()

if(${USE_GEO_PACKAGE})
add_subdirectory (packages/geo)
endif()
Expand Down
22 changes: 22 additions & 0 deletions clients/python/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# conda env create -f environment.yml
name: sliderule
channels:
- conda-forge
dependencies:
- geopandas
- numpy
- pandas
- pip
- pyarrow
- pyproj
- pytables
- pytest
- python
- requests
- scikit-learn
- scipy
- setuptools_scm
- shapely
- tk
- xyzservices
- sliderule
71 changes: 71 additions & 0 deletions clients/python/sliderule/container.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Copyright (c) 2021, University of Washington
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the University of Washington nor the names of its
# contributors may be used to endorse or promote products derived from this
# software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF WASHINGTON AND CONTRIBUTORS
# “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF WASHINGTON OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import time
import logging
import sliderule
from sliderule import logger

###############################################################################
# GLOBALS
###############################################################################

# profiling times for each major function
profiles = {}

###############################################################################
# LOCAL FUNCTIONS
###############################################################################

###############################################################################
# APIs
###############################################################################


#
# Execute Container
#
def execute (parm):
'''
Executes a containerized application using SlideRule
Parameters
----------
parms: dict
parameters used to configure container runtime environment
Returns
-------
str
json response
'''
tstart = time.perf_counter()
rsps = sliderule.source("cre", {"parms": parm})
profiles[execute.__name__] = time.perf_counter() - tstart
return rsps
2 changes: 1 addition & 1 deletion clients/python/sliderule/gedi.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def __flattenbatches(rsps, rectype, batch_column, parm, keep_id, as_numpy_array,

# Check for Output Options
if "output" in parm:
gdf = sliderule.procoutputfile(parm)
gdf = sliderule.procoutputfile(parm, rsps)
profiles["flatten"] = time.perf_counter() - tstart_flatten
return gdf

Expand Down
33 changes: 26 additions & 7 deletions clients/python/sliderule/icesat2.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@
# profiling times for each major function
profiles = {}

# whether exceptions should be rethrown
rethrow_exceptions = False

# icesat2 parameters
CNF_POSSIBLE_TEP = -2
CNF_NOT_CONSIDERED = -1
Expand Down Expand Up @@ -148,7 +151,7 @@ def __flattenbatches(rsps, rectype, batch_column, parm, keep_id, as_numpy_array,

# Check for Output Options
if "output" in parm:
gdf = sliderule.procoutputfile(parm)
gdf = sliderule.procoutputfile(parm, rsps)
profiles["flatten"] = time.perf_counter() - tstart_flatten
return gdf

Expand Down Expand Up @@ -285,7 +288,7 @@ def __build_request(parm, resources, default_asset='icesat2'):
#
# Initialize
#
def init (url=sliderule.service_url, verbose=False, max_resources=earthdata.DEFAULT_MAX_REQUESTED_RESOURCES, loglevel=logging.CRITICAL, organization=sliderule.service_org, desired_nodes=None, time_to_live=60, bypass_dns=False):
def init (url=sliderule.service_url, verbose=False, max_resources=earthdata.DEFAULT_MAX_REQUESTED_RESOURCES, loglevel=logging.CRITICAL, organization=sliderule.service_org, desired_nodes=None, time_to_live=60, bypass_dns=False, rethrow=False):
'''
Initializes the Python client for use with SlideRule and should be called before other ICESat-2 API calls.
This function is a wrapper for the `sliderule.init(...) function </web/rtds/api_reference/sliderule.html#init>`_.
Expand All @@ -300,8 +303,10 @@ def init (url=sliderule.service_url, verbose=False, max_resources=earthdata.DEFA
>>> from sliderule import icesat2
>>> icesat2.init()
'''
global rethrow_exceptions
sliderule.init(url, verbose, loglevel, organization, desired_nodes, time_to_live, bypass_dns, plugins=['icesat2'])
earthdata.set_max_resources(max_resources) # set maximum number of resources allowed per request
rethrow_exceptions = rethrow

#
# ATL06
Expand Down Expand Up @@ -403,7 +408,11 @@ def atl06p(parm, callbacks={}, resources=None, keep_id=False, as_numpy_array=Fal
# Handle Runtime Errors
except RuntimeError as e:
logger.critical(e)
return sliderule.emptyframe()
if rethrow_exceptions:
raise

# Error Case
return sliderule.emptyframe()

#
# Subsetted ATL06
Expand Down Expand Up @@ -480,7 +489,11 @@ def atl06sp(parm, callbacks={}, resources=None, keep_id=False, as_numpy_array=Fa
# Handle Runtime Errorss
except RuntimeError as e:
logger.critical(e)
return sliderule.emptyframe()
if rethrow_exceptions:
raise

# Error Case
return sliderule.emptyframe()

#
# Subsetted ATL03
Expand Down Expand Up @@ -548,7 +561,7 @@ def atl03sp(parm, callbacks={}, resources=None, keep_id=False, height_key=None):
# Check for Output Options
if "output" in parm:
profiles[atl03sp.__name__] = time.perf_counter() - tstart
return sliderule.procoutputfile(parm)
return sliderule.procoutputfile(parm, rsps)
else: # Native Output
# Flatten Responses
tstart_flatten = time.perf_counter()
Expand Down Expand Up @@ -639,8 +652,10 @@ def atl03sp(parm, callbacks={}, resources=None, keep_id=False, height_key=None):
# Handle Runtime Errors
except RuntimeError as e:
logger.critical(e)
if rethrow_exceptions:
raise

# Error or No Data
# Error Case
return sliderule.emptyframe()

#
Expand Down Expand Up @@ -719,4 +734,8 @@ def atl08p(parm, callbacks={}, resources=None, keep_id=False, as_numpy_array=Fal
# Handle Runtime Errors
except RuntimeError as e:
logger.critical(e)
return sliderule.emptyframe()
if rethrow_exceptions:
raise

# Error Case
return sliderule.emptyframe()
51 changes: 47 additions & 4 deletions clients/python/sliderule/sliderule.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
MAX_PS_CLUSTER_WAIT_SECS = 600

request_timeout = (10, 120) # (connection, read) in seconds
decode_aux = True

logger = logging.getLogger(__name__)
console = None
Expand Down Expand Up @@ -232,6 +233,10 @@ def __decode_native(rectype, rawdata):
if "PTR" in flags:
continue

# check for mvp flag
if not decode_aux and "AUX" in flags:
continue

# get endianness
if "LE" in flags:
endian = '<'
Expand Down Expand Up @@ -542,18 +547,25 @@ def emptyframe(**kwargs):
#
# Process Output File
#
def procoutputfile(parm):
def procoutputfile(parm, rsps):
output = parm["output"]
path = output["path"]
# Check If Remote Record Is In Responses
for rsp in rsps:
if 'arrowrec.remote' == rsp['__rectype']:
path = rsp['url']
break
# Handle Local Files
if "open_on_complete" in output and output["open_on_complete"]:
if "as_geo" in output and not output["as_geo"]:
# Return Parquet File as DataFrame
return geopandas.pd.read_parquet(output["path"])
return geopandas.pd.read_parquet(path)
else:
# Return GeoParquet File as GeoDataFrame
return geopandas.read_parquet(output["path"])
return geopandas.read_parquet(path)
else:
# Return Parquet Filename
return output["path"]
return path

#
# Get Values from Raw Buffer
Expand Down Expand Up @@ -883,6 +895,30 @@ def set_rqst_timeout (timeout):
else:
raise FatalError('timeout must be a tuple (<connection timeout>, <read timeout>)')

#
# set_processing_flags
#
def set_processing_flags (aux=True):
'''
Sets flags used when processing the record definitions
Parameters
----------
aux: bool
decode auxiliary fields
Examples
--------
>>> import sliderule
>>> sliderule.set_processing_flags(aux=False)
'''
global decode_aux
if type(aux) == bool:
decode_aux = aux
else:
raise FatalError('aux must be a boolean')


#
# update_available_servers
#
Expand Down Expand Up @@ -1319,6 +1355,13 @@ def toregion(source, tolerance=0.0, cellsize=0.01, n_clusters=1):
datafile = file.read()
os.remove(tempfile)

elif isinstance(source, Polygon):
gdf = geopandas.GeoDataFrame(geometry=[source], crs=EPSG_WGS84)
gdf.to_file(tempfile, driver="GeoJSON")
with open(tempfile, mode='rt') as file:
datafile = file.read()
os.remove(tempfile)

elif isinstance(source, list) and (len(source) >= 4) and (len(source) % 2 == 0):
# create lat/lon lists
if len(source) == 4: # bounding box
Expand Down
Binary file not shown.
4 changes: 2 additions & 2 deletions clients/python/tests/test_ancillary.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def test_atl08_phoreal(self, init):
gdf = icesat2.atl08(parms, "ATL03_20181017222812_02950102_006_02.h5")
assert init
assert len(gdf) == 819
assert abs(gdf["h_dif_ref"].quantile(q=.50) - -0.9146728515625) < 0.000001
assert abs(gdf["h_dif_ref"].quantile(q=.50) - -0.9443359375) < 0.000001
assert abs(gdf["rgt_y"].quantile(q=.50) - 295.0) < 0.000001
assert abs(gdf["sigma_atlas_land%"].quantile(q=.50) - 0.2402431309223175) < 0.000001
assert abs(gdf["sigma_atlas_land%"].quantile(q=.50) - 0.24470525979995728) < 0.000001
assert abs(gdf["cloud_flag_atm%"].quantile(q=.50) - 1.0) < 0.000001
14 changes: 7 additions & 7 deletions clients/python/tests/test_arcticdem.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ def test_nearestneighbour(self, init):
gdf = icesat2.atl06p(parms, resources=[resource])
assert init
assert len(gdf) == 957
assert len(gdf.keys()) == 19
assert gdf["rgt"][0] == 1160
assert gdf["cycle"][0] == 2
assert len(gdf.keys()) == 20
assert gdf["rgt"].iloc[0] == 1160
assert gdf["cycle"].iloc[0] == 2
assert gdf['segment_id'].describe()["min"] == 405231
assert gdf['segment_id'].describe()["max"] == 405902
assert abs(gdf["mosaic.value"].describe()["min"] - 600.4140625) < sigma
Expand All @@ -82,15 +82,15 @@ def test_zonal_stats(self, init):
gdf = icesat2.atl06p(parms, resources=[resource])
assert init
assert len(gdf) == 957
assert len(gdf.keys()) == 26
assert gdf["rgt"][0] == 1160
assert gdf["cycle"][0] == 2
assert len(gdf.keys()) == 27
assert gdf["rgt"].iloc[0] == 1160
assert gdf["cycle"].iloc[0] == 2
assert gdf['segment_id'].describe()["min"] == 405231
assert gdf['segment_id'].describe()["max"] == 405902
assert abs(gdf["mosaic.value"].describe()["min"] - 600.4140625) < sigma
assert gdf["mosaic.count"].describe()["max"] == 81
assert gdf["mosaic.stdev"].describe()["count"] == 957
assert gdf["mosaic.time"][0] == vrtFileTime
assert gdf["mosaic.time"].iloc[0] == vrtFileTime

@pytest.mark.network
class TestStrips:
Expand Down
2 changes: 1 addition & 1 deletion clients/python/tests/test_meritdem.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ def test_sample(self, init):
gdf = raster.sample("merit-dem", [[-172, 51.7], [-172, 51.71], [-172, 51.72], [-172, 51.73], [-172, 51.74]])
sliderule.set_rqst_timeout(default_request_timeout)
assert init
assert gdf["value"][0] == -99990000
assert gdf["value"].iloc[0] == -99990000
assert len(gdf) == 5
Loading

0 comments on commit 78f3531

Please sign in to comment.