Skip to content

Commit

Permalink
Add dtype validation when rescale=True (#209)
Browse files Browse the repository at this point in the history
  • Loading branch information
RSchueder authored Sep 19, 2023
1 parent 30e7c5b commit 705fd70
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 7 deletions.
1 change: 1 addition & 0 deletions .pdm-python
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/Users/gabe/dev/stackstac/.venv/bin/python
37 changes: 32 additions & 5 deletions stackstac/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@
from .stac_types import ItemSequence
from . import accumulate_metadata, geom_utils

ASSET_TABLE_DT = np.dtype([("url", object), ("bounds", "float64", 4), ("scale_offset", "float64", 2)])
ASSET_TABLE_DT = np.dtype(
[("url", object), ("bounds", "float64", 4), ("scale_offset", "float64", 2)]
)


class Mimetype(NamedTuple):
Expand Down Expand Up @@ -64,6 +66,8 @@ def prepare_items(
bounds: Optional[Bbox] = None,
bounds_latlon: Optional[Bbox] = None,
snap_bounds: bool = True,
rescale: bool = True,
dtype: np.dtype = np.dtype("float64"),
) -> Tuple[np.ndarray, RasterSpec, List[str], ItemSequence]:

if bounds is not None and bounds_latlon is not None:
Expand Down Expand Up @@ -144,7 +148,7 @@ def prepare_items(
asset_bbox = asset.get("proj:bbox", item_bbox)
asset_shape = asset.get("proj:shape", item_shape)
asset_transform = asset.get("proj:transform", item_transform)
raster_bands = asset.get('raster:bands')
raster_bands = asset.get("raster:bands")

if raster_bands is not None:
if len(raster_bands) != 1:
Expand All @@ -154,12 +158,31 @@ def prepare_items(
"If you don't care about this asset, you can skip it by giving a list "
"of asset IDs you *do* want in `assets=`, and leaving this one out."
)
asset_scale = raster_bands[0].get('scale', 1)
asset_offset = raster_bands[0].get('offset', 0)
asset_scale = raster_bands[0].get("scale", 1)
asset_offset = raster_bands[0].get("offset", 0)
else:
asset_scale = 1
asset_offset = 0

if rescale:
if not np.can_cast(asset_scale, dtype):
raise ValueError(
f"`rescale=True`, but safe casting cannot be completed between "
f"asset scale value {asset_scale} and output dtype {dtype}.\n"
f"To continue using `{dtype=}`, pass `rescale=False` to retrieve "
"data in its raw, unscaled values. Or, if you want rescaled "
"values, pass a different `dtype=` (typically `float`)."
)

if not np.can_cast(asset_offset, dtype):
raise ValueError(
f"`rescale=True`, but safe casting cannot be completed between "
f"asset offset value {asset_offset} and output dtype {dtype}.\n"
f"To continue using `{dtype=}`, pass `rescale=False` to retrieve "
"data in its raw, unscaled values. Or, if you want rescaled "
"values, pass a different `dtype=` (typically `float`)."
)

asset_affine = None

# Auto-compute CRS
Expand Down Expand Up @@ -339,7 +362,11 @@ def prepare_items(
continue

# Phew, we figured out all the spatial stuff! Now actually store the information we care about.
asset_table[item_i, asset_i] = (asset["href"], asset_bbox_proj, (asset_scale, asset_offset))
asset_table[item_i, asset_i] = (
asset["href"],
asset_bbox_proj,
(asset_scale, asset_offset),
)
# ^ NOTE: If `asset_bbox_proj` is None, NumPy automatically converts it to NaNs

# At this point, everything has been set (or there was as error)
Expand Down
8 changes: 6 additions & 2 deletions stackstac/stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,10 @@ def stack(
Whether to rescale pixel values by the scale and offset present in the ``raster:bands`` metadata
for each asset.
Default: True. Note that this could produce floating-point data when the
original values are ints, so set ``dtype`` accordingly. You will NOT be warned
if the cast to ``dtype`` is losing information!
original values are ints, so set ``dtype`` accordingly. Raises `ValueError` if the
``dtype`` specified can't hold the data after rescaling: for example, if loading
data with ``dtype=int, rescale=True`` where the scaling factor is 1.5, the rescaled
data would be floating-point, and couldn't be stored as an integer.
sortby_date:
Whether to pre-sort the items by date (from the ``properties["datetime"]`` field).
One of ``"asc"``, ``"desc"``, or False to disable sorting. Default: ``"asc"``.
Expand Down Expand Up @@ -293,6 +295,8 @@ def stack(
bounds=bounds,
bounds_latlon=bounds_latlon,
snap_bounds=snap_bounds,
rescale=rescale,
dtype=dtype,
)
arr = items_to_dask(
asset_table,
Expand Down

0 comments on commit 705fd70

Please sign in to comment.