Skip to content

Commit

Permalink
Scale offset from item asset (#202)
Browse files Browse the repository at this point in the history
  • Loading branch information
RSchueder authored Jul 23, 2023
1 parent acaa55a commit 7836a36
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 20 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ It's a good idea to use `conda` to handle installing rasterio on Windows. There'
* Transfer the STAC metadata into [xarray coordinates](http://xarray.pydata.org/en/stable/data-structures.html#coordinates) for easy indexing, filtering, and provenance of metadata.
* Efficiently generate a Dask graph for loading the data in parallel.
* Mediate between Dask's parallelism and GDAL's aversion to it, allowing for fast, multi-threaded reads when possible, and at least preventing segfaults when not.
* Mask nodata and rescale by dataset-level scales/offsets.
* Mask nodata and rescale by STAC item asset scales/offsets.
* Display data in interactive maps in a notebook, computed on the fly by Dask.

## Limitations:
Expand Down
21 changes: 19 additions & 2 deletions stackstac/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@
import xarray as xr

from .raster_spec import IntFloat, Bbox, Resolutions, RasterSpec

from .stac_types import ItemSequence
from . import accumulate_metadata, geom_utils

ASSET_TABLE_DT = np.dtype([("url", object), ("bounds", "float64", 4)])
ASSET_TABLE_DT = np.dtype([("url", object), ("bounds", "float64", 4), ("scale_offset", "float64", 2)])


class Mimetype(NamedTuple):
Expand Down Expand Up @@ -143,6 +144,22 @@ def prepare_items(
asset_bbox = asset.get("proj:bbox", item_bbox)
asset_shape = asset.get("proj:shape", item_shape)
asset_transform = asset.get("proj:transform", item_transform)
raster_bands = asset.get('raster:bands')

if raster_bands is not None:
if len(raster_bands) != 1:
raise ValueError(
f"raster:bands has {len(raster_bands)} elements for asset {asset_id!r}. "
"Multi-band rasters are not currently supported.\n"
"If you don't care about this asset, you can skip it by giving a list "
"of asset IDs you *do* want in `assets=`, and leaving this one out."
)
asset_scale = raster_bands[0].get('scale', 1)
asset_offset = raster_bands[0].get('offset', 0)
else:
asset_scale = 1
asset_offset = 0

asset_affine = None

# Auto-compute CRS
Expand Down Expand Up @@ -322,7 +339,7 @@ def prepare_items(
continue

# Phew, we figured out all the spatial stuff! Now actually store the information we care about.
asset_table[item_i, asset_i] = (asset["href"], asset_bbox_proj)
asset_table[item_i, asset_i] = (asset["href"], asset_bbox_proj, (asset_scale, asset_offset))
# ^ NOTE: If `asset_bbox_proj` is None, NumPy automatically converts it to NaNs

# At this point, everything has been set (or there was as error)
Expand Down
4 changes: 1 addition & 3 deletions stackstac/reader_protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def __init__(
resampling: Resampling,
dtype: np.dtype,
fill_value: Union[int, float],
rescale: bool,
scale_offset: Tuple[Union[int, float], Union[int, float]],
gdal_env: Optional[LayeredEnv],
errors_as_nodata: Tuple[Exception, ...] = (),
) -> None:
Expand All @@ -55,8 +55,6 @@ def __init__(
fill_value:
Fill nodata pixels in the output array with this value.
If None, whatever nodata value is set in the asset will be used.
rescale:
Rescale the output array according to any scales and offsets set in the asset.
gdal_env:
A `~.LayeredEnv` of GDAL configuration options to use while opening
and reading datasets. If None (default), `~.DEFAULT_GDAL_ENV` is used.
Expand Down
24 changes: 12 additions & 12 deletions stackstac/rio_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def _curthread():

# /TODO


# Default GDAL configuration options
DEFAULT_GDAL_ENV = LayeredEnv(
always=dict(
Expand Down Expand Up @@ -64,11 +63,12 @@ def _curthread():
# See `ThreadLocalRioDataset` for more.
# https://github.com/pangeo-data/pangeo-example-notebooks/issues/21#issuecomment-432457955
# https://gdal.org/drivers/raster/vrt.html#multi-threading-issues

MULTITHREADED_DRIVER_ALLOWLIST = {"GTiff"}


class ThreadsafeRioDataset(Protocol):
scale_offset: Tuple[float, float]
scale_offset: Tuple[Union[int, float], Union[int, float]]

def read(self, window: Window, **kwargs) -> np.ndarray:
...
Expand Down Expand Up @@ -280,7 +280,7 @@ class PickleState(TypedDict):
resampling: Resampling
dtype: np.dtype
fill_value: Union[int, float]
rescale: bool
scale_offset: Tuple[Union[int, float], Union[int, float]]
gdal_env: Optional[LayeredEnv]
errors_as_nodata: Tuple[Exception, ...]

Expand All @@ -303,16 +303,16 @@ def __init__(
resampling: Resampling,
dtype: np.dtype,
fill_value: Union[int, float],
rescale: bool,
scale_offset: Tuple[Union[int, float], Union[int, float]],
gdal_env: Optional[LayeredEnv] = None,
errors_as_nodata: Tuple[Exception, ...] = (),
) -> None:
self.url = url
self.spec = spec
self.resampling = resampling
self.dtype = dtype
self.rescale = rescale
self.fill_value = fill_value
self.scale_offset = scale_offset
self.gdal_env = gdal_env or DEFAULT_GDAL_ENV
self.errors_as_nodata = errors_as_nodata

Expand Down Expand Up @@ -398,12 +398,12 @@ def read(self, window: Window, **kwargs) -> np.ndarray:

raise RuntimeError(msg) from e

if self.rescale:
scale, offset = reader.scale_offset
if scale != 1:
result *= scale
if offset != 0:
result += offset
scale, offset = self.scale_offset

if scale != 1:
result *= scale
if offset != 0:
result += offset

result = np.ma.filled(result, fill_value=self.fill_value)
assert np.issubdtype(result.dtype, self.dtype), (
Expand Down Expand Up @@ -436,7 +436,7 @@ def __getstate__(
"resampling": self.resampling,
"dtype": self.dtype,
"fill_value": self.fill_value,
"rescale": self.rescale,
"scale_offset": self.scale_offset,
"gdal_env": self.gdal_env,
"errors_as_nodata": self.errors_as_nodata,
}
Expand Down
3 changes: 2 additions & 1 deletion stackstac/stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,8 @@ def stack(
be represented in a smaller data type (like ``uint16``), using a different ``fill_value``
(like 0) and managing it yourself could save a lot of memory.
rescale:
Whether to rescale pixel values by the scale and offset set on the dataset.
Whether to rescale pixel values by the scale and offset present in the ``raster:bands`` metadata
for each asset.
Default: True. Note that this could produce floating-point data when the
original values are ints, so set ``dtype`` accordingly. You will NOT be warned
if the cast to ``dtype`` is losing information!
Expand Down
6 changes: 5 additions & 1 deletion stackstac/to_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ def asset_table_to_reader_and_window(
if url:
asset_bounds: Bbox = asset_entry["bounds"]
asset_window = windows.from_bounds(*asset_bounds, spec.transform)
if rescale:
asset_scale_offset = asset_entry["scale_offset"]
else:
asset_scale_offset = (1, 0)

entry: ReaderTableEntry = (
reader(
Expand All @@ -140,7 +144,7 @@ def asset_table_to_reader_and_window(
resampling=resampling,
dtype=dtype,
fill_value=fill_value,
rescale=rescale,
scale_offset=asset_scale_offset,
gdal_env=gdal_env,
errors_as_nodata=errors_as_nodata,
),
Expand Down

0 comments on commit 7836a36

Please sign in to comment.