-
Notifications
You must be signed in to change notification settings - Fork 51
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Scale offset from item asset #202
Changes from 4 commits
36e07d3
dae0481
1b93db6
1dcc7b3
5dbf54c
1179df4
559b03f
8bf5839
4f1d951
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -26,7 +26,7 @@ | |||||||||||||||||||||||||||||||
from .stac_types import ItemSequence | ||||||||||||||||||||||||||||||||
from . import accumulate_metadata, geom_utils | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
ASSET_TABLE_DT = np.dtype([("url", object), ("bounds", "float64", 4)]) | ||||||||||||||||||||||||||||||||
ASSET_TABLE_DT = np.dtype([("url", object), ("bounds", "float64", 4), ("scale_offset", "float64", 2)]) | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
class Mimetype(NamedTuple): | ||||||||||||||||||||||||||||||||
|
@@ -143,6 +143,15 @@ def prepare_items( | |||||||||||||||||||||||||||||||
asset_bbox = asset.get("proj:bbox", item_bbox) | ||||||||||||||||||||||||||||||||
asset_shape = asset.get("proj:shape", item_shape) | ||||||||||||||||||||||||||||||||
asset_transform = asset.get("proj:transform", item_transform) | ||||||||||||||||||||||||||||||||
raster_bands = asset.get('raster:bands') | ||||||||||||||||||||||||||||||||
gjoseph92 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||||||||
if raster_bands is not None: | ||||||||||||||||||||||||||||||||
try: | ||||||||||||||||||||||||||||||||
assert len(raster_bands) == 1 | ||||||||||||||||||||||||||||||||
asset_scale = raster_bands[0].get('scale', np.nan) | ||||||||||||||||||||||||||||||||
asset_offset = raster_bands[0].get('offset', np.nan) | ||||||||||||||||||||||||||||||||
except AssertionError: | ||||||||||||||||||||||||||||||||
raise ValueError(f'raster:bands has more than one element for asset {asset_id}.') | ||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
No need to catch our own |
||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
asset_affine = None | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
# Auto-compute CRS | ||||||||||||||||||||||||||||||||
|
@@ -322,7 +331,7 @@ def prepare_items( | |||||||||||||||||||||||||||||||
continue | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
# Phew, we figured out all the spatial stuff! Now actually store the information we care about. | ||||||||||||||||||||||||||||||||
asset_table[item_i, asset_i] = (asset["href"], asset_bbox_proj) | ||||||||||||||||||||||||||||||||
asset_table[item_i, asset_i] = (asset["href"], asset_bbox_proj, (asset_scale, asset_offset)) | ||||||||||||||||||||||||||||||||
gjoseph92 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||||||||||||||||||||
# ^ NOTE: If `asset_bbox_proj` is None, NumPy automatically converts it to NaNs | ||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||
# At this point, everything has been set (or there was as error) | ||||||||||||||||||||||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,7 +32,6 @@ def _curthread(): | |
|
||
# /TODO | ||
|
||
|
||
# Default GDAL configuration options | ||
DEFAULT_GDAL_ENV = LayeredEnv( | ||
always=dict( | ||
|
@@ -65,6 +64,7 @@ def _curthread(): | |
# https://github.com/pangeo-data/pangeo-example-notebooks/issues/21#issuecomment-432457955 | ||
# https://gdal.org/drivers/raster/vrt.html#multi-threading-issues | ||
MULTITHREADED_DRIVER_ALLOWLIST = {"GTiff"} | ||
INTEGER_DTYPES = ['int', 'uint8', 'int8', 'uint16', 'int16'] | ||
|
||
|
||
class ThreadsafeRioDataset(Protocol): | ||
|
@@ -304,6 +304,7 @@ def __init__( | |
dtype: np.dtype, | ||
fill_value: Union[int, float], | ||
rescale: bool, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It feels a little weird to have both Then, for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have made the recommended changes, happy to get feedback to see if it makes sense how I use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, the docstring for the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated |
||
scale_offset: Tuple[float, float], | ||
gjoseph92 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
gdal_env: Optional[LayeredEnv] = None, | ||
errors_as_nodata: Tuple[Exception, ...] = (), | ||
) -> None: | ||
|
@@ -312,6 +313,7 @@ def __init__( | |
self.resampling = resampling | ||
self.dtype = dtype | ||
self.rescale = rescale | ||
self.scale_offset = scale_offset | ||
self.fill_value = fill_value | ||
self.gdal_env = gdal_env or DEFAULT_GDAL_ENV | ||
self.errors_as_nodata = errors_as_nodata | ||
|
@@ -399,14 +401,23 @@ def read(self, window: Window, **kwargs) -> np.ndarray: | |
|
||
raise RuntimeError(msg) from e | ||
|
||
if result.dtype != self.dtype: | ||
result = result.astype(self.dtype, copy=False) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am happy to remove and delegate to #208 as it is authored by you. What is your timeline for merging that? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was hoping to hear confirmation from @Berhinj that it solves the problem before merging, since I don't have a reproducer. However, I'm quite confident it'll work, so I'll just merge it now so we can get these both in. I think I'd slightly prefer that approach to what you have here, just because it maybe saves one copy (letting GDAL read the data into an array of the desired output dtype, vs copying into a new array). There are so many copies internally already though, I doubt it matters much. |
||
|
||
if self.rescale: | ||
scale, offset = reader.scale_offset | ||
scale, offset = self.scale_offset | ||
|
||
if np.isnan(scale) or np.isnan(offset): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that it seems to be valid for only one of scale and offset to be set in In that case, it seems like it would still be appropriate to apply the rescaling from STAC metadata? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah very interesting! yes I agree. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have enabled rescaling to be used even if only |
||
scale, offset = reader.scale_offset | ||
|
||
if scale != 1: | ||
if self.dtype in INTEGER_DTYPES: | ||
raise ValueError(f'Requested asset dtype ({self.dtype}) is not compatible with ' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the check here would need to be more sophisticated. The goal of this check is to prevent silent information loss, and alert users that they need to either not rescale, or change the output data type to something else that can fit the rescaled values. But just checking if the output dtype is integral is too strict. Rescaling in an integer dtype could be perfectly fine, if you're scaling by an integer, and the resulting values still fit in the new dtype. Rescaling from float to int could even be fine; maybe the values are all integers, but unnecessarily stored in float64 on disk. Ideally we'd probably temporarily set NumPy to raise on overflow and just try to do the rescale, and handle that error if it occurs, but Note that right now, the docs for
which makes me feel that for this first PR, maybe it's better to not validate at all than to have overly strict validation. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm yes I see how it is overly strict. I will remove the validation. |
||
f'asset scale value dtype ({scale.dtype}).') | ||
result *= scale | ||
if offset != 0: | ||
result += offset | ||
|
||
result = result.astype(self.dtype, copy=False) | ||
result = np.ma.filled(result, fill_value=self.fill_value) | ||
return result | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd prefer to do the version bump as a separate PR, like #176 for example.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds good