-
Notifications
You must be signed in to change notification settings - Fork 94
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #553 from djhoese/cache-area-slices
Add optional caching to AreaDefinition.get_area_slices
- Loading branch information
Showing
8 changed files
with
411 additions
and
148 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
"""Various tools for caching. | ||
These tools are rarely needed by users and are used where they make sense | ||
throughout pyresample. | ||
""" | ||
from __future__ import annotations | ||
|
||
import hashlib | ||
import json | ||
import os | ||
import warnings | ||
from functools import update_wrapper | ||
from glob import glob | ||
from pathlib import Path | ||
from typing import Any, Callable | ||
|
||
import pyresample | ||
|
||
|
||
class JSONCacheHelper: | ||
"""Decorator class to cache results to a JSON file on-disk.""" | ||
|
||
def __init__( | ||
self, | ||
func: Callable, | ||
cache_config_key: str, | ||
cache_version: int = 1, | ||
): | ||
self._callable = func | ||
self._cache_config_key = cache_config_key | ||
self._cache_version = cache_version | ||
self._uncacheable_arg_type_names = ("",) | ||
|
||
@staticmethod | ||
def cache_clear(cache_dir: str | None = None): | ||
"""Remove all on-disk files associated with this function. | ||
Intended to mimic the :func:`functools.cache` behavior. | ||
""" | ||
cache_dir = _get_cache_dir_from_config(cache_dir=cache_dir, cache_version="*") | ||
for json_file in glob(str(cache_dir / "*.json")): | ||
os.remove(json_file) | ||
|
||
def __call__(self, *args): | ||
"""Call decorated function and cache the result to JSON.""" | ||
should_cache = pyresample.config.get(self._cache_config_key, False) | ||
if not should_cache: | ||
return self._callable(*args) | ||
|
||
try: | ||
arg_hash = _hash_args(args) | ||
except TypeError as err: | ||
warnings.warn("Cannot cache function due to unhashable argument: " + str(err), | ||
stacklevel=2) | ||
return self._callable(*args) | ||
|
||
return self._run_and_cache(arg_hash, args) | ||
|
||
def _run_and_cache(self, arg_hash: str, args: tuple[Any]) -> Any: | ||
base_cache_dir = _get_cache_dir_from_config(cache_version=self._cache_version) | ||
json_path = base_cache_dir / f"{arg_hash}.json" | ||
if not json_path.is_file(): | ||
res = self._callable(*args) | ||
json_path.parent.mkdir(exist_ok=True) | ||
with open(json_path, "w") as json_cache: | ||
json.dump(res, json_cache, cls=_JSONEncoderWithSlice) | ||
|
||
# for consistency, always load the cached result | ||
with open(json_path, "r") as json_cache: | ||
res = json.load(json_cache, object_hook=_object_hook) | ||
return res | ||
|
||
|
||
def _get_cache_dir_from_config(cache_dir: str | None = None, cache_version: int | str = 1) -> Path: | ||
cache_dir = cache_dir or pyresample.config.get("cache_dir") | ||
if cache_dir is None: | ||
raise RuntimeError("Can't use JSON caching. No 'cache_dir' configured.") | ||
subdir = f"geometry_slices_v{cache_version}" | ||
return Path(cache_dir) / subdir | ||
|
||
|
||
def _hash_args(args: tuple[Any]) -> str: | ||
from pyresample.future.geometry import AreaDefinition, SwathDefinition | ||
from pyresample.geometry import AreaDefinition as LegacyAreaDefinition | ||
from pyresample.geometry import SwathDefinition as LegacySwathDefinition | ||
|
||
hashable_args = [] | ||
for arg in args: | ||
if isinstance(arg, (SwathDefinition, LegacySwathDefinition)): | ||
raise TypeError(f"Unhashable type ({type(arg)})") | ||
if isinstance(arg, (AreaDefinition, LegacyAreaDefinition)): | ||
arg = hash(arg) | ||
hashable_args.append(arg) | ||
arg_hash = hashlib.sha1() # nosec | ||
arg_hash.update(json.dumps(tuple(hashable_args)).encode("utf8")) | ||
return arg_hash.hexdigest() | ||
|
||
|
||
class _JSONEncoderWithSlice(json.JSONEncoder): | ||
def default(self, obj: Any) -> Any: | ||
if isinstance(obj, slice): | ||
return {"__slice__": True, "start": obj.start, "stop": obj.stop, "step": obj.step} | ||
return super().default(obj) | ||
|
||
|
||
def _object_hook(obj: object) -> Any: | ||
if isinstance(obj, dict) and obj.get("__slice__", False): | ||
return slice(obj["start"], obj["stop"], obj["step"]) | ||
return obj | ||
|
||
|
||
def cache_to_json_if(cache_config_key: str) -> Callable: | ||
"""Decorate a function and cache the results to a JSON file on disk. | ||
This caching only happens if the ``pyresample.config`` boolean value for | ||
the provided key is ``True`` as well as some other conditions. See | ||
:class:`JSONCacheHelper` for more information. Most importantly this | ||
decorator does not limit how many items can be cached and does not clear | ||
out old entries. It is up to the user to manage the size of the cache. | ||
""" | ||
def _decorator(func: Callable) -> Callable: | ||
zarr_cacher = JSONCacheHelper(func, cache_config_key) | ||
wrapper = update_wrapper(zarr_cacher, func) | ||
return wrapper | ||
|
||
return _decorator |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
"""Functions and tools for subsetting a geometry object.""" | ||
from __future__ import annotations | ||
|
||
import math | ||
from typing import TYPE_CHECKING, Any | ||
|
||
import numpy as np | ||
|
||
# this caching module imports the geometries so this subset module | ||
# must be imported inside functions in the geometry modules if needed | ||
# to avoid circular dependencies | ||
from pyresample._caching import cache_to_json_if | ||
from pyresample.boundary import Boundary | ||
from pyresample.geometry import get_geostationary_bounding_box_in_lonlats, logger | ||
from pyresample.utils import check_slice_orientation | ||
|
||
if TYPE_CHECKING: | ||
from pyresample import AreaDefinition | ||
|
||
|
||
@cache_to_json_if("cache_geometry_slices") | ||
def get_area_slices( | ||
src_area: AreaDefinition, | ||
area_to_cover: AreaDefinition, | ||
shape_divisible_by: int | None, | ||
) -> tuple[slice, slice]: | ||
"""Compute the slice to read based on an `area_to_cover`.""" | ||
if not _is_area_like(src_area): | ||
raise NotImplementedError(f"Only AreaDefinitions are supported, not {type(src_area)}") | ||
if not _is_area_like(area_to_cover): | ||
raise NotImplementedError(f"Only AreaDefinitions are supported, not {type(area_to_cover)}") | ||
|
||
# Intersection only required for two different projections | ||
proj_def_to_cover = area_to_cover.crs | ||
proj_def = src_area.crs | ||
if proj_def_to_cover == proj_def: | ||
logger.debug('Projections for data and slice areas are identical: %s', | ||
proj_def_to_cover) | ||
# Get slice parameters | ||
xstart, xstop, ystart, ystop = _get_slice_starts_stops(src_area, area_to_cover) | ||
|
||
x_slice = check_slice_orientation(slice(xstart, xstop)) | ||
y_slice = check_slice_orientation(slice(ystart, ystop)) | ||
x_slice = _ensure_integer_slice(x_slice) | ||
y_slice = _ensure_integer_slice(y_slice) | ||
return x_slice, y_slice | ||
|
||
data_boundary = _get_area_boundary(src_area) | ||
area_boundary = _get_area_boundary(area_to_cover) | ||
intersection = data_boundary.contour_poly.intersection( | ||
area_boundary.contour_poly) | ||
if intersection is None: | ||
logger.debug('Cannot determine appropriate slicing. ' | ||
"Data and projection area do not overlap.") | ||
raise NotImplementedError | ||
x, y = src_area.get_array_indices_from_lonlat( | ||
np.rad2deg(intersection.lon), np.rad2deg(intersection.lat)) | ||
x_slice = slice(np.ma.min(x), np.ma.max(x) + 1) | ||
y_slice = slice(np.ma.min(y), np.ma.max(y) + 1) | ||
x_slice = _ensure_integer_slice(x_slice) | ||
y_slice = _ensure_integer_slice(y_slice) | ||
if shape_divisible_by is not None: | ||
x_slice = _make_slice_divisible(x_slice, src_area.width, | ||
factor=shape_divisible_by) | ||
y_slice = _make_slice_divisible(y_slice, src_area.height, | ||
factor=shape_divisible_by) | ||
|
||
return (check_slice_orientation(x_slice), | ||
check_slice_orientation(y_slice)) | ||
|
||
|
||
def _is_area_like(area_obj: Any) -> bool: | ||
return hasattr(area_obj, "crs") and hasattr(area_obj, "area_extent") | ||
|
||
|
||
def _get_slice_starts_stops(src_area, area_to_cover): | ||
"""Get x and y start and stop points for slicing.""" | ||
llx, lly, urx, ury = area_to_cover.area_extent | ||
x, y = src_area.get_array_coordinates_from_projection_coordinates([llx, urx], [lly, ury]) | ||
|
||
# we use `round` because we want the *exterior* of the pixels to contain the area_to_cover's area extent. | ||
if (src_area.area_extent[0] > src_area.area_extent[2]) ^ (llx > urx): | ||
xstart = max(0, round(x[1])) | ||
xstop = min(src_area.width, round(x[0]) + 1) | ||
else: | ||
xstart = max(0, round(x[0])) | ||
xstop = min(src_area.width, round(x[1]) + 1) | ||
if (src_area.area_extent[1] > src_area.area_extent[3]) ^ (lly > ury): | ||
ystart = max(0, round(y[0])) | ||
ystop = min(src_area.height, round(y[1]) + 1) | ||
else: | ||
ystart = max(0, round(y[1])) | ||
ystop = min(src_area.height, round(y[0]) + 1) | ||
|
||
return xstart, xstop, ystart, ystop | ||
|
||
|
||
def _get_area_boundary(area_to_cover: AreaDefinition) -> Boundary: | ||
try: | ||
if area_to_cover.is_geostationary: | ||
return Boundary(*get_geostationary_bounding_box_in_lonlats(area_to_cover)) | ||
boundary_shape = max(max(*area_to_cover.shape) // 100 + 1, 3) | ||
return area_to_cover.boundary(frequency=boundary_shape, force_clockwise=True) | ||
except ValueError: | ||
raise NotImplementedError("Can't determine boundary of area to cover") | ||
|
||
|
||
def _make_slice_divisible(sli, max_size, factor=2): | ||
"""Make the given slice even in size.""" | ||
rem = (sli.stop - sli.start) % factor | ||
if rem != 0: | ||
adj = factor - rem | ||
if sli.stop + 1 + rem < max_size: | ||
sli = slice(sli.start, sli.stop + adj) | ||
elif sli.start > 0: | ||
sli = slice(sli.start - adj, sli.stop) | ||
else: | ||
sli = slice(sli.start, sli.stop - rem) | ||
|
||
return sli | ||
|
||
|
||
def _ensure_integer_slice(sli): | ||
start = sli.start | ||
stop = sli.stop | ||
step = sli.step | ||
return slice( | ||
math.floor(start) if start is not None else None, | ||
math.ceil(stop) if stop is not None else None, | ||
math.floor(step) if step is not None else None | ||
) |
Oops, something went wrong.