Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: adds "UnitsDataArray.open" and "UnitsDataArray.create" class methods #334

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion podpac/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def makedirs(name, mode=511, exist_ok=False):
from podpac.core.node import Node, NodeException
import podpac.core.authentication as authentication
from podpac.core.utils import NodeTrait
from podpac.core.units import ureg as units
from podpac.core.units import ureg as units, UnitsDataArray

# Organized submodules
# These files are simply wrappers to create a curated namespace of podpac modules
Expand Down
2 changes: 1 addition & 1 deletion podpac/core/managers/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -1604,7 +1604,7 @@ def create_function(
elif function_source_dist_zip is not None:
with open(function_source_dist_zip, "rb") as f:
lambda_config["Code"] = {} # reset the code dict to make sure S3Bucket and S3Key are overridden
lambda_config["Code"]["ZipFile"]: f.read()
lambda_config["Code"]["ZipFile"] = f.read()

else:
raise ValueError("Function source is not defined")
Expand Down
4 changes: 2 additions & 2 deletions podpac/core/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import traitlets as tl

from podpac.core.settings import settings
from podpac.core.units import ureg, UnitsDataArray, create_data_array
from podpac.core.units import ureg, UnitsDataArray
from podpac.core.utils import common_doc
from podpac.core.utils import JSONEncoder, is_json_serializable
from podpac.core.utils import _get_query_params_from_url, _get_from_url, _get_param
Expand Down Expand Up @@ -252,7 +252,7 @@ def create_output_array(self, coords, data=np.nan, **kwargs):
if self.units is not None:
attrs["units"] = ureg.Unit(self.units)

return create_data_array(coords, data=data, dtype=self.dtype, attrs=attrs, **kwargs)
return UnitsDataArray.create(coords, data=data, dtype=self.dtype, attrs=attrs, **kwargs)

# -----------------------------------------------------------------------------------------------------------------
# Serialization
Expand Down
80 changes: 67 additions & 13 deletions podpac/core/test/test_units.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@

from podpac.core.units import ureg
from podpac.core.units import UnitsDataArray
from podpac.core.units import create_data_array
from podpac.core.units import get_image
from podpac.core.units import create_dataarray # DEPRECATED

from podpac.data import Array


class TestUnitDataArray(object):
Expand Down Expand Up @@ -331,77 +333,129 @@ def setup_class(cls):
cls.coords = Coordinates([[0, 1, 2], [0, 1, 2, 3]], dims=["lat", "lon"])

def test_default(self):
a = create_data_array(self.coords)
a = UnitsDataArray.create(self.coords)
assert isinstance(a, UnitsDataArray)
assert a.shape == self.coords.shape
assert np.all(np.isnan(a))

def test_empty(self):
a = create_data_array(self.coords, data=None)
a = UnitsDataArray.create(self.coords, data=None)
assert isinstance(a, UnitsDataArray)
assert a.shape == self.coords.shape
assert a.dtype == float

a = create_data_array(self.coords, data=None, dtype=bool)
a = UnitsDataArray.create(self.coords, data=None, dtype=bool)
assert isinstance(a, UnitsDataArray)
assert a.shape == self.coords.shape
assert a.dtype == bool

def test_zeros(self):
a = create_data_array(self.coords, data=0)
a = UnitsDataArray.create(self.coords, data=0)
assert isinstance(a, UnitsDataArray)
assert a.shape == self.coords.shape
assert a.dtype == float
assert np.all(a == 0.0)

a = create_data_array(self.coords, data=0, dtype=bool)
a = UnitsDataArray.create(self.coords, data=0, dtype=bool)
assert isinstance(a, UnitsDataArray)
assert a.shape == self.coords.shape
assert a.dtype == bool
assert np.all(~a)

def test_ones(self):
a = create_data_array(self.coords, data=1)
a = UnitsDataArray.create(self.coords, data=1)
assert isinstance(a, UnitsDataArray)
assert a.shape == self.coords.shape
assert a.dtype == float
assert np.all(a == 1.0)

a = create_data_array(self.coords, data=1, dtype=bool)
a = UnitsDataArray.create(self.coords, data=1, dtype=bool)
assert isinstance(a, UnitsDataArray)
assert a.shape == self.coords.shape
assert a.dtype == bool
assert np.all(a)

def test_full(self):
a = create_data_array(self.coords, data=10)
a = UnitsDataArray.create(self.coords, data=10)
assert isinstance(a, UnitsDataArray)
assert a.shape == self.coords.shape
assert a.dtype == float
assert np.all(a == 10)

a = create_data_array(self.coords, data=10, dtype=int)
a = UnitsDataArray.create(self.coords, data=10, dtype=int)
assert isinstance(a, UnitsDataArray)
assert a.shape == self.coords.shape
assert a.dtype == int
assert np.all(a == 10)

def test_array(self):
data = np.random.random(self.coords.shape)
a = create_data_array(self.coords, data=data)
a = UnitsDataArray.create(self.coords, data=data)
assert isinstance(a, UnitsDataArray)
assert a.dtype == float
np.testing.assert_equal(a.data, data)

data = np.round(10 * np.random.random(self.coords.shape))
a = create_data_array(self.coords, data=data, dtype=int)
a = UnitsDataArray.create(self.coords, data=data, dtype=int)
assert isinstance(a, UnitsDataArray)
assert a.dtype == int
np.testing.assert_equal(a.data, data.astype(int))

def test_invalid_coords(self):
with pytest.raises(TypeError):
create_data_array((3, 4))
UnitsDataArray.create((3, 4))

def test_deprecate_create_dataarray(self):
with pytest.deprecated_call():
create_dataarray(self.coords, data=10)


class TestOpenDataArray(object):
def test_open_after_create(self):
coords = Coordinates([[0, 1, 2], [0, 1, 2, 3]], dims=["lat", "lon"])
uda_1 = UnitsDataArray.create(coords, data=np.random.rand(3, 4))
ncdf = uda_1.to_netcdf()
uda_2 = UnitsDataArray.open(ncdf)

assert isinstance(uda_2, UnitsDataArray)
assert np.all(uda_2.data == uda_1.data)

def test_open_after_create_with_attrs(self):
coords = Coordinates([[0, 1, 2], [0, 1, 2, 3]], dims=["lat", "lon"], crs="EPSG:4193")
uda_1 = UnitsDataArray.create(coords, data=np.random.rand(3, 4), attrs={"some_attr": 5})
ncdf = uda_1.to_netcdf()
uda_2 = UnitsDataArray.open(ncdf)

assert isinstance(uda_2, UnitsDataArray)
assert np.all(uda_2.data == uda_1.data)

assert "some_attr" in uda_2.attrs
assert uda_2.attrs.get("some_attr") == uda_1.attrs.get("some_attr")

assert "crs" in uda_2.attrs
assert uda_2.attrs.get("crs") == uda_1.attrs.get("crs")

def test_open_after_eval(self):

# mock node
data = np.random.rand(5, 5)
lat = np.linspace(-10, 10, 5)
lon = np.linspace(-10, 10, 5)
native_coords = Coordinates([lat, lon], ["lat", "lon"])
node = Array(source=data, native_coordinates=native_coords)
uda = node.eval(node.native_coordinates)

ncdf = uda.to_netcdf()
uda_2 = UnitsDataArray.open(ncdf)

assert isinstance(uda_2, UnitsDataArray)
assert np.all(uda_2.data == uda.data)

assert "layer_style" in uda_2.attrs
assert uda_2.attrs.get("layer_style").json == uda.attrs.get("layer_style").json

assert "crs" in uda_2.attrs
assert uda_2.attrs.get("crs") == uda.attrs.get("crs")


class TestGetImage(object):
Expand Down
104 changes: 75 additions & 29 deletions podpac/core/units.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import operator
from six import string_types
import json
import warnings

from io import BytesIO
import base64
Expand All @@ -29,6 +30,7 @@
ureg = UnitRegistry()

import podpac
from podpac import Coordinates
from podpac.core.settings import settings
from podpac.core.utils import JSONEncoder
from podpac.core.style import Style
Expand Down Expand Up @@ -283,6 +285,71 @@ def set(self, value, mask):
# set self to have the same dims (and same order) as when first started
self = self.transpose(*orig_dims)

@classmethod
def open(cls, *args, **kwargs):
"""
Open an :class:`podpac.UnitsDataArray` from a file or file-like object containing a single data variable.

This is a wrapper around :func:`xarray.open_datarray`.
The inputs to this function are passed directly to :func:`xarray.open_datarray`.
See http://xarray.pydata.org/en/stable/generated/xarray.open_dataarray.html#xarray.open_dataarray.

The DataArray passed back from :func:`xarray.open_datarray` is used to create a units data array using :func:`creare_dataarray`.

Returns
-------
:class:`podpac.UnitsDataArray`
"""
da = xr.open_dataarray(*args, **kwargs)
coords = Coordinates.from_xarray(da.coords, crs=da.attrs.get("crs"))

# pass in kwargs to constructor
uda_kwargs = {"attrs": da.attrs}
return cls.create(coords, data=da.data, **uda_kwargs)

@classmethod
def create(cls, coords, data=np.nan, dtype=float, **kwargs):
"""Shortcut to create :class:`podpac.UnitsDataArray`

Parameters
----------
coords : :class:`podpac.Coordinates`
PODPAC Coordinates
data : np.ndarray, optional
Data to fill in. Defaults to np.nan.
dtype : type, optional
Data type. Defaults to float.
**kwargs
keyword arguments to pass to :class:`podpac.UnitsDataArray` constructor

Returns
-------
:class:`podpac.UnitsDataArray`
"""
if not isinstance(coords, podpac.Coordinates):
raise TypeError("`UnitsDataArray.create` expected Coordinates object, not '%s'" % type(coords))

if data is None:
data = np.empty(coords.shape, dtype=dtype)
elif np.shape(data) == ():
if data == 0:
data = np.zeros(coords.shape, dtype=dtype)
elif data == 1:
data = np.ones(coords.shape, dtype=dtype)
else:
data = np.full(coords.shape, data, dtype=dtype)
else:
data = data.astype(dtype)

# add crs attr
if "attrs" in kwargs:
if "crs" not in kwargs["attrs"]:
kwargs["attrs"]["crs"] = coords.crs
else:
kwargs["attrs"] = {"crs": coords.crs}

return cls(data, coords=coords.coords, dims=coords.idims, **kwargs)


for tp in ("mul", "matmul", "truediv", "div"):
meth = "__{:s}__".format(tp)
Expand Down Expand Up @@ -346,35 +413,14 @@ def func(self, *args, **kwargs):
del func


# ---------------------------------------------------------------------------------------------------------------------
# Utility functions
# ---------------------------------------------------------------------------------------------------------------------


def create_data_array(coords, data=np.nan, dtype=float, **kwargs):
if not isinstance(coords, podpac.Coordinates):
raise TypeError("create_data_array expected Coordinates object, not '%s'" % type(coords))

if data is None:
data = np.empty(coords.shape, dtype=dtype)
elif np.shape(data) == ():
if data == 0:
data = np.zeros(coords.shape, dtype=dtype)
elif data == 1:
data = np.ones(coords.shape, dtype=dtype)
else:
data = np.full(coords.shape, data, dtype=dtype)
else:
data = data.astype(dtype)

# add crs attr
if "attrs" in kwargs:
if "crs" not in kwargs["attrs"]:
kwargs["attrs"]["crs"] = coords.crs
else:
kwargs["attrs"] = {"crs": coords.crs}

return UnitsDataArray(data, coords=coords.coords, dims=coords.idims, **kwargs)
def create_dataarray(coords, data=np.nan, dtype=float, **kwargs):
"""Deprecated. Use `UnitsDataArray.create()` in place.
"""
warnings.warn(
"The `create_dataarray` function is deprecated and will be removed in podpac 2.0. Use the classmethod `UnitsDataArray.create()` instead.",
DeprecationWarning,
)
return UnitsDataArray.create(coords, data, dtype, **kwargs)


def get_image(data, format="png", vmin=None, vmax=None, return_base64=False):
Expand Down
4 changes: 2 additions & 2 deletions podpac/datalib/egi.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from podpac.data import DataSource
from podpac import authentication
from podpac import settings
from podpac.core.units import UnitsDataArray, create_data_array
from podpac.core.units import UnitsDataArray
from podpac.core.node import node_eval

# Set up logging
Expand Down Expand Up @@ -225,7 +225,7 @@ def eval(self, coordinates, output=None):
)
raise e
# Force update on native_coordinates (in case of multiple evals)
self.set_trait('native_coordinates', self.get_native_coordinates())
self.set_trait("native_coordinates", self.get_native_coordinates())

# run normal eval once self.data is prepared
return super(EGI, self).eval(coordinates, output)
Expand Down
5 changes: 2 additions & 3 deletions podpac/datalib/smap_egi.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def isnat(a):
import podpac.datalib
from podpac.core.coordinates import Coordinates
from podpac.datalib import EGI
from podpac.core.units import create_data_array
from podpac.core.units import UnitsDataArray

SMAP_PRODUCT_DICT = {
#'shortname': ['lat_key', 'lon_key', 'data_key', 'quality_flag', 'default_verison']
Expand Down Expand Up @@ -143,7 +143,6 @@ def coverage(self):
else:
return (self.data_key, self.lat_key, self.lon_key)


@tl.default("version")
def _version_default(self):
return SMAP_PRODUCT_DICT[self.product][4]
Expand Down Expand Up @@ -219,7 +218,7 @@ def read_file(self, filelike):
c = Coordinates([time, lon, lat], dims=["time", "lon", "lat"], crs="epsg:6933")

# make units data array with coordinates and data
return create_data_array(c, data=data)
return UnitsDataArray.create(c, data=data)

def append_file(self, all_data, data):
"""Append data
Expand Down